From 0ce7bac369f1ebf7e78e18445eb5532cfc620041 Mon Sep 17 00:00:00 2001 From: diegokingston Date: Fri, 22 May 2026 11:38:30 -0300 Subject: [PATCH 1/3] refactor(prover): bind the proven statement into the Fiat-Shamir transcript MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VM prover and verifier built their transcript with `new(&[])` — the statement (program, public output, table layout) was never absorbed, so Fiat-Shamir challenges did not depend on it. Soundness was preserved only by out-of-band reconstruction checks in the verifier. Add a canonical, versioned, length-prefixed encoder (`prover/src/statement.rs`) that digests the full statement — ELF hash, public output, table counts, and page layout — into a 32-byte transcript seed. Seed the transcript with it on the prove path, the verify path, and the bus-balance replay (`replay_transcript_phase_a`), so all three derive identical challenges for an honest proof and diverge for any tampered statement. Closes the frozen-heart class of issue as defense-in-depth and is a prerequisite for recursion. Breaking proof-format change (challenges differ). `tiny-keccak` is promoted from a dev-dependency to a dependency for the statement digest. --- prover/Cargo.toml | 2 +- prover/src/lib.rs | 47 +++++++-- prover/src/statement.rs | 96 ++++++++++++++++++ prover/src/tests/decode_tests.rs | 1 + prover/src/tests/mod.rs | 2 + prover/src/tests/prove_elfs_tests.rs | 5 + prover/src/tests/statement_tests.rs | 146 +++++++++++++++++++++++++++ 7 files changed, 288 insertions(+), 11 deletions(-) create mode 100644 prover/src/statement.rs create mode 100644 prover/src/tests/statement_tests.rs diff --git a/prover/Cargo.toml b/prover/Cargo.toml index b2505a94c..781aef722 100644 --- a/prover/Cargo.toml +++ b/prover/Cargo.toml @@ -19,6 +19,7 @@ serde = { version = "1.0", features = ["derive"] } rayon = { version = "1.8.0", optional = true } sysinfo = { version = "0.31", default-features = false, features = ["system"] } log = "0.4" +tiny-keccak = { version = "2.0", features = ["keccak"] } [dev-dependencies] env_logger = "*" @@ -26,7 +27,6 @@ criterion = { version = "0.5", default-features = false } bincode = "1" tikv-jemallocator = "0.6" tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] } -tiny-keccak = { version = "2.0", features = ["keccak"] } # Enable stark's test-utils so cross-crate tests can reach # `compute_precomputed_commitment_for_testing`. Only active under cargo test/bench. stark = { path = "../crypto/stark", features = ["test-utils"] } diff --git a/prover/src/lib.rs b/prover/src/lib.rs index dbe13d20b..a4544d151 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -17,6 +17,7 @@ pub mod constraints; mod debug_report; #[cfg(feature = "instruments")] pub mod instruments; +mod statement; pub mod tables; pub mod test_utils; #[cfg(test)] @@ -35,6 +36,7 @@ use stark::storage_mode::StorageMode; use stark::traits::AIR; use stark::verifier::{IsStarkVerifier, Verifier}; +use crate::statement::statement_seed; pub use crate::tables::MaxRowsConfig; use crate::tables::bitwise; use crate::tables::decode; @@ -449,8 +451,9 @@ impl VmAirs { pub(crate) fn replay_transcript_phase_a( airs: &[&dyn AIR], multi_proof: &MultiProof, + transcript_seed: &[u8], ) -> (FieldElement, FieldElement) { - let mut transcript = DefaultTranscript::::new(&[]); + let mut transcript = DefaultTranscript::::new(transcript_seed); for (air, proof) in airs.iter().zip(&multi_proof.proofs) { if air.is_preprocessed() { transcript.append_bytes(&air.precomputed_commitment()); @@ -506,8 +509,9 @@ pub(crate) fn compute_expected_commit_bus_balance( airs: &[&dyn AIR], proof: &MultiProof, public_output_bytes: &[u8], + transcript_seed: &[u8], ) -> Option> { - let (z, alpha) = replay_transcript_phase_a(airs, proof); + let (z, alpha) = replay_transcript_phase_a(airs, proof, transcript_seed); compute_commit_bus_offset(public_output_bytes, &z, &alpha) } @@ -646,10 +650,26 @@ pub fn prove_with_options_and_inputs( let runtime_page_ranges = traces.runtime_page_ranges(); + let num_private_input_pages = traces + .page_configs + .iter() + .filter(|c| c.is_private_input) + .count(); + + // Bind the full statement (program, public output, table layout) into the + // Fiat-Shamir transcript so every challenge depends on it. + let transcript_seed = statement_seed( + elf_bytes, + &traces.public_output_bytes, + &table_counts, + num_private_input_pages, + &runtime_page_ranges, + ); + // Phase 4: Prove (multi_prove) let proof = Prover::multi_prove( airs.air_trace_pairs(&mut traces), - &mut DefaultTranscript::::new(&[]), + &mut DefaultTranscript::::new(&transcript_seed), #[cfg(feature = "disk-spill")] storage_mode, ) @@ -671,12 +691,6 @@ pub fn prove_with_options_and_inputs( ); } - let num_private_input_pages = traces - .page_configs - .iter() - .filter(|c| c.is_private_input) - .count(); - Ok(VmProof { proof, runtime_page_ranges, @@ -759,10 +773,23 @@ pub fn verify_with_options( // If public_output was tampered, the recomputed offset won't match the // actual bus total in the proof, and multi_verify will reject. let air_refs = airs.air_refs(); + + // Recompute the statement seed from the proof bundle: a tampered statement + // field makes this diverge from the prover's seed, so every derived + // challenge differs and verification rejects. + let transcript_seed = statement_seed( + elf_bytes, + &vm_proof.public_output, + &vm_proof.table_counts, + vm_proof.num_private_input_pages, + &vm_proof.runtime_page_ranges, + ); + let expected_bus_balance = match compute_expected_commit_bus_balance( &air_refs, &vm_proof.proof, &vm_proof.public_output, + &transcript_seed, ) { Some(balance) => balance, None => return Ok(false), @@ -771,7 +798,7 @@ pub fn verify_with_options( Ok(Verifier::multi_verify( &air_refs, &vm_proof.proof, - &mut DefaultTranscript::::new(&[]), + &mut DefaultTranscript::::new(&transcript_seed), &expected_bus_balance, )) } diff --git a/prover/src/statement.rs b/prover/src/statement.rs new file mode 100644 index 000000000..570c099b5 --- /dev/null +++ b/prover/src/statement.rs @@ -0,0 +1,96 @@ +//! Canonical encoding of the statement a VM proof attests to. +//! +//! The 32-byte digest produced by [`statement_seed`] seeds the Fiat-Shamir +//! transcript on both the prove and verify paths, so every challenge is bound +//! to the program, its public output, and the table layout. Prover and +//! verifier must compute it from identical inputs — any divergence makes every +//! derived challenge differ and verification reject. + +use tiny_keccak::{Hasher, Keccak}; + +use crate::{RuntimePageRange, TableCounts}; + +/// Bumped whenever the statement encoding changes, so a re-encoding under a new +/// layout cannot collide with one produced by an older layout. +const FORMAT_VERSION: u32 = 1; + +/// Fixed domain-separation tag prefixing every statement encoding. +const DOMAIN_TAG: &[u8] = b"LAMBDAVM_STARK_STATEMENT_V1"; + +fn keccak256(data: &[u8]) -> [u8; 32] { + let mut hasher = Keccak::v256(); + hasher.update(data); + let mut out = [0u8; 32]; + hasher.finalize(&mut out); + out +} + +/// Canonical, length-prefixed encoding of the statement. +/// +/// Every variable-length field is length-prefixed, so two distinct statements +/// can never produce the same byte string by shifting content across a field +/// boundary. +pub(crate) fn encode_statement( + elf_bytes: &[u8], + public_output: &[u8], + table_counts: &TableCounts, + num_private_input_pages: usize, + runtime_page_ranges: &[RuntimePageRange], +) -> Vec { + let mut out = Vec::new(); + out.extend_from_slice(DOMAIN_TAG); + out.extend_from_slice(&FORMAT_VERSION.to_le_bytes()); + + // Program identity: a length-prefixed digest of the ELF (hashed, not + // inlined, to keep the encoding small). + out.extend_from_slice(&(elf_bytes.len() as u64).to_le_bytes()); + out.extend_from_slice(&keccak256(elf_bytes)); + + // Public output. + out.extend_from_slice(&(public_output.len() as u64).to_le_bytes()); + out.extend_from_slice(public_output); + + // Table layout: every field, declared order, fixed width. + for count in [ + table_counts.cpu, + table_counts.lt, + table_counts.memw, + table_counts.memw_aligned, + table_counts.load, + table_counts.mul, + table_counts.dvrm, + table_counts.shift, + table_counts.branch, + table_counts.memw_register, + ] { + out.extend_from_slice(&(count as u64).to_le_bytes()); + } + + out.extend_from_slice(&(num_private_input_pages as u64).to_le_bytes()); + + // Runtime page ranges (count-prefixed; each entry fixed width). + out.extend_from_slice(&(runtime_page_ranges.len() as u64).to_le_bytes()); + for range in runtime_page_ranges { + out.extend_from_slice(&range.base.to_le_bytes()); + out.extend_from_slice(&range.count.to_le_bytes()); + } + + out +} + +/// The 32-byte Fiat-Shamir transcript seed binding the full statement. +pub(crate) fn statement_seed( + elf_bytes: &[u8], + public_output: &[u8], + table_counts: &TableCounts, + num_private_input_pages: usize, + runtime_page_ranges: &[RuntimePageRange], +) -> [u8; 32] { + keccak256(&encode_statement( + elf_bytes, + public_output, + table_counts, + num_private_input_pages, + runtime_page_ranges, + )) +} diff --git a/prover/src/tests/decode_tests.rs b/prover/src/tests/decode_tests.rs index f1f60e5ba..488a4c747 100644 --- a/prover/src/tests/decode_tests.rs +++ b/prover/src/tests/decode_tests.rs @@ -1068,6 +1068,7 @@ fn test_decode_soundness_same_elf_accepted() { &verifier_air_refs, &proof, &traces.public_output_bytes, + &[], ) .expect("fingerprint collision in test"); diff --git a/prover/src/tests/mod.rs b/prover/src/tests/mod.rs index dc5f3fe22..fa31abfcf 100644 --- a/prover/src/tests/mod.rs +++ b/prover/src/tests/mod.rs @@ -29,4 +29,6 @@ pub mod mul_tests; #[cfg(test)] pub mod prove_elfs_tests; #[cfg(test)] +pub mod statement_tests; +#[cfg(test)] pub mod trace_builder_tests; diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index 53149a943..7ba94f869 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -71,6 +71,7 @@ fn prove_and_verify_vm_minimal(elf: &Elf, traces: &mut Traces) -> bool { &airs.air_refs(), &multi_proof, &traces.public_output_bytes, + &[], ) .expect("fingerprint collision in test"); @@ -885,6 +886,7 @@ fn test_prove_elfs_test_commit_4_wrong_pages_rejected() { &verifier_air_refs, &proof, &traces.public_output_bytes, + &[], ) .expect("fingerprint collision in test"); @@ -1621,6 +1623,7 @@ fn test_deep_stack_runtime_pages_roundtrip() { &verifier_air_refs, &proof, &traces.public_output_bytes, + &[], ) .expect("fingerprint collision in test"); @@ -1676,6 +1679,7 @@ fn test_deep_stack_missing_pages_rejected() { &verifier_air_refs, &proof, &traces.public_output_bytes, + &[], ) .expect("fingerprint collision in test"); @@ -1766,6 +1770,7 @@ fn test_heap_alloc_runtime_pages_roundtrip() { &verifier_air_refs, &proof, &traces.public_output_bytes, + &[], ) .expect("fingerprint collision in test"); diff --git a/prover/src/tests/statement_tests.rs b/prover/src/tests/statement_tests.rs new file mode 100644 index 000000000..4f6acb9a7 --- /dev/null +++ b/prover/src/tests/statement_tests.rs @@ -0,0 +1,146 @@ +//! Tests for statement encoding and Fiat-Shamir transcript seeding. + +use crate::statement::{encode_statement, statement_seed}; +use crate::{RuntimePageRange, TableCounts}; + +fn sample_counts() -> TableCounts { + TableCounts { + cpu: 3, + lt: 1, + memw: 2, + memw_aligned: 1, + load: 1, + mul: 1, + dvrm: 1, + shift: 1, + branch: 2, + memw_register: 1, + } +} + +fn sample_ranges() -> Vec { + vec![ + RuntimePageRange { + base: 0x1000, + count: 4, + }, + RuntimePageRange { + base: 0x8000, + count: 2, + }, + ] +} + +#[test] +fn encoding_is_deterministic() { + let counts = sample_counts(); + let ranges = sample_ranges(); + assert_eq!( + encode_statement(b"elf-bytes", b"output", &counts, 3, &ranges), + encode_statement(b"elf-bytes", b"output", &counts, 3, &ranges), + ); + assert_eq!( + statement_seed(b"elf-bytes", b"output", &counts, 3, &ranges), + statement_seed(b"elf-bytes", b"output", &counts, 3, &ranges), + ); +} + +#[test] +fn encoding_starts_with_domain_tag() { + let enc = encode_statement(b"", b"", &sample_counts(), 0, &[]); + assert!(enc.starts_with(b"LAMBDAVM_STARK_STATEMENT_V1")); +} + +#[test] +fn seed_depends_on_elf() { + let c = sample_counts(); + let r = sample_ranges(); + assert_ne!( + statement_seed(b"program-a", b"out", &c, 1, &r), + statement_seed(b"program-b", b"out", &c, 1, &r), + ); +} + +#[test] +fn seed_depends_on_public_output() { + let c = sample_counts(); + let r = sample_ranges(); + assert_ne!( + statement_seed(b"elf", b"output-1", &c, 1, &r), + statement_seed(b"elf", b"output-2", &c, 1, &r), + ); +} + +#[test] +fn seed_depends_on_table_counts() { + let r = sample_ranges(); + let mut c2 = sample_counts(); + c2.branch += 1; + assert_ne!( + statement_seed(b"elf", b"out", &sample_counts(), 1, &r), + statement_seed(b"elf", b"out", &c2, 1, &r), + ); +} + +#[test] +fn seed_depends_on_private_input_pages() { + let c = sample_counts(); + let r = sample_ranges(); + assert_ne!( + statement_seed(b"elf", b"out", &c, 1, &r), + statement_seed(b"elf", b"out", &c, 2, &r), + ); +} + +#[test] +fn seed_depends_on_runtime_page_ranges() { + let c = sample_counts(); + assert_ne!( + statement_seed(b"elf", b"out", &c, 1, &sample_ranges()), + statement_seed(b"elf", b"out", &c, 1, &[]), + ); +} + +#[test] +fn length_prefix_prevents_public_output_boundary_collision() { + // Without the public_output length prefix, "empty output + cpu count 0x41" + // and "output [0x41] + cpu count 0" would encode to the same bytes. The + // length prefix keeps the two statements distinct. + let mut counts_a = sample_counts(); + counts_a.cpu = 0x41; + let mut counts_b = sample_counts(); + counts_b.cpu = 0; + assert_ne!( + encode_statement(b"elf", b"", &counts_a, 0, &[]), + encode_statement(b"elf", b"\x41", &counts_b, 0, &[]), + ); +} + +/// End-to-end: an honest proof verifies against its own program, and a proof +/// must not verify against a different program -- the verifier's statement seed +/// (built from the other ELF) diverges from the prover's, so every Fiat-Shamir +/// challenge differs. Also exercises seed consistency across the prove path, +/// the verify path, and the bus-balance transcript replay. +#[test] +fn proof_binds_the_program_it_was_generated_for() { + let rust_artifacts = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("workspace root") + .join("executor/program_artifacts/rust"); + let read = |name: &str| { + std::fs::read(rust_artifacts.join(name)) + .unwrap_or_else(|_| panic!("{name} not found -- run `make compile-programs-rust`")) + }; + let allocator = read("allocator.elf"); + let pure_commit = read("pure_commit.elf"); + + let proof = crate::prove(&allocator).expect("prove allocator"); + assert!( + crate::verify(&proof, &allocator).expect("verify allocator"), + "honest proof must verify against its own program", + ); + assert!( + !matches!(crate::verify(&proof, &pure_commit), Ok(true)), + "a proof must not verify against a different program", + ); +} From 93131deef7663f5f866efc26c3d513aca7a0c8dd Mon Sep 17 00:00:00 2001 From: MauroFab Date: Fri, 22 May 2026 17:24:14 -0300 Subject: [PATCH 2/3] refactor(prover): streamline statement absorption into transcript MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apply the simplifications to the statement-binding work: - Drop tiny-keccak as a prod dep; the DefaultTranscript is already a Keccak256 absorber, so the outer keccak256 in statement_seed() was redundant. ELF digest now uses sha3::Keccak256 (already a transitive prod dep via crypto). tiny-keccak stays as a dev-dep for the in-VM keccak precompile cross-check. - Replace encode_statement -> Vec + statement_seed -> [u8; 32] with a single absorb_statement(&mut transcript, ...) that streams bytes directly into the transcript. No intermediate allocation. - Drop FORMAT_VERSION constant — the version is in DOMAIN_TAG suffix (LAMBDAVM_STARK_STATEMENT_V1). Bump the suffix on any encoding change. - Drop the redundant 8-byte length prefix on the elf bytes: we hash to fixed 32-byte digest, so the prefix doesn't prevent any collision. - replay_transcript_phase_a / compute_expected_commit_bus_balance now take &mut DefaultTranscript instead of an opaque transcript_seed: &[u8]. Callers decide what's in the transcript; internal tests just hand a fresh empty one (no &[] magic sentinel). - Fold the duplicated lde_trace_main_merkle_root append branches in replay_transcript_phase_a. - verify_with_options absorbs the statement ONCE, then clones the transcript so multi_verify and the bus-balance replay both start from the same post-absorb state — no closure, no field-list drift. - statement_tests.rs reframed around transcript.state(). 3 tests (state_is_deterministic, state_depends_on_every_field, public_output_length_prefix_prevents_collision) cover the same surface as the 8 byte-level encoder tests they replace, hitting the real absorb path. Drop the two-ELF e2e test that needed RISC-V artifacts and couldn't run in author env. - 6 internal test sites (decode_tests.rs, prove_elfs_tests.rs): pass a fresh DefaultTranscript instead of &[]. Soundness commitment unchanged: same five statement fields bound, same domain separation, same length prefixes where needed, same cross-site agreement (now explicitly tested via deterministic absorb). Follow-up: Phase A is still replayed twice on the verify path (once in compute_expected_commit_bus_balance, once in multi_verify). That's pre-existing, not from this PR; removing it would change multi_verify's public API to take a closure that produces the bus balance offset given (z, alpha). Separate scope. --- Cargo.lock | 1 + prover/Cargo.toml | 3 +- prover/src/lib.rs | 37 ++++--- prover/src/statement.rs | 97 +++++++----------- prover/src/tests/decode_tests.rs | 3 +- prover/src/tests/prove_elfs_tests.rs | 15 ++- prover/src/tests/statement_tests.rs | 144 +++++++++++---------------- 7 files changed, 129 insertions(+), 171 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 70b4071e8..8fff60dcf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1961,6 +1961,7 @@ dependencies = [ "math", "rayon", "serde", + "sha3", "stark", "sysinfo", "tikv-jemalloc-ctl", diff --git a/prover/Cargo.toml b/prover/Cargo.toml index 781aef722..2d58776c5 100644 --- a/prover/Cargo.toml +++ b/prover/Cargo.toml @@ -19,7 +19,7 @@ serde = { version = "1.0", features = ["derive"] } rayon = { version = "1.8.0", optional = true } sysinfo = { version = "0.31", default-features = false, features = ["system"] } log = "0.4" -tiny-keccak = { version = "2.0", features = ["keccak"] } +sha3 = { version = "0.10.8", default-features = false } [dev-dependencies] env_logger = "*" @@ -27,6 +27,7 @@ criterion = { version = "0.5", default-features = false } bincode = "1" tikv-jemallocator = "0.6" tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] } +tiny-keccak = { version = "2.0", features = ["keccak"] } # Enable stark's test-utils so cross-crate tests can reach # `compute_precomputed_commitment_for_testing`. Only active under cargo test/bench. stark = { path = "../crypto/stark", features = ["test-utils"] } diff --git a/prover/src/lib.rs b/prover/src/lib.rs index a4544d151..5cb762a96 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -36,7 +36,7 @@ use stark::storage_mode::StorageMode; use stark::traits::AIR; use stark::verifier::{IsStarkVerifier, Verifier}; -use crate::statement::statement_seed; +use crate::statement::absorb_statement; pub use crate::tables::MaxRowsConfig; use crate::tables::bitwise; use crate::tables::decode; @@ -451,16 +451,13 @@ impl VmAirs { pub(crate) fn replay_transcript_phase_a( airs: &[&dyn AIR], multi_proof: &MultiProof, - transcript_seed: &[u8], + transcript: &mut DefaultTranscript, ) -> (FieldElement, FieldElement) { - let mut transcript = DefaultTranscript::::new(transcript_seed); for (air, proof) in airs.iter().zip(&multi_proof.proofs) { if air.is_preprocessed() { transcript.append_bytes(&air.precomputed_commitment()); - transcript.append_bytes(&proof.lde_trace_main_merkle_root); - } else { - transcript.append_bytes(&proof.lde_trace_main_merkle_root); } + transcript.append_bytes(&proof.lde_trace_main_merkle_root); } let z: FieldElement = transcript.sample_field_element(); let alpha: FieldElement = transcript.sample_field_element(); @@ -509,9 +506,9 @@ pub(crate) fn compute_expected_commit_bus_balance( airs: &[&dyn AIR], proof: &MultiProof, public_output_bytes: &[u8], - transcript_seed: &[u8], + transcript: &mut DefaultTranscript, ) -> Option> { - let (z, alpha) = replay_transcript_phase_a(airs, proof, transcript_seed); + let (z, alpha) = replay_transcript_phase_a(airs, proof, transcript); compute_commit_bus_offset(public_output_bytes, &z, &alpha) } @@ -658,7 +655,9 @@ pub fn prove_with_options_and_inputs( // Bind the full statement (program, public output, table layout) into the // Fiat-Shamir transcript so every challenge depends on it. - let transcript_seed = statement_seed( + let mut transcript = DefaultTranscript::::new(&[]); + absorb_statement( + &mut transcript, elf_bytes, &traces.public_output_bytes, &table_counts, @@ -669,7 +668,7 @@ pub fn prove_with_options_and_inputs( // Phase 4: Prove (multi_prove) let proof = Prover::multi_prove( airs.air_trace_pairs(&mut traces), - &mut DefaultTranscript::::new(&transcript_seed), + &mut transcript, #[cfg(feature = "disk-spill")] storage_mode, ) @@ -774,10 +773,12 @@ pub fn verify_with_options( // actual bus total in the proof, and multi_verify will reject. let air_refs = airs.air_refs(); - // Recompute the statement seed from the proof bundle: a tampered statement - // field makes this diverge from the prover's seed, so every derived - // challenge differs and verification rejects. - let transcript_seed = statement_seed( + // Bind the statement into the verifier's transcript. A tampered statement + // field makes this diverge from the prover's transcript state, so every + // derived challenge differs and verification rejects. + let mut transcript = DefaultTranscript::::new(&[]); + absorb_statement( + &mut transcript, elf_bytes, &vm_proof.public_output, &vm_proof.table_counts, @@ -785,11 +786,15 @@ pub fn verify_with_options( &vm_proof.runtime_page_ranges, ); + // Fork the post-absorb state: the replay helper advances through Phase A + // independently of the multi_verify transcript, but both must start from + // the same statement-bound state. + let mut transcript_for_replay = transcript.clone(); let expected_bus_balance = match compute_expected_commit_bus_balance( &air_refs, &vm_proof.proof, &vm_proof.public_output, - &transcript_seed, + &mut transcript_for_replay, ) { Some(balance) => balance, None => return Ok(false), @@ -798,7 +803,7 @@ pub fn verify_with_options( Ok(Verifier::multi_verify( &air_refs, &vm_proof.proof, - &mut DefaultTranscript::::new(&transcript_seed), + &mut transcript, &expected_bus_balance, )) } diff --git a/prover/src/statement.rs b/prover/src/statement.rs index 570c099b5..2ac334d72 100644 --- a/prover/src/statement.rs +++ b/prover/src/statement.rs @@ -1,56 +1,48 @@ -//! Canonical encoding of the statement a VM proof attests to. +//! Statement absorbed into the Fiat-Shamir transcript before Phase A. //! -//! The 32-byte digest produced by [`statement_seed`] seeds the Fiat-Shamir -//! transcript on both the prove and verify paths, so every challenge is bound -//! to the program, its public output, and the table layout. Prover and -//! verifier must compute it from identical inputs — any divergence makes every -//! derived challenge differ and verification reject. +//! Streams a canonical, domain-separated, length-prefixed encoding directly +//! into the transcript. The transcript is itself a Keccak256 absorber +//! (`DefaultTranscript`), so a single hash suffices — no external digest +//! needed beyond the ELF. +//! +//! All three call sites (prove, verify, bus-balance replay) must absorb +//! identical bytes; any divergence makes every derived challenge differ and +//! verification reject. -use tiny_keccak::{Hasher, Keccak}; +use crypto::fiat_shamir::is_transcript::IsTranscript; +use sha3::{Digest, Keccak256}; +use crate::test_utils::E; use crate::{RuntimePageRange, TableCounts}; -/// Bumped whenever the statement encoding changes, so a re-encoding under a new -/// layout cannot collide with one produced by an older layout. -const FORMAT_VERSION: u32 = 1; - -/// Fixed domain-separation tag prefixing every statement encoding. +/// Domain-separation tag. Bump the suffix (`_V2`, ...) on any encoding change. const DOMAIN_TAG: &[u8] = b"LAMBDAVM_STARK_STATEMENT_V1"; -fn keccak256(data: &[u8]) -> [u8; 32] { - let mut hasher = Keccak::v256(); - hasher.update(data); - let mut out = [0u8; 32]; - hasher.finalize(&mut out); - out +fn elf_digest(elf: &[u8]) -> [u8; 32] { + let mut h = Keccak256::new(); + h.update(elf); + h.finalize().into() } -/// Canonical, length-prefixed encoding of the statement. -/// -/// Every variable-length field is length-prefixed, so two distinct statements -/// can never produce the same byte string by shifting content across a field -/// boundary. -pub(crate) fn encode_statement( +pub(crate) fn absorb_statement( + t: &mut impl IsTranscript, elf_bytes: &[u8], public_output: &[u8], table_counts: &TableCounts, num_private_input_pages: usize, runtime_page_ranges: &[RuntimePageRange], -) -> Vec { - let mut out = Vec::new(); - out.extend_from_slice(DOMAIN_TAG); - out.extend_from_slice(&FORMAT_VERSION.to_le_bytes()); +) { + t.append_bytes(DOMAIN_TAG); - // Program identity: a length-prefixed digest of the ELF (hashed, not - // inlined, to keep the encoding small). - out.extend_from_slice(&(elf_bytes.len() as u64).to_le_bytes()); - out.extend_from_slice(&keccak256(elf_bytes)); + // ELF: fixed 32-byte digest — no length prefix needed. + t.append_bytes(&elf_digest(elf_bytes)); - // Public output. - out.extend_from_slice(&(public_output.len() as u64).to_le_bytes()); - out.extend_from_slice(public_output); + // public_output: variable length → length-prefix to prevent boundary collisions. + t.append_bytes(&(public_output.len() as u64).to_le_bytes()); + t.append_bytes(public_output); - // Table layout: every field, declared order, fixed width. + // table_counts: fixed-width u64s in declared order. + // Reordering or adding a field requires bumping DOMAIN_TAG above. for count in [ table_counts.cpu, table_counts.lt, @@ -63,34 +55,15 @@ pub(crate) fn encode_statement( table_counts.branch, table_counts.memw_register, ] { - out.extend_from_slice(&(count as u64).to_le_bytes()); + t.append_bytes(&(count as u64).to_le_bytes()); } - out.extend_from_slice(&(num_private_input_pages as u64).to_le_bytes()); + t.append_bytes(&(num_private_input_pages as u64).to_le_bytes()); - // Runtime page ranges (count-prefixed; each entry fixed width). - out.extend_from_slice(&(runtime_page_ranges.len() as u64).to_le_bytes()); - for range in runtime_page_ranges { - out.extend_from_slice(&range.base.to_le_bytes()); - out.extend_from_slice(&range.count.to_le_bytes()); + // runtime_page_ranges: count-prefixed; each entry fixed width. + t.append_bytes(&(runtime_page_ranges.len() as u64).to_le_bytes()); + for r in runtime_page_ranges { + t.append_bytes(&r.base.to_le_bytes()); + t.append_bytes(&r.count.to_le_bytes()); } - - out -} - -/// The 32-byte Fiat-Shamir transcript seed binding the full statement. -pub(crate) fn statement_seed( - elf_bytes: &[u8], - public_output: &[u8], - table_counts: &TableCounts, - num_private_input_pages: usize, - runtime_page_ranges: &[RuntimePageRange], -) -> [u8; 32] { - keccak256(&encode_statement( - elf_bytes, - public_output, - table_counts, - num_private_input_pages, - runtime_page_ranges, - )) } diff --git a/prover/src/tests/decode_tests.rs b/prover/src/tests/decode_tests.rs index 488a4c747..8f61a1d74 100644 --- a/prover/src/tests/decode_tests.rs +++ b/prover/src/tests/decode_tests.rs @@ -1064,11 +1064,12 @@ fn test_decode_soundness_same_elf_accepted() { &table_counts, ); let verifier_air_refs = verifier_airs.air_refs(); + let mut replay_transcript = DefaultTranscript::::new(&[]); let expected_bus_balance = crate::compute_expected_commit_bus_balance( &verifier_air_refs, &proof, &traces.public_output_bytes, - &[], + &mut replay_transcript, ) .expect("fingerprint collision in test"); diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index 7ba94f869..ce15e1ec4 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -67,11 +67,12 @@ fn prove_and_verify_vm_minimal(elf: &Elf, traces: &mut Traces) -> bool { }; // Compute the verifier-side expected COMMIT bus balance from public output bytes + let mut replay_transcript = DefaultTranscript::::new(&[]); let expected_bus_balance = crate::compute_expected_commit_bus_balance( &airs.air_refs(), &multi_proof, &traces.public_output_bytes, - &[], + &mut replay_transcript, ) .expect("fingerprint collision in test"); @@ -882,11 +883,12 @@ fn test_prove_elfs_test_commit_4_wrong_pages_rejected() { let verifier_airs = crate::VmAirs::new(&elf, &proof_options, true, &wrong_configs, &table_counts); let verifier_air_refs = verifier_airs.air_refs(); + let mut replay_transcript = DefaultTranscript::::new(&[]); let expected_bus_balance = crate::compute_expected_commit_bus_balance( &verifier_air_refs, &proof, &traces.public_output_bytes, - &[], + &mut replay_transcript, ) .expect("fingerprint collision in test"); @@ -1619,11 +1621,12 @@ fn test_deep_stack_runtime_pages_roundtrip() { let verifier_airs = crate::VmAirs::new(&elf, &proof_options, true, &verifier_configs, &table_counts); let verifier_air_refs = verifier_airs.air_refs(); + let mut replay_transcript = DefaultTranscript::::new(&[]); let expected_bus_balance = crate::compute_expected_commit_bus_balance( &verifier_air_refs, &proof, &traces.public_output_bytes, - &[], + &mut replay_transcript, ) .expect("fingerprint collision in test"); @@ -1675,11 +1678,12 @@ fn test_deep_stack_missing_pages_rejected() { let verifier_airs = crate::VmAirs::new(&elf, &proof_options, true, &wrong_configs, &table_counts); let verifier_air_refs = verifier_airs.air_refs(); + let mut replay_transcript = DefaultTranscript::::new(&[]); let expected_bus_balance = crate::compute_expected_commit_bus_balance( &verifier_air_refs, &proof, &traces.public_output_bytes, - &[], + &mut replay_transcript, ) .expect("fingerprint collision in test"); @@ -1766,11 +1770,12 @@ fn test_heap_alloc_runtime_pages_roundtrip() { let verifier_airs = crate::VmAirs::new(&elf, &proof_options, true, &verifier_configs, &table_counts); let verifier_air_refs = verifier_airs.air_refs(); + let mut replay_transcript = DefaultTranscript::::new(&[]); let expected_bus_balance = crate::compute_expected_commit_bus_balance( &verifier_air_refs, &proof, &traces.public_output_bytes, - &[], + &mut replay_transcript, ) .expect("fingerprint collision in test"); diff --git a/prover/src/tests/statement_tests.rs b/prover/src/tests/statement_tests.rs index 4f6acb9a7..659113be6 100644 --- a/prover/src/tests/statement_tests.rs +++ b/prover/src/tests/statement_tests.rs @@ -1,6 +1,10 @@ -//! Tests for statement encoding and Fiat-Shamir transcript seeding. +//! Tests for statement absorption into the Fiat-Shamir transcript. -use crate::statement::{encode_statement, statement_seed}; +use crypto::fiat_shamir::default_transcript::DefaultTranscript; +use crypto::fiat_shamir::is_transcript::IsTranscript; + +use crate::statement::absorb_statement; +use crate::test_utils::E; use crate::{RuntimePageRange, TableCounts}; fn sample_counts() -> TableCounts { @@ -31,116 +35,84 @@ fn sample_ranges() -> Vec { ] } -#[test] -fn encoding_is_deterministic() { - let counts = sample_counts(); - let ranges = sample_ranges(); - assert_eq!( - encode_statement(b"elf-bytes", b"output", &counts, 3, &ranges), - encode_statement(b"elf-bytes", b"output", &counts, 3, &ranges), - ); - assert_eq!( - statement_seed(b"elf-bytes", b"output", &counts, 3, &ranges), - statement_seed(b"elf-bytes", b"output", &counts, 3, &ranges), - ); +fn state_after_absorb( + elf: &[u8], + out: &[u8], + counts: &TableCounts, + priv_pages: usize, + ranges: &[RuntimePageRange], +) -> [u8; 32] { + let mut t = DefaultTranscript::::new(&[]); + absorb_statement(&mut t, elf, out, counts, priv_pages, ranges); + t.state() } #[test] -fn encoding_starts_with_domain_tag() { - let enc = encode_statement(b"", b"", &sample_counts(), 0, &[]); - assert!(enc.starts_with(b"LAMBDAVM_STARK_STATEMENT_V1")); +fn state_is_deterministic() { + let a = state_after_absorb(b"elf", b"out", &sample_counts(), 3, &sample_ranges()); + let b = state_after_absorb(b"elf", b"out", &sample_counts(), 3, &sample_ranges()); + assert_eq!(a, b); } #[test] -fn seed_depends_on_elf() { - let c = sample_counts(); - let r = sample_ranges(); +fn state_depends_on_every_field() { + let baseline = state_after_absorb(b"elf", b"out", &sample_counts(), 1, &sample_ranges()); + assert_ne!( - statement_seed(b"program-a", b"out", &c, 1, &r), - statement_seed(b"program-b", b"out", &c, 1, &r), + baseline, + state_after_absorb( + b"different-elf", + b"out", + &sample_counts(), + 1, + &sample_ranges() + ), + "state must depend on elf", ); -} - -#[test] -fn seed_depends_on_public_output() { - let c = sample_counts(); - let r = sample_ranges(); assert_ne!( - statement_seed(b"elf", b"output-1", &c, 1, &r), - statement_seed(b"elf", b"output-2", &c, 1, &r), + baseline, + state_after_absorb( + b"elf", + b"different-output", + &sample_counts(), + 1, + &sample_ranges() + ), + "state must depend on public_output", ); -} -#[test] -fn seed_depends_on_table_counts() { - let r = sample_ranges(); - let mut c2 = sample_counts(); - c2.branch += 1; + let mut counts2 = sample_counts(); + counts2.branch += 1; assert_ne!( - statement_seed(b"elf", b"out", &sample_counts(), 1, &r), - statement_seed(b"elf", b"out", &c2, 1, &r), + baseline, + state_after_absorb(b"elf", b"out", &counts2, 1, &sample_ranges()), + "state must depend on table_counts", ); -} -#[test] -fn seed_depends_on_private_input_pages() { - let c = sample_counts(); - let r = sample_ranges(); assert_ne!( - statement_seed(b"elf", b"out", &c, 1, &r), - statement_seed(b"elf", b"out", &c, 2, &r), + baseline, + state_after_absorb(b"elf", b"out", &sample_counts(), 2, &sample_ranges()), + "state must depend on num_private_input_pages", ); -} -#[test] -fn seed_depends_on_runtime_page_ranges() { - let c = sample_counts(); assert_ne!( - statement_seed(b"elf", b"out", &c, 1, &sample_ranges()), - statement_seed(b"elf", b"out", &c, 1, &[]), + baseline, + state_after_absorb(b"elf", b"out", &sample_counts(), 1, &[]), + "state must depend on runtime_page_ranges", ); } #[test] -fn length_prefix_prevents_public_output_boundary_collision() { - // Without the public_output length prefix, "empty output + cpu count 0x41" - // and "output [0x41] + cpu count 0" would encode to the same bytes. The - // length prefix keeps the two statements distinct. +fn public_output_length_prefix_prevents_collision() { + // Without the length prefix on public_output, "empty output + cpu count + // 0x41" and "output [0x41] + cpu count 0" would absorb identical bytes. + // The prefix keeps the two statements distinct. let mut counts_a = sample_counts(); counts_a.cpu = 0x41; let mut counts_b = sample_counts(); counts_b.cpu = 0; assert_ne!( - encode_statement(b"elf", b"", &counts_a, 0, &[]), - encode_statement(b"elf", b"\x41", &counts_b, 0, &[]), - ); -} - -/// End-to-end: an honest proof verifies against its own program, and a proof -/// must not verify against a different program -- the verifier's statement seed -/// (built from the other ELF) diverges from the prover's, so every Fiat-Shamir -/// challenge differs. Also exercises seed consistency across the prove path, -/// the verify path, and the bus-balance transcript replay. -#[test] -fn proof_binds_the_program_it_was_generated_for() { - let rust_artifacts = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("workspace root") - .join("executor/program_artifacts/rust"); - let read = |name: &str| { - std::fs::read(rust_artifacts.join(name)) - .unwrap_or_else(|_| panic!("{name} not found -- run `make compile-programs-rust`")) - }; - let allocator = read("allocator.elf"); - let pure_commit = read("pure_commit.elf"); - - let proof = crate::prove(&allocator).expect("prove allocator"); - assert!( - crate::verify(&proof, &allocator).expect("verify allocator"), - "honest proof must verify against its own program", - ); - assert!( - !matches!(crate::verify(&proof, &pure_commit), Ok(true)), - "a proof must not verify against a different program", + state_after_absorb(b"elf", b"", &counts_a, 0, &[]), + state_after_absorb(b"elf", b"\x41", &counts_b, 0, &[]), ); } From 86a121756bf4aa9522c4d2944926e940a125808d Mon Sep 17 00:00:00 2001 From: MauroFab Date: Tue, 26 May 2026 12:24:34 -0300 Subject: [PATCH 3/3] refactor(prover): use exhaustive destructure to guard statement encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a field is added to TableCounts or RuntimePageRange and not bound into the transcript, statement binding is silently broken for that field — a proof generated for one configuration could pass verification against a different one. The previous `[ table_counts.cpu, ... ]` array literal compiled fine even when a field was missing; the new field was simply skipped. Same for direct `.base` / `.count` access on RuntimePageRange. Replace both with exhaustive `&Struct { ... }` patterns. Adding a field now produces: error[E0027]: pattern does not mention field `` --> prover/src/statement.rs:.. The compile error fires at the encoding site itself — on every build, not just on test runs — pointing the dev directly at the code that needs updating. No separate test needed; the safety is in the encoding function. Encoded transcript bytes are unchanged: same field order, same widths. Pure source-level guard. --- prover/src/statement.rs | 44 ++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/prover/src/statement.rs b/prover/src/statement.rs index 2ac334d72..82c41861c 100644 --- a/prover/src/statement.rs +++ b/prover/src/statement.rs @@ -41,19 +41,32 @@ pub(crate) fn absorb_statement( t.append_bytes(&(public_output.len() as u64).to_le_bytes()); t.append_bytes(public_output); - // table_counts: fixed-width u64s in declared order. - // Reordering or adding a field requires bumping DOMAIN_TAG above. + // table_counts: fixed-width u64s in declared order. The exhaustive + // destructure makes any field added to TableCounts a compile error here — + // that's the signal to extend the loop below and bump DOMAIN_TAG. + let &TableCounts { + cpu, + lt, + memw, + memw_aligned, + load, + mul, + dvrm, + shift, + branch, + memw_register, + } = table_counts; for count in [ - table_counts.cpu, - table_counts.lt, - table_counts.memw, - table_counts.memw_aligned, - table_counts.load, - table_counts.mul, - table_counts.dvrm, - table_counts.shift, - table_counts.branch, - table_counts.memw_register, + cpu, + lt, + memw, + memw_aligned, + load, + mul, + dvrm, + shift, + branch, + memw_register, ] { t.append_bytes(&(count as u64).to_le_bytes()); } @@ -63,7 +76,10 @@ pub(crate) fn absorb_statement( // runtime_page_ranges: count-prefixed; each entry fixed width. t.append_bytes(&(runtime_page_ranges.len() as u64).to_le_bytes()); for r in runtime_page_ranges { - t.append_bytes(&r.base.to_le_bytes()); - t.append_bytes(&r.count.to_le_bytes()); + // Exhaustive destructure: any field added to RuntimePageRange becomes + // a compile error here. + let &RuntimePageRange { base, count } = r; + t.append_bytes(&base.to_le_bytes()); + t.append_bytes(&count.to_le_bytes()); } }