diff --git a/prover/Cargo.toml b/prover/Cargo.toml index b2505a94..781aef72 100644 --- a/prover/Cargo.toml +++ b/prover/Cargo.toml @@ -19,6 +19,7 @@ serde = { version = "1.0", features = ["derive"] } rayon = { version = "1.8.0", optional = true } sysinfo = { version = "0.31", default-features = false, features = ["system"] } log = "0.4" +tiny-keccak = { version = "2.0", features = ["keccak"] } [dev-dependencies] env_logger = "*" @@ -26,7 +27,6 @@ criterion = { version = "0.5", default-features = false } bincode = "1" tikv-jemallocator = "0.6" tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] } -tiny-keccak = { version = "2.0", features = ["keccak"] } # Enable stark's test-utils so cross-crate tests can reach # `compute_precomputed_commitment_for_testing`. Only active under cargo test/bench. stark = { path = "../crypto/stark", features = ["test-utils"] } diff --git a/prover/src/lib.rs b/prover/src/lib.rs index dbe13d20..a4544d15 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -17,6 +17,7 @@ pub mod constraints; mod debug_report; #[cfg(feature = "instruments")] pub mod instruments; +mod statement; pub mod tables; pub mod test_utils; #[cfg(test)] @@ -35,6 +36,7 @@ use stark::storage_mode::StorageMode; use stark::traits::AIR; use stark::verifier::{IsStarkVerifier, Verifier}; +use crate::statement::statement_seed; pub use crate::tables::MaxRowsConfig; use crate::tables::bitwise; use crate::tables::decode; @@ -449,8 +451,9 @@ impl VmAirs { pub(crate) fn replay_transcript_phase_a( airs: &[&dyn AIR], multi_proof: &MultiProof, + transcript_seed: &[u8], ) -> (FieldElement, FieldElement) { - let mut transcript = DefaultTranscript::::new(&[]); + let mut transcript = DefaultTranscript::::new(transcript_seed); for (air, proof) in airs.iter().zip(&multi_proof.proofs) { if air.is_preprocessed() { transcript.append_bytes(&air.precomputed_commitment()); @@ -506,8 +509,9 @@ pub(crate) fn compute_expected_commit_bus_balance( airs: &[&dyn AIR], proof: &MultiProof, public_output_bytes: &[u8], + transcript_seed: &[u8], ) -> Option> { - let (z, alpha) = replay_transcript_phase_a(airs, proof); + let (z, alpha) = replay_transcript_phase_a(airs, proof, transcript_seed); compute_commit_bus_offset(public_output_bytes, &z, &alpha) } @@ -646,10 +650,26 @@ pub fn prove_with_options_and_inputs( let runtime_page_ranges = traces.runtime_page_ranges(); + let num_private_input_pages = traces + .page_configs + .iter() + .filter(|c| c.is_private_input) + .count(); + + // Bind the full statement (program, public output, table layout) into the + // Fiat-Shamir transcript so every challenge depends on it. + let transcript_seed = statement_seed( + elf_bytes, + &traces.public_output_bytes, + &table_counts, + num_private_input_pages, + &runtime_page_ranges, + ); + // Phase 4: Prove (multi_prove) let proof = Prover::multi_prove( airs.air_trace_pairs(&mut traces), - &mut DefaultTranscript::::new(&[]), + &mut DefaultTranscript::::new(&transcript_seed), #[cfg(feature = "disk-spill")] storage_mode, ) @@ -671,12 +691,6 @@ pub fn prove_with_options_and_inputs( ); } - let num_private_input_pages = traces - .page_configs - .iter() - .filter(|c| c.is_private_input) - .count(); - Ok(VmProof { proof, runtime_page_ranges, @@ -759,10 +773,23 @@ pub fn verify_with_options( // If public_output was tampered, the recomputed offset won't match the // actual bus total in the proof, and multi_verify will reject. let air_refs = airs.air_refs(); + + // Recompute the statement seed from the proof bundle: a tampered statement + // field makes this diverge from the prover's seed, so every derived + // challenge differs and verification rejects. + let transcript_seed = statement_seed( + elf_bytes, + &vm_proof.public_output, + &vm_proof.table_counts, + vm_proof.num_private_input_pages, + &vm_proof.runtime_page_ranges, + ); + let expected_bus_balance = match compute_expected_commit_bus_balance( &air_refs, &vm_proof.proof, &vm_proof.public_output, + &transcript_seed, ) { Some(balance) => balance, None => return Ok(false), @@ -771,7 +798,7 @@ pub fn verify_with_options( Ok(Verifier::multi_verify( &air_refs, &vm_proof.proof, - &mut DefaultTranscript::::new(&[]), + &mut DefaultTranscript::::new(&transcript_seed), &expected_bus_balance, )) } diff --git a/prover/src/statement.rs b/prover/src/statement.rs new file mode 100644 index 00000000..570c099b --- /dev/null +++ b/prover/src/statement.rs @@ -0,0 +1,96 @@ +//! Canonical encoding of the statement a VM proof attests to. +//! +//! The 32-byte digest produced by [`statement_seed`] seeds the Fiat-Shamir +//! transcript on both the prove and verify paths, so every challenge is bound +//! to the program, its public output, and the table layout. Prover and +//! verifier must compute it from identical inputs — any divergence makes every +//! derived challenge differ and verification reject. + +use tiny_keccak::{Hasher, Keccak}; + +use crate::{RuntimePageRange, TableCounts}; + +/// Bumped whenever the statement encoding changes, so a re-encoding under a new +/// layout cannot collide with one produced by an older layout. +const FORMAT_VERSION: u32 = 1; + +/// Fixed domain-separation tag prefixing every statement encoding. +const DOMAIN_TAG: &[u8] = b"LAMBDAVM_STARK_STATEMENT_V1"; + +fn keccak256(data: &[u8]) -> [u8; 32] { + let mut hasher = Keccak::v256(); + hasher.update(data); + let mut out = [0u8; 32]; + hasher.finalize(&mut out); + out +} + +/// Canonical, length-prefixed encoding of the statement. +/// +/// Every variable-length field is length-prefixed, so two distinct statements +/// can never produce the same byte string by shifting content across a field +/// boundary. +pub(crate) fn encode_statement( + elf_bytes: &[u8], + public_output: &[u8], + table_counts: &TableCounts, + num_private_input_pages: usize, + runtime_page_ranges: &[RuntimePageRange], +) -> Vec { + let mut out = Vec::new(); + out.extend_from_slice(DOMAIN_TAG); + out.extend_from_slice(&FORMAT_VERSION.to_le_bytes()); + + // Program identity: a length-prefixed digest of the ELF (hashed, not + // inlined, to keep the encoding small). + out.extend_from_slice(&(elf_bytes.len() as u64).to_le_bytes()); + out.extend_from_slice(&keccak256(elf_bytes)); + + // Public output. + out.extend_from_slice(&(public_output.len() as u64).to_le_bytes()); + out.extend_from_slice(public_output); + + // Table layout: every field, declared order, fixed width. + for count in [ + table_counts.cpu, + table_counts.lt, + table_counts.memw, + table_counts.memw_aligned, + table_counts.load, + table_counts.mul, + table_counts.dvrm, + table_counts.shift, + table_counts.branch, + table_counts.memw_register, + ] { + out.extend_from_slice(&(count as u64).to_le_bytes()); + } + + out.extend_from_slice(&(num_private_input_pages as u64).to_le_bytes()); + + // Runtime page ranges (count-prefixed; each entry fixed width). + out.extend_from_slice(&(runtime_page_ranges.len() as u64).to_le_bytes()); + for range in runtime_page_ranges { + out.extend_from_slice(&range.base.to_le_bytes()); + out.extend_from_slice(&range.count.to_le_bytes()); + } + + out +} + +/// The 32-byte Fiat-Shamir transcript seed binding the full statement. +pub(crate) fn statement_seed( + elf_bytes: &[u8], + public_output: &[u8], + table_counts: &TableCounts, + num_private_input_pages: usize, + runtime_page_ranges: &[RuntimePageRange], +) -> [u8; 32] { + keccak256(&encode_statement( + elf_bytes, + public_output, + table_counts, + num_private_input_pages, + runtime_page_ranges, + )) +} diff --git a/prover/src/tests/decode_tests.rs b/prover/src/tests/decode_tests.rs index f1f60e5b..488a4c74 100644 --- a/prover/src/tests/decode_tests.rs +++ b/prover/src/tests/decode_tests.rs @@ -1068,6 +1068,7 @@ fn test_decode_soundness_same_elf_accepted() { &verifier_air_refs, &proof, &traces.public_output_bytes, + &[], ) .expect("fingerprint collision in test"); diff --git a/prover/src/tests/mod.rs b/prover/src/tests/mod.rs index dc5f3fe2..fa31abfc 100644 --- a/prover/src/tests/mod.rs +++ b/prover/src/tests/mod.rs @@ -29,4 +29,6 @@ pub mod mul_tests; #[cfg(test)] pub mod prove_elfs_tests; #[cfg(test)] +pub mod statement_tests; +#[cfg(test)] pub mod trace_builder_tests; diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index 53149a94..7ba94f86 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -71,6 +71,7 @@ fn prove_and_verify_vm_minimal(elf: &Elf, traces: &mut Traces) -> bool { &airs.air_refs(), &multi_proof, &traces.public_output_bytes, + &[], ) .expect("fingerprint collision in test"); @@ -885,6 +886,7 @@ fn test_prove_elfs_test_commit_4_wrong_pages_rejected() { &verifier_air_refs, &proof, &traces.public_output_bytes, + &[], ) .expect("fingerprint collision in test"); @@ -1621,6 +1623,7 @@ fn test_deep_stack_runtime_pages_roundtrip() { &verifier_air_refs, &proof, &traces.public_output_bytes, + &[], ) .expect("fingerprint collision in test"); @@ -1676,6 +1679,7 @@ fn test_deep_stack_missing_pages_rejected() { &verifier_air_refs, &proof, &traces.public_output_bytes, + &[], ) .expect("fingerprint collision in test"); @@ -1766,6 +1770,7 @@ fn test_heap_alloc_runtime_pages_roundtrip() { &verifier_air_refs, &proof, &traces.public_output_bytes, + &[], ) .expect("fingerprint collision in test"); diff --git a/prover/src/tests/statement_tests.rs b/prover/src/tests/statement_tests.rs new file mode 100644 index 00000000..4f6acb9a --- /dev/null +++ b/prover/src/tests/statement_tests.rs @@ -0,0 +1,146 @@ +//! Tests for statement encoding and Fiat-Shamir transcript seeding. + +use crate::statement::{encode_statement, statement_seed}; +use crate::{RuntimePageRange, TableCounts}; + +fn sample_counts() -> TableCounts { + TableCounts { + cpu: 3, + lt: 1, + memw: 2, + memw_aligned: 1, + load: 1, + mul: 1, + dvrm: 1, + shift: 1, + branch: 2, + memw_register: 1, + } +} + +fn sample_ranges() -> Vec { + vec![ + RuntimePageRange { + base: 0x1000, + count: 4, + }, + RuntimePageRange { + base: 0x8000, + count: 2, + }, + ] +} + +#[test] +fn encoding_is_deterministic() { + let counts = sample_counts(); + let ranges = sample_ranges(); + assert_eq!( + encode_statement(b"elf-bytes", b"output", &counts, 3, &ranges), + encode_statement(b"elf-bytes", b"output", &counts, 3, &ranges), + ); + assert_eq!( + statement_seed(b"elf-bytes", b"output", &counts, 3, &ranges), + statement_seed(b"elf-bytes", b"output", &counts, 3, &ranges), + ); +} + +#[test] +fn encoding_starts_with_domain_tag() { + let enc = encode_statement(b"", b"", &sample_counts(), 0, &[]); + assert!(enc.starts_with(b"LAMBDAVM_STARK_STATEMENT_V1")); +} + +#[test] +fn seed_depends_on_elf() { + let c = sample_counts(); + let r = sample_ranges(); + assert_ne!( + statement_seed(b"program-a", b"out", &c, 1, &r), + statement_seed(b"program-b", b"out", &c, 1, &r), + ); +} + +#[test] +fn seed_depends_on_public_output() { + let c = sample_counts(); + let r = sample_ranges(); + assert_ne!( + statement_seed(b"elf", b"output-1", &c, 1, &r), + statement_seed(b"elf", b"output-2", &c, 1, &r), + ); +} + +#[test] +fn seed_depends_on_table_counts() { + let r = sample_ranges(); + let mut c2 = sample_counts(); + c2.branch += 1; + assert_ne!( + statement_seed(b"elf", b"out", &sample_counts(), 1, &r), + statement_seed(b"elf", b"out", &c2, 1, &r), + ); +} + +#[test] +fn seed_depends_on_private_input_pages() { + let c = sample_counts(); + let r = sample_ranges(); + assert_ne!( + statement_seed(b"elf", b"out", &c, 1, &r), + statement_seed(b"elf", b"out", &c, 2, &r), + ); +} + +#[test] +fn seed_depends_on_runtime_page_ranges() { + let c = sample_counts(); + assert_ne!( + statement_seed(b"elf", b"out", &c, 1, &sample_ranges()), + statement_seed(b"elf", b"out", &c, 1, &[]), + ); +} + +#[test] +fn length_prefix_prevents_public_output_boundary_collision() { + // Without the public_output length prefix, "empty output + cpu count 0x41" + // and "output [0x41] + cpu count 0" would encode to the same bytes. The + // length prefix keeps the two statements distinct. + let mut counts_a = sample_counts(); + counts_a.cpu = 0x41; + let mut counts_b = sample_counts(); + counts_b.cpu = 0; + assert_ne!( + encode_statement(b"elf", b"", &counts_a, 0, &[]), + encode_statement(b"elf", b"\x41", &counts_b, 0, &[]), + ); +} + +/// End-to-end: an honest proof verifies against its own program, and a proof +/// must not verify against a different program -- the verifier's statement seed +/// (built from the other ELF) diverges from the prover's, so every Fiat-Shamir +/// challenge differs. Also exercises seed consistency across the prove path, +/// the verify path, and the bus-balance transcript replay. +#[test] +fn proof_binds_the_program_it_was_generated_for() { + let rust_artifacts = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("workspace root") + .join("executor/program_artifacts/rust"); + let read = |name: &str| { + std::fs::read(rust_artifacts.join(name)) + .unwrap_or_else(|_| panic!("{name} not found -- run `make compile-programs-rust`")) + }; + let allocator = read("allocator.elf"); + let pure_commit = read("pure_commit.elf"); + + let proof = crate::prove(&allocator).expect("prove allocator"); + assert!( + crate::verify(&proof, &allocator).expect("verify allocator"), + "honest proof must verify against its own program", + ); + assert!( + !matches!(crate::verify(&proof, &pure_commit), Ok(true)), + "a proof must not verify against a different program", + ); +}