From 0ce7bac369f1ebf7e78e18445eb5532cfc620041 Mon Sep 17 00:00:00 2001
From: diegokingston <dkingston@fi.uba.ar>
Date: Fri, 22 May 2026 11:38:30 -0300
Subject: [PATCH 1/3] refactor(prover): bind the proven statement into the
 Fiat-Shamir transcript
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The VM prover and verifier built their transcript with `new(&[])` — the
statement (program, public output, table layout) was never absorbed, so
Fiat-Shamir challenges did not depend on it. Soundness was preserved only
by out-of-band reconstruction checks in the verifier.

Add a canonical, versioned, length-prefixed encoder (`prover/src/statement.rs`)
that digests the full statement — ELF hash, public output, table counts, and
page layout — into a 32-byte transcript seed. Seed the transcript with it on
the prove path, the verify path, and the bus-balance replay
(`replay_transcript_phase_a`), so all three derive identical challenges for an
honest proof and diverge for any tampered statement.

Closes the frozen-heart class of issue as defense-in-depth and is a
prerequisite for recursion. Breaking proof-format change (challenges differ).

`tiny-keccak` is promoted from a dev-dependency to a dependency for the
statement digest.
---
 prover/Cargo.toml                    |   2 +-
 prover/src/lib.rs                    |  47 +++++++--
 prover/src/statement.rs              |  96 ++++++++++++++++++
 prover/src/tests/decode_tests.rs     |   1 +
 prover/src/tests/mod.rs              |   2 +
 prover/src/tests/prove_elfs_tests.rs |   5 +
 prover/src/tests/statement_tests.rs  | 146 +++++++++++++++++++++++++++
 7 files changed, 288 insertions(+), 11 deletions(-)
 create mode 100644 prover/src/statement.rs
 create mode 100644 prover/src/tests/statement_tests.rs
diff --git a/prover/Cargo.toml b/prover/Cargo.toml
index b2505a94c..781aef722 100644
--- a/prover/Cargo.toml
+++ b/prover/Cargo.toml
@@ -19,6 +19,7 @@ serde = { version = "1.0", features = ["derive"] }
 rayon = { version = "1.8.0", optional = true }
 sysinfo = { version = "0.31", default-features = false, features = ["system"] }
 log = "0.4"
+tiny-keccak = { version = "2.0", features = ["keccak"] }
 
 [dev-dependencies]
 env_logger = "*"
@@ -26,7 +27,6 @@ criterion = { version = "0.5", default-features = false }
 bincode = "1"
 tikv-jemallocator = "0.6"
 tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
-tiny-keccak = { version = "2.0", features = ["keccak"] }
 # Enable stark's test-utils so cross-crate tests can reach
 # `compute_precomputed_commitment_for_testing`. Only active under cargo test/bench.
 stark = { path = "../crypto/stark", features = ["test-utils"] }
diff --git a/prover/src/lib.rs b/prover/src/lib.rs
index dbe13d20b..a4544d151 100644
--- a/prover/src/lib.rs
+++ b/prover/src/lib.rs
@@ -17,6 +17,7 @@ pub mod constraints;
 mod debug_report;
 #[cfg(feature = "instruments")]
 pub mod instruments;
+mod statement;
 pub mod tables;
 pub mod test_utils;
 #[cfg(test)]
@@ -35,6 +36,7 @@ use stark::storage_mode::StorageMode;
 use stark::traits::AIR;
 use stark::verifier::{IsStarkVerifier, Verifier};
 
+use crate::statement::statement_seed;
 pub use crate::tables::MaxRowsConfig;
 use crate::tables::bitwise;
 use crate::tables::decode;
@@ -449,8 +451,9 @@ impl VmAirs {
 pub(crate) fn replay_transcript_phase_a(
     airs: &[&dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>],
     multi_proof: &MultiProof<F, E, ()>,
+    transcript_seed: &[u8],
 ) -> (FieldElement<E>, FieldElement<E>) {
-    let mut transcript = DefaultTranscript::<E>::new(&[]);
+    let mut transcript = DefaultTranscript::<E>::new(transcript_seed);
     for (air, proof) in airs.iter().zip(&multi_proof.proofs) {
         if air.is_preprocessed() {
             transcript.append_bytes(&air.precomputed_commitment());
@@ -506,8 +509,9 @@ pub(crate) fn compute_expected_commit_bus_balance(
     airs: &[&dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>],
     proof: &MultiProof<F, E, ()>,
     public_output_bytes: &[u8],
+    transcript_seed: &[u8],
 ) -> Option<FieldElement<E>> {
-    let (z, alpha) = replay_transcript_phase_a(airs, proof);
+    let (z, alpha) = replay_transcript_phase_a(airs, proof, transcript_seed);
     compute_commit_bus_offset(public_output_bytes, &z, &alpha)
 }
 
@@ -646,10 +650,26 @@ pub fn prove_with_options_and_inputs(
 
     let runtime_page_ranges = traces.runtime_page_ranges();
 
+    let num_private_input_pages = traces
+        .page_configs
+        .iter()
+        .filter(|c| c.is_private_input)
+        .count();
+
+    // Bind the full statement (program, public output, table layout) into the
+    // Fiat-Shamir transcript so every challenge depends on it.
+    let transcript_seed = statement_seed(
+        elf_bytes,
+        &traces.public_output_bytes,
+        &table_counts,
+        num_private_input_pages,
+        &runtime_page_ranges,
+    );
+
     // Phase 4: Prove (multi_prove)
     let proof = Prover::multi_prove(
         airs.air_trace_pairs(&mut traces),
-        &mut DefaultTranscript::<E>::new(&[]),
+        &mut DefaultTranscript::<E>::new(&transcript_seed),
         #[cfg(feature = "disk-spill")]
         storage_mode,
     )
@@ -671,12 +691,6 @@ pub fn prove_with_options_and_inputs(
         );
     }
 
-    let num_private_input_pages = traces
-        .page_configs
-        .iter()
-        .filter(|c| c.is_private_input)
-        .count();
-
     Ok(VmProof {
         proof,
         runtime_page_ranges,
@@ -759,10 +773,23 @@ pub fn verify_with_options(
     // If public_output was tampered, the recomputed offset won't match the
     // actual bus total in the proof, and multi_verify will reject.
     let air_refs = airs.air_refs();
+
+    // Recompute the statement seed from the proof bundle: a tampered statement
+    // field makes this diverge from the prover's seed, so every derived
+    // challenge differs and verification rejects.
+    let transcript_seed = statement_seed(
+        elf_bytes,
+        &vm_proof.public_output,
+        &vm_proof.table_counts,
+        vm_proof.num_private_input_pages,
+        &vm_proof.runtime_page_ranges,
+    );
+
     let expected_bus_balance = match compute_expected_commit_bus_balance(
         &air_refs,
         &vm_proof.proof,
         &vm_proof.public_output,
+        &transcript_seed,
     ) {
         Some(balance) => balance,
         None => return Ok(false),
@@ -771,7 +798,7 @@ pub fn verify_with_options(
     Ok(Verifier::multi_verify(
         &air_refs,
         &vm_proof.proof,
-        &mut DefaultTranscript::<E>::new(&[]),
+        &mut DefaultTranscript::<E>::new(&transcript_seed),
         &expected_bus_balance,
     ))
 }
diff --git a/prover/src/statement.rs b/prover/src/statement.rs
new file mode 100644
index 000000000..570c099b5
--- /dev/null
+++ b/prover/src/statement.rs
@@ -0,0 +1,96 @@
+//! Canonical encoding of the statement a VM proof attests to.
+//!
+//! The 32-byte digest produced by [`statement_seed`] seeds the Fiat-Shamir
+//! transcript on both the prove and verify paths, so every challenge is bound
+//! to the program, its public output, and the table layout. Prover and
+//! verifier must compute it from identical inputs — any divergence makes every
+//! derived challenge differ and verification reject.
+
+use tiny_keccak::{Hasher, Keccak};
+
+use crate::{RuntimePageRange, TableCounts};
+
+/// Bumped whenever the statement encoding changes, so a re-encoding under a new
+/// layout cannot collide with one produced by an older layout.
+const FORMAT_VERSION: u32 = 1;
+
+/// Fixed domain-separation tag prefixing every statement encoding.
+const DOMAIN_TAG: &[u8] = b"LAMBDAVM_STARK_STATEMENT_V1";
+
+fn keccak256(data: &[u8]) -> [u8; 32] {
+    let mut hasher = Keccak::v256();
+    hasher.update(data);
+    let mut out = [0u8; 32];
+    hasher.finalize(&mut out);
+    out
+}
+
+/// Canonical, length-prefixed encoding of the statement.
+///
+/// Every variable-length field is length-prefixed, so two distinct statements
+/// can never produce the same byte string by shifting content across a field
+/// boundary.
+pub(crate) fn encode_statement(
+    elf_bytes: &[u8],
+    public_output: &[u8],
+    table_counts: &TableCounts,
+    num_private_input_pages: usize,
+    runtime_page_ranges: &[RuntimePageRange],
+) -> Vec<u8> {
+    let mut out = Vec::new();
+    out.extend_from_slice(DOMAIN_TAG);
+    out.extend_from_slice(&FORMAT_VERSION.to_le_bytes());
+
+    // Program identity: a length-prefixed digest of the ELF (hashed, not
+    // inlined, to keep the encoding small).
+    out.extend_from_slice(&(elf_bytes.len() as u64).to_le_bytes());
+    out.extend_from_slice(&keccak256(elf_bytes));
+
+    // Public output.
+    out.extend_from_slice(&(public_output.len() as u64).to_le_bytes());
+    out.extend_from_slice(public_output);
+
+    // Table layout: every field, declared order, fixed width.
+    for count in [
+        table_counts.cpu,
+        table_counts.lt,
+        table_counts.memw,
+        table_counts.memw_aligned,
+        table_counts.load,
+        table_counts.mul,
+        table_counts.dvrm,
+        table_counts.shift,
+        table_counts.branch,
+        table_counts.memw_register,
+    ] {
+        out.extend_from_slice(&(count as u64).to_le_bytes());
+    }
+
+    out.extend_from_slice(&(num_private_input_pages as u64).to_le_bytes());
+
+    // Runtime page ranges (count-prefixed; each entry fixed width).
+    out.extend_from_slice(&(runtime_page_ranges.len() as u64).to_le_bytes());
+    for range in runtime_page_ranges {
+        out.extend_from_slice(&range.base.to_le_bytes());
+        out.extend_from_slice(&range.count.to_le_bytes());
+    }
+
+    out
+}
+
+/// The 32-byte Fiat-Shamir transcript seed binding the full statement.
+pub(crate) fn statement_seed(
+    elf_bytes: &[u8],
+    public_output: &[u8],
+    table_counts: &TableCounts,
+    num_private_input_pages: usize,
+    runtime_page_ranges: &[RuntimePageRange],
+) -> [u8; 32] {
+    keccak256(&encode_statement(
+        elf_bytes,
+        public_output,
+        table_counts,
+        num_private_input_pages,
+        runtime_page_ranges,
+    ))
+}
diff --git a/prover/src/tests/decode_tests.rs b/prover/src/tests/decode_tests.rs
index f1f60e5ba..488a4c747 100644
--- a/prover/src/tests/decode_tests.rs
+++ b/prover/src/tests/decode_tests.rs
@@ -1068,6 +1068,7 @@ fn test_decode_soundness_same_elf_accepted() {
         &verifier_air_refs,
         &proof,
         &traces.public_output_bytes,
+        &[],
     )
     .expect("fingerprint collision in test");
 
diff --git a/prover/src/tests/mod.rs b/prover/src/tests/mod.rs
index dc5f3fe22..fa31abfcf 100644
--- a/prover/src/tests/mod.rs
+++ b/prover/src/tests/mod.rs
@@ -29,4 +29,6 @@ pub mod mul_tests;
 #[cfg(test)]
 pub mod prove_elfs_tests;
 #[cfg(test)]
+pub mod statement_tests;
+#[cfg(test)]
 pub mod trace_builder_tests;
diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs
index 53149a943..7ba94f869 100644
--- a/prover/src/tests/prove_elfs_tests.rs
+++ b/prover/src/tests/prove_elfs_tests.rs
@@ -71,6 +71,7 @@ fn prove_and_verify_vm_minimal(elf: &Elf, traces: &mut Traces) -> bool {
         &airs.air_refs(),
         &multi_proof,
         &traces.public_output_bytes,
+        &[],
     )
     .expect("fingerprint collision in test");
 
@@ -885,6 +886,7 @@ fn test_prove_elfs_test_commit_4_wrong_pages_rejected() {
         &verifier_air_refs,
         &proof,
         &traces.public_output_bytes,
+        &[],
     )
     .expect("fingerprint collision in test");
 
@@ -1621,6 +1623,7 @@ fn test_deep_stack_runtime_pages_roundtrip() {
         &verifier_air_refs,
         &proof,
         &traces.public_output_bytes,
+        &[],
     )
     .expect("fingerprint collision in test");
 
@@ -1676,6 +1679,7 @@ fn test_deep_stack_missing_pages_rejected() {
         &verifier_air_refs,
         &proof,
         &traces.public_output_bytes,
+        &[],
     )
     .expect("fingerprint collision in test");
 
@@ -1766,6 +1770,7 @@ fn test_heap_alloc_runtime_pages_roundtrip() {
         &verifier_air_refs,
         &proof,
         &traces.public_output_bytes,
+        &[],
     )
     .expect("fingerprint collision in test");
 
diff --git a/prover/src/tests/statement_tests.rs b/prover/src/tests/statement_tests.rs
new file mode 100644
index 000000000..4f6acb9a7
--- /dev/null
+++ b/prover/src/tests/statement_tests.rs
@@ -0,0 +1,146 @@
+//! Tests for statement encoding and Fiat-Shamir transcript seeding.
+
+use crate::statement::{encode_statement, statement_seed};
+use crate::{RuntimePageRange, TableCounts};
+
+fn sample_counts() -> TableCounts {
+    TableCounts {
+        cpu: 3,
+        lt: 1,
+        memw: 2,
+        memw_aligned: 1,
+        load: 1,
+        mul: 1,
+        dvrm: 1,
+        shift: 1,
+        branch: 2,
+        memw_register: 1,
+    }
+}
+
+fn sample_ranges() -> Vec<RuntimePageRange> {
+    vec![
+        RuntimePageRange {
+            base: 0x1000,
+            count: 4,
+        },
+        RuntimePageRange {
+            base: 0x8000,
+            count: 2,
+        },
+    ]
+}
+
+#[test]
+fn encoding_is_deterministic() {
+    let counts = sample_counts();
+    let ranges = sample_ranges();
+    assert_eq!(
+        encode_statement(b"elf-bytes", b"output", &counts, 3, &ranges),
+        encode_statement(b"elf-bytes", b"output", &counts, 3, &ranges),
+    );
+    assert_eq!(
+        statement_seed(b"elf-bytes", b"output", &counts, 3, &ranges),
+        statement_seed(b"elf-bytes", b"output", &counts, 3, &ranges),
+    );
+}
+
+#[test]
+fn encoding_starts_with_domain_tag() {
+    let enc = encode_statement(b"", b"", &sample_counts(), 0, &[]);
+    assert!(enc.starts_with(b"LAMBDAVM_STARK_STATEMENT_V1"));
+}
+
+#[test]
+fn seed_depends_on_elf() {
+    let c = sample_counts();
+    let r = sample_ranges();
+    assert_ne!(
+        statement_seed(b"program-a", b"out", &c, 1, &r),
+        statement_seed(b"program-b", b"out", &c, 1, &r),
+    );
+}
+
+#[test]
+fn seed_depends_on_public_output() {
+    let c = sample_counts();
+    let r = sample_ranges();
+    assert_ne!(
+        statement_seed(b"elf", b"output-1", &c, 1, &r),
+        statement_seed(b"elf", b"output-2", &c, 1, &r),
+    );
+}
+
+#[test]
+fn seed_depends_on_table_counts() {
+    let r = sample_ranges();
+    let mut c2 = sample_counts();
+    c2.branch += 1;
+    assert_ne!(
+        statement_seed(b"elf", b"out", &sample_counts(), 1, &r),
+        statement_seed(b"elf", b"out", &c2, 1, &r),
+    );
+}
+
+#[test]
+fn seed_depends_on_private_input_pages() {
+    let c = sample_counts();
+    let r = sample_ranges();
+    assert_ne!(
+        statement_seed(b"elf", b"out", &c, 1, &r),
+        statement_seed(b"elf", b"out", &c, 2, &r),
+    );
+}
+
+#[test]
+fn seed_depends_on_runtime_page_ranges() {
+    let c = sample_counts();
+    assert_ne!(
+        statement_seed(b"elf", b"out", &c, 1, &sample_ranges()),
+        statement_seed(b"elf", b"out", &c, 1, &[]),
+    );
+}
+
+#[test]
+fn length_prefix_prevents_public_output_boundary_collision() {
+    // Without the public_output length prefix, "empty output + cpu count 0x41"
+    // and "output [0x41] + cpu count 0" would encode to the same bytes. The
+    // length prefix keeps the two statements distinct.
+    let mut counts_a = sample_counts();
+    counts_a.cpu = 0x41;
+    let mut counts_b = sample_counts();
+    counts_b.cpu = 0;
+    assert_ne!(
+        encode_statement(b"elf", b"", &counts_a, 0, &[]),
+        encode_statement(b"elf", b"\x41", &counts_b, 0, &[]),
+    );
+}
+
+/// End-to-end: an honest proof verifies against its own program, and a proof
+/// must not verify against a different program -- the verifier's statement seed
+/// (built from the other ELF) diverges from the prover's, so every Fiat-Shamir
+/// challenge differs. Also exercises seed consistency across the prove path,
+/// the verify path, and the bus-balance transcript replay.
+#[test]
+fn proof_binds_the_program_it_was_generated_for() {
+    let rust_artifacts = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+        .parent()
+        .expect("workspace root")
+        .join("executor/program_artifacts/rust");
+    let read = |name: &str| {
+        std::fs::read(rust_artifacts.join(name))
+            .unwrap_or_else(|_| panic!("{name} not found -- run `make compile-programs-rust`"))
+    };
+    let allocator = read("allocator.elf");
+    let pure_commit = read("pure_commit.elf");
+
+    let proof = crate::prove(&allocator).expect("prove allocator");
+    assert!(
+        crate::verify(&proof, &allocator).expect("verify allocator"),
+        "honest proof must verify against its own program",
+    );
+    assert!(
+        !matches!(crate::verify(&proof, &pure_commit), Ok(true)),
+        "a proof must not verify against a different program",
+    );
+}

From 93131deef7663f5f866efc26c3d513aca7a0c8dd Mon Sep 17 00:00:00 2001
From: MauroFab <maurotoscano2@gmail.com>
Date: Fri, 22 May 2026 17:24:14 -0300
Subject: [PATCH 2/3] refactor(prover): streamline statement absorption into
 transcript
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Apply the simplifications to the statement-binding work:

- Drop tiny-keccak as a prod dep; the DefaultTranscript is already a
  Keccak256 absorber, so the outer keccak256 in statement_seed() was
  redundant. ELF digest now uses sha3::Keccak256 (already a transitive
  prod dep via crypto). tiny-keccak stays as a dev-dep for the in-VM
  keccak precompile cross-check.

- Replace encode_statement -> Vec<u8> + statement_seed -> [u8; 32] with
  a single absorb_statement(&mut transcript, ...) that streams bytes
  directly into the transcript. No intermediate allocation.

- Drop FORMAT_VERSION constant — the version is in DOMAIN_TAG suffix
  (LAMBDAVM_STARK_STATEMENT_V1). Bump the suffix on any encoding change.

- Drop the redundant 8-byte length prefix on the elf bytes: we hash to
  fixed 32-byte digest, so the prefix doesn't prevent any collision.

- replay_transcript_phase_a / compute_expected_commit_bus_balance now
  take &mut DefaultTranscript<E> instead of an opaque transcript_seed:
  &[u8]. Callers decide what's in the transcript; internal tests just
  hand a fresh empty one (no &[] magic sentinel).

- Fold the duplicated lde_trace_main_merkle_root append branches in
  replay_transcript_phase_a.

- verify_with_options absorbs the statement ONCE, then clones the
  transcript so multi_verify and the bus-balance replay both start
  from the same post-absorb state — no closure, no field-list drift.

- statement_tests.rs reframed around transcript.state(). 3 tests
  (state_is_deterministic, state_depends_on_every_field,
  public_output_length_prefix_prevents_collision) cover the same
  surface as the 8 byte-level encoder tests they replace, hitting the
  real absorb path. Drop the two-ELF e2e test that needed RISC-V
  artifacts and couldn't run in author env.

- 6 internal test sites (decode_tests.rs, prove_elfs_tests.rs): pass
  a fresh DefaultTranscript instead of &[].

Soundness commitment unchanged: same five statement fields bound, same
domain separation, same length prefixes where needed, same cross-site
agreement (now explicitly tested via deterministic absorb).

Follow-up: Phase A is still replayed twice on the verify path (once in
compute_expected_commit_bus_balance, once in multi_verify). That's
pre-existing, not from this PR; removing it would change multi_verify's
public API to take a closure that produces the bus balance offset given
(z, alpha). Separate scope.
---
 Cargo.lock                           |   1 +
 prover/Cargo.toml                    |   3 +-
 prover/src/lib.rs                    |  37 ++++---
 prover/src/statement.rs              |  97 +++++++-----------
 prover/src/tests/decode_tests.rs     |   3 +-
 prover/src/tests/prove_elfs_tests.rs |  15 ++-
 prover/src/tests/statement_tests.rs  | 144 +++++++++++----------------
 7 files changed, 129 insertions(+), 171 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 70b4071e8..8fff60dcf 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1961,6 +1961,7 @@ dependencies = [
  "math",
  "rayon",
  "serde",
+ "sha3",
  "stark",
  "sysinfo",
  "tikv-jemalloc-ctl",
diff --git a/prover/Cargo.toml b/prover/Cargo.toml
index 781aef722..2d58776c5 100644
--- a/prover/Cargo.toml
+++ b/prover/Cargo.toml
@@ -19,7 +19,7 @@ serde = { version = "1.0", features = ["derive"] }
 rayon = { version = "1.8.0", optional = true }
 sysinfo = { version = "0.31", default-features = false, features = ["system"] }
 log = "0.4"
-tiny-keccak = { version = "2.0", features = ["keccak"] }
+sha3 = { version = "0.10.8", default-features = false }
 
 [dev-dependencies]
 env_logger = "*"
@@ -27,6 +27,7 @@ criterion = { version = "0.5", default-features = false }
 bincode = "1"
 tikv-jemallocator = "0.6"
 tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
+tiny-keccak = { version = "2.0", features = ["keccak"] }
 # Enable stark's test-utils so cross-crate tests can reach
 # `compute_precomputed_commitment_for_testing`. Only active under cargo test/bench.
 stark = { path = "../crypto/stark", features = ["test-utils"] }
diff --git a/prover/src/lib.rs b/prover/src/lib.rs
index a4544d151..5cb762a96 100644
--- a/prover/src/lib.rs
+++ b/prover/src/lib.rs
@@ -36,7 +36,7 @@ use stark::storage_mode::StorageMode;
 use stark::traits::AIR;
 use stark::verifier::{IsStarkVerifier, Verifier};
 
-use crate::statement::statement_seed;
+use crate::statement::absorb_statement;
 pub use crate::tables::MaxRowsConfig;
 use crate::tables::bitwise;
 use crate::tables::decode;
@@ -451,16 +451,13 @@ impl VmAirs {
 pub(crate) fn replay_transcript_phase_a(
     airs: &[&dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>],
     multi_proof: &MultiProof<F, E, ()>,
-    transcript_seed: &[u8],
+    transcript: &mut DefaultTranscript<E>,
 ) -> (FieldElement<E>, FieldElement<E>) {
-    let mut transcript = DefaultTranscript::<E>::new(transcript_seed);
     for (air, proof) in airs.iter().zip(&multi_proof.proofs) {
         if air.is_preprocessed() {
             transcript.append_bytes(&air.precomputed_commitment());
-            transcript.append_bytes(&proof.lde_trace_main_merkle_root);
-        } else {
-            transcript.append_bytes(&proof.lde_trace_main_merkle_root);
         }
+        transcript.append_bytes(&proof.lde_trace_main_merkle_root);
     }
     let z: FieldElement<E> = transcript.sample_field_element();
     let alpha: FieldElement<E> = transcript.sample_field_element();
@@ -509,9 +506,9 @@ pub(crate) fn compute_expected_commit_bus_balance(
     airs: &[&dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>],
     proof: &MultiProof<F, E, ()>,
     public_output_bytes: &[u8],
-    transcript_seed: &[u8],
+    transcript: &mut DefaultTranscript<E>,
 ) -> Option<FieldElement<E>> {
-    let (z, alpha) = replay_transcript_phase_a(airs, proof, transcript_seed);
+    let (z, alpha) = replay_transcript_phase_a(airs, proof, transcript);
     compute_commit_bus_offset(public_output_bytes, &z, &alpha)
 }
 
@@ -658,7 +655,9 @@ pub fn prove_with_options_and_inputs(
 
     // Bind the full statement (program, public output, table layout) into the
     // Fiat-Shamir transcript so every challenge depends on it.
-    let transcript_seed = statement_seed(
+    let mut transcript = DefaultTranscript::<E>::new(&[]);
+    absorb_statement(
+        &mut transcript,
         elf_bytes,
         &traces.public_output_bytes,
         &table_counts,
@@ -669,7 +668,7 @@ pub fn prove_with_options_and_inputs(
     // Phase 4: Prove (multi_prove)
     let proof = Prover::multi_prove(
         airs.air_trace_pairs(&mut traces),
-        &mut DefaultTranscript::<E>::new(&transcript_seed),
+        &mut transcript,
         #[cfg(feature = "disk-spill")]
         storage_mode,
     )
@@ -774,10 +773,12 @@ pub fn verify_with_options(
     // actual bus total in the proof, and multi_verify will reject.
     let air_refs = airs.air_refs();
 
-    // Recompute the statement seed from the proof bundle: a tampered statement
-    // field makes this diverge from the prover's seed, so every derived
-    // challenge differs and verification rejects.
-    let transcript_seed = statement_seed(
+    // Bind the statement into the verifier's transcript. A tampered statement
+    // field makes this diverge from the prover's transcript state, so every
+    // derived challenge differs and verification rejects.
+    let mut transcript = DefaultTranscript::<E>::new(&[]);
+    absorb_statement(
+        &mut transcript,
         elf_bytes,
         &vm_proof.public_output,
         &vm_proof.table_counts,
@@ -785,11 +786,15 @@ pub fn verify_with_options(
         &vm_proof.runtime_page_ranges,
     );
 
+    // Fork the post-absorb state: the replay helper advances through Phase A
+    // independently of the multi_verify transcript, but both must start from
+    // the same statement-bound state.
+    let mut transcript_for_replay = transcript.clone();
     let expected_bus_balance = match compute_expected_commit_bus_balance(
         &air_refs,
         &vm_proof.proof,
         &vm_proof.public_output,
-        &transcript_seed,
+        &mut transcript_for_replay,
     ) {
         Some(balance) => balance,
         None => return Ok(false),
@@ -798,7 +803,7 @@ pub fn verify_with_options(
     Ok(Verifier::multi_verify(
         &air_refs,
         &vm_proof.proof,
-        &mut DefaultTranscript::<E>::new(&transcript_seed),
+        &mut transcript,
         &expected_bus_balance,
     ))
 }
diff --git a/prover/src/statement.rs b/prover/src/statement.rs
index 570c099b5..2ac334d72 100644
--- a/prover/src/statement.rs
+++ b/prover/src/statement.rs
@@ -1,56 +1,48 @@
-//! Canonical encoding of the statement a VM proof attests to.
+//! Statement absorbed into the Fiat-Shamir transcript before Phase A.
 //!
-//! The 32-byte digest produced by [`statement_seed`] seeds the Fiat-Shamir
-//! transcript on both the prove and verify paths, so every challenge is bound
-//! to the program, its public output, and the table layout. Prover and
-//! verifier must compute it from identical inputs — any divergence makes every
-//! derived challenge differ and verification reject.
+//! Streams a canonical, domain-separated, length-prefixed encoding directly
+//! into the transcript. The transcript is itself a Keccak256 absorber
+//! (`DefaultTranscript`), so a single hash suffices — no external digest
+//! needed beyond the ELF.
+//!
+//! All three call sites (prove, verify, bus-balance replay) must absorb
+//! identical bytes; any divergence makes every derived challenge differ and
+//! verification reject.
 
-use tiny_keccak::{Hasher, Keccak};
+use crypto::fiat_shamir::is_transcript::IsTranscript;
+use sha3::{Digest, Keccak256};
 
+use crate::test_utils::E;
 use crate::{RuntimePageRange, TableCounts};
 
-/// Bumped whenever the statement encoding changes, so a re-encoding under a new
-/// layout cannot collide with one produced by an older layout.
-const FORMAT_VERSION: u32 = 1;
-
-/// Fixed domain-separation tag prefixing every statement encoding.
+/// Domain-separation tag. Bump the suffix (`_V2`, ...) on any encoding change.
 const DOMAIN_TAG: &[u8] = b"LAMBDAVM_STARK_STATEMENT_V1";
 
-fn keccak256(data: &[u8]) -> [u8; 32] {
-    let mut hasher = Keccak::v256();
-    hasher.update(data);
-    let mut out = [0u8; 32];
-    hasher.finalize(&mut out);
-    out
+fn elf_digest(elf: &[u8]) -> [u8; 32] {
+    let mut h = Keccak256::new();
+    h.update(elf);
+    h.finalize().into()
 }
 
-/// Canonical, length-prefixed encoding of the statement.
-///
-/// Every variable-length field is length-prefixed, so two distinct statements
-/// can never produce the same byte string by shifting content across a field
-/// boundary.
-pub(crate) fn encode_statement(
+pub(crate) fn absorb_statement(
+    t: &mut impl IsTranscript<E>,
     elf_bytes: &[u8],
     public_output: &[u8],
     table_counts: &TableCounts,
     num_private_input_pages: usize,
     runtime_page_ranges: &[RuntimePageRange],
-) -> Vec<u8> {
-    let mut out = Vec::new();
-    out.extend_from_slice(DOMAIN_TAG);
-    out.extend_from_slice(&FORMAT_VERSION.to_le_bytes());
+) {
+    t.append_bytes(DOMAIN_TAG);
 
-    // Program identity: a length-prefixed digest of the ELF (hashed, not
-    // inlined, to keep the encoding small).
-    out.extend_from_slice(&(elf_bytes.len() as u64).to_le_bytes());
-    out.extend_from_slice(&keccak256(elf_bytes));
+    // ELF: fixed 32-byte digest — no length prefix needed.
+    t.append_bytes(&elf_digest(elf_bytes));
 
-    // Public output.
-    out.extend_from_slice(&(public_output.len() as u64).to_le_bytes());
-    out.extend_from_slice(public_output);
+    // public_output: variable length → length-prefix to prevent boundary collisions.
+    t.append_bytes(&(public_output.len() as u64).to_le_bytes());
+    t.append_bytes(public_output);
 
-    // Table layout: every field, declared order, fixed width.
+    // table_counts: fixed-width u64s in declared order.
+    // Reordering or adding a field requires bumping DOMAIN_TAG above.
     for count in [
         table_counts.cpu,
         table_counts.lt,
@@ -63,34 +55,15 @@ pub(crate) fn encode_statement(
         table_counts.branch,
         table_counts.memw_register,
     ] {
-        out.extend_from_slice(&(count as u64).to_le_bytes());
+        t.append_bytes(&(count as u64).to_le_bytes());
     }
 
-    out.extend_from_slice(&(num_private_input_pages as u64).to_le_bytes());
+    t.append_bytes(&(num_private_input_pages as u64).to_le_bytes());
 
-    // Runtime page ranges (count-prefixed; each entry fixed width).
-    out.extend_from_slice(&(runtime_page_ranges.len() as u64).to_le_bytes());
-    for range in runtime_page_ranges {
-        out.extend_from_slice(&range.base.to_le_bytes());
-        out.extend_from_slice(&range.count.to_le_bytes());
+    // runtime_page_ranges: count-prefixed; each entry fixed width.
+    t.append_bytes(&(runtime_page_ranges.len() as u64).to_le_bytes());
+    for r in runtime_page_ranges {
+        t.append_bytes(&r.base.to_le_bytes());
+        t.append_bytes(&r.count.to_le_bytes());
     }
-
-    out
-}
-
-/// The 32-byte Fiat-Shamir transcript seed binding the full statement.
-pub(crate) fn statement_seed(
-    elf_bytes: &[u8],
-    public_output: &[u8],
-    table_counts: &TableCounts,
-    num_private_input_pages: usize,
-    runtime_page_ranges: &[RuntimePageRange],
-) -> [u8; 32] {
-    keccak256(&encode_statement(
-        elf_bytes,
-        public_output,
-        table_counts,
-        num_private_input_pages,
-        runtime_page_ranges,
-    ))
 }
diff --git a/prover/src/tests/decode_tests.rs b/prover/src/tests/decode_tests.rs
index 488a4c747..8f61a1d74 100644
--- a/prover/src/tests/decode_tests.rs
+++ b/prover/src/tests/decode_tests.rs
@@ -1064,11 +1064,12 @@ fn test_decode_soundness_same_elf_accepted() {
         &table_counts,
     );
     let verifier_air_refs = verifier_airs.air_refs();
+    let mut replay_transcript = DefaultTranscript::<E>::new(&[]);
     let expected_bus_balance = crate::compute_expected_commit_bus_balance(
         &verifier_air_refs,
         &proof,
         &traces.public_output_bytes,
-        &[],
+        &mut replay_transcript,
     )
     .expect("fingerprint collision in test");
 
diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs
index 7ba94f869..ce15e1ec4 100644
--- a/prover/src/tests/prove_elfs_tests.rs
+++ b/prover/src/tests/prove_elfs_tests.rs
@@ -67,11 +67,12 @@ fn prove_and_verify_vm_minimal(elf: &Elf, traces: &mut Traces) -> bool {
     };
 
     // Compute the verifier-side expected COMMIT bus balance from public output bytes
+    let mut replay_transcript = DefaultTranscript::<E>::new(&[]);
     let expected_bus_balance = crate::compute_expected_commit_bus_balance(
         &airs.air_refs(),
         &multi_proof,
         &traces.public_output_bytes,
-        &[],
+        &mut replay_transcript,
     )
     .expect("fingerprint collision in test");
 
@@ -882,11 +883,12 @@ fn test_prove_elfs_test_commit_4_wrong_pages_rejected() {
     let verifier_airs =
         crate::VmAirs::new(&elf, &proof_options, true, &wrong_configs, &table_counts);
     let verifier_air_refs = verifier_airs.air_refs();
+    let mut replay_transcript = DefaultTranscript::<E>::new(&[]);
     let expected_bus_balance = crate::compute_expected_commit_bus_balance(
         &verifier_air_refs,
         &proof,
         &traces.public_output_bytes,
-        &[],
+        &mut replay_transcript,
     )
     .expect("fingerprint collision in test");
 
@@ -1619,11 +1621,12 @@ fn test_deep_stack_runtime_pages_roundtrip() {
     let verifier_airs =
         crate::VmAirs::new(&elf, &proof_options, true, &verifier_configs, &table_counts);
     let verifier_air_refs = verifier_airs.air_refs();
+    let mut replay_transcript = DefaultTranscript::<E>::new(&[]);
     let expected_bus_balance = crate::compute_expected_commit_bus_balance(
         &verifier_air_refs,
         &proof,
         &traces.public_output_bytes,
-        &[],
+        &mut replay_transcript,
     )
     .expect("fingerprint collision in test");
 
@@ -1675,11 +1678,12 @@ fn test_deep_stack_missing_pages_rejected() {
     let verifier_airs =
         crate::VmAirs::new(&elf, &proof_options, true, &wrong_configs, &table_counts);
     let verifier_air_refs = verifier_airs.air_refs();
+    let mut replay_transcript = DefaultTranscript::<E>::new(&[]);
     let expected_bus_balance = crate::compute_expected_commit_bus_balance(
         &verifier_air_refs,
         &proof,
         &traces.public_output_bytes,
-        &[],
+        &mut replay_transcript,
     )
     .expect("fingerprint collision in test");
 
@@ -1766,11 +1770,12 @@ fn test_heap_alloc_runtime_pages_roundtrip() {
     let verifier_airs =
         crate::VmAirs::new(&elf, &proof_options, true, &verifier_configs, &table_counts);
     let verifier_air_refs = verifier_airs.air_refs();
+    let mut replay_transcript = DefaultTranscript::<E>::new(&[]);
     let expected_bus_balance = crate::compute_expected_commit_bus_balance(
         &verifier_air_refs,
         &proof,
         &traces.public_output_bytes,
-        &[],
+        &mut replay_transcript,
     )
     .expect("fingerprint collision in test");
 
diff --git a/prover/src/tests/statement_tests.rs b/prover/src/tests/statement_tests.rs
index 4f6acb9a7..659113be6 100644
--- a/prover/src/tests/statement_tests.rs
+++ b/prover/src/tests/statement_tests.rs
@@ -1,6 +1,10 @@
-//! Tests for statement encoding and Fiat-Shamir transcript seeding.
+//! Tests for statement absorption into the Fiat-Shamir transcript.
 
-use crate::statement::{encode_statement, statement_seed};
+use crypto::fiat_shamir::default_transcript::DefaultTranscript;
+use crypto::fiat_shamir::is_transcript::IsTranscript;
+
+use crate::statement::absorb_statement;
+use crate::test_utils::E;
 use crate::{RuntimePageRange, TableCounts};
 
 fn sample_counts() -> TableCounts {
@@ -31,116 +35,84 @@ fn sample_ranges() -> Vec<RuntimePageRange> {
     ]
 }
 
-#[test]
-fn encoding_is_deterministic() {
-    let counts = sample_counts();
-    let ranges = sample_ranges();
-    assert_eq!(
-        encode_statement(b"elf-bytes", b"output", &counts, 3, &ranges),
-        encode_statement(b"elf-bytes", b"output", &counts, 3, &ranges),
-    );
-    assert_eq!(
-        statement_seed(b"elf-bytes", b"output", &counts, 3, &ranges),
-        statement_seed(b"elf-bytes", b"output", &counts, 3, &ranges),
-    );
+fn state_after_absorb(
+    elf: &[u8],
+    out: &[u8],
+    counts: &TableCounts,
+    priv_pages: usize,
+    ranges: &[RuntimePageRange],
+) -> [u8; 32] {
+    let mut t = DefaultTranscript::<E>::new(&[]);
+    absorb_statement(&mut t, elf, out, counts, priv_pages, ranges);
+    t.state()
 }
 
 #[test]
-fn encoding_starts_with_domain_tag() {
-    let enc = encode_statement(b"", b"", &sample_counts(), 0, &[]);
-    assert!(enc.starts_with(b"LAMBDAVM_STARK_STATEMENT_V1"));
+fn state_is_deterministic() {
+    let a = state_after_absorb(b"elf", b"out", &sample_counts(), 3, &sample_ranges());
+    let b = state_after_absorb(b"elf", b"out", &sample_counts(), 3, &sample_ranges());
+    assert_eq!(a, b);
 }
 
 #[test]
-fn seed_depends_on_elf() {
-    let c = sample_counts();
-    let r = sample_ranges();
+fn state_depends_on_every_field() {
+    let baseline = state_after_absorb(b"elf", b"out", &sample_counts(), 1, &sample_ranges());
+
     assert_ne!(
-        statement_seed(b"program-a", b"out", &c, 1, &r),
-        statement_seed(b"program-b", b"out", &c, 1, &r),
+        baseline,
+        state_after_absorb(
+            b"different-elf",
+            b"out",
+            &sample_counts(),
+            1,
+            &sample_ranges()
+        ),
+        "state must depend on elf",
     );
-}
-
-#[test]
-fn seed_depends_on_public_output() {
-    let c = sample_counts();
-    let r = sample_ranges();
     assert_ne!(
-        statement_seed(b"elf", b"output-1", &c, 1, &r),
-        statement_seed(b"elf", b"output-2", &c, 1, &r),
+        baseline,
+        state_after_absorb(
+            b"elf",
+            b"different-output",
+            &sample_counts(),
+            1,
+            &sample_ranges()
+        ),
+        "state must depend on public_output",
     );
-}
 
-#[test]
-fn seed_depends_on_table_counts() {
-    let r = sample_ranges();
-    let mut c2 = sample_counts();
-    c2.branch += 1;
+    let mut counts2 = sample_counts();
+    counts2.branch += 1;
     assert_ne!(
-        statement_seed(b"elf", b"out", &sample_counts(), 1, &r),
-        statement_seed(b"elf", b"out", &c2, 1, &r),
+        baseline,
+        state_after_absorb(b"elf", b"out", &counts2, 1, &sample_ranges()),
+        "state must depend on table_counts",
     );
-}
 
-#[test]
-fn seed_depends_on_private_input_pages() {
-    let c = sample_counts();
-    let r = sample_ranges();
     assert_ne!(
-        statement_seed(b"elf", b"out", &c, 1, &r),
-        statement_seed(b"elf", b"out", &c, 2, &r),
+        baseline,
+        state_after_absorb(b"elf", b"out", &sample_counts(), 2, &sample_ranges()),
+        "state must depend on num_private_input_pages",
     );
-}
 
-#[test]
-fn seed_depends_on_runtime_page_ranges() {
-    let c = sample_counts();
     assert_ne!(
-        statement_seed(b"elf", b"out", &c, 1, &sample_ranges()),
-        statement_seed(b"elf", b"out", &c, 1, &[]),
+        baseline,
+        state_after_absorb(b"elf", b"out", &sample_counts(), 1, &[]),
+        "state must depend on runtime_page_ranges",
     );
 }
 
 #[test]
-fn length_prefix_prevents_public_output_boundary_collision() {
-    // Without the public_output length prefix, "empty output + cpu count 0x41"
-    // and "output [0x41] + cpu count 0" would encode to the same bytes. The
-    // length prefix keeps the two statements distinct.
+fn public_output_length_prefix_prevents_collision() {
+    // Without the length prefix on public_output, "empty output + cpu count
+    // 0x41" and "output [0x41] + cpu count 0" would absorb identical bytes.
+    // The prefix keeps the two statements distinct.
     let mut counts_a = sample_counts();
     counts_a.cpu = 0x41;
     let mut counts_b = sample_counts();
     counts_b.cpu = 0;
     assert_ne!(
-        encode_statement(b"elf", b"", &counts_a, 0, &[]),
-        encode_statement(b"elf", b"\x41", &counts_b, 0, &[]),
-    );
-}
-
-/// End-to-end: an honest proof verifies against its own program, and a proof
-/// must not verify against a different program -- the verifier's statement seed
-/// (built from the other ELF) diverges from the prover's, so every Fiat-Shamir
-/// challenge differs. Also exercises seed consistency across the prove path,
-/// the verify path, and the bus-balance transcript replay.
-#[test]
-fn proof_binds_the_program_it_was_generated_for() {
-    let rust_artifacts = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
-        .parent()
-        .expect("workspace root")
-        .join("executor/program_artifacts/rust");
-    let read = |name: &str| {
-        std::fs::read(rust_artifacts.join(name))
-            .unwrap_or_else(|_| panic!("{name} not found -- run `make compile-programs-rust`"))
-    };
-    let allocator = read("allocator.elf");
-    let pure_commit = read("pure_commit.elf");
-
-    let proof = crate::prove(&allocator).expect("prove allocator");
-    assert!(
-        crate::verify(&proof, &allocator).expect("verify allocator"),
-        "honest proof must verify against its own program",
-    );
-    assert!(
-        !matches!(crate::verify(&proof, &pure_commit), Ok(true)),
-        "a proof must not verify against a different program",
+        state_after_absorb(b"elf", b"", &counts_a, 0, &[]),
+        state_after_absorb(b"elf", b"\x41", &counts_b, 0, &[]),
     );
 }

From 86a121756bf4aa9522c4d2944926e940a125808d Mon Sep 17 00:00:00 2001
From: MauroFab <maurotoscano2@gmail.com>
Date: Tue, 26 May 2026 12:24:34 -0300
Subject: [PATCH 3/3] refactor(prover): use exhaustive destructure to guard
 statement encoding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If a field is added to TableCounts or RuntimePageRange and not bound into
the transcript, statement binding is silently broken for that field — a
proof generated for one configuration could pass verification against a
different one.

The previous `[ table_counts.cpu, ... ]` array literal compiled fine even
when a field was missing; the new field was simply skipped. Same for
direct `.base` / `.count` access on RuntimePageRange.

Replace both with exhaustive `&Struct { ... }` patterns. Adding a field
now produces:

    error[E0027]: pattern does not mention field `<name>`
      --> prover/src/statement.rs:..

The compile error fires at the encoding site itself — on every build,
not just on test runs — pointing the dev directly at the code that
needs updating. No separate test needed; the safety is in the encoding
function.

Encoded transcript bytes are unchanged: same field order, same widths.
Pure source-level guard.
---
 prover/src/statement.rs | 44 ++++++++++++++++++++++++++++-------------
 1 file changed, 30 insertions(+), 14 deletions(-)

diff --git a/prover/src/statement.rs b/prover/src/statement.rs
index 2ac334d72..82c41861c 100644
--- a/prover/src/statement.rs
+++ b/prover/src/statement.rs
@@ -41,19 +41,32 @@ pub(crate) fn absorb_statement(
     t.append_bytes(&(public_output.len() as u64).to_le_bytes());
     t.append_bytes(public_output);
 
-    // table_counts: fixed-width u64s in declared order.
-    // Reordering or adding a field requires bumping DOMAIN_TAG above.
+    // table_counts: fixed-width u64s in declared order. The exhaustive
+    // destructure makes any field added to TableCounts a compile error here —
+    // that's the signal to extend the loop below and bump DOMAIN_TAG.
+    let &TableCounts {
+        cpu,
+        lt,
+        memw,
+        memw_aligned,
+        load,
+        mul,
+        dvrm,
+        shift,
+        branch,
+        memw_register,
+    } = table_counts;
     for count in [
-        table_counts.cpu,
-        table_counts.lt,
-        table_counts.memw,
-        table_counts.memw_aligned,
-        table_counts.load,
-        table_counts.mul,
-        table_counts.dvrm,
-        table_counts.shift,
-        table_counts.branch,
-        table_counts.memw_register,
+        cpu,
+        lt,
+        memw,
+        memw_aligned,
+        load,
+        mul,
+        dvrm,
+        shift,
+        branch,
+        memw_register,
     ] {
         t.append_bytes(&(count as u64).to_le_bytes());
     }
@@ -63,7 +76,10 @@ pub(crate) fn absorb_statement(
     // runtime_page_ranges: count-prefixed; each entry fixed width.
     t.append_bytes(&(runtime_page_ranges.len() as u64).to_le_bytes());
     for r in runtime_page_ranges {
-        t.append_bytes(&r.base.to_le_bytes());
-        t.append_bytes(&r.count.to_le_bytes());
+        // Exhaustive destructure: any field added to RuntimePageRange becomes
+        // a compile error here.
+        let &RuntimePageRange { base, count } = r;
+        t.append_bytes(&base.to_le_bytes());
+        t.append_bytes(&count.to_le_bytes());
     }
 }