From 22f99d9132186ff58c7ea4b98ac39dc0a6cccb64 Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Thu, 7 May 2026 15:34:56 -0300 Subject: [PATCH 1/8] ppend public output across commit ECALLs and raise the totalcap to 64 KB --- executor/src/vm/memory.rs | 84 ++++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 28 deletions(-) diff --git a/executor/src/vm/memory.rs b/executor/src/vm/memory.rs index b1f047ee1..b30e8443a 100644 --- a/executor/src/vm/memory.rs +++ b/executor/src/vm/memory.rs @@ -38,9 +38,10 @@ impl BuildHasher for U64BuildHasher { pub type U64HashMap = HashMap; -// TODO: Correctly define this -const MAX_PUBLIC_OUTPUT_COMMIT_SIZE: u64 = 1024; -const PUBLIC_OUTPUT_START_INDEX: u64 = 0; +/// Total cap on public output bytes across all `commit_public_output` calls. +/// The COMMIT AIR concatenates calls via the running `x254` index, so this +/// is enforced as a running-total budget rather than a per-call limit. +const MAX_PUBLIC_OUTPUT_TOTAL_SIZE: u64 = 64 * 1024; /// Maximum size of the private input memory region (in bytes). pub const MAX_PRIVATE_INPUT_SIZE: u64 = 6700000; /// Fixed high address where private input is mapped. Guest programs can read @@ -50,19 +51,26 @@ pub const MAX_PRIVATE_INPUT_SIZE: u64 = 6700000; pub const PRIVATE_INPUT_START_INDEX: u64 = 0xFF000000; #[derive(Default, Debug)] -pub struct Memory(U64HashMap<[u8; 4]>); +pub struct Memory { + cells: U64HashMap<[u8; 4]>, + /// Bytes committed to public output via `commit_public_output`. The + /// COMMIT AIR doesn't write to a fixed memory region (it streams bytes + /// onto the Commit bus by `index`), so this buffer is purely the + /// executor's view used by `read_return_value` and CLI display. + public_output: Vec, +} impl Memory { pub fn load_byte(&self, address: u64) -> u8 { let aligned_address = address - address % 4; - let value = self.0.get(&aligned_address).cloned().unwrap_or_default(); + let value = self.cells.get(&aligned_address).cloned().unwrap_or_default(); value[(address % 4) as usize] } pub fn store_byte(&mut self, address: u64, value: u8) { let aligned_address = address - address % 4; let entry = self - .0 + .cells .entry(aligned_address) .or_insert_with(|| [0, 0, 0, 0]); entry[(address % 4) as usize] = value; @@ -72,7 +80,7 @@ impl Memory { if !address.is_multiple_of(4) { return Err(MemoryError::UnalignedAccess); } - let bytes = self.0.get(&address).cloned().unwrap_or_default(); + let bytes = self.cells.get(&address).cloned().unwrap_or_default(); Ok(u32::from_le_bytes(bytes)) } @@ -81,7 +89,7 @@ impl Memory { return Err(MemoryError::UnalignedAccess); } let bytes = value.to_le_bytes(); - self.0.insert(address, bytes); + self.cells.insert(address, bytes); Ok(()) } @@ -90,8 +98,8 @@ impl Memory { if !address.is_multiple_of(8) { return Err(MemoryError::UnalignedAccess); } - let low_bytes = self.0.get(&address).cloned().unwrap_or_default(); - let high_bytes = self.0.get(&(address + 4)).cloned().unwrap_or_default(); + let low_bytes = self.cells.get(&address).cloned().unwrap_or_default(); + let high_bytes = self.cells.get(&(address + 4)).cloned().unwrap_or_default(); let low = u32::from_le_bytes(low_bytes) as u64; let high = u32::from_le_bytes(high_bytes) as u64; Ok(low | (high << 32)) @@ -104,8 +112,8 @@ impl Memory { } let low = (value & 0xFFFFFFFF) as u32; let high = (value >> 32) as u32; - self.0.insert(address, low.to_le_bytes()); - self.0.insert(address + 4, high.to_le_bytes()); + self.cells.insert(address, low.to_le_bytes()); + self.cells.insert(address + 4, high.to_le_bytes()); Ok(()) } @@ -117,7 +125,7 @@ impl Memory { ); } let aligned_address = address - address % 4; - let bytes = self.0.get(&aligned_address).cloned().unwrap_or_default(); + let bytes = self.cells.get(&aligned_address).cloned().unwrap_or_default(); let value = &bytes[(address % 4) as usize..(address % 4) as usize + 2]; Ok(u16::from_le_bytes( value.try_into().map_err(|_| MemoryError::LoadHalf)?, @@ -130,7 +138,7 @@ impl Memory { } let aligned_address = address - address % 4; let entry = self - .0 + .cells .entry(aligned_address) .or_insert_with(|| [0, 0, 0, 0]); let bytes = value.to_le_bytes(); @@ -139,19 +147,25 @@ impl Memory { Ok(()) } + /// Append `length` bytes from guest memory starting at `address` to the + /// public output. The COMMIT AIR concatenates calls via the running + /// `x254` index, and the trace builder accumulates `commit_ops` into + /// `VmProof.public_output`; this method maintains the executor's view + /// of the same byte stream so `read_return_value` matches. pub fn commit_public_output(&mut self, address: u64, length: u64) -> Result<(), MemoryError> { - if length > MAX_PUBLIC_OUTPUT_COMMIT_SIZE { + let new_total = (self.public_output.len() as u64) + .checked_add(length) + .ok_or(MemoryError::CommitSizeExceeded)?; + if new_total > MAX_PUBLIC_OUTPUT_TOTAL_SIZE { return Err(MemoryError::CommitSizeExceeded); } - self.store_word(PUBLIC_OUTPUT_START_INDEX, length as u32)?; - let inputs = self.load_bytes(address, length); - self.set_bytes_aligned(PUBLIC_OUTPUT_START_INDEX + 4, &inputs)?; + let bytes = self.load_bytes(address, length); + self.public_output.extend_from_slice(&bytes); Ok(()) } pub fn read_return_value(&self) -> Result, MemoryError> { - let size = self.load_word(PUBLIC_OUTPUT_START_INDEX)?; - Ok(self.load_bytes(PUBLIC_OUTPUT_START_INDEX + 4, size as u64)) + Ok(self.public_output.clone()) } /// Pre-loads private input bytes at `PRIVATE_INPUT_START_INDEX` as a @@ -174,7 +188,7 @@ impl Memory { let end = addr + len; while addr < end { let aligned = addr - (addr % 4); - let bytes = self.0.get(&aligned).cloned().unwrap_or_default(); + let bytes = self.cells.get(&aligned).cloned().unwrap_or_default(); let offset = (addr % 4) as usize; let take = std::cmp::min(4 - offset, (end - addr) as usize); result.extend_from_slice(&bytes[offset..offset + take]); @@ -192,7 +206,7 @@ impl Memory { for chunk in inputs.chunks(4) { let mut bytes = [0u8; 4]; bytes[..chunk.len()].copy_from_slice(chunk); - self.0.insert(addr, bytes); + self.cells.insert(addr, bytes); addr += 4; } Ok(()) @@ -234,7 +248,7 @@ mod tests { } #[test] - fn test_commit_public_output_overwrites() { + fn test_commit_public_output_appends() { let mut memory = Memory::default(); memory.store_byte(0x100, b'a'); memory.store_byte(0x101, b'b'); @@ -248,19 +262,33 @@ mod tests { .commit_public_output(0x104, 2) .expect("second commit should succeed"); - // Overwrite semantics: second commit replaces first + // Append semantics: calls concatenate (EF zkVM IO interface). assert_eq!( memory .read_return_value() .expect("public output should be readable"), - b"cd".to_vec() + b"abcd".to_vec() ); } #[test] - fn test_commit_public_output_size_exceeded() { + fn test_commit_public_output_total_cap() { let mut memory = Memory::default(); - let err = memory.commit_public_output(0x100, 1025); - assert!(err.is_err()); + // Seed enough source bytes for two 32 KB writes. + let chunk = vec![0xAB; 32 * 1024]; + memory + .set_bytes_aligned(0x1_0000, &chunk) + .expect("seed should succeed"); + + memory + .commit_public_output(0x1_0000, 32 * 1024) + .expect("first 32 KB commit should succeed"); + memory + .commit_public_output(0x1_0000, 32 * 1024) + .expect("second 32 KB commit should succeed (total = 64 KB)"); + + // One more byte exceeds the 64 KB total cap. + let err = memory.commit_public_output(0x1_0000, 1).unwrap_err(); + assert!(matches!(err, super::MemoryError::CommitSizeExceeded)); } } From bfd99129e4271803eaf8a4cf70df91be80eac0a4 Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Thu, 7 May 2026 15:44:12 -0300 Subject: [PATCH 2/8] Add the EF zkVM IO interface --- .../rust/ef_io_demo/.cargo/config.toml | 5 + executor/programs/rust/ef_io_demo/Cargo.toml | 9 ++ executor/programs/rust/ef_io_demo/src/main.rs | 22 +++++ executor/tests/rust.rs | 14 +++ prover/src/tests/prove_elfs_tests.rs | 25 +++++ syscalls/src/ef_io.rs | 93 +++++++++++++++++++ syscalls/src/lib.rs | 1 + syscalls/src/syscalls.rs | 2 +- 8 files changed, 170 insertions(+), 1 deletion(-) create mode 100644 executor/programs/rust/ef_io_demo/.cargo/config.toml create mode 100644 executor/programs/rust/ef_io_demo/Cargo.toml create mode 100644 executor/programs/rust/ef_io_demo/src/main.rs create mode 100644 syscalls/src/ef_io.rs diff --git a/executor/programs/rust/ef_io_demo/.cargo/config.toml b/executor/programs/rust/ef_io_demo/.cargo/config.toml new file mode 100644 index 000000000..ca99a3f45 --- /dev/null +++ b/executor/programs/rust/ef_io_demo/.cargo/config.toml @@ -0,0 +1,5 @@ +[target.riscv64im-lambda-vm-elf] +rustflags = [ + "--cfg", "getrandom_backend=\"custom\"", + "-C", "passes=lower-atomic" +] diff --git a/executor/programs/rust/ef_io_demo/Cargo.toml b/executor/programs/rust/ef_io_demo/Cargo.toml new file mode 100644 index 000000000..f1c6f812a --- /dev/null +++ b/executor/programs/rust/ef_io_demo/Cargo.toml @@ -0,0 +1,9 @@ +[workspace] + +[package] +name = "ef_io_demo" +version = "0.1.0" +edition = "2024" + +[dependencies] +lambda-vm-syscalls = { path = "../../../../syscalls" } diff --git a/executor/programs/rust/ef_io_demo/src/main.rs b/executor/programs/rust/ef_io_demo/src/main.rs new file mode 100644 index 000000000..ef0690398 --- /dev/null +++ b/executor/programs/rust/ef_io_demo/src/main.rs @@ -0,0 +1,22 @@ +// Demo guest exercising the EF zkVM IO interface (`read_input` / `write_output`). +// +// Reads the private input via the EF zero-copy `read_input` shim, then emits it +// back as the public output in TWO `write_output` calls (split in halves) to +// exercise the multi-call concatenation requirement of the EF spec. +use lambda_vm_syscalls as syscalls; + +pub fn main() { + let mut buf_ptr: *const u8 = core::ptr::null(); + let mut buf_size: usize = 0; + unsafe { + syscalls::ef_io::read_input(&mut buf_ptr, &mut buf_size); + } + + if buf_size > 0 { + let half = buf_size / 2; + unsafe { + syscalls::ef_io::write_output(buf_ptr, half); + syscalls::ef_io::write_output(buf_ptr.add(half), buf_size - half); + } + } +} diff --git a/executor/tests/rust.rs b/executor/tests/rust.rs index fab183571..b15530a63 100644 --- a/executor/tests/rust.rs +++ b/executor/tests/rust.rs @@ -160,6 +160,20 @@ fn test_commit() { ); } +#[test] +fn test_ef_io_demo_concatenates_writes() { + // Demo guest reads its private input via EF `read_input`, then emits it + // back as the public output via TWO `write_output` calls (split in halves). + // The COMMIT AIR concatenates the two calls; the executor's + // `commit_public_output` appends in the same order. + let input: Vec = b"hello world!".to_vec(); + run_program_and_check_public_output( + "./program_artifacts/rust/ef_io_demo.elf", + input.clone(), + input, + ); +} + #[test] fn test_commit_sum() { run_program_and_check_public_output( diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index 7e0fbc181..769b14407 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -1999,6 +1999,31 @@ fn test_prove_private_input_different_values() { assert_eq!(proof.public_output, input[4..12].to_vec()); } +/// End-to-end: EF zkVM IO interface — demo guest reads its private input via +/// `read_input` and emits it back through TWO `write_output` calls. The +/// COMMIT AIR's running `x254` index concatenates them; the resulting proof's +/// `public_output` must equal the original input. +#[test] +fn test_prove_ef_io_demo_concatenates() { + let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("workspace root") + .to_path_buf(); + let elf_bytes = + std::fs::read(workspace_root.join("executor/program_artifacts/rust/ef_io_demo.elf")) + .expect("ef_io_demo.elf not found — run `make compile-programs-rust`"); + let input: &[u8] = b"hello world!"; + let proof = crate::prove_with_inputs(&elf_bytes, input).expect("prove should succeed"); + assert!( + crate::verify(&proof, &elf_bytes).expect("verify should not error"), + "ef_io_demo should verify" + ); + assert_eq!( + proof.public_output, input, + "two write_output calls must concatenate" + ); +} + /// End-to-end: Rust std program with private input. #[test] fn test_prove_commit_sum() { diff --git a/syscalls/src/ef_io.rs b/syscalls/src/ef_io.rs new file mode 100644 index 000000000..7bd30bc11 --- /dev/null +++ b/syscalls/src/ef_io.rs @@ -0,0 +1,93 @@ +//! EF zkVM IO interface: +//! +//! Two C-callable functions that match the EF standard so portable applications +//! compile unchanged across zkVMs: +//! +//! - `read_input`: returns a zero-copy pointer + size to the private input. +//! - `write_output`: appends bytes to the public output. Multiple calls +//! concatenate. +//! +//! On Lambda VM these map to: +//! - `read_input` → memory-mapped private input region at `0xFF000000` +//! (4-byte LE length prefix at base, data at `+4`). +//! - `write_output` → ECALL #64 (Commit). The trace builder maintains a +//! running commitment index in synthetic register `x254`, so multiple +//! ECALLs naturally concatenate at the proof level. + +#[cfg(target_arch = "riscv64")] +use core::arch::asm; + +#[cfg(target_arch = "riscv64")] +use crate::syscalls::SyscallNumbers; + +/// Memory-mapped private input region start address. +/// Must match `executor::vm::memory::PRIVATE_INPUT_START_INDEX`. +#[cfg(target_arch = "riscv64")] +const PRIVATE_INPUT_LEN_ADDR: usize = 0xFF000000; +#[cfg(target_arch = "riscv64")] +const PRIVATE_INPUT_DATA_ADDR: usize = 0xFF000004; + +/// EF IO: return a zero-copy pointer and size for the private input. +/// +/// Per the spec this function is idempotent, callable multiple times, and +/// cannot fail. If `buf_size` is 0, the value of `buf_ptr` is unspecified. +/// +/// # Safety +/// +/// `buf_ptr` and `buf_size` must be valid, writable pointers. +#[cfg(target_arch = "riscv64")] +#[unsafe(no_mangle)] +pub unsafe extern "C" fn read_input(buf_ptr: *mut *const u8, buf_size: *mut usize) { + let len_ptr = PRIVATE_INPUT_LEN_ADDR as *const u32; + let len = unsafe { core::ptr::read_volatile(len_ptr) } as usize; + unsafe { + *buf_ptr = PRIVATE_INPUT_DATA_ADDR as *const u8; + *buf_size = len; + } +} + +/// EF IO: append `size` bytes from `output` to the public output. +/// +/// Multiple calls concatenate. Per the spec this function cannot fail; in +/// practice the executor enforces a total-output cap (see +/// `MAX_PUBLIC_OUTPUT_TOTAL_SIZE` in `executor::vm::memory`). Exceeding it +/// causes the executor to return an error and abort proving — not a graceful +/// failure mode at the C boundary, but consistent with "cannot fail" for +/// well-formed programs that stay under the limit. +/// +/// # Safety +/// +/// `output` must point to `size` readable bytes within guest memory. +#[cfg(target_arch = "riscv64")] +#[unsafe(no_mangle)] +pub unsafe extern "C" fn write_output(output: *const u8, size: usize) { + unsafe { + asm!( + "ecall", + in("a0") 1usize, // fd = 1 (stdout) — required by the COMMIT chip + in("a1") output, + in("a2") size, + in("a7") SyscallNumbers::Commit as usize, + ); + } +} + +#[cfg(not(target_arch = "riscv64"))] +/// # Safety +/// +/// Host-side stub. Lambda VM's IO interface is only implemented for the +/// `riscv64` guest target. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn read_input(_buf_ptr: *mut *const u8, _buf_size: *mut usize) { + unimplemented!("read_input is only implemented for riscv64 targets"); +} + +#[cfg(not(target_arch = "riscv64"))] +/// # Safety +/// +/// Host-side stub. Lambda VM's IO interface is only implemented for the +/// `riscv64` guest target. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn write_output(_output: *const u8, _size: usize) { + unimplemented!("write_output is only implemented for riscv64 targets"); +} diff --git a/syscalls/src/lib.rs b/syscalls/src/lib.rs index 378257d18..79a420181 100644 --- a/syscalls/src/lib.rs +++ b/syscalls/src/lib.rs @@ -1,4 +1,5 @@ pub mod allocator; +pub mod ef_io; pub mod entrypoint; pub mod random; pub mod syscalls; diff --git a/syscalls/src/syscalls.rs b/syscalls/src/syscalls.rs index ae0315ff5..91b0fb5eb 100644 --- a/syscalls/src/syscalls.rs +++ b/syscalls/src/syscalls.rs @@ -9,7 +9,7 @@ use core::arch::asm; const PRIVATE_INPUT_START: usize = 0xFF000000; #[cfg(target_arch = "riscv64")] -enum SyscallNumbers { +pub enum SyscallNumbers { Print = 1, Panic = 2, Commit = 64, From c29f3802668e8728fe867f9a023e7fc4cc0c8bfe Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Thu, 7 May 2026 16:01:30 -0300 Subject: [PATCH 3/8] lint --- executor/src/vm/memory.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/executor/src/vm/memory.rs b/executor/src/vm/memory.rs index b30e8443a..d0721c641 100644 --- a/executor/src/vm/memory.rs +++ b/executor/src/vm/memory.rs @@ -63,7 +63,11 @@ pub struct Memory { impl Memory { pub fn load_byte(&self, address: u64) -> u8 { let aligned_address = address - address % 4; - let value = self.cells.get(&aligned_address).cloned().unwrap_or_default(); + let value = self + .cells + .get(&aligned_address) + .cloned() + .unwrap_or_default(); value[(address % 4) as usize] } @@ -125,7 +129,11 @@ impl Memory { ); } let aligned_address = address - address % 4; - let bytes = self.cells.get(&aligned_address).cloned().unwrap_or_default(); + let bytes = self + .cells + .get(&aligned_address) + .cloned() + .unwrap_or_default(); let value = &bytes[(address % 4) as usize..(address % 4) as usize + 2]; Ok(u16::from_le_bytes( value.try_into().map_err(|_| MemoryError::LoadHalf)?, From 9f297e85944c8d8d37e2469d657e175d649c9442 Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Fri, 8 May 2026 11:26:15 -0300 Subject: [PATCH 4/8] Raise public output cap to 1 MB, guard load_bytes overflow, drop no_mangle from host stubs --- executor/src/vm/memory.rs | 20 +++++++++++--------- syscalls/src/ef_io.rs | 20 ++++++++------------ 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/executor/src/vm/memory.rs b/executor/src/vm/memory.rs index d0721c641..d98e2e761 100644 --- a/executor/src/vm/memory.rs +++ b/executor/src/vm/memory.rs @@ -41,7 +41,7 @@ pub type U64HashMap = HashMap; /// Total cap on public output bytes across all `commit_public_output` calls. /// The COMMIT AIR concatenates calls via the running `x254` index, so this /// is enforced as a running-total budget rather than a per-call limit. -const MAX_PUBLIC_OUTPUT_TOTAL_SIZE: u64 = 64 * 1024; +const MAX_PUBLIC_OUTPUT_TOTAL_SIZE: u64 = 1024 * 1024; /// Maximum size of the private input memory region (in bytes). pub const MAX_PRIVATE_INPUT_SIZE: u64 = 6700000; /// Fixed high address where private input is mapped. Guest programs can read @@ -193,7 +193,9 @@ impl Memory { pub fn load_bytes(&self, mut addr: u64, len: u64) -> Vec { let mut result = Vec::with_capacity(len as usize); - let end = addr + len; + let end = addr + .checked_add(len) + .expect("load_bytes: address range exceeds u64::MAX"); while addr < end { let aligned = addr - (addr % 4); let bytes = self.cells.get(&aligned).cloned().unwrap_or_default(); @@ -282,20 +284,20 @@ mod tests { #[test] fn test_commit_public_output_total_cap() { let mut memory = Memory::default(); - // Seed enough source bytes for two 32 KB writes. - let chunk = vec![0xAB; 32 * 1024]; + // Seed enough source bytes for two 512 KB writes. + let chunk = vec![0xAB; 512 * 1024]; memory .set_bytes_aligned(0x1_0000, &chunk) .expect("seed should succeed"); memory - .commit_public_output(0x1_0000, 32 * 1024) - .expect("first 32 KB commit should succeed"); + .commit_public_output(0x1_0000, 512 * 1024) + .expect("first 512 KB commit should succeed"); memory - .commit_public_output(0x1_0000, 32 * 1024) - .expect("second 32 KB commit should succeed (total = 64 KB)"); + .commit_public_output(0x1_0000, 512 * 1024) + .expect("second 512 KB commit should succeed (total = 1 MB)"); - // One more byte exceeds the 64 KB total cap. + // One more byte exceeds the 1 MB total cap. let err = memory.commit_public_output(0x1_0000, 1).unwrap_err(); assert!(matches!(err, super::MemoryError::CommitSizeExceeded)); } diff --git a/syscalls/src/ef_io.rs b/syscalls/src/ef_io.rs index 7bd30bc11..4f3cabb2e 100644 --- a/syscalls/src/ef_io.rs +++ b/syscalls/src/ef_io.rs @@ -72,22 +72,18 @@ pub unsafe extern "C" fn write_output(output: *const u8, size: usize) { } } +/// Host-side stub — Lambda VM's IO interface is only implemented for the +/// `riscv64` guest target. Not exported with C linkage on host so the +/// generic name doesn't collide with C dependencies in test builds. #[cfg(not(target_arch = "riscv64"))] -/// # Safety -/// -/// Host-side stub. Lambda VM's IO interface is only implemented for the -/// `riscv64` guest target. -#[unsafe(no_mangle)] -pub unsafe extern "C" fn read_input(_buf_ptr: *mut *const u8, _buf_size: *mut usize) { +pub fn read_input(_buf_ptr: *mut *const u8, _buf_size: *mut usize) { unimplemented!("read_input is only implemented for riscv64 targets"); } +/// Host-side stub — Lambda VM's IO interface is only implemented for the +/// `riscv64` guest target. Not exported with C linkage on host so the +/// generic name doesn't collide with C dependencies in test builds. #[cfg(not(target_arch = "riscv64"))] -/// # Safety -/// -/// Host-side stub. Lambda VM's IO interface is only implemented for the -/// `riscv64` guest target. -#[unsafe(no_mangle)] -pub unsafe extern "C" fn write_output(_output: *const u8, _size: usize) { +pub fn write_output(_output: *const u8, _size: usize) { unimplemented!("write_output is only implemented for riscv64 targets"); } From 18aa9077cdd3756c50b36d77c8f716f713661db8 Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Tue, 12 May 2026 15:38:01 -0300 Subject: [PATCH 5/8] Return Result from load_bytes to drop the guest-reachable panic in Print/Panic/Commit ECALLs --- executor/src/vm/instruction/execution.rs | 4 +-- executor/src/vm/memory.rs | 46 ++++++++++++++++++++---- syscalls/src/ef_io.rs | 15 +++----- syscalls/src/syscalls.rs | 2 +- 4 files changed, 47 insertions(+), 20 deletions(-) diff --git a/executor/src/vm/instruction/execution.rs b/executor/src/vm/instruction/execution.rs index a5222557a..a8cffe6e5 100644 --- a/executor/src/vm/instruction/execution.rs +++ b/executor/src/vm/instruction/execution.rs @@ -295,7 +295,7 @@ impl Instruction { // It is not the correct implementation of ecall/ebreak let pointer = registers.read(10)?; let len = registers.read(11)?; - let bytes = memory.load_bytes(pointer, len); + let bytes = memory.load_bytes(pointer, len)?; let value = str::from_utf8(&bytes).map_err(|_| ExecutionError::IncorrectMessage)?; println!("PRINT VM: {}", value); @@ -304,7 +304,7 @@ impl Instruction { // panic let pointer = registers.read(10)?; let len = registers.read(11)?; - let bytes = memory.load_bytes(pointer, len); + let bytes = memory.load_bytes(pointer, len)?; let value = str::from_utf8(&bytes).map_err(|_| ExecutionError::IncorrectMessage)?; return Err(ExecutionError::Panic(value.to_owned())); diff --git a/executor/src/vm/memory.rs b/executor/src/vm/memory.rs index d98e2e761..f7c58dc51 100644 --- a/executor/src/vm/memory.rs +++ b/executor/src/vm/memory.rs @@ -41,7 +41,7 @@ pub type U64HashMap = HashMap; /// Total cap on public output bytes across all `commit_public_output` calls. /// The COMMIT AIR concatenates calls via the running `x254` index, so this /// is enforced as a running-total budget rather than a per-call limit. -const MAX_PUBLIC_OUTPUT_TOTAL_SIZE: u64 = 1024 * 1024; +pub const MAX_PUBLIC_OUTPUT_TOTAL_SIZE: u64 = 1024 * 1024; /// Maximum size of the private input memory region (in bytes). pub const MAX_PRIVATE_INPUT_SIZE: u64 = 6700000; /// Fixed high address where private input is mapped. Guest programs can read @@ -167,7 +167,7 @@ impl Memory { if new_total > MAX_PUBLIC_OUTPUT_TOTAL_SIZE { return Err(MemoryError::CommitSizeExceeded); } - let bytes = self.load_bytes(address, length); + let bytes = self.load_bytes(address, length)?; self.public_output.extend_from_slice(&bytes); Ok(()) } @@ -191,11 +191,9 @@ impl Memory { Ok(()) } - pub fn load_bytes(&self, mut addr: u64, len: u64) -> Vec { + pub fn load_bytes(&self, mut addr: u64, len: u64) -> Result, MemoryError> { let mut result = Vec::with_capacity(len as usize); - let end = addr - .checked_add(len) - .expect("load_bytes: address range exceeds u64::MAX"); + let end = addr.checked_add(len).ok_or(MemoryError::AddressOverflow)?; while addr < end { let aligned = addr - (addr % 4); let bytes = self.cells.get(&aligned).cloned().unwrap_or_default(); @@ -204,7 +202,7 @@ impl Memory { result.extend_from_slice(&bytes[offset..offset + take]); addr += take as u64; } - result + Ok(result) } /// Helper method to store a given input at an aligned address. It may also overwrite existing bytes with zero if inputs is not divisible by 4 @@ -233,6 +231,8 @@ pub enum MemoryError { CommitSizeExceeded, #[error("Private input size exceeded")] PrivateInputSizeExceeded, + #[error("Address range exceeds u64::MAX")] + AddressOverflow, } #[cfg(test)] @@ -281,6 +281,38 @@ mod tests { ); } + #[test] + fn test_commit_public_output_empty_is_ok() { + let mut memory = Memory::default(); + memory + .commit_public_output(0, 0) + .expect("zero-length commit should succeed"); + assert!( + memory + .read_return_value() + .expect("public output should be readable") + .is_empty() + ); + } + + #[test] + fn test_commit_public_output_address_overflow() { + let mut memory = Memory::default(); + let err = memory + .commit_public_output(u64::MAX, 2) + .expect_err("address overflow must error, not panic"); + assert!(matches!(err, super::MemoryError::AddressOverflow)); + } + + #[test] + fn test_load_bytes_overflow_errors() { + let memory = Memory::default(); + let err = memory + .load_bytes(u64::MAX, 2) + .expect_err("address overflow must error, not panic"); + assert!(matches!(err, super::MemoryError::AddressOverflow)); + } + #[test] fn test_commit_public_output_total_cap() { let mut memory = Memory::default(); diff --git a/syscalls/src/ef_io.rs b/syscalls/src/ef_io.rs index 4f3cabb2e..96e2afcd4 100644 --- a/syscalls/src/ef_io.rs +++ b/syscalls/src/ef_io.rs @@ -18,19 +18,14 @@ use core::arch::asm; #[cfg(target_arch = "riscv64")] -use crate::syscalls::SyscallNumbers; - -/// Memory-mapped private input region start address. -/// Must match `executor::vm::memory::PRIVATE_INPUT_START_INDEX`. -#[cfg(target_arch = "riscv64")] -const PRIVATE_INPUT_LEN_ADDR: usize = 0xFF000000; -#[cfg(target_arch = "riscv64")] -const PRIVATE_INPUT_DATA_ADDR: usize = 0xFF000004; +use crate::syscalls::{PRIVATE_INPUT_START, SyscallNumbers}; /// EF IO: return a zero-copy pointer and size for the private input. /// /// Per the spec this function is idempotent, callable multiple times, and /// cannot fail. If `buf_size` is 0, the value of `buf_ptr` is unspecified. +/// Privacy of the input is the guest's responsibility; the VM does not +/// enforce it. /// /// # Safety /// @@ -38,10 +33,10 @@ const PRIVATE_INPUT_DATA_ADDR: usize = 0xFF000004; #[cfg(target_arch = "riscv64")] #[unsafe(no_mangle)] pub unsafe extern "C" fn read_input(buf_ptr: *mut *const u8, buf_size: *mut usize) { - let len_ptr = PRIVATE_INPUT_LEN_ADDR as *const u32; + let len_ptr = PRIVATE_INPUT_START as *const u32; let len = unsafe { core::ptr::read_volatile(len_ptr) } as usize; unsafe { - *buf_ptr = PRIVATE_INPUT_DATA_ADDR as *const u8; + *buf_ptr = (PRIVATE_INPUT_START + 4) as *const u8; *buf_size = len; } } diff --git a/syscalls/src/syscalls.rs b/syscalls/src/syscalls.rs index 91b0fb5eb..86d16bd6b 100644 --- a/syscalls/src/syscalls.rs +++ b/syscalls/src/syscalls.rs @@ -6,7 +6,7 @@ use core::arch::asm; /// The host pre-loads the input; the guest reads directly (no ecall). /// Must match `executor::vm::memory::PRIVATE_INPUT_START_INDEX`. #[cfg(target_arch = "riscv64")] -const PRIVATE_INPUT_START: usize = 0xFF000000; +pub const PRIVATE_INPUT_START: usize = 0xFF000000; #[cfg(target_arch = "riscv64")] pub enum SyscallNumbers { From 0509fa6fd441430bae3bb146b22742d900838d29 Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Tue, 12 May 2026 17:32:25 -0300 Subject: [PATCH 6/8] adress comments --- executor/src/vm/memory.rs | 20 +++++++++++++++++++- syscalls/src/ef_io.rs | 4 ++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/executor/src/vm/memory.rs b/executor/src/vm/memory.rs index f7c58dc51..8247d28ca 100644 --- a/executor/src/vm/memory.rs +++ b/executor/src/vm/memory.rs @@ -192,8 +192,11 @@ impl Memory { } pub fn load_bytes(&self, mut addr: u64, len: u64) -> Result, MemoryError> { - let mut result = Vec::with_capacity(len as usize); let end = addr.checked_add(len).ok_or(MemoryError::AddressOverflow)?; + let mut result = Vec::new(); + result + .try_reserve_exact(len as usize) + .map_err(|_| MemoryError::AllocationFailed)?; while addr < end { let aligned = addr - (addr % 4); let bytes = self.cells.get(&aligned).cloned().unwrap_or_default(); @@ -233,6 +236,8 @@ pub enum MemoryError { PrivateInputSizeExceeded, #[error("Address range exceeds u64::MAX")] AddressOverflow, + #[error("Failed to allocate memory for load_bytes")] + AllocationFailed, } #[cfg(test)] @@ -304,6 +309,19 @@ mod tests { assert!(matches!(err, super::MemoryError::AddressOverflow)); } + #[test] + fn test_load_bytes_huge_len_returns_alloc_error() { + let memory = Memory::default(); + // A multi-petabyte allocation request from a guest must fail cleanly, + // not abort the host process via OOM. `addr=0` and `len=1<<50` keep + // `checked_add` happy so the path reaches the allocation. + let huge = 1u64 << 50; + let err = memory + .load_bytes(0, huge) + .expect_err("huge alloc must error, not abort"); + assert!(matches!(err, super::MemoryError::AllocationFailed)); + } + #[test] fn test_load_bytes_overflow_errors() { let memory = Memory::default(); diff --git a/syscalls/src/ef_io.rs b/syscalls/src/ef_io.rs index 96e2afcd4..dabf7818d 100644 --- a/syscalls/src/ef_io.rs +++ b/syscalls/src/ef_io.rs @@ -33,9 +33,9 @@ use crate::syscalls::{PRIVATE_INPUT_START, SyscallNumbers}; #[cfg(target_arch = "riscv64")] #[unsafe(no_mangle)] pub unsafe extern "C" fn read_input(buf_ptr: *mut *const u8, buf_size: *mut usize) { - let len_ptr = PRIVATE_INPUT_START as *const u32; - let len = unsafe { core::ptr::read_volatile(len_ptr) } as usize; unsafe { + let len_ptr = PRIVATE_INPUT_START as *const u32; + let len = core::ptr::read_volatile(len_ptr) as usize; *buf_ptr = (PRIVATE_INPUT_START + 4) as *const u8; *buf_size = len; } From 3d14e565eabd45b6a318f92b6afba9c0f6513036 Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Wed, 13 May 2026 12:09:43 -0300 Subject: [PATCH 7/8] Use usize::try_from for len in load_bytes instead of an implicit cast. --- Cargo.lock | 1 + executor/src/vm/memory.rs | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index f6eea84d6..4870fb5cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1946,6 +1946,7 @@ dependencies = [ "rayon", "serde", "stark", + "tiny-keccak", ] [[package]] diff --git a/executor/src/vm/memory.rs b/executor/src/vm/memory.rs index 8247d28ca..140b62ebe 100644 --- a/executor/src/vm/memory.rs +++ b/executor/src/vm/memory.rs @@ -193,9 +193,10 @@ impl Memory { pub fn load_bytes(&self, mut addr: u64, len: u64) -> Result, MemoryError> { let end = addr.checked_add(len).ok_or(MemoryError::AddressOverflow)?; + let len_usize = usize::try_from(len).map_err(|_| MemoryError::AllocationFailed)?; let mut result = Vec::new(); result - .try_reserve_exact(len as usize) + .try_reserve_exact(len_usize) .map_err(|_| MemoryError::AllocationFailed)?; while addr < end { let aligned = addr - (addr % 4); From b02891bff97ef6313b3474e7e892ba61794992ff Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Mon, 18 May 2026 10:13:00 -0300 Subject: [PATCH 8/8] Guard private input length cast and cross-check public_output before returning VmProof --- executor/src/vm/memory.rs | 4 +++- prover/src/lib.rs | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/executor/src/vm/memory.rs b/executor/src/vm/memory.rs index 140b62ebe..b78c98d44 100644 --- a/executor/src/vm/memory.rs +++ b/executor/src/vm/memory.rs @@ -186,7 +186,9 @@ impl Memory { if inputs.len() as u64 > MAX_PRIVATE_INPUT_SIZE { return Err(MemoryError::PrivateInputSizeExceeded); } - self.store_word(PRIVATE_INPUT_START_INDEX, inputs.len() as u32)?; + let len_u32 = + u32::try_from(inputs.len()).map_err(|_| MemoryError::PrivateInputSizeExceeded)?; + self.store_word(PRIVATE_INPUT_START_INDEX, len_u32)?; self.set_bytes_aligned(PRIVATE_INPUT_START_INDEX + 4, &inputs)?; Ok(()) } diff --git a/prover/src/lib.rs b/prover/src/lib.rs index 6c9a07488..254c37834 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -651,6 +651,11 @@ pub fn prove_with_options_and_inputs( .filter(|c| c.is_private_input) .count(); + debug_assert_eq!( + traces.public_output_bytes, result.return_values.memory_values, + "public output diverged between executor view and trace reconstruction" + ); + Ok(VmProof { proof, runtime_page_ranges,