diff --git a/Cargo.lock b/Cargo.lock
index 7b6ed3c62..001b4b841 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1955,6 +1955,7 @@ dependencies = [
  "rayon",
  "serde",
  "stark",
+ "tiny-keccak",
 ]
 
 [[package]]
diff --git a/executor/programs/rust/ef_io_demo/.cargo/config.toml b/executor/programs/rust/ef_io_demo/.cargo/config.toml
new file mode 100644
index 000000000..ca99a3f45
--- /dev/null
+++ b/executor/programs/rust/ef_io_demo/.cargo/config.toml
@@ -0,0 +1,5 @@
+[target.riscv64im-lambda-vm-elf]
+rustflags = [
+  "--cfg", "getrandom_backend=\"custom\"",
+  "-C", "passes=lower-atomic"
+]
diff --git a/executor/programs/rust/ef_io_demo/Cargo.toml b/executor/programs/rust/ef_io_demo/Cargo.toml
new file mode 100644
index 000000000..f1c6f812a
--- /dev/null
+++ b/executor/programs/rust/ef_io_demo/Cargo.toml
@@ -0,0 +1,9 @@
+[workspace]
+
+[package]
+name = "ef_io_demo"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+lambda-vm-syscalls = { path = "../../../../syscalls" }
diff --git a/executor/programs/rust/ef_io_demo/src/main.rs b/executor/programs/rust/ef_io_demo/src/main.rs
new file mode 100644
index 000000000..ef0690398
--- /dev/null
+++ b/executor/programs/rust/ef_io_demo/src/main.rs
@@ -0,0 +1,22 @@
+// Demo guest exercising the EF zkVM IO interface (`read_input` / `write_output`).
+//
+// Reads the private input via the EF zero-copy `read_input` shim, then emits it
+// back as the public output in TWO `write_output` calls (split in halves) to
+// exercise the multi-call concatenation requirement of the EF spec.
+use lambda_vm_syscalls as syscalls;
+
+pub fn main() {
+    let mut buf_ptr: *const u8 = core::ptr::null();
+    let mut buf_size: usize = 0;
+    unsafe {
+        syscalls::ef_io::read_input(&mut buf_ptr, &mut buf_size);
+    }
+
+    if buf_size > 0 {
+        let half = buf_size / 2;
+        unsafe {
+            syscalls::ef_io::write_output(buf_ptr, half);
+            syscalls::ef_io::write_output(buf_ptr.add(half), buf_size - half);
+        }
+    }
+}
diff --git a/executor/src/vm/instruction/execution.rs b/executor/src/vm/instruction/execution.rs
index 04502645b..219414745 100644
--- a/executor/src/vm/instruction/execution.rs
+++ b/executor/src/vm/instruction/execution.rs
@@ -304,7 +304,7 @@ impl Instruction {
                         // It is not the correct implementation of ecall/ebreak
                         let pointer = registers.read(10)?;
                         let len = registers.read(11)?;
-                        let bytes = memory.load_bytes(pointer, len);
+                        let bytes = memory.load_bytes(pointer, len)?;
                         let value =
                             str::from_utf8(&bytes).map_err(|_| ExecutionError::IncorrectMessage)?;
                         println!("PRINT VM: {}", value);
@@ -313,7 +313,7 @@ impl Instruction {
                         // panic
                         let pointer = registers.read(10)?;
                         let len = registers.read(11)?;
-                        let bytes = memory.load_bytes(pointer, len);
+                        let bytes = memory.load_bytes(pointer, len)?;
                         let value =
                             str::from_utf8(&bytes).map_err(|_| ExecutionError::IncorrectMessage)?;
                         return Err(ExecutionError::Panic(value.to_owned()));
diff --git a/executor/src/vm/memory.rs b/executor/src/vm/memory.rs
index b1f047ee1..b78c98d44 100644
--- a/executor/src/vm/memory.rs
+++ b/executor/src/vm/memory.rs
@@ -38,9 +38,10 @@ impl BuildHasher for U64BuildHasher {
 
 pub type U64HashMap<V> = HashMap<u64, V, U64BuildHasher>;
 
-// TODO: Correctly define this
-const MAX_PUBLIC_OUTPUT_COMMIT_SIZE: u64 = 1024;
-const PUBLIC_OUTPUT_START_INDEX: u64 = 0;
+/// Total cap on public output bytes across all `commit_public_output` calls.
+/// The COMMIT AIR concatenates calls via the running `x254` index, so this
+/// is enforced as a running-total budget rather than a per-call limit.
+pub const MAX_PUBLIC_OUTPUT_TOTAL_SIZE: u64 = 1024 * 1024;
 /// Maximum size of the private input memory region (in bytes).
 pub const MAX_PRIVATE_INPUT_SIZE: u64 = 6700000;
 /// Fixed high address where private input is mapped. Guest programs can read
@@ -50,19 +51,30 @@ pub const MAX_PRIVATE_INPUT_SIZE: u64 = 6700000;
 pub const PRIVATE_INPUT_START_INDEX: u64 = 0xFF000000;
 
 #[derive(Default, Debug)]
-pub struct Memory(U64HashMap<[u8; 4]>);
+pub struct Memory {
+    cells: U64HashMap<[u8; 4]>,
+    /// Bytes committed to public output via `commit_public_output`. The
+    /// COMMIT AIR doesn't write to a fixed memory region (it streams bytes
+    /// onto the Commit bus by `index`), so this buffer is purely the
+    /// executor's view used by `read_return_value` and CLI display.
+    public_output: Vec<u8>,
+}
 
 impl Memory {
     pub fn load_byte(&self, address: u64) -> u8 {
         let aligned_address = address - address % 4;
-        let value = self.0.get(&aligned_address).cloned().unwrap_or_default();
+        let value = self
+            .cells
+            .get(&aligned_address)
+            .cloned()
+            .unwrap_or_default();
         value[(address % 4) as usize]
     }
 
     pub fn store_byte(&mut self, address: u64, value: u8) {
         let aligned_address = address - address % 4;
         let entry = self
-            .0
+            .cells
             .entry(aligned_address)
             .or_insert_with(|| [0, 0, 0, 0]);
         entry[(address % 4) as usize] = value;
@@ -72,7 +84,7 @@ impl Memory {
         if !address.is_multiple_of(4) {
             return Err(MemoryError::UnalignedAccess);
         }
-        let bytes = self.0.get(&address).cloned().unwrap_or_default();
+        let bytes = self.cells.get(&address).cloned().unwrap_or_default();
         Ok(u32::from_le_bytes(bytes))
     }
 
@@ -81,7 +93,7 @@ impl Memory {
             return Err(MemoryError::UnalignedAccess);
         }
         let bytes = value.to_le_bytes();
-        self.0.insert(address, bytes);
+        self.cells.insert(address, bytes);
         Ok(())
     }
 
@@ -90,8 +102,8 @@ impl Memory {
         if !address.is_multiple_of(8) {
             return Err(MemoryError::UnalignedAccess);
         }
-        let low_bytes = self.0.get(&address).cloned().unwrap_or_default();
-        let high_bytes = self.0.get(&(address + 4)).cloned().unwrap_or_default();
+        let low_bytes = self.cells.get(&address).cloned().unwrap_or_default();
+        let high_bytes = self.cells.get(&(address + 4)).cloned().unwrap_or_default();
         let low = u32::from_le_bytes(low_bytes) as u64;
         let high = u32::from_le_bytes(high_bytes) as u64;
         Ok(low | (high << 32))
@@ -104,8 +116,8 @@ impl Memory {
         }
         let low = (value & 0xFFFFFFFF) as u32;
         let high = (value >> 32) as u32;
-        self.0.insert(address, low.to_le_bytes());
-        self.0.insert(address + 4, high.to_le_bytes());
+        self.cells.insert(address, low.to_le_bytes());
+        self.cells.insert(address + 4, high.to_le_bytes());
         Ok(())
     }
 
@@ -117,7 +129,11 @@ impl Memory {
             );
         }
         let aligned_address = address - address % 4;
-        let bytes = self.0.get(&aligned_address).cloned().unwrap_or_default();
+        let bytes = self
+            .cells
+            .get(&aligned_address)
+            .cloned()
+            .unwrap_or_default();
         let value = &bytes[(address % 4) as usize..(address % 4) as usize + 2];
         Ok(u16::from_le_bytes(
             value.try_into().map_err(|_| MemoryError::LoadHalf)?,
@@ -130,7 +146,7 @@ impl Memory {
         }
         let aligned_address = address - address % 4;
         let entry = self
-            .0
+            .cells
             .entry(aligned_address)
             .or_insert_with(|| [0, 0, 0, 0]);
         let bytes = value.to_le_bytes();
@@ -139,19 +155,25 @@ impl Memory {
         Ok(())
     }
 
+    /// Append `length` bytes from guest memory starting at `address` to the
+    /// public output. The COMMIT AIR concatenates calls via the running
+    /// `x254` index, and the trace builder accumulates `commit_ops` into
+    /// `VmProof.public_output`; this method maintains the executor's view
+    /// of the same byte stream so `read_return_value` matches.
     pub fn commit_public_output(&mut self, address: u64, length: u64) -> Result<(), MemoryError> {
-        if length > MAX_PUBLIC_OUTPUT_COMMIT_SIZE {
+        let new_total = (self.public_output.len() as u64)
+            .checked_add(length)
+            .ok_or(MemoryError::CommitSizeExceeded)?;
+        if new_total > MAX_PUBLIC_OUTPUT_TOTAL_SIZE {
             return Err(MemoryError::CommitSizeExceeded);
         }
-        self.store_word(PUBLIC_OUTPUT_START_INDEX, length as u32)?;
-        let inputs = self.load_bytes(address, length);
-        self.set_bytes_aligned(PUBLIC_OUTPUT_START_INDEX + 4, &inputs)?;
+        let bytes = self.load_bytes(address, length)?;
+        self.public_output.extend_from_slice(&bytes);
         Ok(())
     }
 
     pub fn read_return_value(&self) -> Result<Vec<u8>, MemoryError> {
-        let size = self.load_word(PUBLIC_OUTPUT_START_INDEX)?;
-        Ok(self.load_bytes(PUBLIC_OUTPUT_START_INDEX + 4, size as u64))
+        Ok(self.public_output.clone())
     }
 
     /// Pre-loads private input bytes at `PRIVATE_INPUT_START_INDEX` as a
@@ -164,23 +186,29 @@ impl Memory {
         if inputs.len() as u64 > MAX_PRIVATE_INPUT_SIZE {
             return Err(MemoryError::PrivateInputSizeExceeded);
         }
-        self.store_word(PRIVATE_INPUT_START_INDEX, inputs.len() as u32)?;
+        let len_u32 =
+            u32::try_from(inputs.len()).map_err(|_| MemoryError::PrivateInputSizeExceeded)?;
+        self.store_word(PRIVATE_INPUT_START_INDEX, len_u32)?;
         self.set_bytes_aligned(PRIVATE_INPUT_START_INDEX + 4, &inputs)?;
         Ok(())
     }
 
-    pub fn load_bytes(&self, mut addr: u64, len: u64) -> Vec<u8> {
-        let mut result = Vec::with_capacity(len as usize);
-        let end = addr + len;
+    pub fn load_bytes(&self, mut addr: u64, len: u64) -> Result<Vec<u8>, MemoryError> {
+        let end = addr.checked_add(len).ok_or(MemoryError::AddressOverflow)?;
+        let len_usize = usize::try_from(len).map_err(|_| MemoryError::AllocationFailed)?;
+        let mut result = Vec::new();
+        result
+            .try_reserve_exact(len_usize)
+            .map_err(|_| MemoryError::AllocationFailed)?;
         while addr < end {
             let aligned = addr - (addr % 4);
-            let bytes = self.0.get(&aligned).cloned().unwrap_or_default();
+            let bytes = self.cells.get(&aligned).cloned().unwrap_or_default();
             let offset = (addr % 4) as usize;
             let take = std::cmp::min(4 - offset, (end - addr) as usize);
             result.extend_from_slice(&bytes[offset..offset + take]);
             addr += take as u64;
         }
-        result
+        Ok(result)
     }
 
     /// Helper method to store a given input at an aligned address. It may also overwrite existing bytes with zero if inputs is not divisible by 4
@@ -192,7 +220,7 @@ impl Memory {
         for chunk in inputs.chunks(4) {
             let mut bytes = [0u8; 4];
             bytes[..chunk.len()].copy_from_slice(chunk);
-            self.0.insert(addr, bytes);
+            self.cells.insert(addr, bytes);
             addr += 4;
         }
         Ok(())
@@ -209,6 +237,10 @@ pub enum MemoryError {
     CommitSizeExceeded,
     #[error("Private input size exceeded")]
     PrivateInputSizeExceeded,
+    #[error("Address range exceeds u64::MAX")]
+    AddressOverflow,
+    #[error("Failed to allocate memory for load_bytes")]
+    AllocationFailed,
 }
 
 #[cfg(test)]
@@ -234,7 +266,7 @@ mod tests {
     }
 
     #[test]
-    fn test_commit_public_output_overwrites() {
+    fn test_commit_public_output_appends() {
         let mut memory = Memory::default();
         memory.store_byte(0x100, b'a');
         memory.store_byte(0x101, b'b');
@@ -248,19 +280,78 @@ mod tests {
             .commit_public_output(0x104, 2)
             .expect("second commit should succeed");
 
-        // Overwrite semantics: second commit replaces first
+        // Append semantics: calls concatenate (EF zkVM IO interface).
         assert_eq!(
             memory
                 .read_return_value()
                 .expect("public output should be readable"),
-            b"cd".to_vec()
+            b"abcd".to_vec()
         );
     }
 
     #[test]
-    fn test_commit_public_output_size_exceeded() {
+    fn test_commit_public_output_empty_is_ok() {
+        let mut memory = Memory::default();
+        memory
+            .commit_public_output(0, 0)
+            .expect("zero-length commit should succeed");
+        assert!(
+            memory
+                .read_return_value()
+                .expect("public output should be readable")
+                .is_empty()
+        );
+    }
+
+    #[test]
+    fn test_commit_public_output_address_overflow() {
+        let mut memory = Memory::default();
+        let err = memory
+            .commit_public_output(u64::MAX, 2)
+            .expect_err("address overflow must error, not panic");
+        assert!(matches!(err, super::MemoryError::AddressOverflow));
+    }
+
+    #[test]
+    fn test_load_bytes_huge_len_returns_alloc_error() {
+        let memory = Memory::default();
+        // A multi-petabyte allocation request from a guest must fail cleanly,
+        // not abort the host process via OOM. `addr=0` and `len=1<<50` keep
+        // `checked_add` happy so the path reaches the allocation.
+        let huge = 1u64 << 50;
+        let err = memory
+            .load_bytes(0, huge)
+            .expect_err("huge alloc must error, not abort");
+        assert!(matches!(err, super::MemoryError::AllocationFailed));
+    }
+
+    #[test]
+    fn test_load_bytes_overflow_errors() {
+        let memory = Memory::default();
+        let err = memory
+            .load_bytes(u64::MAX, 2)
+            .expect_err("address overflow must error, not panic");
+        assert!(matches!(err, super::MemoryError::AddressOverflow));
+    }
+
+    #[test]
+    fn test_commit_public_output_total_cap() {
         let mut memory = Memory::default();
-        let err = memory.commit_public_output(0x100, 1025);
-        assert!(err.is_err());
+        // Seed enough source bytes for two 512 KB writes.
+        let chunk = vec![0xAB; 512 * 1024];
+        memory
+            .set_bytes_aligned(0x1_0000, &chunk)
+            .expect("seed should succeed");
+
+        memory
+            .commit_public_output(0x1_0000, 512 * 1024)
+            .expect("first 512 KB commit should succeed");
+        memory
+            .commit_public_output(0x1_0000, 512 * 1024)
+            .expect("second 512 KB commit should succeed (total = 1 MB)");
+
+        // One more byte exceeds the 1 MB total cap.
+        let err = memory.commit_public_output(0x1_0000, 1).unwrap_err();
+        assert!(matches!(err, super::MemoryError::CommitSizeExceeded));
     }
 }
diff --git a/executor/tests/rust.rs b/executor/tests/rust.rs
index fab183571..b15530a63 100644
--- a/executor/tests/rust.rs
+++ b/executor/tests/rust.rs
@@ -160,6 +160,20 @@ fn test_commit() {
     );
 }
 
+#[test]
+fn test_ef_io_demo_concatenates_writes() {
+    // Demo guest reads its private input via EF `read_input`, then emits it
+    // back as the public output via TWO `write_output` calls (split in halves).
+    // The COMMIT AIR concatenates the two calls; the executor's
+    // `commit_public_output` appends in the same order.
+    let input: Vec<u8> = b"hello world!".to_vec();
+    run_program_and_check_public_output(
+        "./program_artifacts/rust/ef_io_demo.elf",
+        input.clone(),
+        input,
+    );
+}
+
 #[test]
 fn test_commit_sum() {
     run_program_and_check_public_output(
diff --git a/prover/src/lib.rs b/prover/src/lib.rs
index 6c9a07488..254c37834 100644
--- a/prover/src/lib.rs
+++ b/prover/src/lib.rs
@@ -651,6 +651,11 @@ pub fn prove_with_options_and_inputs(
         .filter(|c| c.is_private_input)
         .count();
 
+    debug_assert_eq!(
+        traces.public_output_bytes, result.return_values.memory_values,
+        "public output diverged between executor view and trace reconstruction"
+    );
+
     Ok(VmProof {
         proof,
         runtime_page_ranges,
diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs
index e8e79f80f..736fcd78e 100644
--- a/prover/src/tests/prove_elfs_tests.rs
+++ b/prover/src/tests/prove_elfs_tests.rs
@@ -2172,6 +2172,31 @@ fn test_prove_private_input_different_values() {
     assert_eq!(proof.public_output, input[4..12].to_vec());
 }
 
+/// End-to-end: EF zkVM IO interface — demo guest reads its private input via
+/// `read_input` and emits it back through TWO `write_output` calls. The
+/// COMMIT AIR's running `x254` index concatenates them; the resulting proof's
+/// `public_output` must equal the original input.
+#[test]
+fn test_prove_ef_io_demo_concatenates() {
+    let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+        .parent()
+        .expect("workspace root")
+        .to_path_buf();
+    let elf_bytes =
+        std::fs::read(workspace_root.join("executor/program_artifacts/rust/ef_io_demo.elf"))
+            .expect("ef_io_demo.elf not found — run `make compile-programs-rust`");
+    let input: &[u8] = b"hello world!";
+    let proof = crate::prove_with_inputs(&elf_bytes, input).expect("prove should succeed");
+    assert!(
+        crate::verify(&proof, &elf_bytes).expect("verify should not error"),
+        "ef_io_demo should verify"
+    );
+    assert_eq!(
+        proof.public_output, input,
+        "two write_output calls must concatenate"
+    );
+}
+
 /// End-to-end: Rust std program with private input.
 #[test]
 fn test_prove_commit_sum() {
diff --git a/syscalls/src/ef_io.rs b/syscalls/src/ef_io.rs
new file mode 100644
index 000000000..dabf7818d
--- /dev/null
+++ b/syscalls/src/ef_io.rs
@@ -0,0 +1,84 @@
+//! EF zkVM IO interface: <https://github.com/eth-act/zkvm-standards/blob/main/standards/io-interface/README.md>
+//!
+//! Two C-callable functions that match the EF standard so portable applications
+//! compile unchanged across zkVMs:
+//!
+//! - `read_input`: returns a zero-copy pointer + size to the private input.
+//! - `write_output`: appends bytes to the public output. Multiple calls
+//!   concatenate.
+//!
+//! On Lambda VM these map to:
+//! - `read_input` → memory-mapped private input region at `0xFF000000`
+//!   (4-byte LE length prefix at base, data at `+4`).
+//! - `write_output` → ECALL #64 (Commit). The trace builder maintains a
+//!   running commitment index in synthetic register `x254`, so multiple
+//!   ECALLs naturally concatenate at the proof level.
+
+#[cfg(target_arch = "riscv64")]
+use core::arch::asm;
+
+#[cfg(target_arch = "riscv64")]
+use crate::syscalls::{PRIVATE_INPUT_START, SyscallNumbers};
+
+/// EF IO: return a zero-copy pointer and size for the private input.
+///
+/// Per the spec this function is idempotent, callable multiple times, and
+/// cannot fail. If `buf_size` is 0, the value of `buf_ptr` is unspecified.
+/// Privacy of the input is the guest's responsibility; the VM does not
+/// enforce it.
+///
+/// # Safety
+///
+/// `buf_ptr` and `buf_size` must be valid, writable pointers.
+#[cfg(target_arch = "riscv64")]
+#[unsafe(no_mangle)]
+pub unsafe extern "C" fn read_input(buf_ptr: *mut *const u8, buf_size: *mut usize) {
+    unsafe {
+        let len_ptr = PRIVATE_INPUT_START as *const u32;
+        let len = core::ptr::read_volatile(len_ptr) as usize;
+        *buf_ptr = (PRIVATE_INPUT_START + 4) as *const u8;
+        *buf_size = len;
+    }
+}
+
+/// EF IO: append `size` bytes from `output` to the public output.
+///
+/// Multiple calls concatenate. Per the spec this function cannot fail; in
+/// practice the executor enforces a total-output cap (see
+/// `MAX_PUBLIC_OUTPUT_TOTAL_SIZE` in `executor::vm::memory`). Exceeding it
+/// causes the executor to return an error and abort proving — not a graceful
+/// failure mode at the C boundary, but consistent with "cannot fail" for
+/// well-formed programs that stay under the limit.
+///
+/// # Safety
+///
+/// `output` must point to `size` readable bytes within guest memory.
+#[cfg(target_arch = "riscv64")]
+#[unsafe(no_mangle)]
+pub unsafe extern "C" fn write_output(output: *const u8, size: usize) {
+    unsafe {
+        asm!(
+            "ecall",
+            in("a0") 1usize, // fd = 1 (stdout) — required by the COMMIT chip
+            in("a1") output,
+            in("a2") size,
+            in("a7") SyscallNumbers::Commit as usize,
+        );
+    }
+}
+
+/// Host-side stub — Lambda VM's IO interface is only implemented for the
+/// `riscv64` guest target. Not exported with C linkage on host so the
+/// generic name doesn't collide with C dependencies in test builds.
+#[cfg(not(target_arch = "riscv64"))]
+pub fn read_input(_buf_ptr: *mut *const u8, _buf_size: *mut usize) {
+    unimplemented!("read_input is only implemented for riscv64 targets");
+}
+
+/// Host-side stub — Lambda VM's IO interface is only implemented for the
+/// `riscv64` guest target. Not exported with C linkage on host so the
+/// generic name doesn't collide with C dependencies in test builds.
+#[cfg(not(target_arch = "riscv64"))]
+pub fn write_output(_output: *const u8, _size: usize) {
+    unimplemented!("write_output is only implemented for riscv64 targets");
+}
diff --git a/syscalls/src/lib.rs b/syscalls/src/lib.rs
index 378257d18..79a420181 100644
--- a/syscalls/src/lib.rs
+++ b/syscalls/src/lib.rs
@@ -1,4 +1,5 @@
 pub mod allocator;
+pub mod ef_io;
 pub mod entrypoint;
 pub mod random;
 pub mod syscalls;
diff --git a/syscalls/src/syscalls.rs b/syscalls/src/syscalls.rs
index 14d5b2e6f..6451828c6 100644
--- a/syscalls/src/syscalls.rs
+++ b/syscalls/src/syscalls.rs
@@ -6,10 +6,10 @@ use core::arch::asm;
 /// The host pre-loads the input; the guest reads directly (no ecall).
 /// Must match `executor::vm::memory::PRIVATE_INPUT_START_INDEX`.
 #[cfg(target_arch = "riscv64")]
-const PRIVATE_INPUT_START: usize = 0xFF000000;
+pub const PRIVATE_INPUT_START: usize = 0xFF000000;
 
 #[cfg(target_arch = "riscv64")]
-enum SyscallNumbers {
+pub enum SyscallNumbers {
     Print = 1,
     Panic = 2,
     Commit = 64,