From d17603c7bbc3e7b774cae9c578c1ee399c707d90 Mon Sep 17 00:00:00 2001 From: Joaquin Carletti Date: Wed, 18 Mar 2026 12:19:11 -0300 Subject: [PATCH 001/231] mem spill clean version --- Cargo.lock | 5 + bin/cli/Cargo.toml | 2 + bin/cli/src/main.rs | 36 +- crypto/crypto/Cargo.toml | 3 + crypto/crypto/src/merkle_tree/merkle.rs | 137 +++++++- crypto/math/src/field/element.rs | 1 + crypto/stark/Cargo.toml | 5 + crypto/stark/src/lookup.rs | 76 ++-- crypto/stark/src/prover.rs | 387 ++++++++++++++++----- crypto/stark/src/table.rs | 240 ++++++++++++- crypto/stark/src/trace.rs | 314 ++++++++++++++++- executor/programs/asm/fib_iterative_128M.s | 24 ++ executor/programs/asm/fib_iterative_16M.s | 24 ++ executor/programs/asm/fib_iterative_2M.s | 5 +- executor/programs/asm/fib_iterative_32M.s | 24 ++ executor/programs/asm/fib_iterative_64M.s | 24 ++ executor/programs/asm/fib_iterative_8M.s | 24 ++ prover/Cargo.toml | 2 + prover/src/lib.rs | 13 + prover/src/tables/trace_builder.rs | 172 +++++++-- prover/src/tests/disk_spill_tests.rs | 98 ++++++ prover/src/tests/mod.rs | 2 + 22 files changed, 1437 insertions(+), 181 deletions(-) create mode 100644 executor/programs/asm/fib_iterative_128M.s create mode 100644 executor/programs/asm/fib_iterative_16M.s create mode 100644 executor/programs/asm/fib_iterative_32M.s create mode 100644 executor/programs/asm/fib_iterative_64M.s create mode 100644 executor/programs/asm/fib_iterative_8M.s create mode 100644 prover/src/tests/disk_spill_tests.rs diff --git a/Cargo.lock b/Cargo.lock index 978d3f942..1a07bd3ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -864,12 +864,14 @@ dependencies = [ "digest", "iai-callgrind", "math", + "memmap2", "rand 0.8.5", "rand_chacha 0.3.1", "rayon", "serde", "sha2", "sha3", + "tempfile", ] [[package]] @@ -2132,6 +2134,7 @@ dependencies = [ name = "lambda-vm-prover" version = "0.1.0" dependencies = [ + "bincode 1.3.3", "criterion 0.5.1", "crypto", "env_logger", @@ -3559,6 +3562,7 @@ dependencies = [ "itertools 0.11.0", "log", "math", + "memmap2", "num-integer", "proptest", "rand 0.8.5", @@ -3569,6 +3573,7 @@ dependencies = [ "serde_cbor", "serde_json", "sha3", + "tempfile", "test-log", "thiserror 1.0.69", "wasm-bindgen", diff --git a/bin/cli/Cargo.toml b/bin/cli/Cargo.toml index 8eb62c86f..080a3951b 100644 --- a/bin/cli/Cargo.toml +++ b/bin/cli/Cargo.toml @@ -14,4 +14,6 @@ tikv-jemallocator = "0.6" tikv-jemalloc-ctl = { version = "0.6", features = ["stats"], optional = true } [features] +default = ["disk-spill"] jemalloc-stats = ["dep:tikv-jemalloc-ctl"] +disk-spill = ["prover/disk-spill"] diff --git a/bin/cli/src/main.rs b/bin/cli/src/main.rs index 725f0de5f..9449b2a51 100644 --- a/bin/cli/src/main.rs +++ b/bin/cli/src/main.rs @@ -123,6 +123,10 @@ enum Commands { /// Print timing breakdown #[arg(long)] time: bool, + + /// Maximum rows per table chunk (power of 2). Smaller = less memory, more chunks. + #[arg(long)] + max_rows: Option, }, /// Verify a proof bundle @@ -155,7 +159,8 @@ fn main() -> ExitCode { output, blowup, time, - } => cmd_prove(elf, output, blowup, time), + max_rows, + } => cmd_prove(elf, output, blowup, time, max_rows), Commands::Verify { proof, elf, @@ -249,7 +254,7 @@ fn cmd_execute(elf_path: PathBuf, flamegraph_path: Option) -> ExitCode ExitCode::SUCCESS } -fn cmd_prove(elf_path: PathBuf, output_path: PathBuf, blowup: Option, time: bool) -> ExitCode { +fn cmd_prove(elf_path: PathBuf, output_path: PathBuf, blowup: Option, time: bool, max_rows: Option) -> ExitCode { eprintln!("Reading ELF file..."); let elf_data = match std::fs::read(&elf_path) { Ok(data) => data, @@ -262,6 +267,27 @@ fn cmd_prove(elf_path: PathBuf, output_path: PathBuf, blowup: Option, time: #[cfg(feature = "jemalloc-stats")] let tracker = heap_tracker::HeapTracker::start(); + if cfg!(feature = "disk-spill") { + eprintln!("Disk-spill: enabled"); + } + + let max_rows_config = match max_rows { + Some(mr) => { + eprintln!("Max rows per chunk: {mr}"); + prover::MaxRowsConfig { + cpu: mr, + memw: mr, + dvrm: mr, + mul: mr, + lt: mr, + shift: mr, + load: mr, + branch: mr, + } + } + None => prover::MaxRowsConfig::default(), + }; + let start = Instant::now(); let proof = match blowup { Some(b) => { @@ -276,11 +302,13 @@ fn cmd_prove(elf_path: PathBuf, output_path: PathBuf, blowup: Option, time: "Generating proof (blowup={b}, queries={})...", opts.fri_number_of_queries ); - prover::prove_with_options(&elf_data, &opts, &Default::default()) + prover::prove_with_options(&elf_data, &opts, &max_rows_config) } None => { + let opts = GoldilocksCubicProofOptions::with_blowup(2) + .expect("blowup=2 is always valid"); eprintln!("Generating proof..."); - prover::prove(&elf_data) + prover::prove_with_options(&elf_data, &opts, &max_rows_config) } }; let prove_elapsed = start.elapsed(); diff --git a/crypto/crypto/Cargo.toml b/crypto/crypto/Cargo.toml index 7f9a4a58f..5ae5d5f24 100644 --- a/crypto/crypto/Cargo.toml +++ b/crypto/crypto/Cargo.toml @@ -19,6 +19,8 @@ serde = { version = "1.0", default-features = false, features = [ rayon = { version = "1.8.0", optional = true } rand = { version = "0.8.5", default-features = false } rand_chacha = { version = "0.3.1", default-features = false } +memmap2 = { version = "0.9", optional = true } +tempfile = { version = "3", optional = true } [dev-dependencies] criterion = "0.4" @@ -32,4 +34,5 @@ asm = ["sha3/asm"] std = ["math/std", "sha2/std", "sha3/std", "serde?/std"] serde = ["dep:serde"] parallel = ["dep:rayon"] +disk-spill = ["std", "dep:memmap2", "dep:tempfile"] alloc = [] \ No newline at end of file diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index 55fa49a83..9f5f68876 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -22,6 +22,19 @@ impl Display for Error { #[cfg(feature = "std")] impl std::error::Error for Error {} +/// File-backed mmap storage for Merkle tree nodes. +/// +/// After `spill_nodes_to_disk()`, the heap `Vec` is freed and all +/// node access goes through this mmap. The OS manages page eviction under +/// memory pressure — file-backed pages are evictable without swap. +#[cfg(feature = "disk-spill")] +pub(crate) struct MmapNodeBacking { + mmap: memmap2::Mmap, + _file: std::fs::File, + node_count: usize, + node_size: usize, +} + /// The struct for the Merkle tree, consisting of the root and the nodes. /// A typical tree would look like this /// root @@ -31,11 +44,29 @@ impl std::error::Error for Error {} /// leaf 1 leaf 2 leaf 3 leaf 4 /// The bottom leafs correspond to the hashes of the elements, while each upper /// layer contains the hash of the concatenation of the daughter nodes. -#[derive(Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct MerkleTree { pub root: B::Node, nodes: Vec, + #[cfg(feature = "disk-spill")] + #[cfg_attr(feature = "serde", serde(skip))] + mmap_backing: Option, +} + +impl Clone for MerkleTree { + fn clone(&self) -> Self { + #[cfg(feature = "disk-spill")] + assert!( + self.mmap_backing.is_none(), + "cannot clone a spilled MerkleTree — nodes have been freed; use Arc instead" + ); + Self { + root: self.root.clone(), + nodes: self.nodes.clone(), + #[cfg(feature = "disk-spill")] + mmap_backing: None, + } + } } const ROOT: usize = 0; @@ -78,14 +109,46 @@ where Some(MerkleTree { root: nodes[ROOT].clone(), nodes, + #[cfg(feature = "disk-spill")] + mmap_backing: None, }) } + /// Total number of nodes in the tree (inner + leaves). + #[inline] + fn node_count(&self) -> usize { + #[cfg(feature = "disk-spill")] + if let Some(ref backing) = self.mmap_backing { + return backing.node_count; + } + self.nodes.len() + } + + /// Access a node by index, returning a reference. + /// + /// Returns `None` if `idx` is out of bounds. + #[inline] + fn node_get(&self, idx: usize) -> Option<&B::Node> { + #[cfg(feature = "disk-spill")] + if let Some(ref backing) = self.mmap_backing { + if idx < backing.node_count { + // SAFETY: B::Node is Copy (required by spill_nodes_to_disk's where clause). + // The mmap contains node_count × node_size contiguous bytes written from + // identical Node values on the same machine. The mmap base is page-aligned + // and node_size divides into page size for all concrete Node types ([u8; 32/64]). + let ptr = unsafe { backing.mmap.as_ptr().add(idx * backing.node_size) }; + return Some(unsafe { &*(ptr as *const B::Node) }); + } + return None; + } + self.nodes.get(idx) + } + /// Returns a Merkle proof for the element/s at position pos /// For example, give me an inclusion proof for the 3rd element in the /// Merkle tree pub fn get_proof_by_pos(&self, pos: usize) -> Option> { - let pos = pos + self.nodes.len() / 2; + let pos = pos + self.node_count() / 2; let Ok(merkle_path) = self.build_merkle_path(pos) else { return None; }; @@ -101,12 +164,12 @@ where /// Returns the Merkle path for the element/s for the leaf at position pos fn build_merkle_path(&self, pos: usize) -> Result, Error> { // Pre-allocate based on tree depth (log2 of tree size) - let tree_depth = (self.nodes.len() + 1).ilog2() as usize; + let tree_depth = (self.node_count() + 1).ilog2() as usize; let mut merkle_path = Vec::with_capacity(tree_depth); let mut pos = pos; while pos != ROOT { - let Some(node) = self.nodes.get(sibling_index(pos)) else { + let Some(node) = self.node_get(sibling_index(pos)) else { // out of bounds, exit returning the current merkle_path return Err(Error::OutOfBounds); }; @@ -141,7 +204,7 @@ where return Err(Error::EmptyPositionList); } - let num_leaves = (self.nodes.len() + 1).div_ceil(2); + let num_leaves = (self.node_count() + 1).div_ceil(2); // Validate all positions are within bounds for &pos in pos_list { @@ -154,7 +217,7 @@ where // of the leaves. let leaf_positions = pos_list .iter() - .map(|pos| pos + self.nodes.len() / 2) + .map(|pos| pos + self.node_count() / 2) .collect::>(); // We get the positions of the nodes for the batch proof. let batch_auth_path_positions = self.get_batch_auth_path_positions(&leaf_positions); @@ -162,7 +225,11 @@ where // We get the nodes for the batch proof. let batch_auth_path_nodes = batch_auth_path_positions .iter() - .map(|pos| self.nodes[*pos].clone()) + .map(|pos| { + self.node_get(*pos) + .expect("batch auth path position in bounds") + .clone() + }) .collect(); Ok(BatchProof { @@ -188,7 +255,7 @@ where let mut obtainable: BTreeSet = leaf_positions.iter().cloned().collect(); // Number of levels in tree - let num_levels = (self.nodes.len() + 1).ilog2(); + let num_levels = (self.node_count() + 1).ilog2(); // Iter lefevel-by-level from leaves to root. for _ in 0..num_levels - 1 { @@ -217,4 +284,58 @@ where // This makes the proof ordered from bottom (nodes closer to leaves) to top (nodes loser to root). auth_path_set.into_iter().rev().collect() } + + /// Write tree nodes to a temp file, mmap it read-only, and free the heap Vec. + /// + /// After this call, all node access methods read from the mmap transparently. + /// The OS can evict mmap pages under memory pressure since they're file-backed. + /// + /// Requires `B::Node: Copy` to ensure nodes have a trivial byte representation + /// suitable for raw serialization and mmap casting. + /// + /// Note: the concrete `Node` type is `[u8; 32]` (Keccak hash), which has no + /// padding bytes. The raw byte round-trip is therefore well-defined. + #[cfg(feature = "disk-spill")] + pub fn spill_nodes_to_disk(&mut self) -> std::io::Result<()> + where + B::Node: Copy, + { + use std::io::Write; + + if self.nodes.is_empty() { + return Ok(()); + } + + let node_size = core::mem::size_of::(); + let node_count = self.nodes.len(); + let total_bytes = node_count * node_size; + + let file = tempfile::tempfile()?; + file.set_len(total_bytes as u64)?; + { + let mut writer = std::io::BufWriter::new(&file); + // SAFETY: B::Node is Copy, so its in-memory representation is a + // valid byte sequence. The Vec is contiguous. + let bytes = unsafe { + core::slice::from_raw_parts(self.nodes.as_ptr() as *const u8, total_bytes) + }; + writer.write_all(bytes)?; + writer.flush()?; + } + + // SAFETY: We own the file exclusively; it won't be modified externally. + let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? }; + + // Free the heap allocation + self.nodes = Vec::new(); + + self.mmap_backing = Some(MmapNodeBacking { + mmap, + _file: file, + node_count, + node_size, + }); + + Ok(()) + } } diff --git a/crypto/math/src/field/element.rs b/crypto/math/src/field/element.rs index 9c2ac3258..fb2019df4 100644 --- a/crypto/math/src/field/element.rs +++ b/crypto/math/src/field/element.rs @@ -40,6 +40,7 @@ use super::traits::{IsPrimeField, IsSubFieldOf, LegendreSymbol}; /// A field element with operations algorithms defined in `F` #[allow(clippy::derived_hash_with_manual_eq)] +#[repr(transparent)] #[derive(Debug, Clone, Hash, Copy)] pub struct FieldElement { value: F::BaseType, diff --git a/crypto/stark/Cargo.toml b/crypto/stark/Cargo.toml index 6ca80c09f..f7e67fb23 100644 --- a/crypto/stark/Cargo.toml +++ b/crypto/stark/Cargo.toml @@ -26,6 +26,10 @@ itertools = "0.11.0" # Parallelization crates rayon = { version = "1.8.0", optional = true } +# Disk-spill: mmap LDE data to reduce heap memory during proving +memmap2 = { version = "0.9", optional = true } +tempfile = { version = "3", optional = true } + # wasm wasm-bindgen = { version = "0.2", optional = true } serde-wasm-bindgen = { version = "0.5", optional = true } @@ -48,6 +52,7 @@ instruments = [] # This enab debug-checks = [] # Enables validate_trace + bus balance report in prover parallel = ["dep:rayon", "crypto/parallel"] wasm = ["dep:wasm-bindgen", "dep:serde-wasm-bindgen", "dep:web-sys"] +disk-spill = ["dep:memmap2", "dep:tempfile", "crypto/disk-spill"] [target.'cfg(not(all(target_arch = "wasm32", target_os = "unknown")))'.dev-dependencies] proptest = "1.2.0" diff --git a/crypto/stark/src/lookup.rs b/crypto/stark/src/lookup.rs index b3cb62fa2..798e33390 100644 --- a/crypto/stark/src/lookup.rs +++ b/crypto/stark/src/lookup.rs @@ -1319,12 +1319,7 @@ where let num_bus_elements = table_interaction.num_bus_elements(); let alpha_powers = compute_alpha_powers(alpha, num_bus_elements); - // Sign: +1 for senders, -1 for receivers - let sign = if table_interaction.is_sender { - FieldElement::::one() - } else { - -FieldElement::::one() - }; + let negate = !table_interaction.is_sender; // Batch inversion: collect all fingerprints, invert once, then multiply back. // Compute fingerprint = z - (bus_id*α^0 + v0*α^1 + v1*α^2 + ...) using @@ -1376,11 +1371,15 @@ where FieldElement::inplace_batch_inverse(&mut fingerprints) .expect("fingerprint is zero - probability of sampling zero is negligible"); - // Compute terms: term[i] = sign * multiplicity[i] * fingerprint_inv[i] + // Compute terms: term[i] = ±(multiplicity[i] * fingerprint_inv[i]) + // Use conditional negation instead of E×E sign multiplication multiplicities .iter() .zip(fingerprints.iter()) - .map(|(multiplicity, fingerprint_inv)| multiplicity * &sign * fingerprint_inv) + .map(|(multiplicity, fingerprint_inv)| { + let term = multiplicity * fingerprint_inv; + if negate { -term } else { term } + }) .collect() } @@ -1410,16 +1409,8 @@ where .max(interaction_b.num_bus_elements()); let alpha_powers = compute_alpha_powers(alpha, max_bus_elements); - let sign_a = if interaction_a.is_sender { - FieldElement::::one() - } else { - -FieldElement::::one() - }; - let sign_b = if interaction_b.is_sender { - FieldElement::::one() - } else { - -FieldElement::::one() - }; + let negate_a = !interaction_a.is_sender; + let negate_b = !interaction_b.is_sender; // Helper to compute multiplicities for an interaction let compute_multiplicities = |interaction: &BusInteraction| -> Vec> { @@ -1511,13 +1502,17 @@ where FieldElement::inplace_batch_inverse(&mut all_fingerprints) .expect("fingerprint is zero - probability of sampling zero is negligible"); - // Compute batched terms: term[i] = sign_a * m_a[i] * fp_a_inv[i] + sign_b * m_b[i] * fp_b_inv[i] + // Compute batched terms: term[i] = m_a[i] / fp_a[i] ± m_b[i] / fp_b[i] + // Use conditional negation instead of E×E sign multiplication (0..trace_len) .map(|row| { let fp_a_inv = &all_fingerprints[row]; let fp_b_inv = &all_fingerprints[trace_len + row]; - &multiplicities_a[row] * &sign_a * fp_a_inv - + &multiplicities_b[row] * &sign_b * fp_b_inv + let term_a = &multiplicities_a[row] * fp_a_inv; + let term_b = &multiplicities_b[row] * fp_b_inv; + let term_a = if negate_a { -term_a } else { term_a }; + let term_b = if negate_b { -term_b } else { term_b }; + term_a + term_b }) .collect() } @@ -1762,19 +1757,21 @@ where let fp_a = compute_fingerprint_from_step(step, interaction_a, z, alpha_powers); let fp_b = compute_fingerprint_from_step(step, interaction_b, z, alpha_powers); - let sign_a: FieldElement = if interaction_a.is_sender { - FieldElement::one() + // c * fp_a * fp_b - sign_a * m_a * fp_b - sign_b * m_b * fp_a = 0 + // Use conditional negation instead of E×E sign multiplication + let term_a = m_a * &fp_b; + let term_a = if interaction_a.is_sender { + term_a } else { - -FieldElement::one() + -term_a }; - let sign_b: FieldElement = if interaction_b.is_sender { - FieldElement::one() + let term_b = m_b * &fp_a; + let term_b = if interaction_b.is_sender { + term_b } else { - -FieldElement::one() + -term_b }; - - // c * fp_a * fp_b - sign_a * m_a * fp_b - sign_b * m_b * fp_a = 0 - c * &fp_a * &fp_b - m_a * sign_a * &fp_b - m_b * sign_b * &fp_a + c * &fp_a * &fp_b - term_a - term_b } let res = match evaluation_context { @@ -1896,9 +1893,11 @@ where // Clear denominators of absorbed interactions debug_assert!(matches!(absorbed.len(), 1 | 2)); + // Use conditional negation instead of E×E sign multiplication where possible match absorbed.len() { 1 => { // (delta) · f - sign · m = 0 + // sign multiply also promotes m from base field A to extension B let m = compute_multiplicity_from_step(second_step, &absorbed[0].multiplicity); let f = compute_fingerprint_from_step(second_step, &absorbed[0], z, alpha_powers); @@ -1911,23 +1910,18 @@ where } 2 => { // (delta) · f₁ · f₂ - sign₁·m₁·f₂ - sign₂·m₂·f₁ = 0 + // m_i * f_j naturally promotes A→B, then conditionally negate let m1 = compute_multiplicity_from_step(second_step, &absorbed[0].multiplicity); let m2 = compute_multiplicity_from_step(second_step, &absorbed[1].multiplicity); let f1 = compute_fingerprint_from_step(second_step, &absorbed[0], z, alpha_powers); let f2 = compute_fingerprint_from_step(second_step, &absorbed[1], z, alpha_powers); - let sign1: FieldElement = if absorbed[0].is_sender { - FieldElement::one() - } else { - -FieldElement::one() - }; - let sign2: FieldElement = if absorbed[1].is_sender { - FieldElement::one() - } else { - -FieldElement::one() - }; - delta * &f1 * &f2 - m1 * sign1 * &f2 - m2 * sign2 * &f1 + let term1 = m1 * &f2; + let term1 = if absorbed[0].is_sender { term1 } else { -term1 }; + let term2 = m2 * &f1; + let term2 = if absorbed[1].is_sender { term2 } else { -term2 }; + delta * &f1 * &f2 - term1 - term2 } _ => unreachable!("absorbed must contain 1 or 2 interactions"), } diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 64e0d27e9..4b74ec3d8 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1465,6 +1465,19 @@ pub trait IsStarkProver< }) .collect(); + // With disk-spill, move all main trace data from heap to mmap BEFORE + // allocating pools. For large programs (64M+ instructions) the total main + // trace data can exceed available RAM; spilling early converts it to + // demand-paged mmap so the OS can evict pages under memory pressure. + // extract_columns_main_into uses get() which reads transparently from mmap. + #[cfg(feature = "disk-spill")] + for (_, trace, _) in air_trace_pairs.iter_mut() { + trace + .main_table + .spill_to_disk() + .map_err(|e| ProvingError::WrongParameter(format!("disk-spill early main: {e}")))?; + } + // ===================================================================== // Round 1, Phase A: Commit all main traces (parallel in chunks of K) // ===================================================================== @@ -1473,6 +1486,12 @@ pub trait IsStarkProver< let mut main_commits: Vec> = Vec::with_capacity(num_airs); + // When disk-spill is enabled, spill each table's LDE from the pool to disk + // before the pool is overwritten by the next chunk. + #[cfg(feature = "disk-spill")] + let mut spilled_ldes: Vec>> = + (0..num_airs).map(|_| None).collect(); + for chunk_start in (0..num_airs).step_by(k) { let chunk_end = (chunk_start + k).min(num_airs); let chunk_size = chunk_end - chunk_start; @@ -1504,9 +1523,45 @@ pub trait IsStarkProver< }) .collect(); - // Sequential: append roots to shared transcript (Fiat-Shamir ordering) - for result in chunk_results { - let (tree, root, pre_tree, pre_root, n_pre) = result?; + // Sequential: spill LDE from pool (before it's overwritten) + append roots + #[allow(unused_variables)] + for (j, result) in chunk_results.into_iter().enumerate() { + #[allow(unused_mut)] + let (mut tree, root, mut pre_tree, pre_root, n_pre) = result?; + + #[cfg(feature = "disk-spill")] + { + let idx = chunk_start + j; + let (air, trace, _) = &air_trace_pairs[idx]; + let num_main_cols = trace.num_main_columns; + let pool = &pool_sets[j]; + + // Spill main LDE columns from pool to disk + spilled_ldes[idx] = Some( + LDETraceTable::spill_main_from_pool( + &pool.main, + num_main_cols, + air.step_size(), + domains[idx].blowup_factor, + ) + .map_err(|e| { + ProvingError::WrongParameter(format!("disk-spill main LDE: {e}")) + })?, + ); + + // Spill Merkle tree nodes to disk + tree.spill_nodes_to_disk().map_err(|e| { + ProvingError::WrongParameter(format!("disk-spill main tree: {e}")) + })?; + if let Some(ref mut pt) = pre_tree { + pt.spill_nodes_to_disk().map_err(|e| { + ProvingError::WrongParameter(format!( + "disk-spill precomputed tree: {e}" + )) + })?; + } + } + if let Some(ref pre_r) = pre_root { transcript.append_bytes(pre_r); } @@ -1533,36 +1588,29 @@ pub trait IsStarkProver< Vec::new() }; + // When disk-spill is enabled, spill all main trace data to disk now. + // Phase A is done (main LDE committed), so the original trace data is only + // needed for aux trace generation (Phase C). Spilling it here frees the + // heap Vec while keeping data accessible through mmap for build_auxiliary_trace. + #[cfg(feature = "disk-spill")] + for (_, trace, _) in air_trace_pairs.iter_mut() { + trace + .main_table + .spill_to_disk() + .map_err(|e| ProvingError::WrongParameter(format!("disk-spill main trace: {e}")))?; + } + // ===================================================================== - // Phase C + Rounds 2-4: Forked per table + // Phase C: Build + commit aux traces in chunks of K // ===================================================================== // Each table gets an independent transcript fork (cloned from the shared // state after Phase B, domain-separated by table index). This matches // the verifier's forking and makes per-table proving independent. - // - // Split into two passes for parallelism: - // Pass 1 (parallel): Build all auxiliary traces (fingerprint + batch inversion) - // Pass 2 (sequential): Fork transcript → extract → LDE → commit (shared pool) - - // Pass 1: Build aux traces in parallel. - // Each build_auxiliary_trace has internal parallelism (batch_inverse, par_chunks), - // but outer parallelism over 12 tables also helps on high-core-count machines. - #[cfg(feature = "parallel")] - let aux_iter = air_trace_pairs.par_iter_mut(); - #[cfg(not(feature = "parallel"))] - let aux_iter = air_trace_pairs.iter_mut(); - let bus_inputs_vec: Vec>> = aux_iter - .map(|(air, trace, _)| { - if air.has_aux_trace() { - air.build_auxiliary_trace(*trace, &lookup_challenges) - } else { - None - } - }) - .collect(); - // Pass 2: Parallel fork transcript → extract → LDE → commit in chunks of K. - // Each table gets its own transcript fork and pool set. + // Aux build and aux commit are merged into a single chunked loop so that + // at most K aux traces are alive at any time. Without this, all N aux + // traces would be allocated simultaneously — for large programs (N=500+ + // table chunks) this easily exceeds available RAM. // Pre-fork all transcripts (cheap, sequential — must match verifier ordering) let mut table_transcripts: Vec<_> = (0..num_airs) @@ -1575,17 +1623,41 @@ pub trait IsStarkProver< }) .collect(); - // Parallel aux commit in chunks of K #[allow(clippy::type_complexity)] let mut aux_results: Vec<( Option>>, Option, )> = Vec::with_capacity(num_airs); + let mut bus_inputs_vec: Vec>> = + Vec::with_capacity(num_airs); + for chunk_start in (0..num_airs).step_by(k) { let chunk_end = (chunk_start + k).min(num_airs); let chunk_size = chunk_end - chunk_start; + // Step 1: Build aux traces for this chunk (parallel within chunk). + // The mutable borrow of air_trace_pairs ends after collect(). + { + #[cfg(feature = "parallel")] + let iter = air_trace_pairs[chunk_start..chunk_end].par_iter_mut(); + #[cfg(not(feature = "parallel"))] + let iter = air_trace_pairs[chunk_start..chunk_end].iter_mut(); + + let chunk_bus_inputs: Vec>> = iter + .map(|(air, trace, _)| { + if air.has_aux_trace() { + air.build_auxiliary_trace(*trace, &lookup_challenges) + } else { + None + } + }) + .collect(); + bus_inputs_vec.extend(chunk_bus_inputs); + } + + // Step 2: Extract aux columns → LDE → commit (parallel, using pools) + #[cfg(feature = "parallel")] let iter = pool_sets[..chunk_size].par_iter_mut().enumerate(); #[cfg(not(feature = "parallel"))] @@ -1610,20 +1682,57 @@ pub trait IsStarkProver< let (tree, root) = Self::commit_columns_bit_reversed(&pool.aux[..num_aux_cols]) .ok_or(ProvingError::EmptyCommitment)?; - Ok((Some(Arc::new(tree)), Some(root))) + Ok((Some(tree), Some(root), num_aux_cols)) } else { - Ok((None, None)) + Ok((None, None, 0)) } }) .collect(); - // Sequential: append aux roots to forked transcripts + // Step 3: Sequential — spill aux LDE + tree, append roots to transcripts + #[allow(unused_variables)] for (j, result) in chunk_aux.into_iter().enumerate() { - let (aux_tree, aux_root) = result?; + #[allow(unused_mut)] + let (mut aux_tree, aux_root, _num_aux_cols) = result?; + + #[cfg(feature = "disk-spill")] + { + let idx = chunk_start + j; + if _num_aux_cols > 0 { + let pool = &pool_sets[j]; + + // Add aux LDE columns to the already-spilled table + if let Some(ref mut spilled) = spilled_ldes[idx] { + spilled + .add_aux_from_pool(&pool.aux, _num_aux_cols) + .map_err(|e| { + ProvingError::WrongParameter(format!("disk-spill aux LDE: {e}")) + })?; + } + + // Spill aux Merkle tree nodes to disk + if let Some(ref mut tree) = aux_tree { + tree.spill_nodes_to_disk().map_err(|e| { + ProvingError::WrongParameter(format!("disk-spill aux tree: {e}")) + })?; + } + } + } + if let Some(ref root) = aux_root { table_transcripts[chunk_start + j].append_bytes(root); } - aux_results.push((aux_tree, aux_root)); + aux_results.push((aux_tree.map(Arc::new), aux_root)); + } + + // Step 4 (disk-spill): Free aux trace data for this chunk. + // The aux data has been LDE'd, committed, and spilled to disk. + // Freeing it now caps peak heap at K aux traces instead of all N. + #[cfg(feature = "disk-spill")] + for idx in chunk_start..chunk_end { + let (_, trace, _) = &mut air_trace_pairs[idx]; + trace.aux_table.data = Vec::new(); + trace.aux_table.height = 0; } } @@ -1664,72 +1773,172 @@ pub trait IsStarkProver< // ===================================================================== // Rounds 2-4: Parallel per-table proving in chunks of K // ===================================================================== - // Each chunk of K tables is processed in parallel. Each worker gets its - // own pool set and transcript fork. Pool sets are reused across chunks. let mut proofs = Vec::with_capacity(num_airs); - for chunk_start in (0..num_airs).step_by(k) { - let chunk_end = (chunk_start + k).min(num_airs); - let chunk_size = chunk_end - chunk_start; - let chunk_transcripts = &mut table_transcripts[chunk_start..chunk_end]; + #[cfg(feature = "disk-spill")] + { + // All LDE data is now on disk. Free pool buffers and trace data + // to minimize memory during Rounds 2-4. + drop(pool_sets); + drop(twiddle_caches); + for (_, trace, _) in air_trace_pairs.iter_mut() { + trace.main_table.data = Vec::new(); + trace.main_table.height = 0; + trace.aux_table.data = Vec::new(); + trace.aux_table.height = 0; + } - #[cfg(feature = "parallel")] - let iter = pool_sets[..chunk_size] - .par_iter_mut() - .zip(chunk_transcripts.par_iter_mut()) - .enumerate(); - #[cfg(not(feature = "parallel"))] - let iter = pool_sets[..chunk_size] - .iter_mut() - .zip(chunk_transcripts.iter_mut()) - .enumerate(); + for chunk_start in (0..num_airs).step_by(k) { + let chunk_end = (chunk_start + k).min(num_airs); + + let chunk_transcripts = &mut table_transcripts[chunk_start..chunk_end]; + let chunk_ldes = &mut spilled_ldes[chunk_start..chunk_end]; + + #[cfg(feature = "parallel")] + let iter = chunk_ldes + .par_iter_mut() + .zip(chunk_transcripts.par_iter_mut()) + .enumerate(); + #[cfg(not(feature = "parallel"))] + let iter = chunk_ldes + .iter_mut() + .zip(chunk_transcripts.iter_mut()) + .enumerate(); + + let chunk_results: Vec> = iter + .map(|(j, (spilled_lde_opt, table_transcript))| { + let idx = chunk_start + j; + let (air, _, pub_inputs) = &air_trace_pairs[idx]; + let metadata = &metadatas[idx]; + let domain = &domains[idx]; + + // Build Round1 directly from spilled data — no LDE recomputation! + let lde_trace = spilled_lde_opt + .take() + .expect("spilled LDE must exist for each table"); + + let main = Round1CommitmentData { + lde_trace_merkle_tree: Arc::clone(&metadata.main_merkle_tree), + lde_trace_merkle_root: metadata.main_merkle_root, + precomputed_merkle_tree: metadata + .precomputed_merkle_tree + .as_ref() + .map(Arc::clone), + precomputed_merkle_root: metadata.precomputed_merkle_root, + num_precomputed_cols: metadata.num_precomputed_cols, + }; + + let aux = + metadata + .aux_merkle_tree + .as_ref() + .map(|tree| Round1CommitmentData { + lde_trace_merkle_tree: Arc::clone(tree), + lde_trace_merkle_root: metadata + .aux_merkle_root + .expect("aux root must exist when aux tree exists"), + precomputed_merkle_tree: None, + precomputed_merkle_root: None, + num_precomputed_cols: 0, + }); + + let round_1_result = Round1 { + lde_trace, + main, + aux, + rap_challenges: metadata.rap_challenges.clone(), + bus_public_inputs: metadata.bus_public_inputs.clone(), + }; + + if let Some(ref bpi) = round_1_result.bus_public_inputs { + table_transcript.append_field_element(&bpi.table_contribution); + } + + let proof = Self::prove_rounds_2_to_4( + *air, + *pub_inputs, + &round_1_result, + table_transcript, + domain, + )?; - let chunk_results: Vec> = iter - .map(|(j, (pool, table_transcript))| { - let idx = chunk_start + j; - let (air, trace, pub_inputs) = &air_trace_pairs[idx]; - let metadata = &metadatas[idx]; - let domain = &domains[idx]; - let twiddles = &twiddle_caches[idx]; + Ok(proof) + }) + .collect(); - let round_1_result = Self::reconstruct_round1( - *air, - *trace, - domain, - metadata, - twiddles, - &mut pool.main, - &mut pool.aux, - )?; - - if let Some(ref bpi) = round_1_result.bus_public_inputs { - table_transcript.append_field_element(&bpi.table_contribution); - } + for result in chunk_results { + proofs.push(result?); + } + } + } - let proof = Self::prove_rounds_2_to_4( - *air, - *pub_inputs, - &round_1_result, - table_transcript, - domain, - )?; + #[cfg(not(feature = "disk-spill"))] + { + // Original flow: reconstruct LDE from traces + pool, then prove. + for chunk_start in (0..num_airs).step_by(k) { + let chunk_end = (chunk_start + k).min(num_airs); + let chunk_size = chunk_end - chunk_start; + + let chunk_transcripts = &mut table_transcripts[chunk_start..chunk_end]; + + #[cfg(feature = "parallel")] + let iter = pool_sets[..chunk_size] + .par_iter_mut() + .zip(chunk_transcripts.par_iter_mut()) + .enumerate(); + #[cfg(not(feature = "parallel"))] + let iter = pool_sets[..chunk_size] + .iter_mut() + .zip(chunk_transcripts.iter_mut()) + .enumerate(); + + let chunk_results: Vec> = iter + .map(|(j, (pool, table_transcript))| { + let idx = chunk_start + j; + let (air, trace, pub_inputs) = &air_trace_pairs[idx]; + let metadata = &metadatas[idx]; + let domain = &domains[idx]; + let twiddles = &twiddle_caches[idx]; + + let round_1_result = Self::reconstruct_round1( + *air, + *trace, + domain, + metadata, + twiddles, + &mut pool.main, + &mut pool.aux, + )?; - // Return column Vecs to pool (zero-copy move back) - let (main_cols, aux_cols) = round_1_result.lde_trace.into_columns(); - for (slot, col) in pool.main.iter_mut().zip(main_cols) { - *slot = col; - } - for (slot, col) in pool.aux.iter_mut().zip(aux_cols) { - *slot = col; - } + if let Some(ref bpi) = round_1_result.bus_public_inputs { + table_transcript.append_field_element(&bpi.table_contribution); + } - Ok(proof) - }) - .collect(); + let proof = Self::prove_rounds_2_to_4( + *air, + *pub_inputs, + &round_1_result, + table_transcript, + domain, + )?; + + // Return column Vecs to pool (zero-copy move back) + let (main_cols, aux_cols) = round_1_result.lde_trace.into_columns(); + for (slot, col) in pool.main.iter_mut().zip(main_cols) { + *slot = col; + } + for (slot, col) in pool.aux.iter_mut().zip(aux_cols) { + *slot = col; + } + + Ok(proof) + }) + .collect(); - for result in chunk_results { - proofs.push(result?); + for result in chunk_results { + proofs.push(result?); + } } } diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index c41629e6b..047d39c00 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -6,6 +6,60 @@ use math::field::{ #[cfg(feature = "parallel")] use rayon::prelude::*; +/// Mmap-backed storage for a spilled Table. +/// +/// The table data is written row-major to a temp file and mmapped back. +/// Access goes through pointer arithmetic on the mmap, matching the +/// original `data[row * width + col]` layout. +#[cfg(feature = "disk-spill")] +pub(crate) struct TableMmapBacking { + mmap: memmap2::Mmap, + _file: std::fs::File, + width: usize, + height: usize, + elem_size: usize, +} + +// Manual trait impls so Table can keep its derive macros. +// Spilled tables should not be cloned during proving. +#[cfg(feature = "disk-spill")] +impl Clone for TableMmapBacking { + fn clone(&self) -> Self { + panic!("TableMmapBacking cannot be cloned — spilled tables should not be cloned") + } +} + +#[cfg(feature = "disk-spill")] +impl Default for TableMmapBacking { + fn default() -> Self { + panic!("TableMmapBacking has no default — use None") + } +} + +#[cfg(feature = "disk-spill")] +impl std::fmt::Debug for TableMmapBacking { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("TableMmapBacking") + .field("width", &self.width) + .field("height", &self.height) + .field("elem_size", &self.elem_size) + .finish() + } +} + +#[cfg(feature = "disk-spill")] +impl PartialEq for TableMmapBacking { + fn eq(&self, other: &Self) -> bool { + self.width == other.width + && self.height == other.height + && self.elem_size == other.elem_size + && self.mmap[..] == other.mmap[..] + } +} + +#[cfg(feature = "disk-spill")] +impl Eq for TableMmapBacking {} + /// A two-dimensional Table holding field elements, arranged in a row-major order. /// This is the basic underlying data structure used for any two-dimensional component in the /// the STARK protocol implementation, such as the `TraceTable` and the `EvaluationFrame`. @@ -17,6 +71,9 @@ pub struct Table { pub data: Vec>, pub width: usize, pub height: usize, + #[cfg(feature = "disk-spill")] + #[serde(skip)] + pub(crate) mmap_backing: Option, } impl Table { @@ -29,6 +86,8 @@ impl Table { data: Vec::new(), width, height: 0, + #[cfg(feature = "disk-spill")] + mmap_backing: None, }; } @@ -40,6 +99,8 @@ impl Table { data, width, height, + #[cfg(feature = "disk-spill")] + mmap_backing: None, } } @@ -92,11 +153,30 @@ impl Table { /// Returns a vector of vectors of field elements representing the table rows pub fn rows(&self) -> Vec>> { - self.data.chunks(self.width).map(|r| r.to_vec()).collect() + (0..self.height) + .map(|row_idx| self.get_row(row_idx).to_vec()) + .collect() } /// Given a row index, returns a reference to that row as a slice of field elements. pub fn get_row(&self, row_idx: usize) -> &[FieldElement] { + #[cfg(feature = "disk-spill")] + if let Some(ref backing) = self.mmap_backing { + debug_assert!( + row_idx < backing.height, + "Table::get_row out of bounds: row={row_idx}, height={}", + backing.height + ); + let offset = row_idx * backing.width * backing.elem_size; + // SAFETY: Row-major layout means width elements are contiguous. + // Same repr(transparent) + page-aligned guarantees as get(). + return unsafe { + std::slice::from_raw_parts( + backing.mmap.as_ptr().add(offset) as *const FieldElement, + backing.width, + ) + }; + } let row_offset = row_idx * self.width; &self.data[row_offset..row_offset + self.width] } @@ -120,7 +200,7 @@ impl Table { (0..self.width) .map(|col_idx| { (0..self.height) - .map(|row_idx| self.data[row_idx * self.width + col_idx].clone()) + .map(|row_idx| self.get(row_idx, col_idx).clone()) .collect() }) .collect() @@ -128,7 +208,7 @@ impl Table { pub fn get_column(&self, col_idx: usize) -> Vec> { (0..self.height) - .map(|row_idx| self.data[row_idx * self.width + col_idx].clone()) + .map(|row_idx| self.get(row_idx, col_idx).clone()) .collect() } @@ -148,17 +228,34 @@ impl Table { let iter = output[..self.width].par_iter_mut().enumerate(); #[cfg(not(feature = "parallel"))] let iter = output[..self.width].iter_mut().enumerate(); + // Use get() which transparently reads from mmap or data Vec iter.for_each(|(col_idx, buf)| { buf.clear(); buf.reserve(self.height.saturating_sub(buf.capacity())); for row_idx in 0..self.height { - buf.push(self.data[row_idx * self.width + col_idx].clone()); + buf.push(self.get(row_idx, col_idx).clone()); } }); } /// Given row and column indexes, returns the stored field element in that position of the table. + #[inline] pub fn get(&self, row: usize, col: usize) -> &FieldElement { + #[cfg(feature = "disk-spill")] + if let Some(ref backing) = self.mmap_backing { + debug_assert!( + row < backing.height && col < backing.width, + "Table::get out of bounds: row={row}, col={col}, height={}, width={}", + backing.height, + backing.width + ); + // Row-major layout: offset = (row * width + col) * elem_size + let offset = (row * backing.width + col) * backing.elem_size; + // SAFETY: FieldElement is #[repr(transparent)] over F::BaseType. + // The mmap is page-aligned and elements are contiguously packed. + // The data was written from identical types on the same machine. + return unsafe { &*(backing.mmap.as_ptr().add(offset) as *const FieldElement) }; + } let idx = row * self.width + col; &self.data[idx] } @@ -168,6 +265,63 @@ impl Table { self.data[idx] = value; } + /// Returns true if this table's data has been spilled to disk via mmap. + pub fn is_spilled(&self) -> bool { + #[cfg(feature = "disk-spill")] + { + self.mmap_backing.is_some() + } + #[cfg(not(feature = "disk-spill"))] + { + false + } + } + + /// Spill the table's row-major data to a temp file and mmap it back. + /// Frees the heap `data` Vec while preserving access through `get()`, + /// `get_row()`, `columns()`, and `extract_columns_into()`. + /// + /// No-op if the table is empty or already spilled. + #[cfg(feature = "disk-spill")] + pub fn spill_to_disk(&mut self) -> std::io::Result<()> { + use std::io::Write; + + if self.data.is_empty() || self.mmap_backing.is_some() { + return Ok(()); + } + + let elem_size = std::mem::size_of::>(); + let total_bytes = self.data.len() * elem_size; + + let file = tempfile::tempfile()?; + file.set_len(total_bytes as u64)?; + { + let mut writer = std::io::BufWriter::new(&file); + // SAFETY: FieldElement is #[repr(transparent)] over F::BaseType. + // The Vec has the same byte layout as a contiguous array. + let bytes: &[u8] = + unsafe { std::slice::from_raw_parts(self.data.as_ptr() as *const u8, total_bytes) }; + writer.write_all(bytes)?; + writer.flush()?; + } + + // SAFETY: We own the file exclusively. + let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? }; + + self.mmap_backing = Some(TableMmapBacking { + mmap, + _file: file, + width: self.width, + height: self.height, + elem_size, + }); + + // Free heap allocation + self.data = Vec::new(); + + Ok(()) + } + /// Given a step size, converts the given table into a `Frame`. /// Clones row data into owned Vecs (only used by verifier on small OOD tables). pub fn into_frame(&self, main_trace_columns: usize, step_size: usize) -> Frame { @@ -225,3 +379,81 @@ where &self.aux_data[row][col] } } + +#[cfg(all(test, feature = "disk-spill"))] +mod disk_spill_tests { + use super::*; + use math::field::goldilocks::GoldilocksField; + + type F = GoldilocksField; + + /// Create a Table, spill it to disk, and verify that `get()` and `get_row()` + /// return the same values as before the spill. + #[test] + fn test_table_spill_roundtrip() { + let width = 4; + let height = 8; + let data: Vec> = (0..width * height) + .map(|i| FieldElement::::from(i as u64)) + .collect(); + + let mut table = Table::new(data.clone(), width); + assert!(!table.is_spilled()); + + // Snapshot values before spill + let pre_spill: Vec>> = (0..height) + .map(|r| (0..width).map(|c| *table.get(r, c)).collect()) + .collect(); + + table.spill_to_disk().expect("spill_to_disk failed"); + assert!(table.is_spilled()); + assert!( + table.data.is_empty(), + "heap data should be freed after spill" + ); + + // Verify get() returns the same values + for (r, pre_row) in pre_spill.iter().enumerate() { + for (c, pre_val) in pre_row.iter().enumerate() { + assert_eq!(table.get(r, c), pre_val, "mismatch at ({r}, {c})"); + } + } + + // Verify get_row() returns the same values + for (r, pre_row) in pre_spill.iter().enumerate() { + let row = table.get_row(r); + assert_eq!(row.len(), width); + for (c, pre_val) in pre_row.iter().enumerate() { + assert_eq!(&row[c], pre_val, "get_row mismatch at ({r}, {c})"); + } + } + } + + /// Spilling an empty table is a no-op. + #[test] + fn test_table_spill_empty_is_noop() { + let mut table = Table::::new(Vec::new(), 0); + table + .spill_to_disk() + .expect("spill_to_disk on empty table failed"); + assert!(!table.is_spilled()); + } + + /// Spilling twice is idempotent (second call is a no-op). + #[test] + fn test_table_spill_idempotent() { + let data: Vec> = + (0..16).map(|i| FieldElement::::from(i as u64)).collect(); + let mut table = Table::new(data, 4); + + table.spill_to_disk().expect("first spill failed"); + assert!(table.is_spilled()); + + table.spill_to_disk().expect("second spill should be no-op"); + assert!(table.is_spilled()); + + // Still readable + assert_eq!(table.get(0, 0), &FieldElement::::from(0u64)); + assert_eq!(table.get(3, 3), &FieldElement::::from(15u64)); + } +} diff --git a/crypto/stark/src/trace.rs b/crypto/stark/src/trace.rs index da74b5335..0e7c12640 100644 --- a/crypto/stark/src/trace.rs +++ b/crypto/stark/src/trace.rs @@ -1,3 +1,6 @@ +#[cfg(all(feature = "disk-spill", feature = "wasm"))] +compile_error!("disk-spill and wasm features are mutually exclusive"); + use crate::domain::Domain; use crate::table::Table; use itertools::Itertools; @@ -150,6 +153,15 @@ where self.num_aux_columns = num_aux_columns; } + /// Spill the main trace data to disk via mmap. + /// After this call, `main_table.data` is freed but all accessors + /// (`get_main`, `columns_main`, `extract_columns_main_into`) continue + /// to work transparently through mmap. + #[cfg(feature = "disk-spill")] + pub fn spill_main_to_disk(&mut self) -> std::io::Result<()> { + self.main_table.spill_to_disk() + } + pub fn compute_trace_polys_main(&self) -> Vec>> where S: IsFFTField + IsSubFieldOf, @@ -201,6 +213,35 @@ where pub(crate) aux_columns: Vec>>, pub(crate) lde_step_size: usize, pub(crate) blowup_factor: usize, + /// When `disk-spill` is enabled and data has been spilled to disk, + /// this holds the mmap backing. Access methods read from here instead + /// of `main_columns`/`aux_columns` (which are empty after spill). + #[cfg(feature = "disk-spill")] + pub(crate) mmap_backing: Option, +} + +/// File-backed mmap storage for LDE column data. +/// +/// Columns are stored in separate files for main and aux (since they may be +/// spilled at different times during Phase A and Phase B of proving). +/// Each file has column-major layout: +/// ```text +/// [col_0][col_1]...[col_N] +/// ``` +/// Each column occupies `num_rows * elem_size` contiguous bytes. +/// Elements are stored as their native in-memory representation, +/// which is valid because `FieldElement` is `#[repr(transparent)]`. +#[cfg(feature = "disk-spill")] +pub(crate) struct MmapBacking { + main_mmap: memmap2::Mmap, + _main_file: std::fs::File, + aux_mmap: Option, + _aux_file: Option, + num_rows: usize, + num_main_cols: usize, + num_aux_cols: usize, + main_elem_size: usize, + aux_elem_size: usize, } impl LDETraceTable @@ -223,24 +264,39 @@ where aux_columns, lde_step_size, blowup_factor, + #[cfg(feature = "disk-spill")] + mmap_backing: None, } } /// Consume self and return the owned column vectors. + /// When mmap-backed (disk-spill), returns empty Vecs since columns were freed. #[allow(clippy::type_complexity)] pub fn into_columns(self) -> (Vec>>, Vec>>) { (self.main_columns, self.aux_columns) } pub fn num_main_cols(&self) -> usize { + #[cfg(feature = "disk-spill")] + if let Some(ref backing) = self.mmap_backing { + return backing.num_main_cols; + } self.main_columns.len() } pub fn num_aux_cols(&self) -> usize { + #[cfg(feature = "disk-spill")] + if let Some(ref backing) = self.mmap_backing { + return backing.num_aux_cols; + } self.aux_columns.len() } pub fn num_rows(&self) -> usize { + #[cfg(feature = "disk-spill")] + if let Some(ref backing) = self.mmap_backing { + return backing.num_rows; + } if self.main_columns.is_empty() { 0 } else { @@ -251,21 +307,51 @@ where /// Get a single main-trace element by (row, col). #[inline] pub fn get_main(&self, row: usize, col: usize) -> &FieldElement { + #[cfg(feature = "disk-spill")] + if let Some(ref backing) = self.mmap_backing { + debug_assert!( + row < backing.num_rows && col < backing.num_main_cols, + "get_main out of bounds: row={row}, col={col}, num_rows={}, num_main_cols={}", + backing.num_rows, + backing.num_main_cols + ); + let offset = (col * backing.num_rows + row) * backing.main_elem_size; + // SAFETY: FieldElement is #[repr(transparent)] over F::BaseType. + // The mmap is page-aligned and elements are contiguously packed at + // multiples of main_elem_size, so alignment is satisfied. + // The data was written from identical types on the same machine. + return unsafe { &*(backing.main_mmap.as_ptr().add(offset) as *const FieldElement) }; + } &self.main_columns[col][row] } /// Get a single aux-trace element by (row, col). #[inline] pub fn get_aux(&self, row: usize, col: usize) -> &FieldElement { + #[cfg(feature = "disk-spill")] + if let Some(ref backing) = self.mmap_backing { + debug_assert!( + row < backing.num_rows && col < backing.num_aux_cols, + "get_aux out of bounds: row={row}, col={col}, num_rows={}, num_aux_cols={}", + backing.num_rows, + backing.num_aux_cols + ); + let aux_mmap = backing + .aux_mmap + .as_ref() + .expect("aux mmap must exist when accessing aux columns"); + let offset = (col * backing.num_rows + row) * backing.aux_elem_size; + // SAFETY: Same as get_main — repr(transparent) + page-aligned mmap. + return unsafe { &*(aux_mmap.as_ptr().add(offset) as *const FieldElement) }; + } &self.aux_columns[col][row] } /// Gather a full main-trace row into an owned Vec. /// Used by `open_trace_polys` (called ~30 times per table, allocation is negligible). pub fn gather_main_row(&self, row_idx: usize) -> Vec> { - self.main_columns - .iter() - .map(|col| col[row_idx].clone()) + (0..self.num_main_cols()) + .map(|col| self.get_main(row_idx, col).clone()) .collect() } @@ -277,17 +363,15 @@ where col_start: usize, col_end: usize, ) -> Vec> { - self.main_columns[col_start..col_end] - .iter() - .map(|col| col[row_idx].clone()) + (col_start..col_end) + .map(|col| self.get_main(row_idx, col).clone()) .collect() } /// Gather a full aux-trace row into an owned Vec. pub fn gather_aux_row(&self, row_idx: usize) -> Vec> { - self.aux_columns - .iter() - .map(|col| col[row_idx].clone()) + (0..self.num_aux_cols()) + .map(|col| self.get_aux(row_idx, col).clone()) .collect() } @@ -300,6 +384,121 @@ where pub fn step_to_row(&self, step: usize) -> usize { self.lde_step_size * step } + + /// Write pool column data to a temp file, mmap it, and return an mmap-backed + /// LDETraceTable. The pool buffers are NOT consumed — they keep their capacity + /// for reuse by the next chunk. + /// + /// This is used during Phase A to snapshot the main LDE columns from the pool + /// before the pool is overwritten by the next chunk. + #[cfg(feature = "disk-spill")] + pub fn spill_main_from_pool( + main_pool: &[Vec>], + num_main_cols: usize, + trace_step_size: usize, + blowup_factor: usize, + ) -> std::io::Result { + let num_rows = if num_main_cols > 0 { + main_pool[0].len() + } else { + 0 + }; + + let main_elem_size = std::mem::size_of::>(); + let (main_mmap, main_file) = + Self::write_pool_columns_to_mmap(&main_pool[..num_main_cols], main_elem_size)?; + + let lde_step_size = trace_step_size * blowup_factor; + let aux_elem_size = std::mem::size_of::>(); + + Ok(Self { + main_columns: Vec::new(), + aux_columns: Vec::new(), + lde_step_size, + blowup_factor, + mmap_backing: Some(MmapBacking { + main_mmap, + _main_file: main_file, + aux_mmap: None, + _aux_file: None, + num_rows, + num_main_cols, + num_aux_cols: 0, + main_elem_size, + aux_elem_size, + }), + }) + } + + /// Add aux LDE columns from the pool to an already-spilled LDETraceTable. + /// + /// Used during Phase B to attach aux data to a table whose main LDE was + /// already spilled in Phase A. + #[cfg(feature = "disk-spill")] + pub fn add_aux_from_pool( + &mut self, + aux_pool: &[Vec>], + num_aux_cols: usize, + ) -> std::io::Result<()> { + if num_aux_cols == 0 { + return Ok(()); + } + + let aux_elem_size = std::mem::size_of::>(); + let (aux_mmap, aux_file) = + Self::write_pool_columns_to_mmap(&aux_pool[..num_aux_cols], aux_elem_size)?; + + let backing = self + .mmap_backing + .as_mut() + .expect("add_aux_from_pool requires main already spilled"); + backing.aux_mmap = Some(aux_mmap); + backing._aux_file = Some(aux_file); + backing.num_aux_cols = num_aux_cols; + + Ok(()) + } + + /// Write borrowed pool columns to a temp file and mmap them. + /// Does NOT consume the pool — columns keep their capacity. + /// + /// Note: the concrete element types are `FieldElement` (8 bytes, + /// `#[repr(transparent)]` over `u64`) and `FieldElement` + /// (24 bytes, `#[repr(transparent)]` over `[u64; 3]`). Neither has padding, + /// so the raw byte round-trip is well-defined. + #[cfg(feature = "disk-spill")] + fn write_pool_columns_to_mmap( + columns: &[Vec], + elem_size: usize, + ) -> std::io::Result<(memmap2::Mmap, std::fs::File)> { + use std::io::Write; + + let num_cols = columns.len(); + let num_rows = if num_cols > 0 { columns[0].len() } else { 0 }; + debug_assert!( + columns.iter().all(|c| c.len() == num_rows), + "all columns must have the same length" + ); + let total_bytes = (num_cols * num_rows * elem_size) as u64; + + let file = tempfile::tempfile()?; + file.set_len(total_bytes)?; + { + let mut writer = std::io::BufWriter::new(&file); + for col in columns { + // SAFETY: FieldElement is #[repr(transparent)] over BaseType, + // so the Vec has the same byte layout as a contiguous array. + let bytes: &[u8] = unsafe { + std::slice::from_raw_parts(col.as_ptr() as *const u8, col.len() * elem_size) + }; + writer.write_all(bytes)?; + } + writer.flush()?; + } + // SAFETY: We own the file exclusively. + let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? }; + Ok((mmap, file)) + } } /// Given a slice of trace polynomials, an evaluation point `x`, the frame offsets @@ -501,6 +700,103 @@ where Table::new(table_data, table_width) } +#[cfg(all(test, feature = "disk-spill"))] +mod disk_spill_tests { + use super::*; + use math::field::extensions_goldilocks::Degree3GoldilocksExtensionField; + use math::field::goldilocks::GoldilocksField; + + type F = GoldilocksField; + type E = Degree3GoldilocksExtensionField; + + /// Spill main LDE columns from a simulated pool, then verify `get_main()` + /// returns the correct values from the mmap backing. + #[test] + fn test_lde_spill_main_roundtrip() { + let num_cols = 3; + let num_rows = 16; + + // Simulate pool: column-major Vec> + let pool: Vec>> = (0..num_cols) + .map(|c| { + (0..num_rows) + .map(|r| FieldElement::::from((c * num_rows + r) as u64)) + .collect() + }) + .collect(); + + let lde = LDETraceTable::::spill_main_from_pool( + &pool, num_cols, /*trace_step_size=*/ 1, /*blowup_factor=*/ 1, + ) + .expect("spill_main_from_pool failed"); + + assert_eq!(lde.num_main_cols(), num_cols); + assert_eq!(lde.num_rows(), num_rows); + assert!( + lde.main_columns.is_empty(), + "main_columns should be empty after spill" + ); + + // Verify every element + for (c, pool_col) in pool.iter().enumerate() { + for (r, pool_val) in pool_col.iter().enumerate() { + assert_eq!( + lde.get_main(r, c), + pool_val, + "mismatch at (row={r}, col={c})" + ); + } + } + } + + /// Spill main + aux LDE columns and verify both are accessible. + #[test] + fn test_lde_spill_main_and_aux_roundtrip() { + let num_main = 2; + let num_aux = 2; + let num_rows = 8; + + let main_pool: Vec>> = (0..num_main) + .map(|c| { + (0..num_rows) + .map(|r| FieldElement::::from((c * num_rows + r) as u64)) + .collect() + }) + .collect(); + + let aux_pool: Vec>> = (0..num_aux) + .map(|c| { + (0..num_rows) + .map(|r| FieldElement::::from((100 + c * num_rows + r) as u64)) + .collect() + }) + .collect(); + + let mut lde = LDETraceTable::::spill_main_from_pool(&main_pool, num_main, 1, 1) + .expect("spill_main_from_pool failed"); + + lde.add_aux_from_pool(&aux_pool, num_aux) + .expect("add_aux_from_pool failed"); + + assert_eq!(lde.num_main_cols(), num_main); + assert_eq!(lde.num_aux_cols(), num_aux); + + // Verify main + for (c, main_col) in main_pool.iter().enumerate() { + for (r, main_val) in main_col.iter().enumerate() { + assert_eq!(lde.get_main(r, c), main_val); + } + } + + // Verify aux + for (c, aux_col) in aux_pool.iter().enumerate() { + for (r, aux_val) in aux_col.iter().enumerate() { + assert_eq!(lde.get_aux(r, c), aux_val); + } + } + } +} + pub fn columns2rows(columns: Vec>) -> Vec> where F: Clone, diff --git a/executor/programs/asm/fib_iterative_128M.s b/executor/programs/asm/fib_iterative_128M.s new file mode 100644 index 000000000..b7eb30470 --- /dev/null +++ b/executor/programs/asm/fib_iterative_128M.s @@ -0,0 +1,24 @@ + .attribute 5, "rv64i2p1_m2p0" + .globl main +main: + # Iterative Fibonacci - pure register arithmetic + # ~128M steps + # + # Loop body: 5 instructions per iteration + # 25600000 iterations × 5 = 128000000 + setup/teardown + + li t0, 0 # a = fib(0) = 0 + li t1, 1 # b = fib(1) = 1 + li a0, 25600000 # iteration count + +.loop: + add t2, t0, t1 # t2 = a + b + mv t0, t1 # a = b + mv t1, t2 # b = t2 + addi a0, a0, -1 # n-- + bnez a0, .loop # loop if n != 0 + + mv a0, t1 # result = b + li a0, 0 + li a7, 93 + ecall # halt with result in a0 diff --git a/executor/programs/asm/fib_iterative_16M.s b/executor/programs/asm/fib_iterative_16M.s new file mode 100644 index 000000000..1ede85aaf --- /dev/null +++ b/executor/programs/asm/fib_iterative_16M.s @@ -0,0 +1,24 @@ + .attribute 5, "rv64i2p1_m2p0" + .globl main +main: + # Iterative Fibonacci - pure register arithmetic + # ~16M steps + # + # Loop body: 5 instructions per iteration + # 3200000 iterations × 5 = 16000000 + setup/teardown + + li t0, 0 # a = fib(0) = 0 + li t1, 1 # b = fib(1) = 1 + li a0, 3200000 # iteration count + +.loop: + add t2, t0, t1 # t2 = a + b + mv t0, t1 # a = b + mv t1, t2 # b = t2 + addi a0, a0, -1 # n-- + bnez a0, .loop # loop if n != 0 + + mv a0, t1 # result = b + li a0, 0 + li a7, 93 + ecall # halt with result in a0 diff --git a/executor/programs/asm/fib_iterative_2M.s b/executor/programs/asm/fib_iterative_2M.s index e224db769..96cdf68e2 100644 --- a/executor/programs/asm/fib_iterative_2M.s +++ b/executor/programs/asm/fib_iterative_2M.s @@ -5,11 +5,11 @@ main: # ~2M steps # # Loop body: 5 instructions per iteration - # 399999 iterations × 5 = 1999995 + 4 setup/teardown = 1999999 + # 400000 iterations × 5 = 2000000 + 4 setup/teardown ≈ 2000004 li t0, 0 # a = fib(0) = 0 li t1, 1 # b = fib(1) = 1 - li a0, 399999 # iteration count + li a0, 400000 # iteration count .loop: add t2, t0, t1 # t2 = a + b @@ -19,5 +19,6 @@ main: bnez a0, .loop # loop if n != 0 mv a0, t1 # result = b + li a0, 0 li a7, 93 ecall # halt with result in a0 diff --git a/executor/programs/asm/fib_iterative_32M.s b/executor/programs/asm/fib_iterative_32M.s new file mode 100644 index 000000000..df6644193 --- /dev/null +++ b/executor/programs/asm/fib_iterative_32M.s @@ -0,0 +1,24 @@ + .attribute 5, "rv64i2p1_m2p0" + .globl main +main: + # Iterative Fibonacci - pure register arithmetic + # ~32M steps + # + # Loop body: 5 instructions per iteration + # 6400000 iterations × 5 = 32000000 + setup/teardown + + li t0, 0 # a = fib(0) = 0 + li t1, 1 # b = fib(1) = 1 + li a0, 6400000 # iteration count + +.loop: + add t2, t0, t1 # t2 = a + b + mv t0, t1 # a = b + mv t1, t2 # b = t2 + addi a0, a0, -1 # n-- + bnez a0, .loop # loop if n != 0 + + mv a0, t1 # result = b + li a0, 0 + li a7, 93 + ecall # halt with result in a0 diff --git a/executor/programs/asm/fib_iterative_64M.s b/executor/programs/asm/fib_iterative_64M.s new file mode 100644 index 000000000..af232577b --- /dev/null +++ b/executor/programs/asm/fib_iterative_64M.s @@ -0,0 +1,24 @@ + .attribute 5, "rv64i2p1_m2p0" + .globl main +main: + # Iterative Fibonacci - pure register arithmetic + # ~64M steps + # + # Loop body: 5 instructions per iteration + # 12800000 iterations × 5 = 64000000 + setup/teardown + + li t0, 0 # a = fib(0) = 0 + li t1, 1 # b = fib(1) = 1 + li a0, 12800000 # iteration count + +.loop: + add t2, t0, t1 # t2 = a + b + mv t0, t1 # a = b + mv t1, t2 # b = t2 + addi a0, a0, -1 # n-- + bnez a0, .loop # loop if n != 0 + + mv a0, t1 # result = b + li a0, 0 + li a7, 93 + ecall # halt with result in a0 diff --git a/executor/programs/asm/fib_iterative_8M.s b/executor/programs/asm/fib_iterative_8M.s new file mode 100644 index 000000000..11dab529c --- /dev/null +++ b/executor/programs/asm/fib_iterative_8M.s @@ -0,0 +1,24 @@ + .attribute 5, "rv64i2p1_m2p0" + .globl main +main: + # Iterative Fibonacci - pure register arithmetic + # ~8M steps + # + # Loop body: 5 instructions per iteration + # 1600000 iterations × 5 = 8000000 + setup/teardown + + li t0, 0 # a = fib(0) = 0 + li t1, 1 # b = fib(1) = 1 + li a0, 1600000 # iteration count + +.loop: + add t2, t0, t1 # t2 = a + b + mv t0, t1 # a = b + mv t1, t2 # b = t2 + addi a0, a0, -1 # n-- + bnez a0, .loop # loop if n != 0 + + mv a0, t1 # result = b + li a0, 0 + li a7, 93 + ecall # halt with result in a0 diff --git a/prover/Cargo.toml b/prover/Cargo.toml index 56189724d..72453ed13 100644 --- a/prover/Cargo.toml +++ b/prover/Cargo.toml @@ -7,6 +7,7 @@ edition = "2024" default = ["parallel"] parallel = ["stark/parallel", "math/parallel", "crypto/parallel", "dep:rayon"] debug-checks = ["stark/debug-checks"] +disk-spill = ["stark/disk-spill"] [dependencies] stark = { path = "../crypto/stark" } @@ -19,6 +20,7 @@ rayon = { version = "1.8.0", optional = true } [dev-dependencies] env_logger = "*" criterion = { version = "0.5", default-features = false } +bincode = "1" [[bench]] name = "vm_prover_benchmark" diff --git a/prover/src/lib.rs b/prover/src/lib.rs index 22f789667..a4c6312d9 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -364,6 +364,19 @@ pub fn prove_with_options( // Generate all traces from ELF and execution logs. // Page tables are derived from the prover's MemoryState (all accessed pages). let mut traces = Traces::from_elf_and_logs(&program, &result.logs, max_rows)?; + + // Drop executor result (logs) — no longer needed after trace building. + // For large programs this frees significant heap memory. + drop(result); + + // With disk-spill, move all main trace data from heap to mmap immediately + // after building. For 64M+ instructions the total trace data can exceed + // available RAM; spilling here frees heap before pool allocation in multi_prove. + #[cfg(feature = "disk-spill")] + traces + .spill_all_main_to_disk() + .map_err(|e| Error::Prover(format!("disk-spill traces: {e}")))?; + let table_counts = traces.table_counts(); let airs = VmAirs::new( &program, diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 2e0d084b2..1a12e5bfc 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -1134,6 +1134,7 @@ fn collect_bitwise_from_page(elf: &Elf, memory_state: &MemoryState) -> Vec( } } +/// Like [`chunk_and_generate`], but spills each chunk to disk immediately after +/// generation. Peak memory is bounded by one chunk instead of all chunks combined. +/// This is critical for large tables (e.g. MEMW with 192M rows for 64M instructions). +#[cfg(feature = "disk-spill")] +fn chunk_generate_and_spill( + ops: &[T], + max_rows: usize, + generate: impl Fn(&[T]) -> TraceTable, +) -> Result>, Error> { + let op_chunks: Vec<&[T]> = if ops.is_empty() { + vec![&[][..]] + } else { + ops.chunks(max_rows).collect() + }; + let mut tables = Vec::with_capacity(op_chunks.len()); + for chunk in op_chunks { + let mut t = generate(chunk); + t.main_table + .spill_to_disk() + .map_err(|e| Error::Prover(format!("disk-spill trace: {e}")))?; + tables.push(t); + } + Ok(tables) +} + impl Traces { /// Returns the number of chunks for each split table. pub fn table_counts(&self) -> crate::TableCounts { @@ -1285,6 +1311,47 @@ impl Traces { } } + /// Spill all main trace tables from heap to mmap files. + /// + /// After this call, all trace data is accessible via mmap (demand-paged by the OS) + /// and the heap allocations are freed. This is critical for large programs where + /// total trace data exceeds available RAM. + #[cfg(feature = "disk-spill")] + pub fn spill_all_main_to_disk(&mut self) -> std::io::Result<()> { + for t in &mut self.cpus { + t.main_table.spill_to_disk()?; + } + self.bitwise.main_table.spill_to_disk()?; + for t in &mut self.lts { + t.main_table.spill_to_disk()?; + } + for t in &mut self.shifts { + t.main_table.spill_to_disk()?; + } + for t in &mut self.memws { + t.main_table.spill_to_disk()?; + } + for t in &mut self.loads { + t.main_table.spill_to_disk()?; + } + self.decode.main_table.spill_to_disk()?; + for t in &mut self.muls { + t.main_table.spill_to_disk()?; + } + for t in &mut self.dvrms { + t.main_table.spill_to_disk()?; + } + for t in &mut self.pages { + t.main_table.spill_to_disk()?; + } + self.register.main_table.spill_to_disk()?; + for t in &mut self.branches { + t.main_table.spill_to_disk()?; + } + self.halt.main_table.spill_to_disk()?; + Ok(()) + } + /// Extract page configurations from ELF only (deterministic from binary). /// /// Returns PageConfigs for pages covered by ELF segments, with their @@ -1430,7 +1497,7 @@ impl Traces { // Initialize memory state from ELF so first accesses get correct old_value. let mut memory_state = MemoryState::from_elf(elf); let mut register_state = RegisterState::new(elf.entry_point); - let (mut memw_ops, load_ops, mut lt_ops, shift_ops, mut bitwise_ops) = + let (mut memw_ops, load_ops, mut lt_ops, shift_ops, bitwise_ops) = collect_ops_from_cpu(&cpu_ops, &mut memory_state, &mut register_state); // HALT finalization: 33 register MEMW operations at timestamp u64::MAX. @@ -1505,20 +1572,75 @@ impl Traces { // ===================================================================== lt_ops.extend(collect_lt_from_memw(&memw_ops)); + // Generate traces, largest tables first. + // With disk-spill: use chunk_generate_and_spill — each chunk is spilled to disk + // immediately so peak memory is bounded by one chunk + raw ops. + // Without disk-spill: use chunk_and_generate (standard in-memory path). + // Drop raw ops after each generation to free heap before the next table. + #[cfg(feature = "disk-spill")] + macro_rules! gen_traces { + ($ops:expr, $max:expr, $gen:expr) => { + chunk_generate_and_spill($ops, $max, $gen)? + }; + } + #[cfg(not(feature = "disk-spill"))] + macro_rules! gen_traces { + ($ops:expr, $max:expr, $gen:expr) => { + chunk_and_generate($ops, $max, $gen) + }; + } + // ===================================================================== - // PHASE 4: All → Bitwise lookups + // PHASE 4+5 interleaved: collect bitwise, update multiplicities + // incrementally, generate traces, and drop raw ops ASAP. + // + // Key insight: bitwise_ops can be enormous (43 GB for MEMW alone at 64M). + // Instead of accumulating all bitwise_ops into one Vec, we generate the + // bitwise trace early and update multiplicities in small batches, dropping + // each batch immediately. This keeps bitwise memory bounded. // ===================================================================== - bitwise_ops.extend(collect_bitwise_from_lt(<_ops)); - bitwise_ops.extend(collect_bitwise_from_memw(&memw_ops)); - bitwise_ops.extend(collect_bitwise_from_mul(&mul_ops)); - bitwise_ops.extend(collect_bitwise_from_dvrm(&dvrm_ops)); - bitwise_ops.extend(collect_bitwise_from_branch(&branch_ops)); - bitwise_ops.extend(shift::collect_bitwise_from_shift(&shift_ops)); - // PAGE tables do IS_BYTE lookups for init and fini values (C1, C2) - bitwise_ops.extend(collect_bitwise_from_page(elf, &memory_state)); + let mut bitwise = bitwise::generate_bitwise_trace(); + + // Flush initial CPU bitwise ops (from Phase 2's collect_ops_from_cpu) + bitwise::update_multiplicities(&mut bitwise, &bitwise_ops); + drop(bitwise_ops); + + // Collect-and-flush macro: collects bitwise ops from a chunk of source + // data, updates multiplicities immediately, then drops the ops vec. + // This keeps peak bitwise memory bounded to ~224 MB per batch. + macro_rules! flush_bitwise { + ($source:expr, $chunk_size:expr, $collect_fn:expr) => { + for chunk in $source.chunks($chunk_size) { + let ops = $collect_fn(chunk); + bitwise::update_multiplicities(&mut bitwise, &ops); + } + }; + } + + // MEMW: 28 bitwise ops per row → process in 1M-row chunks (~224 MB each) + flush_bitwise!(&memw_ops, 1_000_000, collect_bitwise_from_memw); + let memws = gen_traces!(&memw_ops, max_rows.memw, memw::generate_memw_trace); + drop(memw_ops); + + // LT: 8 bitwise ops per row → process in 1M-row chunks (~64 MB each) + flush_bitwise!(<_ops, 1_000_000, collect_bitwise_from_lt); + let lts = gen_traces!(<_ops, max_rows.lt, lt::generate_lt_trace); + drop(lt_ops); + + // Remaining bitwise sources (small for typical programs, but still chunked) + flush_bitwise!(&mul_ops, 1_000_000, collect_bitwise_from_mul); + flush_bitwise!(&dvrm_ops, 1_000_000, collect_bitwise_from_dvrm); + flush_bitwise!(&branch_ops, 1_000_000, collect_bitwise_from_branch); + flush_bitwise!(&shift_ops, 1_000_000, shift::collect_bitwise_from_shift); + + // PAGE bitwise (small, no chunking needed) + { + let ops = collect_bitwise_from_page(elf, &memory_state); + bitwise::update_multiplicities(&mut bitwise, &ops); + } // ===================================================================== - // PHASE 5: Generate final traces (parallelized) + // Remaining trace generation // ===================================================================== // Extract halt timestamp from the last ECALL instruction @@ -1529,18 +1651,18 @@ impl Traces { .ok_or(Error::MissingHaltEcall)?; let halt_timestamp = halt_op.timestamp; - let cpus = chunk_and_generate(&cpu_ops, max_rows.cpu, cpu::generate_cpu_trace); - let memws = chunk_and_generate(&memw_ops, max_rows.memw, memw::generate_memw_trace); - let loads = chunk_and_generate(&load_ops, max_rows.load, load::generate_load_trace); - let lts = chunk_and_generate(<_ops, max_rows.lt, lt::generate_lt_trace); - let shifts = chunk_and_generate(&shift_ops, max_rows.shift, shift::generate_shift_trace); - let muls = chunk_and_generate(&mul_ops, max_rows.mul, mul::generate_mul_trace); - let dvrms = chunk_and_generate(&dvrm_ops, max_rows.dvrm, dvrm::generate_dvrm_trace); - let branches = - chunk_and_generate(&branch_ops, max_rows.branch, branch::generate_branch_trace); - - let mut bitwise = bitwise::generate_bitwise_trace(); - bitwise::update_multiplicities(&mut bitwise, &bitwise_ops); + let cpus = gen_traces!(&cpu_ops, max_rows.cpu, cpu::generate_cpu_trace); + // cpu_ops kept alive — needed for decode multiplicities below + let loads = gen_traces!(&load_ops, max_rows.load, load::generate_load_trace); + drop(load_ops); + let shifts = gen_traces!(&shift_ops, max_rows.shift, shift::generate_shift_trace); + drop(shift_ops); + let muls = gen_traces!(&mul_ops, max_rows.mul, mul::generate_mul_trace); + drop(mul_ops); + let dvrms = gen_traces!(&dvrm_ops, max_rows.dvrm, dvrm::generate_dvrm_trace); + drop(dvrm_ops); + let branches = gen_traces!(&branch_ops, max_rows.branch, branch::generate_branch_trace); + drop(branch_ops); // Update DECODE multiplicities // Each CPU operation looks up the DECODE table once @@ -1555,12 +1677,14 @@ impl Traces { let mut decode_lookups: Vec = cpu_ops.iter().map(|op| op.decode.pc).collect(); decode_lookups.extend(std::iter::repeat_n(cpu::CPU_PADDING_PC, num_padding_rows)); decode::update_multiplicities(&mut decode, &pc_to_row, &decode_lookups); + drop(cpu_ops); + drop(decode_lookups); // Prepare register final state before scope (needs register_state ownership) let register_final_state = register_state.to_final_state_map(); // Generate remaining traces in parallel (page, register, halt). - // chunk_and_generate already handled cpu, lt, memw, load, mul, dvrm, branch above. + // gen_traces! already handled cpu, lt, memw, load, mul, dvrm, branch above. let (pages, page_configs, register_trace, halt_trace); #[cfg(feature = "parallel")] { diff --git a/prover/src/tests/disk_spill_tests.rs b/prover/src/tests/disk_spill_tests.rs new file mode 100644 index 000000000..ffb8fbca9 --- /dev/null +++ b/prover/src/tests/disk_spill_tests.rs @@ -0,0 +1,98 @@ +//! Tests for the `disk-spill` feature. +//! +//! Verifies that proving and verification produce correct results when main +//! traces, LDE columns, and Merkle tree nodes are spilled to disk via mmap. + +use crate::tables::MaxRowsConfig; +use crate::test_utils::asm_elf_bytes; +use crate::VmProof; + +/// Prove + verify a small program end-to-end with disk-spill enabled. +/// This exercises the full pipeline: trace generation, main-trace spill, +/// LDE spill, Merkle-tree spill, and verification. +#[test] +fn test_disk_spill_prove_and_verify_small() { + let elf_bytes = asm_elf_bytes("sub"); + let result = crate::prove_and_verify(&elf_bytes); + assert!( + result.is_ok(), + "prove_and_verify failed: {:?}", + result.err() + ); + assert!(result.unwrap(), "verification returned false"); +} + +/// Prove + verify with `MaxRowsConfig::small()` (2^5 = 32 rows per chunk) +/// to force many chunks. This ensures disk-spill works across chunk boundaries +/// where pool buffers are reused and main traces are spilled per-chunk. +#[test] +fn test_disk_spill_prove_and_verify_with_chunks() { + let elf_bytes = asm_elf_bytes("sub"); + let proof_options = stark::proof::options::GoldilocksCubicProofOptions::with_blowup(2) + .expect("blowup=2 is always valid"); + let vm_proof = crate::prove_with_options(&elf_bytes, &proof_options, &MaxRowsConfig::small()); + assert!( + vm_proof.is_ok(), + "prove_with_options failed: {:?}", + vm_proof.err() + ); + let vm_proof = vm_proof.unwrap(); + + let ok = crate::verify_with_options(&vm_proof, &elf_bytes, &proof_options); + assert!(ok.is_ok(), "verify_with_options failed: {:?}", ok.err()); + assert!(ok.unwrap(), "verification returned false"); +} + +/// Prove, serialize with bincode, deserialize, then verify. +/// This reproduces the exact CLI path: prove → write → read → verify. +#[test] +fn test_disk_spill_serialization_roundtrip() { + let elf_bytes = asm_elf_bytes("sub"); + let proof = crate::prove(&elf_bytes).expect("prove failed"); + + let bytes = bincode::serialize(&proof).expect("serialize failed"); + eprintln!("Proof serialized: {} bytes", bytes.len()); + + let proof2: VmProof = bincode::deserialize(&bytes).expect("deserialize failed"); + let valid = crate::verify(&proof2, &elf_bytes).expect("verify failed"); + assert!(valid, "verification failed after serialization roundtrip"); +} + +/// Print struct sizes to verify memory analysis +#[test] +fn test_print_struct_sizes() { + use std::mem::size_of; + eprintln!("CpuOperation: {} bytes", size_of::()); + eprintln!("MemwOperation: {} bytes", size_of::()); + eprintln!("LtOperation: {} bytes", size_of::()); + eprintln!("BranchOperation: {} bytes", size_of::()); + eprintln!("BitwiseOperation: {} bytes", size_of::()); + eprintln!("ShiftOperation: {} bytes", size_of::()); +} + +/// Test prove+verify with a larger program (2M instructions). +/// This catches bugs that only manifest at scale (multiple chunks, larger tables). +#[test] +fn test_disk_spill_prove_and_verify_2m() { + let _ = env_logger::builder().is_test(true).try_init(); + let elf_bytes = asm_elf_bytes("fib_iterative_2M"); + let result = crate::prove_and_verify(&elf_bytes).expect("prove_and_verify failed"); + assert!(result, "verification returned false for fib_iterative_2M"); +} + +/// Same as above but with small chunks (MaxRowsConfig::small()). +#[test] +fn test_disk_spill_serialization_roundtrip_chunked() { + let elf_bytes = asm_elf_bytes("sub"); + let opts = stark::proof::options::GoldilocksCubicProofOptions::with_blowup(2) + .expect("blowup=2 is always valid"); + let proof = + crate::prove_with_options(&elf_bytes, &opts, &MaxRowsConfig::small()).expect("prove failed"); + + let bytes = bincode::serialize(&proof).expect("serialize failed"); + eprintln!("Chunked proof serialized: {} bytes", bytes.len()); + + let proof2: VmProof = bincode::deserialize(&bytes).expect("deserialize failed"); + let valid = crate::verify_with_options(&proof2, &elf_bytes, &opts).expect("verify failed"); + assert!(valid, "verification failed after serialization roundtrip (chunked)"); +} diff --git a/prover/src/tests/mod.rs b/prover/src/tests/mod.rs index 4f349d25a..788533d68 100644 --- a/prover/src/tests/mod.rs +++ b/prover/src/tests/mod.rs @@ -1,3 +1,5 @@ +#[cfg(all(test, feature = "disk-spill"))] +pub mod disk_spill_tests; #[cfg(test)] pub mod bitwise_bus_tests; #[cfg(test)] From 238352939b05801904629ecb4779c6966f7980ee Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 25 Mar 2026 20:11:54 -0300 Subject: [PATCH 002/231] Parallelize disk-spill Rounds 2-4 proving loop --- crypto/stark/src/prover.rs | 183 ++++++++++++++++++++++--------------- 1 file changed, 110 insertions(+), 73 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 4f3bbee66..e2e7c1c84 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1888,83 +1888,120 @@ pub trait IsStarkProver< // ----- disk-spill path: read from spilled LDEs ----- #[cfg(feature = "disk-spill")] { - for idx in 0..num_airs { - let (air, _trace, pub_inputs) = &air_trace_pairs[idx]; - let metadata = &metadatas[idx]; - let domain = &domains[idx]; - let table_transcript = &mut table_transcripts[idx]; - - #[cfg(feature = "instruments")] - let table_start = Instant::now(); - - // Take the spilled LDE (mmap-backed) — no LDE recomputation needed - let lde_trace = spilled_ldes[idx] - .take() - .expect("spilled LDE must exist for every AIR"); - - // Build Round1 from the spilled LDE + stored Merkle trees - let main = Round1CommitmentData:: { - lde_trace_merkle_tree: Arc::clone(&metadata.main_merkle_tree), - lde_trace_merkle_root: metadata.main_merkle_root, - precomputed_merkle_tree: metadata - .precomputed_merkle_tree - .as_ref() - .map(Arc::clone), - precomputed_merkle_root: metadata.precomputed_merkle_root, - num_precomputed_cols: metadata.num_precomputed_cols, - }; - - let aux = if air.has_aux_trace() { - Some(Round1CommitmentData:: { - lde_trace_merkle_tree: Arc::clone( - metadata - .aux_merkle_tree - .as_ref() - .expect("aux tree must exist when has_aux_trace"), - ), - lde_trace_merkle_root: metadata - .aux_merkle_root - .expect("aux root must exist when has_aux_trace"), - precomputed_merkle_tree: None, - precomputed_merkle_root: None, - num_precomputed_cols: 0, + for chunk_start in (0..num_airs).step_by(k) { + let chunk_end = (chunk_start + k).min(num_airs); + + // Pre-take spilled LDEs for the chunk (needs &mut, can't do inside par_iter) + let chunk_ldes: Vec<_> = (chunk_start..chunk_end) + .map(|i| { + spilled_ldes[i] + .take() + .expect("spilled LDE must exist for every AIR") }) - } else { - None - }; - - let round_1_result = Round1 { - lde_trace, - main, - aux, - rap_challenges: metadata.rap_challenges.clone(), - bus_public_inputs: metadata.bus_public_inputs.clone(), - }; - - if let Some(ref bpi) = round_1_result.bus_public_inputs { - table_transcript.append_field_element(&bpi.table_contribution); - } + .collect(); - let proof = Self::prove_rounds_2_to_4( - *air, - *pub_inputs, - &round_1_result, - table_transcript, - domain, - )?; + let chunk_transcripts = &mut table_transcripts[chunk_start..chunk_end]; - #[cfg(feature = "instruments")] - { - let sub_ops = crate::instruments::take_round_sub_ops().unwrap_or_default(); - table_timings.push(( - air.name().to_string(), - air_trace_pairs[idx].1.num_rows(), - table_start.elapsed(), - sub_ops, - )); - } + #[cfg(feature = "parallel")] + let iter = chunk_ldes + .into_par_iter() + .zip(chunk_transcripts.par_iter_mut()) + .enumerate(); + #[cfg(not(feature = "parallel"))] + let iter = chunk_ldes + .into_iter() + .zip(chunk_transcripts.iter_mut()) + .enumerate(); + + let chunk_results: Vec> = iter + .map(|(j, (lde_trace, table_transcript))| { + let idx = chunk_start + j; + let (air, _trace, pub_inputs) = &air_trace_pairs[idx]; + let metadata = &metadatas[idx]; + let domain = &domains[idx]; - proofs.push(proof); + #[cfg(feature = "instruments")] + let table_start = Instant::now(); + + let main = Round1CommitmentData:: { + lde_trace_merkle_tree: Arc::clone(&metadata.main_merkle_tree), + lde_trace_merkle_root: metadata.main_merkle_root, + precomputed_merkle_tree: metadata + .precomputed_merkle_tree + .as_ref() + .map(Arc::clone), + precomputed_merkle_root: metadata.precomputed_merkle_root, + num_precomputed_cols: metadata.num_precomputed_cols, + }; + + let aux = if air.has_aux_trace() { + Some(Round1CommitmentData:: { + lde_trace_merkle_tree: Arc::clone( + metadata + .aux_merkle_tree + .as_ref() + .expect("aux tree must exist when has_aux_trace"), + ), + lde_trace_merkle_root: metadata + .aux_merkle_root + .expect("aux root must exist when has_aux_trace"), + precomputed_merkle_tree: None, + precomputed_merkle_root: None, + num_precomputed_cols: 0, + }) + } else { + None + }; + + let round_1_result = Round1 { + lde_trace, + main, + aux, + rap_challenges: metadata.rap_challenges.clone(), + bus_public_inputs: metadata.bus_public_inputs.clone(), + }; + + if let Some(ref bpi) = round_1_result.bus_public_inputs { + table_transcript.append_field_element(&bpi.table_contribution); + } + + let proof = Self::prove_rounds_2_to_4( + *air, + *pub_inputs, + &round_1_result, + table_transcript, + domain, + )?; + + #[cfg(feature = "instruments")] + { + let sub_ops = + crate::instruments::take_round_sub_ops().unwrap_or_default(); + return Ok(( + proof, + ( + air.name().to_string(), + air_trace_pairs[idx].1.num_rows(), + table_start.elapsed(), + sub_ops, + ), + )); + } + #[cfg(not(feature = "instruments"))] + Ok(proof) + }) + .collect(); + + for result in chunk_results { + #[cfg(feature = "instruments")] + { + let (proof, timing) = result?; + proofs.push(proof); + table_timings.push(timing); + } + #[cfg(not(feature = "instruments"))] + proofs.push(result?); + } } } From d23cf00da9d4125a08d04b377126989a47acb1da Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Thu, 26 Mar 2026 11:30:39 -0300 Subject: [PATCH 003/231] Remove LDE spilling, reuse reconstruct_round1 for Rounds 2-4 --- crypto/stark/src/prover.rs | 354 +++++++++---------------------------- 1 file changed, 87 insertions(+), 267 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index e2e7c1c84..3a0f23f85 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1547,13 +1547,6 @@ pub trait IsStarkProver< let mut main_commits: Vec> = Vec::with_capacity(num_airs); - // Spilled LDE trace tables: one per AIR, populated during Phase A (main) and Phase C (aux). - // In Rounds 2-4 these replace the reconstruct_round1 flow — LDE data is read from mmap - // instead of being recomputed from the trace. - #[cfg(feature = "disk-spill")] - let mut spilled_ldes: Vec>> = - (0..num_airs).map(|_| None).collect(); - for chunk_start in (0..num_airs).step_by(k) { let chunk_end = (chunk_start + k).min(num_airs); let chunk_size = chunk_end - chunk_start; @@ -1594,28 +1587,6 @@ pub trait IsStarkProver< } transcript.append_bytes(&root); - // Spill the main LDE columns from the pool to a temp-file mmap before - // the pool is overwritten by the next chunk. Also spill Merkle tree nodes - // to disk — they remain accessible through mmap for Rounds 2-4 openings. - #[cfg(feature = "disk-spill")] - { - let idx = chunk_start + j; - let (air, trace, _) = &air_trace_pairs[idx]; - let num_main_cols = trace.num_main_columns; - let spilled = LDETraceTable::spill_main_from_pool( - &pool_sets[j].main, - num_main_cols, - air.step_size(), - domains[idx].blowup_factor, - ) - .map_err(|e| { - ProvingError::WrongParameter(format!( - "disk-spill main LDE table {idx}: {e}" - )) - })?; - spilled_ldes[idx] = Some(spilled); - } - #[allow(unused_mut)] let mut main_tree = Arc::new(tree); #[cfg(feature = "disk-spill")] @@ -1793,33 +1764,15 @@ pub trait IsStarkProver< table_transcripts[chunk_start + j].append_bytes(root); } - // Spill aux LDE columns from pool and aux Merkle tree nodes to disk. + // Spill aux Merkle tree nodes to disk. #[cfg(feature = "disk-spill")] - { - let idx = chunk_start + j; - let (air, trace, _) = &air_trace_pairs[idx]; - if air.has_aux_trace() { - let num_aux_cols = trace.num_aux_columns; - if let Some(ref mut spilled) = spilled_ldes[idx] { - spilled - .add_aux_from_pool(&pool_sets[j].aux, num_aux_cols) - .map_err(|e| { - ProvingError::WrongParameter(format!( - "disk-spill aux LDE table {idx}: {e}" - )) - })?; - } - } - if let Some(ref mut tree_arc) = aux_tree { - Arc::get_mut(tree_arc) - .expect("sole Arc owner") - .spill_nodes_to_disk() - .map_err(|e| { - ProvingError::WrongParameter(format!( - "disk-spill aux Merkle tree {idx}: {e}" - )) - })?; - } + if let Some(ref mut tree_arc) = aux_tree { + Arc::get_mut(tree_arc) + .expect("sole Arc owner") + .spill_nodes_to_disk() + .map_err(|e| { + ProvingError::WrongParameter(format!("disk-spill aux Merkle tree: {e}")) + })?; } aux_results.push((aux_tree, aux_root)); @@ -1868,10 +1821,6 @@ pub trait IsStarkProver< // ===================================================================== // Each chunk of K tables is processed in parallel. Each worker gets its // own pool set and transcript fork. Pool sets are reused across chunks. - // - // disk-spill path: LDE data is read from mmap-backed spilled_ldes instead - // of being recomputed via reconstruct_round1. This avoids the peak memory - // spike of holding both the trace and its LDE in RAM simultaneously. #[cfg(feature = "instruments")] let phase_start = Instant::now(); @@ -1884,229 +1833,100 @@ pub trait IsStarkProver< )> = Vec::with_capacity(num_airs); let mut proofs = Vec::with_capacity(num_airs); + for chunk_start in (0..num_airs).step_by(k) { + let chunk_end = (chunk_start + k).min(num_airs); + let chunk_size = chunk_end - chunk_start; - // ----- disk-spill path: read from spilled LDEs ----- - #[cfg(feature = "disk-spill")] - { - for chunk_start in (0..num_airs).step_by(k) { - let chunk_end = (chunk_start + k).min(num_airs); - - // Pre-take spilled LDEs for the chunk (needs &mut, can't do inside par_iter) - let chunk_ldes: Vec<_> = (chunk_start..chunk_end) - .map(|i| { - spilled_ldes[i] - .take() - .expect("spilled LDE must exist for every AIR") - }) - .collect(); - - let chunk_transcripts = &mut table_transcripts[chunk_start..chunk_end]; - - #[cfg(feature = "parallel")] - let iter = chunk_ldes - .into_par_iter() - .zip(chunk_transcripts.par_iter_mut()) - .enumerate(); - #[cfg(not(feature = "parallel"))] - let iter = chunk_ldes - .into_iter() - .zip(chunk_transcripts.iter_mut()) - .enumerate(); - - let chunk_results: Vec> = iter - .map(|(j, (lde_trace, table_transcript))| { - let idx = chunk_start + j; - let (air, _trace, pub_inputs) = &air_trace_pairs[idx]; - let metadata = &metadatas[idx]; - let domain = &domains[idx]; + let chunk_transcripts = &mut table_transcripts[chunk_start..chunk_end]; - #[cfg(feature = "instruments")] - let table_start = Instant::now(); - - let main = Round1CommitmentData:: { - lde_trace_merkle_tree: Arc::clone(&metadata.main_merkle_tree), - lde_trace_merkle_root: metadata.main_merkle_root, - precomputed_merkle_tree: metadata - .precomputed_merkle_tree - .as_ref() - .map(Arc::clone), - precomputed_merkle_root: metadata.precomputed_merkle_root, - num_precomputed_cols: metadata.num_precomputed_cols, - }; - - let aux = if air.has_aux_trace() { - Some(Round1CommitmentData:: { - lde_trace_merkle_tree: Arc::clone( - metadata - .aux_merkle_tree - .as_ref() - .expect("aux tree must exist when has_aux_trace"), - ), - lde_trace_merkle_root: metadata - .aux_merkle_root - .expect("aux root must exist when has_aux_trace"), - precomputed_merkle_tree: None, - precomputed_merkle_root: None, - num_precomputed_cols: 0, - }) - } else { - None - }; - - let round_1_result = Round1 { - lde_trace, - main, - aux, - rap_challenges: metadata.rap_challenges.clone(), - bus_public_inputs: metadata.bus_public_inputs.clone(), - }; - - if let Some(ref bpi) = round_1_result.bus_public_inputs { - table_transcript.append_field_element(&bpi.table_contribution); - } - - let proof = Self::prove_rounds_2_to_4( - *air, - *pub_inputs, - &round_1_result, - table_transcript, - domain, - )?; + #[cfg(feature = "parallel")] + let iter = pool_sets[..chunk_size] + .par_iter_mut() + .zip(chunk_transcripts.par_iter_mut()) + .enumerate(); + #[cfg(not(feature = "parallel"))] + let iter = pool_sets[..chunk_size] + .iter_mut() + .zip(chunk_transcripts.iter_mut()) + .enumerate(); - #[cfg(feature = "instruments")] - { - let sub_ops = - crate::instruments::take_round_sub_ops().unwrap_or_default(); - return Ok(( - proof, - ( - air.name().to_string(), - air_trace_pairs[idx].1.num_rows(), - table_start.elapsed(), - sub_ops, - ), - )); - } - #[cfg(not(feature = "instruments"))] - Ok(proof) - }) - .collect(); + let chunk_results: Vec> = iter + .map(|(j, (pool, table_transcript))| { + let idx = chunk_start + j; + let (air, trace, pub_inputs) = &air_trace_pairs[idx]; + let metadata = &metadatas[idx]; + let domain = &domains[idx]; + let twiddles = &twiddle_caches[idx]; - for result in chunk_results { #[cfg(feature = "instruments")] - { - let (proof, timing) = result?; - proofs.push(proof); - table_timings.push(timing); - } - #[cfg(not(feature = "instruments"))] - proofs.push(result?); - } - } - } + let table_start = Instant::now(); - // ----- non-disk-spill path: reconstruct LDE from trace (original flow) ----- - #[cfg(not(feature = "disk-spill"))] - { - for chunk_start in (0..num_airs).step_by(k) { - let chunk_end = (chunk_start + k).min(num_airs); - let chunk_size = chunk_end - chunk_start; - - let chunk_transcripts = &mut table_transcripts[chunk_start..chunk_end]; - - #[cfg(feature = "parallel")] - let iter = pool_sets[..chunk_size] - .par_iter_mut() - .zip(chunk_transcripts.par_iter_mut()) - .enumerate(); - #[cfg(not(feature = "parallel"))] - let iter = pool_sets[..chunk_size] - .iter_mut() - .zip(chunk_transcripts.iter_mut()) - .enumerate(); - - let chunk_results: Vec> = iter - .map(|(j, (pool, table_transcript))| { - let idx = chunk_start + j; - let (air, trace, pub_inputs) = &air_trace_pairs[idx]; - let metadata = &metadatas[idx]; - let domain = &domains[idx]; - let twiddles = &twiddle_caches[idx]; - - #[cfg(feature = "instruments")] - let table_start = Instant::now(); - - #[cfg(feature = "instruments")] - let lde_start = Instant::now(); - let round_1_result = Self::reconstruct_round1( - *air, - *trace, - domain, - metadata, - twiddles, - &mut pool.main, - &mut pool.aux, - )?; - #[cfg(feature = "instruments")] - let lde_dur = lde_start.elapsed(); + #[cfg(feature = "instruments")] + let lde_start = Instant::now(); + let round_1_result = Self::reconstruct_round1( + *air, + *trace, + domain, + metadata, + twiddles, + &mut pool.main, + &mut pool.aux, + )?; + #[cfg(feature = "instruments")] + let lde_dur = lde_start.elapsed(); - if let Some(ref bpi) = round_1_result.bus_public_inputs { - table_transcript.append_field_element(&bpi.table_contribution); - } + if let Some(ref bpi) = round_1_result.bus_public_inputs { + table_transcript.append_field_element(&bpi.table_contribution); + } - let proof = Self::prove_rounds_2_to_4( - *air, - *pub_inputs, - &round_1_result, - table_transcript, - domain, - )?; + let proof = Self::prove_rounds_2_to_4( + *air, + *pub_inputs, + &round_1_result, + table_transcript, + domain, + )?; - // Collect per-table sub-op timing via TLS. - // Both the store (inside prove_rounds_2_to_4) and this take run on the - // same rayon worker thread, so sub-ops are valid in both sequential and - // parallel mode. - #[cfg(feature = "instruments")] - let table_timing = { - let mut sub_ops = - crate::instruments::take_round_sub_ops().unwrap_or_default(); - sub_ops.trace_lde += lde_dur; - ( - air.name().to_string(), - trace.num_rows(), - table_start.elapsed(), - sub_ops, - ) - }; - - // Return column Vecs to pool (zero-copy move back) - let (main_cols, aux_cols) = round_1_result.lde_trace.into_columns(); - for (slot, col) in pool.main.iter_mut().zip(main_cols) { - *slot = col; - } - for (slot, col) in pool.aux.iter_mut().zip(aux_cols) { - *slot = col; - } + #[cfg(feature = "instruments")] + let table_timing = { + let mut sub_ops = + crate::instruments::take_round_sub_ops().unwrap_or_default(); + sub_ops.trace_lde += lde_dur; + ( + air.name().to_string(), + trace.num_rows(), + table_start.elapsed(), + sub_ops, + ) + }; - #[cfg(feature = "instruments")] - return Ok((proof, table_timing)); - #[cfg(not(feature = "instruments"))] - Ok(proof) - }) - .collect(); + // Return column Vecs to pool (zero-copy move back) + let (main_cols, aux_cols) = round_1_result.lde_trace.into_columns(); + for (slot, col) in pool.main.iter_mut().zip(main_cols) { + *slot = col; + } + for (slot, col) in pool.aux.iter_mut().zip(aux_cols) { + *slot = col; + } - for result in chunk_results { #[cfg(feature = "instruments")] - { - let (proof, timing) = result?; - proofs.push(proof); - table_timings.push(timing); - } + return Ok((proof, table_timing)); #[cfg(not(feature = "instruments"))] - proofs.push(result?); + Ok(proof) + }) + .collect(); + + for result in chunk_results { + #[cfg(feature = "instruments")] + { + let (proof, timing) = result?; + proofs.push(proof); + table_timings.push(timing); } + #[cfg(not(feature = "instruments"))] + proofs.push(result?); } - } // end #[cfg(not(feature = "disk-spill"))] + } #[cfg(feature = "instruments")] { From 8f2641338478d4528e33cba1f69215794e8ac34f Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Thu, 26 Mar 2026 16:15:24 -0300 Subject: [PATCH 004/231] Remove no-op late trace re-spill --- crypto/stark/src/prover.rs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 3a0f23f85..960229880 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1631,18 +1631,6 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let main_commits_elapsed = phase_start.elapsed(); - // Re-spill main traces that were rehydrated by extract_columns_main_into - // during Phase A. After Phase A the pool holds the LDE data (already - // snapshotted to mmap above), so the original trace data is no longer - // needed in heap — push it back to mmap to free RAM for aux trace building. - #[cfg(feature = "disk-spill")] - for (_, trace, _) in air_trace_pairs.iter_mut() { - trace - .main_table - .spill_to_disk() - .map_err(|e| ProvingError::WrongParameter(format!("disk-spill late main: {e}")))?; - } - // ===================================================================== // Round 1, Phase B: Sample shared LogUp challenges // ===================================================================== From e7e576987bad51fad0231e9d15924e9736c5da01 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Thu, 26 Mar 2026 16:53:36 -0300 Subject: [PATCH 005/231] Remove Merkle tree node spilling --- crypto/stark/src/prover.rs | 46 +++----------------------------------- 1 file changed, 3 insertions(+), 43 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 960229880..e1d053c4b 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1587,36 +1587,8 @@ pub trait IsStarkProver< } transcript.append_bytes(&root); - #[allow(unused_mut)] - let mut main_tree = Arc::new(tree); - #[cfg(feature = "disk-spill")] - { - Arc::get_mut(&mut main_tree) - .expect("sole Arc owner") - .spill_nodes_to_disk() - .map_err(|e| { - ProvingError::WrongParameter(format!( - "disk-spill main Merkle tree: {e}" - )) - })?; - } - - let precomputed_tree = pre_tree.map(|t| { - let mut arc = Arc::new(t); - #[cfg(feature = "disk-spill")] - { - Arc::get_mut(&mut arc) - .expect("sole Arc owner") - .spill_nodes_to_disk() - .map_err(|e| { - ProvingError::WrongParameter(format!( - "disk-spill precomputed Merkle tree: {e}" - )) - }) - .unwrap(); - } - arc - }); + let main_tree = Arc::new(tree); + let precomputed_tree = pre_tree.map(Arc::new); main_commits.push(MainCommitData { main_tree, @@ -1746,23 +1718,11 @@ pub trait IsStarkProver< // Sequential: append aux roots to forked transcripts #[allow(unused_variables)] for (j, result) in chunk_aux.into_iter().enumerate() { - #[allow(unused_mut)] - let (mut aux_tree, aux_root) = result?; + let (aux_tree, aux_root) = result?; if let Some(ref root) = aux_root { table_transcripts[chunk_start + j].append_bytes(root); } - // Spill aux Merkle tree nodes to disk. - #[cfg(feature = "disk-spill")] - if let Some(ref mut tree_arc) = aux_tree { - Arc::get_mut(tree_arc) - .expect("sole Arc owner") - .spill_nodes_to_disk() - .map_err(|e| { - ProvingError::WrongParameter(format!("disk-spill aux Merkle tree: {e}")) - })?; - } - aux_results.push((aux_tree, aux_root)); } } From fe5776f3ae98d4aa4e30c6b38e5d7f7f0adfb682 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Fri, 27 Mar 2026 16:53:47 -0300 Subject: [PATCH 006/231] Revert "Remove Merkle tree node spilling" This reverts commit e7e576987bad51fad0231e9d15924e9736c5da01. --- crypto/stark/src/prover.rs | 46 +++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index e1d053c4b..960229880 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1587,8 +1587,36 @@ pub trait IsStarkProver< } transcript.append_bytes(&root); - let main_tree = Arc::new(tree); - let precomputed_tree = pre_tree.map(Arc::new); + #[allow(unused_mut)] + let mut main_tree = Arc::new(tree); + #[cfg(feature = "disk-spill")] + { + Arc::get_mut(&mut main_tree) + .expect("sole Arc owner") + .spill_nodes_to_disk() + .map_err(|e| { + ProvingError::WrongParameter(format!( + "disk-spill main Merkle tree: {e}" + )) + })?; + } + + let precomputed_tree = pre_tree.map(|t| { + let mut arc = Arc::new(t); + #[cfg(feature = "disk-spill")] + { + Arc::get_mut(&mut arc) + .expect("sole Arc owner") + .spill_nodes_to_disk() + .map_err(|e| { + ProvingError::WrongParameter(format!( + "disk-spill precomputed Merkle tree: {e}" + )) + }) + .unwrap(); + } + arc + }); main_commits.push(MainCommitData { main_tree, @@ -1718,11 +1746,23 @@ pub trait IsStarkProver< // Sequential: append aux roots to forked transcripts #[allow(unused_variables)] for (j, result) in chunk_aux.into_iter().enumerate() { - let (aux_tree, aux_root) = result?; + #[allow(unused_mut)] + let (mut aux_tree, aux_root) = result?; if let Some(ref root) = aux_root { table_transcripts[chunk_start + j].append_bytes(root); } + // Spill aux Merkle tree nodes to disk. + #[cfg(feature = "disk-spill")] + if let Some(ref mut tree_arc) = aux_tree { + Arc::get_mut(tree_arc) + .expect("sole Arc owner") + .spill_nodes_to_disk() + .map_err(|e| { + ProvingError::WrongParameter(format!("disk-spill aux Merkle tree: {e}")) + })?; + } + aux_results.push((aux_tree, aux_root)); } } From aa24927ee72059c8febbb150c3163d4528c8cc5c Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Fri, 27 Mar 2026 17:03:32 -0300 Subject: [PATCH 007/231] Disk-spill composition poly and FRI layer Merkle trees --- crypto/stark/src/fri/mod.rs | 6 +++++- crypto/stark/src/prover.rs | 8 +++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/crypto/stark/src/fri/mod.rs b/crypto/stark/src/fri/mod.rs index 87ab66a5b..ad08cc795 100644 --- a/crypto/stark/src/fri/mod.rs +++ b/crypto/stark/src/fri/mod.rs @@ -54,9 +54,13 @@ where .chunks_exact(2) .map(|chunk| [chunk[0].clone(), chunk[1].clone()]) .collect(); - let merkle_tree = FriLayerMerkleTree::build(&leaves) + let mut merkle_tree = FriLayerMerkleTree::build(&leaves) .expect("FRI commit: Merkle tree construction must succeed"); let root = merkle_tree.root; + #[cfg(feature = "disk-spill")] + merkle_tree + .spill_nodes_to_disk() + .expect("disk-spill FRI layer Merkle tree"); fri_layer_list.push(FriLayer::new( &evals, merkle_tree, diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 960229880..a165a8471 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -896,11 +896,17 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let t_sub = Instant::now(); - let Some((composition_poly_merkle_tree, composition_poly_root)) = + let Some((mut composition_poly_merkle_tree, composition_poly_root)) = Self::commit_composition_polynomial(&lde_composition_poly_parts_evaluations) else { return Err(ProvingError::EmptyCommitment); }; + #[cfg(feature = "disk-spill")] + composition_poly_merkle_tree + .spill_nodes_to_disk() + .map_err(|e| { + ProvingError::WrongParameter(format!("disk-spill composition Merkle tree: {e}")) + })?; #[cfg(feature = "instruments")] let merkle_dur = t_sub.elapsed(); From f2654f7af9dc071b5bf90248b52ce568018d05e5 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Fri, 27 Mar 2026 17:32:09 -0300 Subject: [PATCH 008/231] Disk-spill FRI layer evaluations and composition poly evaluations --- crypto/stark/src/fri/fri_commitment.rs | 59 ++++++++++++ crypto/stark/src/fri/mod.rs | 11 ++- crypto/stark/src/prover.rs | 119 +++++++++++++++++++++---- 3 files changed, 169 insertions(+), 20 deletions(-) diff --git a/crypto/stark/src/fri/fri_commitment.rs b/crypto/stark/src/fri/fri_commitment.rs index 7eb530452..b8d1a366a 100644 --- a/crypto/stark/src/fri/fri_commitment.rs +++ b/crypto/stark/src/fri/fri_commitment.rs @@ -15,6 +15,17 @@ where pub merkle_tree: MerkleTree, pub coset_offset: FieldElement, pub domain_size: usize, + #[cfg(feature = "disk-spill")] + eval_mmap: Option, +} + +#[cfg(feature = "disk-spill")] +#[derive(Clone)] +struct EvalMmapBacking { + mmap: std::sync::Arc, + _file: std::sync::Arc, + _len: usize, + elem_size: usize, } impl FriLayer @@ -34,6 +45,54 @@ where merkle_tree, coset_offset, domain_size, + #[cfg(feature = "disk-spill")] + eval_mmap: None, + } + } + + #[inline] + pub fn get_evaluation(&self, index: usize) -> &FieldElement { + #[cfg(feature = "disk-spill")] + if let Some(ref backing) = self.eval_mmap { + let offset = index * backing.elem_size; + return unsafe { &*(backing.mmap.as_ptr().add(offset) as *const FieldElement) }; + } + &self.evaluation[index] + } + + #[cfg(feature = "disk-spill")] + pub fn spill_evaluation_to_disk(&mut self) -> std::io::Result<()> { + use std::io::Write; + + if self.evaluation.is_empty() || self.eval_mmap.is_some() { + return Ok(()); + } + + let elem_size = std::mem::size_of::>(); + let total_bytes = self.evaluation.len() * elem_size; + + let file = tempfile::tempfile()?; + file.set_len(total_bytes as u64)?; + { + let mut writer = std::io::BufWriter::new(&file); + let bytes = unsafe { + std::slice::from_raw_parts( + self.evaluation.as_ptr() as *const u8, + total_bytes, + ) + }; + writer.write_all(bytes)?; + writer.flush()?; } + let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? }; + let len = self.evaluation.len(); + self.evaluation = Vec::new(); + self.eval_mmap = Some(EvalMmapBacking { + mmap: std::sync::Arc::new(mmap), + _file: std::sync::Arc::new(file), + _len: len, + elem_size, + }); + Ok(()) } } diff --git a/crypto/stark/src/fri/mod.rs b/crypto/stark/src/fri/mod.rs index ad08cc795..c2252f324 100644 --- a/crypto/stark/src/fri/mod.rs +++ b/crypto/stark/src/fri/mod.rs @@ -61,12 +61,17 @@ where merkle_tree .spill_nodes_to_disk() .expect("disk-spill FRI layer Merkle tree"); - fri_layer_list.push(FriLayer::new( + let mut layer = FriLayer::new( &evals, merkle_tree, current_coset_offset.clone().to_extension(), current_domain_size, - )); + ); + #[cfg(feature = "disk-spill")] + layer + .spill_evaluation_to_disk() + .expect("disk-spill FRI layer evaluation"); + fri_layer_list.push(layer); // >>>> Send commitment: [pₖ] transcript.append_bytes(&root); @@ -107,7 +112,7 @@ where let mut index = *iota_s; for layer in fri_layers { // symmetric element - let evaluation_sym = layer.evaluation[index ^ 1].clone(); + let evaluation_sym = layer.get_evaluation(index ^ 1).clone(); let auth_path_sym = layer.merkle_tree.get_proof_by_pos(index >> 1).unwrap(); layers_evaluations_sym.push(evaluation_sym); layers_auth_paths_sym.push(auth_path_sym); diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index a165a8471..35c63b999 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -242,6 +242,82 @@ where pub(crate) composition_poly_merkle_tree: BatchedMerkleTree, /// The commitment to the composition polynomial parts. pub(crate) composition_poly_root: Commitment, + #[cfg(feature = "disk-spill")] + eval_mmaps: Option>, +} + +#[cfg(feature = "disk-spill")] +struct Round2EvalMmap { + mmap: memmap2::Mmap, + _file: std::fs::File, + len: usize, + elem_size: usize, +} + +impl Round2 +where + F: IsField, + FieldElement: AsBytes, +{ + pub fn num_composition_parts(&self) -> usize { + #[cfg(feature = "disk-spill")] + if let Some(ref mmaps) = self.eval_mmaps { + return mmaps.len(); + } + self.lde_composition_poly_evaluations.len() + } + + #[inline] + pub fn get_composition_eval(&self, part: usize, index: usize) -> &FieldElement { + #[cfg(feature = "disk-spill")] + if let Some(ref mmaps) = self.eval_mmaps { + let m = &mmaps[part]; + let offset = index * m.elem_size; + return unsafe { &*(m.mmap.as_ptr().add(offset) as *const FieldElement) }; + } + &self.lde_composition_poly_evaluations[part][index] + } + + pub fn composition_eval_len(&self, part: usize) -> usize { + #[cfg(feature = "disk-spill")] + if let Some(ref mmaps) = self.eval_mmaps { + return mmaps[part].len; + } + self.lde_composition_poly_evaluations[part].len() + } + + #[cfg(feature = "disk-spill")] + pub fn spill_evaluations_to_disk(&mut self) -> std::io::Result<()> { + use std::io::Write; + + if self.lde_composition_poly_evaluations.is_empty() || self.eval_mmaps.is_some() { + return Ok(()); + } + + let elem_size = std::mem::size_of::>(); + let mut mmaps = Vec::with_capacity(self.lde_composition_poly_evaluations.len()); + + for part in self.lde_composition_poly_evaluations.drain(..) { + let total_bytes = part.len() * elem_size; + let file = tempfile::tempfile()?; + file.set_len(total_bytes as u64)?; + { + let mut writer = std::io::BufWriter::new(&file); + let bytes = unsafe { + std::slice::from_raw_parts(part.as_ptr() as *const u8, total_bytes) + }; + writer.write_all(bytes)?; + writer.flush()?; + } + let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? }; + let len = part.len(); + drop(part); + mmaps.push(Round2EvalMmap { mmap, _file: file, len, elem_size }); + } + + self.eval_mmaps = Some(mmaps); + Ok(()) + } } /// A container for the results of the third round of the STARK Prove protocol. @@ -917,6 +993,8 @@ pub trait IsStarkProver< lde_composition_poly_evaluations: lde_composition_poly_parts_evaluations, composition_poly_merkle_tree, composition_poly_root, + #[cfg(feature = "disk-spill")] + eval_mmaps: None, }) } @@ -932,7 +1010,7 @@ pub trait IsStarkProver< FieldElement: AsBytes, FieldElement: AsBytes, { - let num_parts = round_2_result.lde_composition_poly_evaluations.len(); + let num_parts = round_2_result.num_composition_parts(); let z_power = z.pow(num_parts); let domain_size = domain.interpolation_domain_size; let blowup_factor = domain.blowup_factor; @@ -999,7 +1077,7 @@ pub trait IsStarkProver< air: &dyn AIR, domain: &Domain, round_1_result: &Round1, - round_2_result: &Round2, + round_2_result: &mut Round2, round_3_result: &Round3, z: &FieldElement, transcript: &mut impl IsStarkTranscript, @@ -1013,7 +1091,7 @@ pub trait IsStarkProver< let gamma = transcript.sample_field_element(); - let n_terms_composition_poly = round_2_result.lde_composition_poly_evaluations.len(); + let n_terms_composition_poly = round_2_result.num_composition_parts(); let num_terms_trace = air.context().transition_offsets.len() * air.step_size() * air.context().trace_columns; @@ -1049,6 +1127,13 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let other_dur_1 = t_sub.elapsed(); + // Spill composition poly evaluations to disk after the dense read above. + // They are only needed sparsely in open_composition_poly (~30 queries). + #[cfg(feature = "disk-spill")] + round_2_result + .spill_evaluations_to_disk() + .expect("disk-spill composition poly evaluations"); + // Extend N trace-coset evaluations to 2N LDE-coset evaluations via standard LDE. // deep_evals[i] = h(offset·ω_N^i) = f(ω_N^i) where f(x) = h(offset·x). // Standard iFFT+FFT recovers f and evaluates on the 2N-th roots: f(Ω^j) = h(offset·Ω^j). @@ -1153,7 +1238,7 @@ pub trait IsStarkProver< { let domain_size = domain.interpolation_domain_size; let blowup_factor = domain.blowup_factor; - let num_parts = round_2_result.lde_composition_poly_evaluations.len(); + let num_parts = round_2_result.num_composition_parts(); let z_power = z.pow(num_parts); // pole for H terms // Number of evaluation points per trace column (= transition_offsets.len() * step_size) @@ -1211,7 +1296,7 @@ pub trait IsStarkProver< // H terms: Σ_j γ_j * (H_j(x_i) - H_j(z^K)) * inv_h[i] let mut result = FieldElement::::zero(); for j in 0..num_parts { - let h_j_val = &round_2_result.lde_composition_poly_evaluations[j][row_idx]; + let h_j_val = round_2_result.get_composition_eval(j, row_idx); let h_j_ood = &h_ood[j]; let numerator = h_j_val - h_j_ood; result += &composition_poly_gammas[j] * numerator * &inv_h[i]; @@ -1245,24 +1330,25 @@ pub trait IsStarkProver< /// at the domain value corresponding to the FRI query challenge `index` and its symmetric /// element. fn open_composition_poly( - composition_poly_merkle_tree: &BatchedMerkleTree, - lde_composition_poly_evaluations: &[Vec>], + round_2_result: &Round2, index: usize, ) -> PolynomialOpenings where FieldElement: AsBytes + Sync + Send, FieldElement: AsBytes + Sync + Send, { - let proof = composition_poly_merkle_tree + let proof = round_2_result + .composition_poly_merkle_tree .get_proof_by_pos(index) .unwrap(); - let lde_composition_poly_parts_evaluation: Vec<_> = lde_composition_poly_evaluations - .iter() - .flat_map(|part| { + let num_parts = round_2_result.num_composition_parts(); + let lde_composition_poly_parts_evaluation: Vec<_> = (0..num_parts) + .flat_map(|j| { + let part_len = round_2_result.composition_eval_len(j) as u64; vec![ - part[reverse_index(index * 2, part.len() as u64)].clone(), - part[reverse_index(index * 2 + 1, part.len() as u64)].clone(), + round_2_result.get_composition_eval(j, reverse_index(index * 2, part_len)).clone(), + round_2_result.get_composition_eval(j, reverse_index(index * 2 + 1, part_len)).clone(), ] }) .collect(); @@ -1421,8 +1507,7 @@ pub trait IsStarkProver< }); let composition_openings = Self::open_composition_poly( - &round_2_result.composition_poly_merkle_tree, - &round_2_result.lde_composition_poly_evaluations, + round_2_result, *index, ); @@ -2003,7 +2088,7 @@ pub trait IsStarkProver< coefficients.drain(..num_transition_constraints).collect(); let boundary_coefficients = coefficients; - let round_2_result = Self::round_2_compute_composition_polynomial( + let mut round_2_result = Self::round_2_compute_composition_polynomial( air, pub_inputs, domain, @@ -2061,7 +2146,7 @@ pub trait IsStarkProver< air, domain, round_1_result, - &round_2_result, + &mut round_2_result, &round_3_result, &z, transcript, From c8eeb7bcbd7f2b3734a1856c5dc9200243504879 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Fri, 27 Mar 2026 17:49:06 -0300 Subject: [PATCH 009/231] Lazy-allocate pool buffers instead of pre-allocating at max size --- crypto/stark/src/prover.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 35c63b999..518b8c8d6 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1612,15 +1612,14 @@ pub trait IsStarkProver< } // Allocate K independent LDE column buffer pool sets for parallel table processing. + // Buffers start empty and grow on demand — this avoids reserving + // max_main_cols × max_lde_size × K upfront, which can exceed available RAM + // when the largest table is much bigger than the rest. let k = table_parallelism().min(num_airs).max(1); let mut pool_sets: Vec> = (0..k) .map(|_| PoolSet { - main: (0..max_main_cols) - .map(|_| Vec::with_capacity(max_lde_size)) - .collect(), - aux: (0..max_aux_cols) - .map(|_| Vec::with_capacity(max_lde_size)) - .collect(), + main: (0..max_main_cols).map(|_| Vec::new()).collect(), + aux: (0..max_aux_cols).map(|_| Vec::new()).collect(), }) .collect(); From 1a7ebbc2b55f8cbe01cfe241199e00f9afbb1114 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Fri, 27 Mar 2026 18:30:10 -0300 Subject: [PATCH 010/231] Bring back LDE spilling for Rounds 2-4 to reduce peak pool memory --- crypto/stark/src/prover.rs | 354 ++++++++++++++++++++++++++++--------- 1 file changed, 267 insertions(+), 87 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 518b8c8d6..675f94be2 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1637,6 +1637,13 @@ pub trait IsStarkProver< let mut main_commits: Vec> = Vec::with_capacity(num_airs); + // Spilled LDE trace tables: one per AIR, populated during Phase A (main) and Phase C (aux). + // In Rounds 2-4 these replace the reconstruct_round1 flow — LDE data is read from mmap + // instead of being recomputed from the trace. + #[cfg(feature = "disk-spill")] + let mut spilled_ldes: Vec>> = + (0..num_airs).map(|_| None).collect(); + for chunk_start in (0..num_airs).step_by(k) { let chunk_end = (chunk_start + k).min(num_airs); let chunk_size = chunk_end - chunk_start; @@ -1677,6 +1684,28 @@ pub trait IsStarkProver< } transcript.append_bytes(&root); + // Spill the main LDE columns from the pool to a temp-file mmap before + // the pool is overwritten by the next chunk. Also spill Merkle tree nodes + // to disk — they remain accessible through mmap for Rounds 2-4 openings. + #[cfg(feature = "disk-spill")] + { + let idx = chunk_start + j; + let (air, trace, _) = &air_trace_pairs[idx]; + let num_main_cols = trace.num_main_columns; + let spilled = LDETraceTable::spill_main_from_pool( + &pool_sets[j].main, + num_main_cols, + air.step_size(), + domains[idx].blowup_factor, + ) + .map_err(|e| { + ProvingError::WrongParameter(format!( + "disk-spill main LDE table {idx}: {e}" + )) + })?; + spilled_ldes[idx] = Some(spilled); + } + #[allow(unused_mut)] let mut main_tree = Arc::new(tree); #[cfg(feature = "disk-spill")] @@ -1842,15 +1871,33 @@ pub trait IsStarkProver< table_transcripts[chunk_start + j].append_bytes(root); } - // Spill aux Merkle tree nodes to disk. + // Spill aux LDE columns from pool and aux Merkle tree nodes to disk. #[cfg(feature = "disk-spill")] - if let Some(ref mut tree_arc) = aux_tree { - Arc::get_mut(tree_arc) - .expect("sole Arc owner") - .spill_nodes_to_disk() - .map_err(|e| { - ProvingError::WrongParameter(format!("disk-spill aux Merkle tree: {e}")) - })?; + { + let idx = chunk_start + j; + let (air, trace, _) = &air_trace_pairs[idx]; + if air.has_aux_trace() { + let num_aux_cols = trace.num_aux_columns; + if let Some(ref mut spilled) = spilled_ldes[idx] { + spilled + .add_aux_from_pool(&pool_sets[j].aux, num_aux_cols) + .map_err(|e| { + ProvingError::WrongParameter(format!( + "disk-spill aux LDE table {idx}: {e}" + )) + })?; + } + } + if let Some(ref mut tree_arc) = aux_tree { + Arc::get_mut(tree_arc) + .expect("sole Arc owner") + .spill_nodes_to_disk() + .map_err(|e| { + ProvingError::WrongParameter(format!( + "disk-spill aux Merkle tree {idx}: {e}" + )) + })?; + } } aux_results.push((aux_tree, aux_root)); @@ -1899,6 +1946,10 @@ pub trait IsStarkProver< // ===================================================================== // Each chunk of K tables is processed in parallel. Each worker gets its // own pool set and transcript fork. Pool sets are reused across chunks. + // + // disk-spill path: LDE data is read from mmap-backed spilled_ldes instead + // of being recomputed via reconstruct_round1. This avoids the peak memory + // spike of holding both the trace and its LDE in RAM simultaneously. #[cfg(feature = "instruments")] let phase_start = Instant::now(); @@ -1911,100 +1962,229 @@ pub trait IsStarkProver< )> = Vec::with_capacity(num_airs); let mut proofs = Vec::with_capacity(num_airs); - for chunk_start in (0..num_airs).step_by(k) { - let chunk_end = (chunk_start + k).min(num_airs); - let chunk_size = chunk_end - chunk_start; - let chunk_transcripts = &mut table_transcripts[chunk_start..chunk_end]; + // ----- disk-spill path: read from spilled LDEs ----- + #[cfg(feature = "disk-spill")] + { + for chunk_start in (0..num_airs).step_by(k) { + let chunk_end = (chunk_start + k).min(num_airs); - #[cfg(feature = "parallel")] - let iter = pool_sets[..chunk_size] - .par_iter_mut() - .zip(chunk_transcripts.par_iter_mut()) - .enumerate(); - #[cfg(not(feature = "parallel"))] - let iter = pool_sets[..chunk_size] - .iter_mut() - .zip(chunk_transcripts.iter_mut()) - .enumerate(); + // Pre-take spilled LDEs for the chunk (needs &mut, can't do inside par_iter) + let chunk_ldes: Vec<_> = (chunk_start..chunk_end) + .map(|i| { + spilled_ldes[i] + .take() + .expect("spilled LDE must exist for every AIR") + }) + .collect(); - let chunk_results: Vec> = iter - .map(|(j, (pool, table_transcript))| { - let idx = chunk_start + j; - let (air, trace, pub_inputs) = &air_trace_pairs[idx]; - let metadata = &metadatas[idx]; - let domain = &domains[idx]; - let twiddles = &twiddle_caches[idx]; + let chunk_transcripts = &mut table_transcripts[chunk_start..chunk_end]; - #[cfg(feature = "instruments")] - let table_start = Instant::now(); + #[cfg(feature = "parallel")] + let iter = chunk_ldes + .into_par_iter() + .zip(chunk_transcripts.par_iter_mut()) + .enumerate(); + #[cfg(not(feature = "parallel"))] + let iter = chunk_ldes + .into_iter() + .zip(chunk_transcripts.iter_mut()) + .enumerate(); + + let chunk_results: Vec> = iter + .map(|(j, (lde_trace, table_transcript))| { + let idx = chunk_start + j; + let (air, _trace, pub_inputs) = &air_trace_pairs[idx]; + let metadata = &metadatas[idx]; + let domain = &domains[idx]; - #[cfg(feature = "instruments")] - let lde_start = Instant::now(); - let round_1_result = Self::reconstruct_round1( - *air, - *trace, - domain, - metadata, - twiddles, - &mut pool.main, - &mut pool.aux, - )?; - #[cfg(feature = "instruments")] - let lde_dur = lde_start.elapsed(); + #[cfg(feature = "instruments")] + let table_start = Instant::now(); + + let main = Round1CommitmentData:: { + lde_trace_merkle_tree: Arc::clone(&metadata.main_merkle_tree), + lde_trace_merkle_root: metadata.main_merkle_root, + precomputed_merkle_tree: metadata + .precomputed_merkle_tree + .as_ref() + .map(Arc::clone), + precomputed_merkle_root: metadata.precomputed_merkle_root, + num_precomputed_cols: metadata.num_precomputed_cols, + }; + + let aux = if air.has_aux_trace() { + Some(Round1CommitmentData:: { + lde_trace_merkle_tree: Arc::clone( + metadata + .aux_merkle_tree + .as_ref() + .expect("aux tree must exist when has_aux_trace"), + ), + lde_trace_merkle_root: metadata + .aux_merkle_root + .expect("aux root must exist when has_aux_trace"), + precomputed_merkle_tree: None, + precomputed_merkle_root: None, + num_precomputed_cols: 0, + }) + } else { + None + }; + + let round_1_result = Round1 { + lde_trace, + main, + aux, + rap_challenges: metadata.rap_challenges.clone(), + bus_public_inputs: metadata.bus_public_inputs.clone(), + }; + + if let Some(ref bpi) = round_1_result.bus_public_inputs { + table_transcript.append_field_element(&bpi.table_contribution); + } + + let proof = Self::prove_rounds_2_to_4( + *air, + *pub_inputs, + &round_1_result, + table_transcript, + domain, + )?; - if let Some(ref bpi) = round_1_result.bus_public_inputs { - table_transcript.append_field_element(&bpi.table_contribution); + #[cfg(feature = "instruments")] + { + let sub_ops = + crate::instruments::take_round_sub_ops().unwrap_or_default(); + return Ok(( + proof, + ( + air.name().to_string(), + air_trace_pairs[idx].1.num_rows(), + table_start.elapsed(), + sub_ops, + ), + )); + } + #[cfg(not(feature = "instruments"))] + Ok(proof) + }) + .collect(); + + for result in chunk_results { + #[cfg(feature = "instruments")] + { + let (proof, timing) = result?; + proofs.push(proof); + table_timings.push(timing); } + #[cfg(not(feature = "instruments"))] + proofs.push(result?); + } + } + } - let proof = Self::prove_rounds_2_to_4( - *air, - *pub_inputs, - &round_1_result, - table_transcript, - domain, - )?; + // ----- non-disk-spill path: reconstruct LDE from trace (original flow) ----- + #[cfg(not(feature = "disk-spill"))] + { + for chunk_start in (0..num_airs).step_by(k) { + let chunk_end = (chunk_start + k).min(num_airs); + let chunk_size = chunk_end - chunk_start; + + let chunk_transcripts = &mut table_transcripts[chunk_start..chunk_end]; + + #[cfg(feature = "parallel")] + let iter = pool_sets[..chunk_size] + .par_iter_mut() + .zip(chunk_transcripts.par_iter_mut()) + .enumerate(); + #[cfg(not(feature = "parallel"))] + let iter = pool_sets[..chunk_size] + .iter_mut() + .zip(chunk_transcripts.iter_mut()) + .enumerate(); + + let chunk_results: Vec> = iter + .map(|(j, (pool, table_transcript))| { + let idx = chunk_start + j; + let (air, trace, pub_inputs) = &air_trace_pairs[idx]; + let metadata = &metadatas[idx]; + let domain = &domains[idx]; + let twiddles = &twiddle_caches[idx]; - #[cfg(feature = "instruments")] - let table_timing = { - let mut sub_ops = - crate::instruments::take_round_sub_ops().unwrap_or_default(); - sub_ops.trace_lde += lde_dur; - ( - air.name().to_string(), - trace.num_rows(), - table_start.elapsed(), - sub_ops, - ) - }; + #[cfg(feature = "instruments")] + let table_start = Instant::now(); - // Return column Vecs to pool (zero-copy move back) - let (main_cols, aux_cols) = round_1_result.lde_trace.into_columns(); - for (slot, col) in pool.main.iter_mut().zip(main_cols) { - *slot = col; - } - for (slot, col) in pool.aux.iter_mut().zip(aux_cols) { - *slot = col; - } + #[cfg(feature = "instruments")] + let lde_start = Instant::now(); + let round_1_result = Self::reconstruct_round1( + *air, + *trace, + domain, + metadata, + twiddles, + &mut pool.main, + &mut pool.aux, + )?; + #[cfg(feature = "instruments")] + let lde_dur = lde_start.elapsed(); + + if let Some(ref bpi) = round_1_result.bus_public_inputs { + table_transcript.append_field_element(&bpi.table_contribution); + } + + let proof = Self::prove_rounds_2_to_4( + *air, + *pub_inputs, + &round_1_result, + table_transcript, + domain, + )?; + + // Collect per-table sub-op timing via TLS. + // Both the store (inside prove_rounds_2_to_4) and this take run on the + // same rayon worker thread, so sub-ops are valid in both sequential and + // parallel mode. + #[cfg(feature = "instruments")] + let table_timing = { + let mut sub_ops = + crate::instruments::take_round_sub_ops().unwrap_or_default(); + sub_ops.trace_lde += lde_dur; + ( + air.name().to_string(), + trace.num_rows(), + table_start.elapsed(), + sub_ops, + ) + }; + + // Return column Vecs to pool (zero-copy move back) + let (main_cols, aux_cols) = round_1_result.lde_trace.into_columns(); + for (slot, col) in pool.main.iter_mut().zip(main_cols) { + *slot = col; + } + for (slot, col) in pool.aux.iter_mut().zip(aux_cols) { + *slot = col; + } + #[cfg(feature = "instruments")] + return Ok((proof, table_timing)); + #[cfg(not(feature = "instruments"))] + Ok(proof) + }) + .collect(); + + for result in chunk_results { #[cfg(feature = "instruments")] - return Ok((proof, table_timing)); + { + let (proof, timing) = result?; + proofs.push(proof); + table_timings.push(timing); + } #[cfg(not(feature = "instruments"))] - Ok(proof) - }) - .collect(); - - for result in chunk_results { - #[cfg(feature = "instruments")] - { - let (proof, timing) = result?; - proofs.push(proof); - table_timings.push(timing); + proofs.push(result?); } - #[cfg(not(feature = "instruments"))] - proofs.push(result?); } - } + } // end #[cfg(not(feature = "disk-spill"))] #[cfg(feature = "instruments")] { From 94dcef18c0a843b904aa4b286fb3291c1690da0a Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Fri, 27 Mar 2026 18:36:01 -0300 Subject: [PATCH 011/231] Free pool buffers between phases to reduce peak memory --- crypto/stark/src/prover.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 675f94be2..dbdb0ee0f 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1750,6 +1750,16 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let main_commits_elapsed = phase_start.elapsed(); + // Free main pool buffers after Phase A — they are not needed for Phase C + // (aux uses its own buffers) and their retained capacity from the largest + // table can be tens of GB. + #[cfg(feature = "disk-spill")] + for pool in pool_sets.iter_mut() { + for buf in pool.main.iter_mut() { + *buf = Vec::new(); + } + } + // ===================================================================== // Round 1, Phase B: Sample shared LogUp challenges // ===================================================================== @@ -1941,6 +1951,11 @@ pub trait IsStarkProver< ); } + // Free pool buffers — the disk-spill Rounds 2-4 path reads from + // spilled LDEs (mmap), so pool buffers are no longer needed. + #[cfg(feature = "disk-spill")] + drop(pool_sets); + // ===================================================================== // Rounds 2-4: Parallel per-table proving in chunks of K // ===================================================================== From 23794bb37dcc75aed718eb6c9a05e357f64a0dba Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Fri, 27 Mar 2026 18:52:45 -0300 Subject: [PATCH 012/231] Spill LDE and free pool inside parallel closure to reduce peak memory --- crypto/stark/src/prover.rs | 67 ++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index dbdb0ee0f..705824d50 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1660,7 +1660,7 @@ pub trait IsStarkProver< let domain = &domains[idx]; let twiddles = &twiddle_caches[idx]; - if air.is_preprocessed() { + let (tree, root, pre_tree, pre_root, n_pre) = if air.is_preprocessed() { Self::commit_preprocessed_trace( *trace, domain, @@ -1668,41 +1668,56 @@ pub trait IsStarkProver< air.num_precomputed_columns(), twiddles, &mut pool.main, - ) + )? } else { - Self::commit_main_trace(*trace, domain, twiddles, &mut pool.main) - } + Self::commit_main_trace(*trace, domain, twiddles, &mut pool.main)? + }; + + // Spill LDE from pool to mmap while pool is still filled, + // then free pool buffers to reduce peak memory. + #[cfg(feature = "disk-spill")] + let spilled = { + let num_main_cols = trace.num_main_columns; + let s = LDETraceTable::spill_main_from_pool( + &pool.main, + num_main_cols, + air.step_size(), + domain.blowup_factor, + ) + .map_err(|e| { + ProvingError::WrongParameter(format!( + "disk-spill main LDE table {idx}: {e}" + )) + })?; + for buf in pool.main.iter_mut() { + *buf = Vec::new(); + } + s + }; + + #[cfg(feature = "disk-spill")] + return Ok((tree, root, pre_tree, pre_root, n_pre, spilled)); + #[cfg(not(feature = "disk-spill"))] + Ok((tree, root, pre_tree, pre_root, n_pre)) }) .collect(); // Sequential: append roots to shared transcript (Fiat-Shamir ordering) #[allow(unused_variables, unused_mut)] for (j, result) in chunk_results.into_iter().enumerate() { + #[cfg(feature = "disk-spill")] + let (tree, root, pre_tree, pre_root, n_pre, spilled) = result?; + #[cfg(not(feature = "disk-spill"))] let (tree, root, pre_tree, pre_root, n_pre) = result?; + if let Some(ref pre_r) = pre_root { transcript.append_bytes(pre_r); } transcript.append_bytes(&root); - // Spill the main LDE columns from the pool to a temp-file mmap before - // the pool is overwritten by the next chunk. Also spill Merkle tree nodes - // to disk — they remain accessible through mmap for Rounds 2-4 openings. #[cfg(feature = "disk-spill")] { let idx = chunk_start + j; - let (air, trace, _) = &air_trace_pairs[idx]; - let num_main_cols = trace.num_main_columns; - let spilled = LDETraceTable::spill_main_from_pool( - &pool_sets[j].main, - num_main_cols, - air.step_size(), - domains[idx].blowup_factor, - ) - .map_err(|e| { - ProvingError::WrongParameter(format!( - "disk-spill main LDE table {idx}: {e}" - )) - })?; spilled_ldes[idx] = Some(spilled); } @@ -1745,19 +1760,7 @@ pub trait IsStarkProver< num_precomputed_cols: n_pre, }); } - } - - #[cfg(feature = "instruments")] - let main_commits_elapsed = phase_start.elapsed(); - // Free main pool buffers after Phase A — they are not needed for Phase C - // (aux uses its own buffers) and their retained capacity from the largest - // table can be tens of GB. - #[cfg(feature = "disk-spill")] - for pool in pool_sets.iter_mut() { - for buf in pool.main.iter_mut() { - *buf = Vec::new(); - } } // ===================================================================== From a49e2753e4dcd17cba36f7d1680224b70502d1cc Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Fri, 27 Mar 2026 19:06:22 -0300 Subject: [PATCH 013/231] Use k_commit=1 for Phase A/C to avoid multiple large pools alive simultaneously --- crypto/stark/src/prover.rs | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 705824d50..2f107c5c1 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1611,12 +1611,16 @@ pub trait IsStarkProver< .map_err(|e| ProvingError::WrongParameter(format!("disk-spill early main: {e}")))?; } - // Allocate K independent LDE column buffer pool sets for parallel table processing. - // Buffers start empty and grow on demand — this avoids reserving - // max_main_cols × max_lde_size × K upfront, which can exceed available RAM - // when the largest table is much bigger than the rest. + // Number of tables to process concurrently. + // disk-spill: Phase A/C use k_commit=1 (one pool at a time, since each + // table's LDE already saturates all cores via column-parallel FFT). + // Rounds 2-4 use the full k (no pools needed, reads from mmap). let k = table_parallelism().min(num_airs).max(1); - let mut pool_sets: Vec> = (0..k) + #[cfg(feature = "disk-spill")] + let k_commit = 1_usize; + #[cfg(not(feature = "disk-spill"))] + let k_commit = k; + let mut pool_sets: Vec> = (0..k_commit) .map(|_| PoolSet { main: (0..max_main_cols).map(|_| Vec::new()).collect(), aux: (0..max_aux_cols).map(|_| Vec::new()).collect(), @@ -1644,8 +1648,8 @@ pub trait IsStarkProver< let mut spilled_ldes: Vec>> = (0..num_airs).map(|_| None).collect(); - for chunk_start in (0..num_airs).step_by(k) { - let chunk_end = (chunk_start + k).min(num_airs); + for chunk_start in (0..num_airs).step_by(k_commit) { + let chunk_end = (chunk_start + k_commit).min(num_airs); let chunk_size = chunk_end - chunk_start; #[cfg(feature = "parallel")] @@ -1832,8 +1836,8 @@ pub trait IsStarkProver< Option, )> = Vec::with_capacity(num_airs); - for chunk_start in (0..num_airs).step_by(k) { - let chunk_end = (chunk_start + k).min(num_airs); + for chunk_start in (0..num_airs).step_by(k_commit) { + let chunk_end = (chunk_start + k_commit).min(num_airs); let chunk_size = chunk_end - chunk_start; #[cfg(feature = "parallel")] From 098833c2e91a2be9d78f3636d118bc9330707465 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Fri, 27 Mar 2026 19:40:27 -0300 Subject: [PATCH 014/231] Drop trace mmap pages from page cache after extracting into pool buffers --- Cargo.lock | 1 + crypto/stark/Cargo.toml | 3 ++- crypto/stark/src/prover.rs | 4 ++++ crypto/stark/src/table.rs | 16 ++++++++++++++++ 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 1a07bd3ff..5e14b385b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3560,6 +3560,7 @@ dependencies = [ "env_logger", "hex", "itertools 0.11.0", + "libc", "log", "math", "memmap2", diff --git a/crypto/stark/Cargo.toml b/crypto/stark/Cargo.toml index f7e67fb23..ec52aef9f 100644 --- a/crypto/stark/Cargo.toml +++ b/crypto/stark/Cargo.toml @@ -29,6 +29,7 @@ rayon = { version = "1.8.0", optional = true } # Disk-spill: mmap LDE data to reduce heap memory during proving memmap2 = { version = "0.9", optional = true } tempfile = { version = "3", optional = true } +libc = { version = "0.2", optional = true } # wasm wasm-bindgen = { version = "0.2", optional = true } @@ -52,7 +53,7 @@ instruments = [] # This enab debug-checks = [] # Enables validate_trace + bus balance report in prover parallel = ["dep:rayon", "crypto/parallel"] wasm = ["dep:wasm-bindgen", "dep:serde-wasm-bindgen", "dep:web-sys"] -disk-spill = ["dep:memmap2", "dep:tempfile", "crypto/disk-spill"] +disk-spill = ["dep:memmap2", "dep:tempfile", "dep:libc", "crypto/disk-spill"] [target.'cfg(not(all(target_arch = "wasm32", target_os = "unknown")))'.dev-dependencies] proptest = "1.2.0" diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 2f107c5c1..5e31e1ce4 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -549,6 +549,8 @@ pub trait IsStarkProver< { let num_cols = trace.num_main_columns; trace.extract_columns_main_into(main_pool); + #[cfg(feature = "disk-spill")] + trace.main_table.advise_drop_cache(); #[cfg(feature = "instruments")] let t_sub = Instant::now(); Self::expand_pool_to_lde::(main_pool, num_cols, domain, twiddles); @@ -592,6 +594,8 @@ pub trait IsStarkProver< { let num_cols = trace.num_main_columns; trace.extract_columns_main_into(main_pool); + #[cfg(feature = "disk-spill")] + trace.main_table.advise_drop_cache(); #[cfg(feature = "instruments")] let t_sub = Instant::now(); Self::expand_pool_to_lde::(main_pool, num_cols, domain, twiddles); diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 047d39c00..d7b1a12b9 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -322,6 +322,22 @@ impl Table { Ok(()) } + /// Advise the kernel to drop mmap pages from the page cache. + /// Call after reading spilled data into pool buffers to free ~37GB of + /// cached pages that would otherwise persist under memory pressure. + #[cfg(feature = "disk-spill")] + pub fn advise_drop_cache(&self) { + if let Some(ref backing) = self.mmap_backing { + unsafe { + libc::madvise( + backing.mmap.as_ptr() as *mut libc::c_void, + backing.mmap.len(), + libc::MADV_DONTNEED, + ); + } + } + } + /// Given a step size, converts the given table into a `Frame`. /// Clones row data into owned Vecs (only used by verifier on small OOD tables). pub fn into_frame(&self, main_trace_columns: usize, step_size: usize) -> Frame { From 431672615fe88f4c6a55ac436f047712a006e042 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Fri, 27 Mar 2026 19:42:34 -0300 Subject: [PATCH 015/231] Restore main_commits_elapsed timing for instruments feature --- crypto/stark/src/prover.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 5e31e1ce4..3c8a8071d 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1771,6 +1771,9 @@ pub trait IsStarkProver< } + #[cfg(feature = "instruments")] + let main_commits_elapsed = phase_start.elapsed(); + // ===================================================================== // Round 1, Phase B: Sample shared LogUp challenges // ===================================================================== From adf2dabe28da57a124f324d9bb40ad02e2c59714 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Fri, 27 Mar 2026 19:58:00 -0300 Subject: [PATCH 016/231] Add per-phase heap profiling to instruments --- bin/cli/src/main.rs | 6 + crypto/stark/src/fri/fri_commitment.rs | 5 +- crypto/stark/src/instruments.rs | 40 ++++ crypto/stark/src/prover.rs | 52 ++++-- prover/src/instruments.rs | 40 +++- prover/src/lib.rs | 15 ++ scripts/bench_heap_profile.sh | 247 +++++++++++++++++++++++++ 7 files changed, 389 insertions(+), 16 deletions(-) create mode 100755 scripts/bench_heap_profile.sh diff --git a/bin/cli/src/main.rs b/bin/cli/src/main.rs index 4f6b36c09..a71a306f6 100644 --- a/bin/cli/src/main.rs +++ b/bin/cli/src/main.rs @@ -277,6 +277,12 @@ fn cmd_prove( eprintln!("Disk-spill: enabled"); } + #[cfg(all(feature = "jemalloc-stats", feature = "instruments"))] + stark::instruments::set_heap_reader(|| { + tikv_jemalloc_ctl::epoch::advance().ok(); + tikv_jemalloc_ctl::stats::allocated::read().unwrap_or(0) + }); + let max_rows_config = match max_rows { Some(mr) => { eprintln!("Max rows per chunk: {mr}"); diff --git a/crypto/stark/src/fri/fri_commitment.rs b/crypto/stark/src/fri/fri_commitment.rs index b8d1a366a..b7c070b0a 100644 --- a/crypto/stark/src/fri/fri_commitment.rs +++ b/crypto/stark/src/fri/fri_commitment.rs @@ -76,10 +76,7 @@ where { let mut writer = std::io::BufWriter::new(&file); let bytes = unsafe { - std::slice::from_raw_parts( - self.evaluation.as_ptr() as *const u8, - total_bytes, - ) + std::slice::from_raw_parts(self.evaluation.as_ptr() as *const u8, total_bytes) }; writer.write_all(bytes)?; writer.flush()?; diff --git a/crypto/stark/src/instruments.rs b/crypto/stark/src/instruments.rs index 3c13ef2e1..beda186d8 100644 --- a/crypto/stark/src/instruments.rs +++ b/crypto/stark/src/instruments.rs @@ -1,7 +1,30 @@ use std::cell::RefCell; +use std::sync::OnceLock; use std::sync::atomic::{AtomicU64, Ordering}; use std::time::Duration; +// --------------------------------------------------------------------------- +// Heap reader callback: set by the binary (CLI) to provide jemalloc reads +// without coupling this crate to jemalloc. +// --------------------------------------------------------------------------- + +static HEAP_READER: OnceLock usize> = OnceLock::new(); + +/// Register a function that returns the current heap allocated bytes. +/// Call this once from the binary before proving starts. +pub fn set_heap_reader(f: fn() -> usize) { + let _ = HEAP_READER.set(f); +} + +/// Read current heap in bytes, or `None` if no reader was registered. +pub fn heap_bytes() -> Option { + HEAP_READER.get().map(|f| f()) +} + +fn heap_mb() -> Option { + heap_bytes().map(|b| b / (1024 * 1024)) +} + /// Sub-operation timing breakdown for a single table in Rounds 2-4. #[derive(Clone, Debug, Default)] pub struct TableSubOps { @@ -38,6 +61,9 @@ pub struct Round1SubOps { pub aux_merkle: Duration, } +/// Heap snapshot: (label, allocated_mb) at a phase boundary. +pub type HeapSnapshot = (&'static str, usize); + /// Timing data collected inside `multi_prove`. pub struct MultiProveTiming { pub prepass: Duration, @@ -49,6 +75,20 @@ pub struct MultiProveTiming { pub round1_sub: Round1SubOps, /// (name, rows, duration, sub_ops) per table for rounds 2-4. pub table_timings: Vec<(String, usize, Duration, TableSubOps)>, + /// Heap snapshots at phase boundaries (empty if no heap reader set). + pub heap_snapshots: Vec, +} + +/// Heap snapshots taken in `prove_with_options` (before multi_prove). +pub struct ProveHeapProfile { + pub after_execute: Option, + pub after_trace_build: Option, + pub after_air: Option, +} + +/// Take a heap snapshot, returning `(label, mb)` or `None`. +pub fn snap(label: &'static str) -> Option { + heap_mb().map(|mb| (label, mb)) } /// Round 1 sub-timings: atomics so parallel rayon workers can accumulate safely. diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 3c8a8071d..7b580fb08 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -303,16 +303,20 @@ where file.set_len(total_bytes as u64)?; { let mut writer = std::io::BufWriter::new(&file); - let bytes = unsafe { - std::slice::from_raw_parts(part.as_ptr() as *const u8, total_bytes) - }; + let bytes = + unsafe { std::slice::from_raw_parts(part.as_ptr() as *const u8, total_bytes) }; writer.write_all(bytes)?; writer.flush()?; } let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? }; let len = part.len(); drop(part); - mmaps.push(Round2EvalMmap { mmap, _file: file, len, elem_size }); + mmaps.push(Round2EvalMmap { + mmap, + _file: file, + len, + elem_size, + }); } self.eval_mmaps = Some(mmaps); @@ -1351,8 +1355,12 @@ pub trait IsStarkProver< .flat_map(|j| { let part_len = round_2_result.composition_eval_len(j) as u64; vec![ - round_2_result.get_composition_eval(j, reverse_index(index * 2, part_len)).clone(), - round_2_result.get_composition_eval(j, reverse_index(index * 2 + 1, part_len)).clone(), + round_2_result + .get_composition_eval(j, reverse_index(index * 2, part_len)) + .clone(), + round_2_result + .get_composition_eval(j, reverse_index(index * 2 + 1, part_len)) + .clone(), ] }) .collect(); @@ -1510,10 +1518,7 @@ pub trait IsStarkProver< ) }); - let composition_openings = Self::open_composition_poly( - round_2_result, - *index, - ); + let composition_openings = Self::open_composition_poly(round_2_result, *index); let aux_trace_polys = round_1_result.aux.as_ref().map(|aux| { Self::open_trace_polys_aux( @@ -1568,6 +1573,12 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] crate::instruments::reset_all(); + #[cfg(feature = "instruments")] + let mut heap_snaps: Vec = Vec::new(); + #[cfg(feature = "instruments")] + if let Some(s) = crate::instruments::snap("entry") { + heap_snaps.push(s); + } let num_airs = air_trace_pairs.len(); @@ -1633,6 +1644,10 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let prepass_elapsed = phase_start.elapsed(); + #[cfg(feature = "instruments")] + if let Some(s) = crate::instruments::snap("after pool alloc") { + heap_snaps.push(s); + } // ===================================================================== // Round 1, Phase A: Commit all main traces (parallel in chunks of K) @@ -1768,11 +1783,14 @@ pub trait IsStarkProver< num_precomputed_cols: n_pre, }); } - } #[cfg(feature = "instruments")] let main_commits_elapsed = phase_start.elapsed(); + #[cfg(feature = "instruments")] + if let Some(s) = crate::instruments::snap("after main commits") { + heap_snaps.push(s); + } // ===================================================================== // Round 1, Phase B: Sample shared LogUp challenges @@ -1819,6 +1837,10 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let aux_build_elapsed = phase_start.elapsed(); + #[cfg(feature = "instruments")] + if let Some(s) = crate::instruments::snap("after aux build") { + heap_snaps.push(s); + } // Pass 2: Parallel fork transcript → extract → LDE → commit in chunks of K. // Each table gets its own transcript fork and pool set. @@ -1951,6 +1973,10 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let aux_commit_elapsed = phase_start.elapsed(); + #[cfg(feature = "instruments")] + if let Some(s) = crate::instruments::snap("after aux commit") { + heap_snaps.push(s); + } #[cfg(feature = "debug-checks")] { @@ -2217,6 +2243,9 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] { + if let Some(s) = crate::instruments::snap("after rounds 2-4") { + heap_snaps.push(s); + } // Store timing data for the top-level report in prove_with_options. // Uses a thread-local to avoid changing multi_prove's return type. crate::instruments::store(crate::instruments::MultiProveTiming { @@ -2227,6 +2256,7 @@ pub trait IsStarkProver< rounds_2_4: phase_start.elapsed(), round1_sub: crate::instruments::take_r1_sub(), table_timings, + heap_snapshots: heap_snaps, }); } diff --git a/prover/src/instruments.rs b/prover/src/instruments.rs index e3db38b95..96cb8b79c 100644 --- a/prover/src/instruments.rs +++ b/prover/src/instruments.rs @@ -57,6 +57,8 @@ pub fn print_report( trace_build: Duration, air_construction: Duration, total: Duration, + heap_before: Option, + heap_profile: &stark::instruments::ProveHeapProfile, ) { let mp = stark::instruments::take(); @@ -69,7 +71,7 @@ pub fn print_report( row_top("Trace build", trace_build, total); row_top("AIR construction", air_construction, total); - if let Some(mp) = mp { + if let Some(ref mp) = mp { let round1 = mp.main_commits + mp.aux_build + mp.aux_commit; row_top("Pre-pass (domains/twiddles)", mp.prepass, total); @@ -214,4 +216,40 @@ pub fn print_report( eprintln!(" {}", "─".repeat(58)); eprintln!(" {:<36} {:>7.2}s", "TOTAL", total.as_secs_f64()); eprintln!(); + + // Heap profile + let mb = |b: usize| b / (1024 * 1024); + let has_heap = heap_before.is_some(); + if has_heap { + eprintln!("=== HEAP PROFILE (MB) ==="); + eprintln!(" {:<36} {:>8} {:>8}", "Phase", "Heap", "Delta"); + eprintln!(" {}", "─".repeat(56)); + + let mut prev = heap_before.unwrap(); + let mut print_row = |label: &str, val: Option| { + if let Some(v) = val { + let cur = mb(v); + let delta = mb(v) as isize - mb(prev) as isize; + eprintln!(" {:<36} {:>7} {:>+8}", label, cur, delta); + prev = v; + } + }; + + print_row("After execute", heap_profile.after_execute); + print_row("After trace build", heap_profile.after_trace_build); + print_row("After AIR construction", heap_profile.after_air); + + if let Some(ref mp_data) = mp { + for (label, snap_mb) in &mp_data.heap_snapshots { + let snap_bytes = snap_mb * (1024 * 1024); + let cur = *snap_mb; + let delta = cur as isize - mb(prev) as isize; + eprintln!(" {:<36} {:>7} {:>+8}", label, cur, delta); + prev = snap_bytes; + } + } + + eprintln!(" {}", "─".repeat(56)); + eprintln!(); + } } diff --git a/prover/src/lib.rs b/prover/src/lib.rs index 11a4757d9..3e60dc0b8 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -450,6 +450,8 @@ pub fn prove_with_options( ) -> Result { #[cfg(feature = "instruments")] let total_start = std::time::Instant::now(); + #[cfg(feature = "instruments")] + let heap_before = stark::instruments::heap_bytes(); // Phase 1: Execute (ELF load + run) #[cfg(feature = "instruments")] @@ -463,6 +465,8 @@ pub fn prove_with_options( #[cfg(feature = "instruments")] let execute_elapsed = phase_start.elapsed(); + #[cfg(feature = "instruments")] + let heap_after_execute = stark::instruments::heap_bytes(); // Phase 2: Trace build #[cfg(feature = "instruments")] @@ -481,6 +485,8 @@ pub fn prove_with_options( #[cfg(feature = "instruments")] let trace_build_elapsed = phase_start.elapsed(); + #[cfg(feature = "instruments")] + let heap_after_trace = stark::instruments::heap_bytes(); #[cfg(feature = "instruments")] let phase_start = std::time::Instant::now(); @@ -496,6 +502,8 @@ pub fn prove_with_options( #[cfg(feature = "instruments")] let air_elapsed = phase_start.elapsed(); + #[cfg(feature = "instruments")] + let heap_after_air = stark::instruments::heap_bytes(); let runtime_page_ranges = traces.runtime_page_ranges(); @@ -508,11 +516,18 @@ pub fn prove_with_options( #[cfg(feature = "instruments")] { + let heap_profile = stark::instruments::ProveHeapProfile { + after_execute: heap_after_execute, + after_trace_build: heap_after_trace, + after_air: heap_after_air, + }; instruments::print_report( execute_elapsed, trace_build_elapsed, air_elapsed, total_start.elapsed(), + heap_before, + &heap_profile, ); } diff --git a/scripts/bench_heap_profile.sh b/scripts/bench_heap_profile.sh new file mode 100755 index 000000000..512e93113 --- /dev/null +++ b/scripts/bench_heap_profile.sh @@ -0,0 +1,247 @@ +#!/bin/bash +# Per-phase heap profile across program sizes. +# Shows where heap grows and how each phase scales with program size. +# +# Usage: bench_heap_profile.sh [--no-build] [--programs "500k 1M 2M 4M"] +# +# Requires: instruments + jemalloc-stats features. +# Peak heap is deterministic, so 1 run per size is enough. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +TMP_DIR="/tmp/bench_heap_profile" +ELF_DIR="$ROOT_DIR/executor/program_artifacts/asm" + +GREEN='\033[0;32m' +BOLD='\033[1m' +NC='\033[0m' + +BUILD=true +PROGRAMS="500k 1M 2M 4M" + +while [[ $# -gt 0 ]]; do + case $1 in + --no-build) BUILD=false; shift ;; + --programs) PROGRAMS="$2"; shift 2 ;; + *) echo "Unknown arg: $1"; exit 1 ;; + esac +done + +suffix_to_steps() { + case $1 in + 160k) echo 160000 ;; 250k) echo 250000 ;; 372k) echo 372000 ;; + 500k) echo 500000 ;; 1M) echo 1000000 ;; 1200k) echo 1200000 ;; + 2M) echo 2000000 ;; 4M) echo 4000000 ;; 8M) echo 8000000 ;; + *) echo "Unknown: $1" >&2; exit 1 ;; + esac +} + +rm -rf "$TMP_DIR" && mkdir -p "$TMP_DIR" + +if $BUILD; then + echo -e "${GREEN}Building CLI with instruments + jemalloc-stats...${NC}" + cargo build --release -p cli --features jemalloc-stats,instruments \ + --manifest-path "$ROOT_DIR/Cargo.toml" 2>&1 | tail -1 +fi +CLI="$ROOT_DIR/target/release/cli" + +# Phase labels we parse from stderr (order matters) +PHASES="execute trace_build air pool_alloc main_commits aux_build aux_commit rounds_2_4" + +for size in $PROGRAMS; do + ELF="$ELF_DIR/fib_iterative_${size}.elf" + [ -f "$ELF" ] || { echo "Missing: $ELF"; continue; } + steps=$(suffix_to_steps "$size") + echo -e "${GREEN}Running fib_iterative_${size}...${NC}" + + STDERR="$TMP_DIR/${size}_stderr.txt" + STDOUT="$TMP_DIR/${size}_stdout.txt" + "$CLI" prove "$ELF" -o "$TMP_DIR/proof.bin" --time >"$STDOUT" 2>"$STDERR" + rm -f "$TMP_DIR/proof.bin" + + # Parse absolute heap values (second-to-last column) from HEAP PROFILE section + HEAP_VALS=$(awk '/^=== HEAP PROFILE/,/^──/{ + if (/After execute/) printf "execute=%s\n", $(NF-1) + if (/After trace build/) printf "trace_build=%s\n", $(NF-1) + if (/After AIR/) printf "air=%s\n", $(NF-1) + if (/after pool alloc/) printf "pool_alloc=%s\n", $(NF-1) + if (/after main commits/) printf "main_commits=%s\n", $(NF-1) + if (/after aux build/) printf "aux_build=%s\n", $(NF-1) + if (/after aux commit/) printf "aux_commit=%s\n", $(NF-1) + if (/after rounds 2-4/) printf "rounds_2_4=%s\n", $(NF-1) + }' "$STDERR") + + PEAK=$(grep -o 'Peak heap: [0-9]*' "$STDOUT" | awk '{print $3}') + echo "steps=$steps" > "$TMP_DIR/${size}_data.txt" + echo "peak=$PEAK" >> "$TMP_DIR/${size}_data.txt" + echo "$HEAP_VALS" >> "$TMP_DIR/${size}_data.txt" +done + +echo "" +echo -e "${BOLD}=== HEAP PROFILE ACROSS SIZES ===${NC}" +echo "" + +# Print table: phases as rows, sizes as columns +# Header +printf " %-22s" "Phase (delta MB)" +for size in $PROGRAMS; do printf " %10s" "$size"; done +echo "" +printf " %-22s" "──────────────────────" +for size in $PROGRAMS; do printf " %10s" "──────────"; done +echo "" + +# For each phase, print the delta +prev_phase="" +for phase in $PHASES; do + case $phase in + execute) label="Execute" ;; + trace_build) label="Trace build" ;; + air) label="AIR construction" ;; + pool_alloc) label="Pool allocation" ;; + main_commits) label="Main commits" ;; + aux_build) label="Aux build" ;; + aux_commit) label="Aux commit" ;; + rounds_2_4) label="Rounds 2-4" ;; + esac + + printf " %-22s" "$label" + for size in $PROGRAMS; do + DATA="$TMP_DIR/${size}_data.txt" + [ -f "$DATA" ] || { printf " %10s" "N/A"; continue; } + cur=$(grep "^${phase}=" "$DATA" | cut -d= -f2) + if [ -z "$cur" ]; then + printf " %10s" "N/A" + else + # Get previous phase value to compute delta + if [ -z "$prev_phase" ]; then + delta="$cur" + else + prev_val=$(grep "^${prev_phase}=" "$DATA" | cut -d= -f2) + delta=$((cur - prev_val)) + fi + printf " %+10d" "$delta" + fi + done + echo "" + prev_phase=$phase +done + +# Total/peak row +printf " %-22s" "──────────────────────" +for size in $PROGRAMS; do printf " %10s" "──────────"; done +echo "" +printf " %-22s" "Peak heap" +for size in $PROGRAMS; do + DATA="$TMP_DIR/${size}_data.txt" + peak=$(grep "^peak=" "$DATA" | cut -d= -f2) + printf " %10s" "${peak:-N/A}" +done +echo "" + +# Linear regression per phase +echo "" +echo -e "${BOLD}=== GROWTH RATE PER PHASE (MB per 1M steps) ===${NC}" +echo "" + +for phase in $PHASES; do + case $phase in + execute) label="Execute" ;; + trace_build) label="Trace build" ;; + air) label="AIR construction" ;; + pool_alloc) label="Pool allocation" ;; + main_commits) label="Main commits" ;; + aux_build) label="Aux build" ;; + aux_commit) label="Aux commit" ;; + rounds_2_4) label="Rounds 2-4" ;; + esac + + # Collect (steps_M, delta) pairs + PAIRS="" + prev_phase_key="" + case $phase in + execute) prev_phase_key="" ;; + trace_build) prev_phase_key="execute" ;; + air) prev_phase_key="trace_build" ;; + pool_alloc) prev_phase_key="air" ;; + main_commits) prev_phase_key="pool_alloc" ;; + aux_build) prev_phase_key="main_commits" ;; + aux_commit) prev_phase_key="aux_build" ;; + rounds_2_4) prev_phase_key="aux_commit" ;; + esac + + for size in $PROGRAMS; do + DATA="$TMP_DIR/${size}_data.txt" + [ -f "$DATA" ] || continue + steps=$(grep "^steps=" "$DATA" | cut -d= -f2) + cur=$(grep "^${phase}=" "$DATA" | cut -d= -f2) + [ -z "$cur" ] && continue + if [ -z "$prev_phase_key" ]; then + delta="$cur" + else + prev_val=$(grep "^${prev_phase_key}=" "$DATA" | cut -d= -f2) + delta=$((cur - prev_val)) + fi + steps_m=$(awk "BEGIN {printf \"%.2f\", $steps / 1000000}") + PAIRS="$PAIRS $steps_m $delta" + done + + # Linear regression: delta = a + b * steps_M + echo "$PAIRS" | awk -v label="$label" '{ + n = NF / 2 + if (n < 2) { printf " %-22s (insufficient data)\n", label; next } + for (i = 0; i < n; i++) { + x[i] = $(2*i+1); y[i] = $(2*i+2) + } + sx=0; sy=0; sxx=0; sxy=0 + for (i=0;i0) ? 1 - ss_res/ss_tot : 1 + printf " %-22s %+.0f MB/M steps (base: %.0f MB, R²=%.3f)\n", label, b, a, r2 + }' +done + +# Extrapolation +echo "" +echo -e "${BOLD}=== EXTRAPOLATED PEAK HEAP ===${NC}" +echo "" + +# Collect (steps_M, peak) for regression +PEAK_PAIRS="" +for size in $PROGRAMS; do + DATA="$TMP_DIR/${size}_data.txt" + [ -f "$DATA" ] || continue + steps=$(grep "^steps=" "$DATA" | cut -d= -f2) + peak=$(grep "^peak=" "$DATA" | cut -d= -f2) + [ -z "$peak" ] && continue + steps_m=$(awk "BEGIN {printf \"%.2f\", $steps / 1000000}") + PEAK_PAIRS="$PEAK_PAIRS $steps_m $peak" +done + +echo "$PEAK_PAIRS" | awk '{ + n = NF / 2 + if (n < 2) { print " (insufficient data)"; next } + for (i = 0; i < n; i++) { x[i] = $(2*i+1); y[i] = $(2*i+2) } + sx=0; sy=0; sxx=0; sxy=0 + for (i=0;i0) ? 1 - ss_res/ss_tot : 1 + printf " Model: peak = %.0f + %.0f * steps_M (R²=%.4f)\n\n", a, b, r2 + targets[0]=8; targets[1]=16; targets[2]=32; targets[3]=64 + labels[0]="8M"; labels[1]="16M"; labels[2]="32M"; labels[3]="64M" + for (t=0; t<4; t++) { + pred = a + b * targets[t] + printf " fib_iterative_%-6s ~%.0f MB (~%.0f GB)\n", labels[t], pred, pred/1024 + } +}' + +echo "" +echo "Raw data: $TMP_DIR/" From 567a235aaaad793979430f8d3b61d19f8b24f8f1 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Fri, 27 Mar 2026 20:42:50 -0300 Subject: [PATCH 017/231] Pre-allocate pool to max_lde_size and keep capacity between tables --- crypto/stark/src/prover.rs | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 7b580fb08..2bd29fab1 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1635,9 +1635,20 @@ pub trait IsStarkProver< let k_commit = 1_usize; #[cfg(not(feature = "disk-spill"))] let k_commit = k; + // With k_commit=1 (disk-spill), pre-allocate to max_lde_size so + // coset_lde_full_expand can resize in-place without reallocation spikes. + // With k_commit>1 (no disk-spill), start empty and grow on demand. let mut pool_sets: Vec> = (0..k_commit) .map(|_| PoolSet { - main: (0..max_main_cols).map(|_| Vec::new()).collect(), + main: (0..max_main_cols) + .map(|_| { + if k_commit == 1 { + Vec::with_capacity(max_lde_size) + } else { + Vec::new() + } + }) + .collect(), aux: (0..max_aux_cols).map(|_| Vec::new()).collect(), }) .collect(); @@ -1696,12 +1707,13 @@ pub trait IsStarkProver< Self::commit_main_trace(*trace, domain, twiddles, &mut pool.main)? }; - // Spill LDE from pool to mmap while pool is still filled, - // then free pool buffers to reduce peak memory. + // Spill LDE from pool to mmap while pool is still filled. + // Pool buffers keep their capacity for reuse by the next table, + // avoiding resize reallocation spikes in coset_lde_full_expand. #[cfg(feature = "disk-spill")] let spilled = { let num_main_cols = trace.num_main_columns; - let s = LDETraceTable::spill_main_from_pool( + LDETraceTable::spill_main_from_pool( &pool.main, num_main_cols, air.step_size(), @@ -1711,11 +1723,7 @@ pub trait IsStarkProver< ProvingError::WrongParameter(format!( "disk-spill main LDE table {idx}: {e}" )) - })?; - for buf in pool.main.iter_mut() { - *buf = Vec::new(); - } - s + })? }; #[cfg(feature = "disk-spill")] From b566cc0a60d40fe8703a38b01e820c259671e860 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Fri, 27 Mar 2026 21:11:16 -0300 Subject: [PATCH 018/231] Spill aux traces to disk immediately after building --- crypto/stark/src/prover.rs | 7 ++++++- crypto/stark/src/trace.rs | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 2bd29fab1..1abd540e5 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1836,7 +1836,12 @@ pub trait IsStarkProver< let bus_inputs_vec: Vec>> = aux_iter .map(|(air, trace, _)| { if air.has_aux_trace() { - air.build_auxiliary_trace(*trace, &lookup_challenges) + let result = air.build_auxiliary_trace(*trace, &lookup_challenges); + #[cfg(feature = "disk-spill")] + trace + .spill_aux_to_disk() + .expect("disk-spill aux trace after build"); + result } else { None } diff --git a/crypto/stark/src/trace.rs b/crypto/stark/src/trace.rs index 0e7c12640..fa8a2d2bd 100644 --- a/crypto/stark/src/trace.rs +++ b/crypto/stark/src/trace.rs @@ -162,6 +162,11 @@ where self.main_table.spill_to_disk() } + #[cfg(feature = "disk-spill")] + pub fn spill_aux_to_disk(&mut self) -> std::io::Result<()> { + self.aux_table.spill_to_disk() + } + pub fn compute_trace_polys_main(&self) -> Vec>> where S: IsFFTField + IsSubFieldOf, From bcdc7dabb3bf502496c4e25fd4fec14b9759349d Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Fri, 27 Mar 2026 21:53:16 -0300 Subject: [PATCH 019/231] Fix clippy unused_mut warnings when disk-spill is disabled --- crypto/stark/src/fri/mod.rs | 22 ++++++++++++++-------- crypto/stark/src/prover.rs | 10 ++++++---- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/crypto/stark/src/fri/mod.rs b/crypto/stark/src/fri/mod.rs index c2252f324..c2b5fc834 100644 --- a/crypto/stark/src/fri/mod.rs +++ b/crypto/stark/src/fri/mod.rs @@ -54,23 +54,29 @@ where .chunks_exact(2) .map(|chunk| [chunk[0].clone(), chunk[1].clone()]) .collect(); - let mut merkle_tree = FriLayerMerkleTree::build(&leaves) + let merkle_tree = FriLayerMerkleTree::build(&leaves) .expect("FRI commit: Merkle tree construction must succeed"); let root = merkle_tree.root; #[cfg(feature = "disk-spill")] - merkle_tree - .spill_nodes_to_disk() - .expect("disk-spill FRI layer Merkle tree"); - let mut layer = FriLayer::new( + let merkle_tree = { + let mut t = merkle_tree; + t.spill_nodes_to_disk() + .expect("disk-spill FRI layer Merkle tree"); + t + }; + let layer = FriLayer::new( &evals, merkle_tree, current_coset_offset.clone().to_extension(), current_domain_size, ); #[cfg(feature = "disk-spill")] - layer - .spill_evaluation_to_disk() - .expect("disk-spill FRI layer evaluation"); + let layer = { + let mut l = layer; + l.spill_evaluation_to_disk() + .expect("disk-spill FRI layer evaluation"); + l + }; fri_layer_list.push(layer); // >>>> Send commitment: [pₖ] diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 1abd540e5..03fc9fe7c 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -980,17 +980,19 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let t_sub = Instant::now(); - let Some((mut composition_poly_merkle_tree, composition_poly_root)) = + let Some((composition_poly_merkle_tree, composition_poly_root)) = Self::commit_composition_polynomial(&lde_composition_poly_parts_evaluations) else { return Err(ProvingError::EmptyCommitment); }; #[cfg(feature = "disk-spill")] - composition_poly_merkle_tree - .spill_nodes_to_disk() - .map_err(|e| { + let composition_poly_merkle_tree = { + let mut t = composition_poly_merkle_tree; + t.spill_nodes_to_disk().map_err(|e| { ProvingError::WrongParameter(format!("disk-spill composition Merkle tree: {e}")) })?; + t + }; #[cfg(feature = "instruments")] let merkle_dur = t_sub.elapsed(); From 5bb8c05e82b5633d5624dbe103ad1210da5a2a02 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 30 Mar 2026 11:42:35 -0300 Subject: [PATCH 020/231] Add 16M-128M sizes to bench_heap_profile.sh --- scripts/bench_heap_profile.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/bench_heap_profile.sh b/scripts/bench_heap_profile.sh index 512e93113..eebcfc920 100755 --- a/scripts/bench_heap_profile.sh +++ b/scripts/bench_heap_profile.sh @@ -34,6 +34,7 @@ suffix_to_steps() { 160k) echo 160000 ;; 250k) echo 250000 ;; 372k) echo 372000 ;; 500k) echo 500000 ;; 1M) echo 1000000 ;; 1200k) echo 1200000 ;; 2M) echo 2000000 ;; 4M) echo 4000000 ;; 8M) echo 8000000 ;; + 16M) echo 16000000 ;; 32M) echo 32000000 ;; 64M) echo 64000000 ;; 128M) echo 128000000 ;; *) echo "Unknown: $1" >&2; exit 1 ;; esac } From 8d0d6a71c3b0e8e8cf7216b755a5677e81f14da5 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 31 Mar 2026 19:26:10 -0300 Subject: [PATCH 021/231] Default k_commit=4 for disk-spill Phase A/C to balance memory and parallelism --- crypto/stark/src/prover.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 03fc9fe7c..710a37503 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1629,12 +1629,18 @@ pub trait IsStarkProver< } // Number of tables to process concurrently. - // disk-spill: Phase A/C use k_commit=1 (one pool at a time, since each - // table's LDE already saturates all cores via column-parallel FFT). + // disk-spill: Phase A/C use a reduced k_commit (default 4) to limit + // concurrent pool memory while retaining some parallelism. + // Override with COMMIT_PARALLELISM env var (1 = minimum memory, k = maximum speed). // Rounds 2-4 use the full k (no pools needed, reads from mmap). let k = table_parallelism().min(num_airs).max(1); #[cfg(feature = "disk-spill")] - let k_commit = 1_usize; + let k_commit = std::env::var("COMMIT_PARALLELISM") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(4_usize) + .min(k) + .max(1); #[cfg(not(feature = "disk-spill"))] let k_commit = k; // With k_commit=1 (disk-spill), pre-allocate to max_lde_size so From e572b46b20c2a4264122d834db4b2b8d3f38b2a1 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 31 Mar 2026 20:25:18 -0300 Subject: [PATCH 022/231] Revert "Default k_commit=4 for disk-spill Phase A/C to balance memory and parallelism" This reverts commit 8d0d6a71c3b0e8e8cf7216b755a5677e81f14da5. --- crypto/stark/src/prover.rs | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 710a37503..03fc9fe7c 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1629,18 +1629,12 @@ pub trait IsStarkProver< } // Number of tables to process concurrently. - // disk-spill: Phase A/C use a reduced k_commit (default 4) to limit - // concurrent pool memory while retaining some parallelism. - // Override with COMMIT_PARALLELISM env var (1 = minimum memory, k = maximum speed). + // disk-spill: Phase A/C use k_commit=1 (one pool at a time, since each + // table's LDE already saturates all cores via column-parallel FFT). // Rounds 2-4 use the full k (no pools needed, reads from mmap). let k = table_parallelism().min(num_airs).max(1); #[cfg(feature = "disk-spill")] - let k_commit = std::env::var("COMMIT_PARALLELISM") - .ok() - .and_then(|s| s.parse().ok()) - .unwrap_or(4_usize) - .min(k) - .max(1); + let k_commit = 1_usize; #[cfg(not(feature = "disk-spill"))] let k_commit = k; // With k_commit=1 (disk-spill), pre-allocate to max_lde_size so From f3ca018d46937e74d32c9ad3c6e844d9bfa8cd76 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 31 Mar 2026 22:08:44 -0300 Subject: [PATCH 023/231] Skip composition poly and FRI spills in Rounds 2-4 to reduce I/O overhead --- crypto/stark/src/fri/mod.rs | 14 -------------- crypto/stark/src/prover.rs | 18 ++++-------------- 2 files changed, 4 insertions(+), 28 deletions(-) diff --git a/crypto/stark/src/fri/mod.rs b/crypto/stark/src/fri/mod.rs index c2b5fc834..8d3724093 100644 --- a/crypto/stark/src/fri/mod.rs +++ b/crypto/stark/src/fri/mod.rs @@ -57,26 +57,12 @@ where let merkle_tree = FriLayerMerkleTree::build(&leaves) .expect("FRI commit: Merkle tree construction must succeed"); let root = merkle_tree.root; - #[cfg(feature = "disk-spill")] - let merkle_tree = { - let mut t = merkle_tree; - t.spill_nodes_to_disk() - .expect("disk-spill FRI layer Merkle tree"); - t - }; let layer = FriLayer::new( &evals, merkle_tree, current_coset_offset.clone().to_extension(), current_domain_size, ); - #[cfg(feature = "disk-spill")] - let layer = { - let mut l = layer; - l.spill_evaluation_to_disk() - .expect("disk-spill FRI layer evaluation"); - l - }; fri_layer_list.push(layer); // >>>> Send commitment: [pₖ] diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 03fc9fe7c..f53cb24db 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -985,14 +985,8 @@ pub trait IsStarkProver< else { return Err(ProvingError::EmptyCommitment); }; - #[cfg(feature = "disk-spill")] - let composition_poly_merkle_tree = { - let mut t = composition_poly_merkle_tree; - t.spill_nodes_to_disk().map_err(|e| { - ProvingError::WrongParameter(format!("disk-spill composition Merkle tree: {e}")) - })?; - t - }; + // Note: composition Merkle tree kept in RAM (spilling adds I/O overhead + // for only ~30 sparse queries; memory is freed after Rounds 2-4). #[cfg(feature = "instruments")] let merkle_dur = t_sub.elapsed(); @@ -1137,12 +1131,8 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let other_dur_1 = t_sub.elapsed(); - // Spill composition poly evaluations to disk after the dense read above. - // They are only needed sparsely in open_composition_poly (~30 queries). - #[cfg(feature = "disk-spill")] - round_2_result - .spill_evaluations_to_disk() - .expect("disk-spill composition poly evaluations"); + // Note: composition poly evaluations kept in RAM (only ~30 sparse queries + // remain; spilling adds I/O overhead that outweighs memory savings). // Extend N trace-coset evaluations to 2N LDE-coset evaluations via standard LDE. // deep_evals[i] = h(offset·ω_N^i) = f(ω_N^i) where f(x) = h(offset·x). From 75537a800e008a1c1045a224095f031d4cba0849 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 12:46:22 -0300 Subject: [PATCH 024/231] Remove max-rows CLI arg --- bin/cli/src/main.rs | 37 ++++--------------------------------- 1 file changed, 4 insertions(+), 33 deletions(-) diff --git a/bin/cli/src/main.rs b/bin/cli/src/main.rs index eb7c3db48..bc7cc57d1 100644 --- a/bin/cli/src/main.rs +++ b/bin/cli/src/main.rs @@ -123,10 +123,6 @@ enum Commands { /// Print timing breakdown #[arg(long)] time: bool, - - /// Maximum rows per table chunk (power of 2). Smaller = less memory, more chunks. - #[arg(long)] - max_rows: Option, }, /// Verify a proof bundle @@ -159,8 +155,7 @@ fn main() -> ExitCode { output, blowup, time, - max_rows, - } => cmd_prove(elf, output, blowup, time, max_rows), + } => cmd_prove(elf, output, blowup, time), Commands::Verify { proof, elf, @@ -254,13 +249,7 @@ fn cmd_execute(elf_path: PathBuf, flamegraph_path: Option) -> ExitCode ExitCode::SUCCESS } -fn cmd_prove( - elf_path: PathBuf, - output_path: PathBuf, - blowup: Option, - time: bool, - max_rows: Option, -) -> ExitCode { +fn cmd_prove(elf_path: PathBuf, output_path: PathBuf, blowup: Option, time: bool) -> ExitCode { eprintln!("Reading ELF file..."); let elf_data = match std::fs::read(&elf_path) { Ok(data) => data, @@ -283,24 +272,6 @@ fn cmd_prove( tikv_jemalloc_ctl::stats::allocated::read().unwrap_or(0) }); - let max_rows_config = match max_rows { - Some(mr) => { - eprintln!("Max rows per chunk: {mr}"); - prover::MaxRowsConfig { - cpu: mr, - memw: mr, - memw_aligned: mr, - dvrm: mr, - mul: mr, - lt: mr, - shift: mr, - load: mr, - branch: mr, - } - } - None => prover::MaxRowsConfig::default(), - }; - let start = Instant::now(); let proof = match blowup { Some(b) => { @@ -315,13 +286,13 @@ fn cmd_prove( "Generating proof (blowup={b}, queries={})...", opts.fri_number_of_queries ); - prover::prove_with_options(&elf_data, &opts, &max_rows_config) + prover::prove_with_options(&elf_data, &opts, &Default::default()) } None => { let opts = GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is always valid"); eprintln!("Generating proof..."); - prover::prove_with_options(&elf_data, &opts, &max_rows_config) + prover::prove_with_options(&elf_data, &opts, &Default::default()) } }; let prove_elapsed = start.elapsed(); From abf8014bafaafe87c4bea38db8d196cbc62efcdb Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 12:51:26 -0300 Subject: [PATCH 025/231] Remove heap profiling infrastructure (moved to separate PR) --- bin/cli/src/main.rs | 6 - crypto/stark/src/instruments.rs | 40 ------ crypto/stark/src/prover.rs | 27 +--- prover/src/instruments.rs | 40 +----- prover/src/lib.rs | 15 -- scripts/bench_heap_profile.sh | 248 -------------------------------- 6 files changed, 6 insertions(+), 370 deletions(-) delete mode 100755 scripts/bench_heap_profile.sh diff --git a/bin/cli/src/main.rs b/bin/cli/src/main.rs index bc7cc57d1..886fbcca6 100644 --- a/bin/cli/src/main.rs +++ b/bin/cli/src/main.rs @@ -266,12 +266,6 @@ fn cmd_prove(elf_path: PathBuf, output_path: PathBuf, blowup: Option, time: eprintln!("Disk-spill: enabled"); } - #[cfg(all(feature = "jemalloc-stats", feature = "instruments"))] - stark::instruments::set_heap_reader(|| { - tikv_jemalloc_ctl::epoch::advance().ok(); - tikv_jemalloc_ctl::stats::allocated::read().unwrap_or(0) - }); - let start = Instant::now(); let proof = match blowup { Some(b) => { diff --git a/crypto/stark/src/instruments.rs b/crypto/stark/src/instruments.rs index beda186d8..3c13ef2e1 100644 --- a/crypto/stark/src/instruments.rs +++ b/crypto/stark/src/instruments.rs @@ -1,30 +1,7 @@ use std::cell::RefCell; -use std::sync::OnceLock; use std::sync::atomic::{AtomicU64, Ordering}; use std::time::Duration; -// --------------------------------------------------------------------------- -// Heap reader callback: set by the binary (CLI) to provide jemalloc reads -// without coupling this crate to jemalloc. -// --------------------------------------------------------------------------- - -static HEAP_READER: OnceLock usize> = OnceLock::new(); - -/// Register a function that returns the current heap allocated bytes. -/// Call this once from the binary before proving starts. -pub fn set_heap_reader(f: fn() -> usize) { - let _ = HEAP_READER.set(f); -} - -/// Read current heap in bytes, or `None` if no reader was registered. -pub fn heap_bytes() -> Option { - HEAP_READER.get().map(|f| f()) -} - -fn heap_mb() -> Option { - heap_bytes().map(|b| b / (1024 * 1024)) -} - /// Sub-operation timing breakdown for a single table in Rounds 2-4. #[derive(Clone, Debug, Default)] pub struct TableSubOps { @@ -61,9 +38,6 @@ pub struct Round1SubOps { pub aux_merkle: Duration, } -/// Heap snapshot: (label, allocated_mb) at a phase boundary. -pub type HeapSnapshot = (&'static str, usize); - /// Timing data collected inside `multi_prove`. pub struct MultiProveTiming { pub prepass: Duration, @@ -75,20 +49,6 @@ pub struct MultiProveTiming { pub round1_sub: Round1SubOps, /// (name, rows, duration, sub_ops) per table for rounds 2-4. pub table_timings: Vec<(String, usize, Duration, TableSubOps)>, - /// Heap snapshots at phase boundaries (empty if no heap reader set). - pub heap_snapshots: Vec, -} - -/// Heap snapshots taken in `prove_with_options` (before multi_prove). -pub struct ProveHeapProfile { - pub after_execute: Option, - pub after_trace_build: Option, - pub after_air: Option, -} - -/// Take a heap snapshot, returning `(label, mb)` or `None`. -pub fn snap(label: &'static str) -> Option { - heap_mb().map(|mb| (label, mb)) } /// Round 1 sub-timings: atomics so parallel rayon workers can accumulate safely. diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index f849f6e10..271914b0a 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1588,12 +1588,6 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] crate::instruments::reset_all(); - #[cfg(feature = "instruments")] - let mut heap_snaps: Vec = Vec::new(); - #[cfg(feature = "instruments")] - if let Some(s) = crate::instruments::snap("entry") { - heap_snaps.push(s); - } let num_airs = air_trace_pairs.len(); @@ -1671,9 +1665,7 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let prepass_elapsed = phase_start.elapsed(); #[cfg(feature = "instruments")] - if let Some(s) = crate::instruments::snap("after pool alloc") { - heap_snaps.push(s); - } + if let Some(s) = crate::instruments::snap("after pool alloc") {} // ===================================================================== // Round 1, Phase A: Commit all main traces (parallel in chunks of K) @@ -1811,9 +1803,7 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let main_commits_elapsed = phase_start.elapsed(); #[cfg(feature = "instruments")] - if let Some(s) = crate::instruments::snap("after main commits") { - heap_snaps.push(s); - } + if let Some(s) = crate::instruments::snap("after main commits") {} // ===================================================================== // Round 1, Phase B: Sample shared LogUp challenges @@ -1866,9 +1856,7 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let aux_build_elapsed = phase_start.elapsed(); #[cfg(feature = "instruments")] - if let Some(s) = crate::instruments::snap("after aux build") { - heap_snaps.push(s); - } + if let Some(s) = crate::instruments::snap("after aux build") {} // Pass 2: Parallel fork transcript → extract → LDE → commit in chunks of K. // Each table gets its own transcript fork and pool set. @@ -2002,9 +1990,7 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let aux_commit_elapsed = phase_start.elapsed(); #[cfg(feature = "instruments")] - if let Some(s) = crate::instruments::snap("after aux commit") { - heap_snaps.push(s); - } + if let Some(s) = crate::instruments::snap("after aux commit") {} #[cfg(feature = "debug-checks")] { @@ -2271,9 +2257,7 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] { - if let Some(s) = crate::instruments::snap("after rounds 2-4") { - heap_snaps.push(s); - } + if let Some(s) = crate::instruments::snap("after rounds 2-4") {} // Store timing data for the top-level report in prove_with_options. // Uses a thread-local to avoid changing multi_prove's return type. crate::instruments::store(crate::instruments::MultiProveTiming { @@ -2284,7 +2268,6 @@ pub trait IsStarkProver< rounds_2_4: phase_start.elapsed(), round1_sub: crate::instruments::take_r1_sub(), table_timings, - heap_snapshots: heap_snaps, }); } diff --git a/prover/src/instruments.rs b/prover/src/instruments.rs index 96cb8b79c..e3db38b95 100644 --- a/prover/src/instruments.rs +++ b/prover/src/instruments.rs @@ -57,8 +57,6 @@ pub fn print_report( trace_build: Duration, air_construction: Duration, total: Duration, - heap_before: Option, - heap_profile: &stark::instruments::ProveHeapProfile, ) { let mp = stark::instruments::take(); @@ -71,7 +69,7 @@ pub fn print_report( row_top("Trace build", trace_build, total); row_top("AIR construction", air_construction, total); - if let Some(ref mp) = mp { + if let Some(mp) = mp { let round1 = mp.main_commits + mp.aux_build + mp.aux_commit; row_top("Pre-pass (domains/twiddles)", mp.prepass, total); @@ -216,40 +214,4 @@ pub fn print_report( eprintln!(" {}", "─".repeat(58)); eprintln!(" {:<36} {:>7.2}s", "TOTAL", total.as_secs_f64()); eprintln!(); - - // Heap profile - let mb = |b: usize| b / (1024 * 1024); - let has_heap = heap_before.is_some(); - if has_heap { - eprintln!("=== HEAP PROFILE (MB) ==="); - eprintln!(" {:<36} {:>8} {:>8}", "Phase", "Heap", "Delta"); - eprintln!(" {}", "─".repeat(56)); - - let mut prev = heap_before.unwrap(); - let mut print_row = |label: &str, val: Option| { - if let Some(v) = val { - let cur = mb(v); - let delta = mb(v) as isize - mb(prev) as isize; - eprintln!(" {:<36} {:>7} {:>+8}", label, cur, delta); - prev = v; - } - }; - - print_row("After execute", heap_profile.after_execute); - print_row("After trace build", heap_profile.after_trace_build); - print_row("After AIR construction", heap_profile.after_air); - - if let Some(ref mp_data) = mp { - for (label, snap_mb) in &mp_data.heap_snapshots { - let snap_bytes = snap_mb * (1024 * 1024); - let cur = *snap_mb; - let delta = cur as isize - mb(prev) as isize; - eprintln!(" {:<36} {:>7} {:>+8}", label, cur, delta); - prev = snap_bytes; - } - } - - eprintln!(" {}", "─".repeat(56)); - eprintln!(); - } } diff --git a/prover/src/lib.rs b/prover/src/lib.rs index 62fde65e3..972cacab7 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -476,8 +476,6 @@ pub fn prove_with_options( ) -> Result { #[cfg(feature = "instruments")] let total_start = std::time::Instant::now(); - #[cfg(feature = "instruments")] - let heap_before = stark::instruments::heap_bytes(); // Phase 1: Execute (ELF load + run) #[cfg(feature = "instruments")] @@ -491,8 +489,6 @@ pub fn prove_with_options( #[cfg(feature = "instruments")] let execute_elapsed = phase_start.elapsed(); - #[cfg(feature = "instruments")] - let heap_after_execute = stark::instruments::heap_bytes(); // Phase 2: Trace build #[cfg(feature = "instruments")] @@ -511,8 +507,6 @@ pub fn prove_with_options( #[cfg(feature = "instruments")] let trace_build_elapsed = phase_start.elapsed(); - #[cfg(feature = "instruments")] - let heap_after_trace = stark::instruments::heap_bytes(); #[cfg(feature = "instruments")] let phase_start = std::time::Instant::now(); @@ -528,8 +522,6 @@ pub fn prove_with_options( #[cfg(feature = "instruments")] let air_elapsed = phase_start.elapsed(); - #[cfg(feature = "instruments")] - let heap_after_air = stark::instruments::heap_bytes(); let runtime_page_ranges = traces.runtime_page_ranges(); @@ -542,18 +534,11 @@ pub fn prove_with_options( #[cfg(feature = "instruments")] { - let heap_profile = stark::instruments::ProveHeapProfile { - after_execute: heap_after_execute, - after_trace_build: heap_after_trace, - after_air: heap_after_air, - }; instruments::print_report( execute_elapsed, trace_build_elapsed, air_elapsed, total_start.elapsed(), - heap_before, - &heap_profile, ); } diff --git a/scripts/bench_heap_profile.sh b/scripts/bench_heap_profile.sh deleted file mode 100755 index eebcfc920..000000000 --- a/scripts/bench_heap_profile.sh +++ /dev/null @@ -1,248 +0,0 @@ -#!/bin/bash -# Per-phase heap profile across program sizes. -# Shows where heap grows and how each phase scales with program size. -# -# Usage: bench_heap_profile.sh [--no-build] [--programs "500k 1M 2M 4M"] -# -# Requires: instruments + jemalloc-stats features. -# Peak heap is deterministic, so 1 run per size is enough. - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" -TMP_DIR="/tmp/bench_heap_profile" -ELF_DIR="$ROOT_DIR/executor/program_artifacts/asm" - -GREEN='\033[0;32m' -BOLD='\033[1m' -NC='\033[0m' - -BUILD=true -PROGRAMS="500k 1M 2M 4M" - -while [[ $# -gt 0 ]]; do - case $1 in - --no-build) BUILD=false; shift ;; - --programs) PROGRAMS="$2"; shift 2 ;; - *) echo "Unknown arg: $1"; exit 1 ;; - esac -done - -suffix_to_steps() { - case $1 in - 160k) echo 160000 ;; 250k) echo 250000 ;; 372k) echo 372000 ;; - 500k) echo 500000 ;; 1M) echo 1000000 ;; 1200k) echo 1200000 ;; - 2M) echo 2000000 ;; 4M) echo 4000000 ;; 8M) echo 8000000 ;; - 16M) echo 16000000 ;; 32M) echo 32000000 ;; 64M) echo 64000000 ;; 128M) echo 128000000 ;; - *) echo "Unknown: $1" >&2; exit 1 ;; - esac -} - -rm -rf "$TMP_DIR" && mkdir -p "$TMP_DIR" - -if $BUILD; then - echo -e "${GREEN}Building CLI with instruments + jemalloc-stats...${NC}" - cargo build --release -p cli --features jemalloc-stats,instruments \ - --manifest-path "$ROOT_DIR/Cargo.toml" 2>&1 | tail -1 -fi -CLI="$ROOT_DIR/target/release/cli" - -# Phase labels we parse from stderr (order matters) -PHASES="execute trace_build air pool_alloc main_commits aux_build aux_commit rounds_2_4" - -for size in $PROGRAMS; do - ELF="$ELF_DIR/fib_iterative_${size}.elf" - [ -f "$ELF" ] || { echo "Missing: $ELF"; continue; } - steps=$(suffix_to_steps "$size") - echo -e "${GREEN}Running fib_iterative_${size}...${NC}" - - STDERR="$TMP_DIR/${size}_stderr.txt" - STDOUT="$TMP_DIR/${size}_stdout.txt" - "$CLI" prove "$ELF" -o "$TMP_DIR/proof.bin" --time >"$STDOUT" 2>"$STDERR" - rm -f "$TMP_DIR/proof.bin" - - # Parse absolute heap values (second-to-last column) from HEAP PROFILE section - HEAP_VALS=$(awk '/^=== HEAP PROFILE/,/^──/{ - if (/After execute/) printf "execute=%s\n", $(NF-1) - if (/After trace build/) printf "trace_build=%s\n", $(NF-1) - if (/After AIR/) printf "air=%s\n", $(NF-1) - if (/after pool alloc/) printf "pool_alloc=%s\n", $(NF-1) - if (/after main commits/) printf "main_commits=%s\n", $(NF-1) - if (/after aux build/) printf "aux_build=%s\n", $(NF-1) - if (/after aux commit/) printf "aux_commit=%s\n", $(NF-1) - if (/after rounds 2-4/) printf "rounds_2_4=%s\n", $(NF-1) - }' "$STDERR") - - PEAK=$(grep -o 'Peak heap: [0-9]*' "$STDOUT" | awk '{print $3}') - echo "steps=$steps" > "$TMP_DIR/${size}_data.txt" - echo "peak=$PEAK" >> "$TMP_DIR/${size}_data.txt" - echo "$HEAP_VALS" >> "$TMP_DIR/${size}_data.txt" -done - -echo "" -echo -e "${BOLD}=== HEAP PROFILE ACROSS SIZES ===${NC}" -echo "" - -# Print table: phases as rows, sizes as columns -# Header -printf " %-22s" "Phase (delta MB)" -for size in $PROGRAMS; do printf " %10s" "$size"; done -echo "" -printf " %-22s" "──────────────────────" -for size in $PROGRAMS; do printf " %10s" "──────────"; done -echo "" - -# For each phase, print the delta -prev_phase="" -for phase in $PHASES; do - case $phase in - execute) label="Execute" ;; - trace_build) label="Trace build" ;; - air) label="AIR construction" ;; - pool_alloc) label="Pool allocation" ;; - main_commits) label="Main commits" ;; - aux_build) label="Aux build" ;; - aux_commit) label="Aux commit" ;; - rounds_2_4) label="Rounds 2-4" ;; - esac - - printf " %-22s" "$label" - for size in $PROGRAMS; do - DATA="$TMP_DIR/${size}_data.txt" - [ -f "$DATA" ] || { printf " %10s" "N/A"; continue; } - cur=$(grep "^${phase}=" "$DATA" | cut -d= -f2) - if [ -z "$cur" ]; then - printf " %10s" "N/A" - else - # Get previous phase value to compute delta - if [ -z "$prev_phase" ]; then - delta="$cur" - else - prev_val=$(grep "^${prev_phase}=" "$DATA" | cut -d= -f2) - delta=$((cur - prev_val)) - fi - printf " %+10d" "$delta" - fi - done - echo "" - prev_phase=$phase -done - -# Total/peak row -printf " %-22s" "──────────────────────" -for size in $PROGRAMS; do printf " %10s" "──────────"; done -echo "" -printf " %-22s" "Peak heap" -for size in $PROGRAMS; do - DATA="$TMP_DIR/${size}_data.txt" - peak=$(grep "^peak=" "$DATA" | cut -d= -f2) - printf " %10s" "${peak:-N/A}" -done -echo "" - -# Linear regression per phase -echo "" -echo -e "${BOLD}=== GROWTH RATE PER PHASE (MB per 1M steps) ===${NC}" -echo "" - -for phase in $PHASES; do - case $phase in - execute) label="Execute" ;; - trace_build) label="Trace build" ;; - air) label="AIR construction" ;; - pool_alloc) label="Pool allocation" ;; - main_commits) label="Main commits" ;; - aux_build) label="Aux build" ;; - aux_commit) label="Aux commit" ;; - rounds_2_4) label="Rounds 2-4" ;; - esac - - # Collect (steps_M, delta) pairs - PAIRS="" - prev_phase_key="" - case $phase in - execute) prev_phase_key="" ;; - trace_build) prev_phase_key="execute" ;; - air) prev_phase_key="trace_build" ;; - pool_alloc) prev_phase_key="air" ;; - main_commits) prev_phase_key="pool_alloc" ;; - aux_build) prev_phase_key="main_commits" ;; - aux_commit) prev_phase_key="aux_build" ;; - rounds_2_4) prev_phase_key="aux_commit" ;; - esac - - for size in $PROGRAMS; do - DATA="$TMP_DIR/${size}_data.txt" - [ -f "$DATA" ] || continue - steps=$(grep "^steps=" "$DATA" | cut -d= -f2) - cur=$(grep "^${phase}=" "$DATA" | cut -d= -f2) - [ -z "$cur" ] && continue - if [ -z "$prev_phase_key" ]; then - delta="$cur" - else - prev_val=$(grep "^${prev_phase_key}=" "$DATA" | cut -d= -f2) - delta=$((cur - prev_val)) - fi - steps_m=$(awk "BEGIN {printf \"%.2f\", $steps / 1000000}") - PAIRS="$PAIRS $steps_m $delta" - done - - # Linear regression: delta = a + b * steps_M - echo "$PAIRS" | awk -v label="$label" '{ - n = NF / 2 - if (n < 2) { printf " %-22s (insufficient data)\n", label; next } - for (i = 0; i < n; i++) { - x[i] = $(2*i+1); y[i] = $(2*i+2) - } - sx=0; sy=0; sxx=0; sxy=0 - for (i=0;i0) ? 1 - ss_res/ss_tot : 1 - printf " %-22s %+.0f MB/M steps (base: %.0f MB, R²=%.3f)\n", label, b, a, r2 - }' -done - -# Extrapolation -echo "" -echo -e "${BOLD}=== EXTRAPOLATED PEAK HEAP ===${NC}" -echo "" - -# Collect (steps_M, peak) for regression -PEAK_PAIRS="" -for size in $PROGRAMS; do - DATA="$TMP_DIR/${size}_data.txt" - [ -f "$DATA" ] || continue - steps=$(grep "^steps=" "$DATA" | cut -d= -f2) - peak=$(grep "^peak=" "$DATA" | cut -d= -f2) - [ -z "$peak" ] && continue - steps_m=$(awk "BEGIN {printf \"%.2f\", $steps / 1000000}") - PEAK_PAIRS="$PEAK_PAIRS $steps_m $peak" -done - -echo "$PEAK_PAIRS" | awk '{ - n = NF / 2 - if (n < 2) { print " (insufficient data)"; next } - for (i = 0; i < n; i++) { x[i] = $(2*i+1); y[i] = $(2*i+2) } - sx=0; sy=0; sxx=0; sxy=0 - for (i=0;i0) ? 1 - ss_res/ss_tot : 1 - printf " Model: peak = %.0f + %.0f * steps_M (R²=%.4f)\n\n", a, b, r2 - targets[0]=8; targets[1]=16; targets[2]=32; targets[3]=64 - labels[0]="8M"; labels[1]="16M"; labels[2]="32M"; labels[3]="64M" - for (t=0; t<4; t++) { - pred = a + b * targets[t] - printf " fib_iterative_%-6s ~%.0f MB (~%.0f GB)\n", labels[t], pred, pred/1024 - } -}' - -echo "" -echo "Raw data: $TMP_DIR/" From a2d0e4eda295ffb1a3c684995d56d13e1d2c29d2 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 13:04:55 -0300 Subject: [PATCH 026/231] Use prover::prove for default blowup path --- bin/cli/src/main.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bin/cli/src/main.rs b/bin/cli/src/main.rs index 886fbcca6..9514939a8 100644 --- a/bin/cli/src/main.rs +++ b/bin/cli/src/main.rs @@ -283,10 +283,8 @@ fn cmd_prove(elf_path: PathBuf, output_path: PathBuf, blowup: Option, time: prover::prove_with_options(&elf_data, &opts, &Default::default()) } None => { - let opts = - GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is always valid"); eprintln!("Generating proof..."); - prover::prove_with_options(&elf_data, &opts, &Default::default()) + prover::prove(&elf_data) } }; let prove_elapsed = start.elapsed(); From 55cb1a3501ebabed8c202cb7dc82948ef49ea86c Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 13:08:41 -0300 Subject: [PATCH 027/231] Simplify MmapNodeBacking doc comment --- crypto/crypto/src/merkle_tree/merkle.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index 9f5f68876..b4cfb00f4 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -24,9 +24,8 @@ impl std::error::Error for Error {} /// File-backed mmap storage for Merkle tree nodes. /// -/// After `spill_nodes_to_disk()`, the heap `Vec` is freed and all -/// node access goes through this mmap. The OS manages page eviction under -/// memory pressure — file-backed pages are evictable without swap. +/// After `spill_nodes_to_disk()`, the in-memory node vector is freed and +/// node access goes through this mmap instead. #[cfg(feature = "disk-spill")] pub(crate) struct MmapNodeBacking { mmap: memmap2::Mmap, From e465b38c2d5c223f40e6530151fcdd1692299fd8 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 13:11:46 -0300 Subject: [PATCH 028/231] Clarify clone assert message for spilled MerkleTree --- crypto/crypto/src/merkle_tree/merkle.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index b4cfb00f4..33143b11e 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -57,7 +57,7 @@ impl Clone for MerkleTree { #[cfg(feature = "disk-spill")] assert!( self.mmap_backing.is_none(), - "cannot clone a spilled MerkleTree — nodes have been freed; use Arc instead" + "cannot clone a spilled MerkleTree: nodes are on disk, not in memory" ); Self { root: self.root.clone(), From c4326e6c135710ce7c0b716d2254d440d90af715 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 13:15:34 -0300 Subject: [PATCH 029/231] Disable Clone for MerkleTree and FriLayer when disk-spill is enabled --- crypto/crypto/src/merkle_tree/merkle.rs | 17 +---------------- crypto/stark/src/fri/fri_commitment.rs | 2 +- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index 33143b11e..3df4b1bf3 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -43,6 +43,7 @@ pub(crate) struct MmapNodeBacking { /// leaf 1 leaf 2 leaf 3 leaf 4 /// The bottom leafs correspond to the hashes of the elements, while each upper /// layer contains the hash of the concatenation of the daughter nodes. +#[cfg_attr(not(feature = "disk-spill"), derive(Clone))] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct MerkleTree { pub root: B::Node, @@ -52,22 +53,6 @@ pub struct MerkleTree { mmap_backing: Option, } -impl Clone for MerkleTree { - fn clone(&self) -> Self { - #[cfg(feature = "disk-spill")] - assert!( - self.mmap_backing.is_none(), - "cannot clone a spilled MerkleTree: nodes are on disk, not in memory" - ); - Self { - root: self.root.clone(), - nodes: self.nodes.clone(), - #[cfg(feature = "disk-spill")] - mmap_backing: None, - } - } -} - const ROOT: usize = 0; impl MerkleTree diff --git a/crypto/stark/src/fri/fri_commitment.rs b/crypto/stark/src/fri/fri_commitment.rs index b7c070b0a..c20f1686b 100644 --- a/crypto/stark/src/fri/fri_commitment.rs +++ b/crypto/stark/src/fri/fri_commitment.rs @@ -4,7 +4,7 @@ use math::{ traits::AsBytes, }; -#[derive(Clone)] +#[cfg_attr(not(feature = "disk-spill"), derive(Clone))] pub struct FriLayer where F: IsField, From fdb37d279a4fedb00be10a4348eef403f71548fd Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 13:25:05 -0300 Subject: [PATCH 030/231] Simplify unsafe safety comment in node_get --- crypto/crypto/src/merkle_tree/merkle.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index 3df4b1bf3..3910a7759 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -116,10 +116,8 @@ where #[cfg(feature = "disk-spill")] if let Some(ref backing) = self.mmap_backing { if idx < backing.node_count { - // SAFETY: B::Node is Copy (required by spill_nodes_to_disk's where clause). - // The mmap contains node_count × node_size contiguous bytes written from - // identical Node values on the same machine. The mmap base is page-aligned - // and node_size divides into page size for all concrete Node types ([u8; 32/64]). + // SAFETY: The mmap contains node_count × node_size contiguous bytes + // written from identical Node values. B::Node is [u8; N] (align 1). let ptr = unsafe { backing.mmap.as_ptr().add(idx * backing.node_size) }; return Some(unsafe { &*(ptr as *const B::Node) }); } From f0b0d5e09a4c8575c7118125e1e8cb172dfd6f17 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 13:29:29 -0300 Subject: [PATCH 031/231] Add compile-time alignment assert for mmap node access --- crypto/crypto/src/merkle_tree/merkle.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index 3910a7759..e4449153f 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -116,8 +116,8 @@ where #[cfg(feature = "disk-spill")] if let Some(ref backing) = self.mmap_backing { if idx < backing.node_count { - // SAFETY: The mmap contains node_count × node_size contiguous bytes - // written from identical Node values. B::Node is [u8; N] (align 1). + // SAFETY: spill_nodes_to_disk writes self.nodes as contiguous bytes + // to this mmap and asserts align_of::() == 1 at compile time. let ptr = unsafe { backing.mmap.as_ptr().add(idx * backing.node_size) }; return Some(unsafe { &*(ptr as *const B::Node) }); } @@ -282,6 +282,12 @@ where where B::Node: Copy, { + const { + assert!( + align_of::() == 1, + "B::Node must have alignment 1 for mmap safety" + ) + } use std::io::Write; if self.nodes.is_empty() { From 122c9866b07cdcd0d02c22cf825407f0a19c0ab8 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 13:34:29 -0300 Subject: [PATCH 032/231] Simplify spill_nodes_to_disk doc comment --- crypto/crypto/src/merkle_tree/merkle.rs | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index e4449153f..705457ccc 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -267,16 +267,8 @@ where auth_path_set.into_iter().rev().collect() } - /// Write tree nodes to a temp file, mmap it read-only, and free the heap Vec. - /// - /// After this call, all node access methods read from the mmap transparently. - /// The OS can evict mmap pages under memory pressure since they're file-backed. - /// - /// Requires `B::Node: Copy` to ensure nodes have a trivial byte representation - /// suitable for raw serialization and mmap casting. - /// - /// Note: the concrete `Node` type is `[u8; 32]` (Keccak hash), which has no - /// padding bytes. The raw byte round-trip is therefore well-defined. + /// Write tree nodes to a temp file, mmap it, and free the in-memory vector. + /// Node access methods read from the mmap after this call. #[cfg(feature = "disk-spill")] pub fn spill_nodes_to_disk(&mut self) -> std::io::Result<()> where From 461e45d079d08c83913ed084dbbcff66af1ab83a Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 13:35:52 -0300 Subject: [PATCH 033/231] Fix safety comment for node byte cast --- crypto/crypto/src/merkle_tree/merkle.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index 705457ccc..f068883a7 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -294,8 +294,8 @@ where file.set_len(total_bytes as u64)?; { let mut writer = std::io::BufWriter::new(&file); - // SAFETY: B::Node is Copy, so its in-memory representation is a - // valid byte sequence. The Vec is contiguous. + // SAFETY: B::Node is [u8; N] (alignment 1, no padding). The Vec + // is contiguous so the cast to a byte slice is valid. let bytes = unsafe { core::slice::from_raw_parts(self.nodes.as_ptr() as *const u8, total_bytes) }; From ff63f9c5f5875d2bb5c6a697f84aec69e3c21346 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 14:03:17 -0300 Subject: [PATCH 034/231] Simplify safety comment --- crypto/crypto/src/merkle_tree/merkle.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index f068883a7..87b3c5b38 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -294,8 +294,8 @@ where file.set_len(total_bytes as u64)?; { let mut writer = std::io::BufWriter::new(&file); - // SAFETY: B::Node is [u8; N] (alignment 1, no padding). The Vec - // is contiguous so the cast to a byte slice is valid. + // SAFETY: B::Node is a plain byte array ([u8; N]), so casting + // the contiguous Vec to a byte slice is valid. let bytes = unsafe { core::slice::from_raw_parts(self.nodes.as_ptr() as *const u8, total_bytes) }; From bdba457aceb0773303f3ed38fde8b703688c12d2 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 14:04:31 -0300 Subject: [PATCH 035/231] Explain why tempfile is exclusively owned in safety comment --- crypto/crypto/src/merkle_tree/merkle.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index 87b3c5b38..065554df9 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -303,7 +303,8 @@ where writer.flush()?; } - // SAFETY: We own the file exclusively; it won't be modified externally. + // SAFETY: tempfile() creates an anonymous file with no filesystem path, + // so no other process can open or modify it. let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? }; // Free the heap allocation From 3c88c30b48e2414d97f628f44baf7b0c73e68482 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 14:09:20 -0300 Subject: [PATCH 036/231] Document _file field in MmapNodeBacking --- crypto/crypto/src/merkle_tree/merkle.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index 065554df9..f50897b0c 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -29,6 +29,8 @@ impl std::error::Error for Error {} #[cfg(feature = "disk-spill")] pub(crate) struct MmapNodeBacking { mmap: memmap2::Mmap, + /// Owns the file descriptor backing the mmap. Dropping it would close + /// the descriptor and invalidate the mmap. _file: std::fs::File, node_count: usize, node_size: usize, From 78ceae2c45dd90778a2b7f5101858a2af9f33d2e Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 14:12:02 -0300 Subject: [PATCH 037/231] Document _file field, remove unused _len from EvalMmapBacking --- crypto/stark/src/fri/fri_commitment.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crypto/stark/src/fri/fri_commitment.rs b/crypto/stark/src/fri/fri_commitment.rs index c20f1686b..01f39cbe0 100644 --- a/crypto/stark/src/fri/fri_commitment.rs +++ b/crypto/stark/src/fri/fri_commitment.rs @@ -23,8 +23,9 @@ where #[derive(Clone)] struct EvalMmapBacking { mmap: std::sync::Arc, + /// Owns the file descriptor backing the mmap. Dropping it would close + /// the descriptor and invalidate the mmap. _file: std::sync::Arc, - _len: usize, elem_size: usize, } @@ -82,12 +83,10 @@ where writer.flush()?; } let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? }; - let len = self.evaluation.len(); self.evaluation = Vec::new(); self.eval_mmap = Some(EvalMmapBacking { mmap: std::sync::Arc::new(mmap), _file: std::sync::Arc::new(file), - _len: len, elem_size, }); Ok(()) From 63f26bda4af0d43e81b46b4c8bffcba26990a975 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 14:15:50 -0300 Subject: [PATCH 038/231] Document EvalMmapBacking struct --- crypto/stark/src/fri/fri_commitment.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crypto/stark/src/fri/fri_commitment.rs b/crypto/stark/src/fri/fri_commitment.rs index 01f39cbe0..a6761db18 100644 --- a/crypto/stark/src/fri/fri_commitment.rs +++ b/crypto/stark/src/fri/fri_commitment.rs @@ -19,6 +19,9 @@ where eval_mmap: Option, } +/// File-backed mmap storage for FRI layer evaluations. +/// After `spill_evaluation_to_disk()`, the in-memory evaluation vector is freed +/// and element access goes through this mmap instead. #[cfg(feature = "disk-spill")] #[derive(Clone)] struct EvalMmapBacking { From eb5164d914501ae0c8cf21c462594c6f2d19da0b Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 14:17:08 -0300 Subject: [PATCH 039/231] Add safety comment to get_evaluation mmap access --- crypto/stark/src/fri/fri_commitment.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crypto/stark/src/fri/fri_commitment.rs b/crypto/stark/src/fri/fri_commitment.rs index a6761db18..1b14d3241 100644 --- a/crypto/stark/src/fri/fri_commitment.rs +++ b/crypto/stark/src/fri/fri_commitment.rs @@ -59,6 +59,9 @@ where #[cfg(feature = "disk-spill")] if let Some(ref backing) = self.eval_mmap { let offset = index * backing.elem_size; + // SAFETY: spill_evaluation_to_disk writes self.evaluation as contiguous + // bytes to this mmap. FieldElement is #[repr(transparent)] over its + // base type, so the byte layout matches the original elements. return unsafe { &*(backing.mmap.as_ptr().add(offset) as *const FieldElement) }; } &self.evaluation[index] From a244ec88ba18aac5900450e7590c7af1ed59d7fd Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 14:18:05 -0300 Subject: [PATCH 040/231] Add safety comments to spill_evaluation_to_disk --- crypto/stark/src/fri/fri_commitment.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crypto/stark/src/fri/fri_commitment.rs b/crypto/stark/src/fri/fri_commitment.rs index 1b14d3241..f37d9fd57 100644 --- a/crypto/stark/src/fri/fri_commitment.rs +++ b/crypto/stark/src/fri/fri_commitment.rs @@ -82,12 +82,16 @@ where file.set_len(total_bytes as u64)?; { let mut writer = std::io::BufWriter::new(&file); + // SAFETY: FieldElement is #[repr(transparent)], so the Vec + // can be viewed as a contiguous byte slice. let bytes = unsafe { std::slice::from_raw_parts(self.evaluation.as_ptr() as *const u8, total_bytes) }; writer.write_all(bytes)?; writer.flush()?; } + // SAFETY: tempfile() creates an anonymous file with no filesystem path, + // so no other process can open or modify it. let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? }; self.evaluation = Vec::new(); self.eval_mmap = Some(EvalMmapBacking { From 9bc5da6488dee38a24bd2cc1cb0f4c4abb014f7f Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 14:20:52 -0300 Subject: [PATCH 041/231] Inline FriLayer::new back into push --- crypto/stark/src/fri/mod.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crypto/stark/src/fri/mod.rs b/crypto/stark/src/fri/mod.rs index 8d3724093..377e03f6c 100644 --- a/crypto/stark/src/fri/mod.rs +++ b/crypto/stark/src/fri/mod.rs @@ -57,13 +57,12 @@ where let merkle_tree = FriLayerMerkleTree::build(&leaves) .expect("FRI commit: Merkle tree construction must succeed"); let root = merkle_tree.root; - let layer = FriLayer::new( + fri_layer_list.push(FriLayer::new( &evals, merkle_tree, current_coset_offset.clone().to_extension(), current_domain_size, - ); - fri_layer_list.push(layer); + )); // >>>> Send commitment: [pₖ] transcript.append_bytes(&root); From 6e0d5e981da058251c3d1c0d6b9237e4873d17c7 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 14:22:18 -0300 Subject: [PATCH 042/231] Document Round2EvalMmap struct --- crypto/stark/src/prover.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 271914b0a..99d8df15c 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -246,9 +246,14 @@ where eval_mmaps: Option>, } +/// File-backed mmap storage for a single composition polynomial part's LDE evaluations. +/// After `spill_evaluations_to_disk()`, elements are read from the mmap instead of +/// the in-memory vector. #[cfg(feature = "disk-spill")] struct Round2EvalMmap { mmap: memmap2::Mmap, + /// Owns the file descriptor backing the mmap. Dropping it would close + /// the descriptor and invalidate the mmap. _file: std::fs::File, len: usize, elem_size: usize, From 04b7e3e38ce673f28563b7e17c915b7882761f01 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 14:23:27 -0300 Subject: [PATCH 043/231] Add safety comment to get_composition_eval mmap access --- crypto/stark/src/prover.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 99d8df15c..c512a05b1 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -278,6 +278,8 @@ where if let Some(ref mmaps) = self.eval_mmaps { let m = &mmaps[part]; let offset = index * m.elem_size; + // SAFETY: spill_evaluations_to_disk writes the evaluations as contiguous + // bytes to this mmap. FieldElement is #[repr(transparent)]. return unsafe { &*(m.mmap.as_ptr().add(offset) as *const FieldElement) }; } &self.lde_composition_poly_evaluations[part][index] From 6df4e40b5aaec6ebb49ee8a8bf83477235790b83 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 14:25:30 -0300 Subject: [PATCH 044/231] Add safety comments to spill_evaluations_to_disk --- crypto/stark/src/prover.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index c512a05b1..2bd75cf7d 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -310,11 +310,15 @@ where file.set_len(total_bytes as u64)?; { let mut writer = std::io::BufWriter::new(&file); + // SAFETY: FieldElement is #[repr(transparent)], so the Vec + // can be viewed as a contiguous byte slice. let bytes = unsafe { std::slice::from_raw_parts(part.as_ptr() as *const u8, total_bytes) }; writer.write_all(bytes)?; writer.flush()?; } + // SAFETY: tempfile() creates an anonymous file with no filesystem path, + // so no other process can open or modify it. let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? }; let len = part.len(); drop(part); From 6152c9109f4c972fc18014cb2cdd3ca96241cd25 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 14:30:23 -0300 Subject: [PATCH 045/231] Add comments explaining advise_drop_cache calls --- crypto/stark/src/prover.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 2bd75cf7d..e517d09dd 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -564,6 +564,8 @@ pub trait IsStarkProver< { let num_cols = trace.num_main_columns; trace.extract_columns_main_into(main_pool); + // Data is now in the pool buffers. Evict the mmap pages from the OS + // page cache so the same data doesn't occupy RAM in both places. #[cfg(feature = "disk-spill")] trace.main_table.advise_drop_cache(); #[cfg(feature = "instruments")] @@ -609,6 +611,8 @@ pub trait IsStarkProver< { let num_cols = trace.num_main_columns; trace.extract_columns_main_into(main_pool); + // Data is now in the pool buffers. Evict the mmap pages from the OS + // page cache so the same data doesn't occupy RAM in both places. #[cfg(feature = "disk-spill")] trace.main_table.advise_drop_cache(); #[cfg(feature = "instruments")] From abbcbe11554d82bae08f0ba7e5b0d4448cf5fc1f Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 14:34:16 -0300 Subject: [PATCH 046/231] Remove leftover spill comments --- crypto/stark/src/prover.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index e517d09dd..1eddff77a 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1020,8 +1020,6 @@ pub trait IsStarkProver< else { return Err(ProvingError::EmptyCommitment); }; - // Note: composition Merkle tree kept in RAM (spilling adds I/O overhead - // for only ~30 sparse queries; memory is freed after Rounds 2-4). #[cfg(feature = "instruments")] let merkle_dur = t_sub.elapsed(); @@ -1166,9 +1164,6 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let other_dur_1 = t_sub.elapsed(); - // Note: composition poly evaluations kept in RAM (only ~30 sparse queries - // remain; spilling adds I/O overhead that outweighs memory savings). - // Extend N trace-coset evaluations to 2N LDE-coset evaluations via standard LDE. // deep_evals[i] = h(offset·ω_N^i) = f(ω_N^i) where f(x) = h(offset·x). // Standard iFFT+FFT recovers f and evaluates on the 2N-th roots: f(Ω^j) = h(offset·Ω^j). From 5e227537c10399c6a9b073ffa90b2297829223ce Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 18:10:59 -0300 Subject: [PATCH 047/231] Remove unnecessary &mut on round_2_result --- crypto/stark/src/prover.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 1eddff77a..53ed80b63 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1114,7 +1114,7 @@ pub trait IsStarkProver< air: &dyn AIR, domain: &Domain, round_1_result: &Round1, - round_2_result: &mut Round2, + round_2_result: &Round2, round_3_result: &Round3, z: &FieldElement, transcript: &mut impl IsStarkTranscript, @@ -2347,7 +2347,7 @@ pub trait IsStarkProver< coefficients.drain(..num_transition_constraints).collect(); let boundary_coefficients = coefficients; - let mut round_2_result = Self::round_2_compute_composition_polynomial( + let round_2_result = Self::round_2_compute_composition_polynomial( air, pub_inputs, domain, @@ -2405,7 +2405,7 @@ pub trait IsStarkProver< air, domain, round_1_result, - &mut round_2_result, + &round_2_result, &round_3_result, &z, transcript, From 5fbbe1c9de897ba67ea0e3bfbf19e420854a7aa2 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 18:23:35 -0300 Subject: [PATCH 048/231] Simplify trace spill comment --- crypto/stark/src/prover.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 53ed80b63..7b7daea56 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1635,8 +1635,8 @@ pub trait IsStarkProver< } // Spill all main trace tables to mmap before allocating pool buffers. - // This frees the heap-allocated main trace data (~120 cols × N rows × 8 bytes each), - // making room for the LDE pool buffers which are much larger (blowup_factor × N). + // This frees the heap-allocated trace data, making room for the LDE pool + // buffers which are much larger (blowup_factor × trace size). #[cfg(feature = "disk-spill")] for (_, trace, _) in air_trace_pairs.iter_mut() { trace From 28cb672171ce2e91708e3330af8192a6774ba634 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 18:30:15 -0300 Subject: [PATCH 049/231] Clean up k_commit and pool allocation comments --- crypto/stark/src/prover.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 7b7daea56..2f6676fc9 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1646,17 +1646,15 @@ pub trait IsStarkProver< } // Number of tables to process concurrently. - // disk-spill: Phase A/C use k_commit=1 (one pool at a time, since each - // table's LDE already saturates all cores via column-parallel FFT). - // Rounds 2-4 use the full k (no pools needed, reads from mmap). + // disk-spill: Phase A/C commit one table at a time to limit pool memory. + // Rounds 2-4 use full parallelism (no pools, reads from mmap). let k = table_parallelism().min(num_airs).max(1); #[cfg(feature = "disk-spill")] let k_commit = 1_usize; #[cfg(not(feature = "disk-spill"))] let k_commit = k; - // With k_commit=1 (disk-spill), pre-allocate to max_lde_size so - // coset_lde_full_expand can resize in-place without reallocation spikes. - // With k_commit>1 (no disk-spill), start empty and grow on demand. + // k_commit=1: pre-allocate pool to max_lde_size to avoid reallocation. + // k_commit>1: start empty and grow on demand. let mut pool_sets: Vec> = (0..k_commit) .map(|_| PoolSet { main: (0..max_main_cols) From b162db0c932e340fa4734f2b3a7b2252d7f70b68 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 18:41:23 -0300 Subject: [PATCH 050/231] Remove leftover empty snap calls --- crypto/stark/src/prover.rs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 2f6676fc9..7e346f9b2 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1672,8 +1672,6 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let prepass_elapsed = phase_start.elapsed(); - #[cfg(feature = "instruments")] - if let Some(s) = crate::instruments::snap("after pool alloc") {} // ===================================================================== // Round 1, Phase A: Commit all main traces (parallel in chunks of K) @@ -1810,13 +1808,10 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let main_commits_elapsed = phase_start.elapsed(); - #[cfg(feature = "instruments")] - if let Some(s) = crate::instruments::snap("after main commits") {} // ===================================================================== // Round 1, Phase B: Sample shared LogUp challenges // ===================================================================== - let lookup_challenges: Vec> = if needs_lookup_challenges { (0..LOGUP_NUM_CHALLENGES) .map(|_| transcript.sample_field_element()) @@ -1863,8 +1858,6 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let aux_build_elapsed = phase_start.elapsed(); - #[cfg(feature = "instruments")] - if let Some(s) = crate::instruments::snap("after aux build") {} // Pass 2: Parallel fork transcript → extract → LDE → commit in chunks of K. // Each table gets its own transcript fork and pool set. @@ -1997,8 +1990,6 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let aux_commit_elapsed = phase_start.elapsed(); - #[cfg(feature = "instruments")] - if let Some(s) = crate::instruments::snap("after aux commit") {} #[cfg(feature = "debug-checks")] { @@ -2265,7 +2256,6 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] { - if let Some(s) = crate::instruments::snap("after rounds 2-4") {} // Store timing data for the top-level report in prove_with_options. // Uses a thread-local to avoid changing multi_prove's return type. crate::instruments::store(crate::instruments::MultiProveTiming { From 6a7e22fad4a4c5e7d53f8b725886b13287abe5e0 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 18:44:12 -0300 Subject: [PATCH 051/231] Simplify spilled_ldes comment --- crypto/stark/src/prover.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 7e346f9b2..55f58a034 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1684,9 +1684,8 @@ pub trait IsStarkProver< let mut main_commits: Vec> = Vec::with_capacity(num_airs); - // Spilled LDE trace tables: one per AIR, populated during Phase A (main) and Phase C (aux). - // In Rounds 2-4 these replace the reconstruct_round1 flow — LDE data is read from mmap - // instead of being recomputed from the trace. + // One mmap-backed LDE table per AIR. Filled during Phase A (main) and + // Phase C (aux), then read from mmap in Rounds 2-4. #[cfg(feature = "disk-spill")] let mut spilled_ldes: Vec>> = (0..num_airs).map(|_| None).collect(); From 852b150bf8948f7cd37c1fbbcfaf2b61bd5787b3 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 18:53:36 -0300 Subject: [PATCH 052/231] Simplify Rounds 2-4 section comment --- crypto/stark/src/prover.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 55f58a034..552135f31 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -2011,12 +2011,8 @@ pub trait IsStarkProver< // ===================================================================== // Rounds 2-4: Parallel per-table proving in chunks of K // ===================================================================== - // Each chunk of K tables is processed in parallel. Each worker gets its - // own pool set and transcript fork. Pool sets are reused across chunks. - // - // disk-spill path: LDE data is read from mmap-backed spilled_ldes instead - // of being recomputed via reconstruct_round1. This avoids the peak memory - // spike of holding both the trace and its LDE in RAM simultaneously. + // disk-spill: reads LDE data from mmap (spilled_ldes). + // non-disk-spill: recomputes LDE from the trace (reconstruct_round1). #[cfg(feature = "instruments")] let phase_start = Instant::now(); From b91a9e8af037d6e85506044c21866a8ac5d3c050 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 19:06:50 -0300 Subject: [PATCH 053/231] Remove 'original flow' from comment --- crypto/stark/src/prover.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 552135f31..924a47733 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -2146,7 +2146,7 @@ pub trait IsStarkProver< } } - // ----- non-disk-spill path: reconstruct LDE from trace (original flow) ----- + // ----- non-disk-spill path: recompute LDE from trace ----- #[cfg(not(feature = "disk-spill"))] { for chunk_start in (0..num_airs).step_by(k) { From 27b598653ecc63fa886977546f5adbdde3b3b82a Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 19:08:28 -0300 Subject: [PATCH 054/231] Remove end-of-block comment --- crypto/stark/src/prover.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 924a47733..e94362a5c 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -2247,7 +2247,7 @@ pub trait IsStarkProver< proofs.push(result?); } } - } // end #[cfg(not(feature = "disk-spill"))] + } #[cfg(feature = "instruments")] { From c1ad3eeb903c890661999a9d77534c55ae7000fd Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 19:16:04 -0300 Subject: [PATCH 055/231] Clarify why TableMmapBacking implements Clone --- crypto/stark/src/table.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 9a65cff3e..c12d8c048 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -20,8 +20,9 @@ pub(crate) struct TableMmapBacking { elem_size: usize, } -// Manual trait impls so Table can keep its derive macros. -// Spilled tables should not be cloned during proving. +// Table derives Clone, which requires all fields to implement Clone. +// TableMmapBacking implements Clone to satisfy this, but panics because +// mmap-backed data cannot be cloned. #[cfg(feature = "disk-spill")] impl Clone for TableMmapBacking { fn clone(&self) -> Self { From 44a52e6831212bb7c19811183fe74f61cd3fb54b Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 19:19:37 -0300 Subject: [PATCH 056/231] Remove unnecessary Default impl for TableMmapBacking --- crypto/stark/src/table.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index c12d8c048..13827edd6 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -30,12 +30,6 @@ impl Clone for TableMmapBacking { } } -#[cfg(feature = "disk-spill")] -impl Default for TableMmapBacking { - fn default() -> Self { - panic!("TableMmapBacking has no default — use None") - } -} #[cfg(feature = "disk-spill")] impl std::fmt::Debug for TableMmapBacking { From a825766c23da6eb4e0d1dc14663e2725c9440c61 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 19:20:50 -0300 Subject: [PATCH 057/231] Fix safety comment in get_row --- crypto/stark/src/table.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 13827edd6..55ccabf1c 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -131,8 +131,9 @@ impl Table { backing.height ); let offset = row_idx * backing.width * backing.elem_size; - // SAFETY: Row-major layout means width elements are contiguous. - // Same repr(transparent) + page-aligned guarantees as get(). + // SAFETY: spill_to_disk writes the table in row-major layout, so + // width elements at this offset are contiguous. FieldElement + // is #[repr(transparent)]. return unsafe { std::slice::from_raw_parts( backing.mmap.as_ptr().add(offset) as *const FieldElement, From 900150774e6251b339a375d067a29c28906c8918 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 19:22:35 -0300 Subject: [PATCH 058/231] Remove obvious comment in extract_columns_into --- crypto/stark/src/table.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 55ccabf1c..bae2a3975 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -173,7 +173,6 @@ impl Table { let iter = output[..self.width].par_iter_mut().enumerate(); #[cfg(not(feature = "parallel"))] let iter = output[..self.width].iter_mut().enumerate(); - // Use get() which transparently reads from mmap or data Vec iter.for_each(|(col_idx, buf)| { buf.clear(); buf.reserve(self.height.saturating_sub(buf.capacity())); From dac2b125a8657bd9eaedab9e5d05b872c3133ed6 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 19:25:56 -0300 Subject: [PATCH 059/231] Fix mmap safety comment in spill_to_disk --- crypto/stark/src/table.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index bae2a3975..dba1cfe53 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -249,7 +249,8 @@ impl Table { writer.flush()?; } - // SAFETY: We own the file exclusively. + // SAFETY: tempfile() creates an anonymous file with no filesystem path, + // so no other process can open or modify it. let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? }; self.mmap_backing = Some(TableMmapBacking { From a9e7ca745968ece41c3eda171620715230a3c7d3 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 19:26:56 -0300 Subject: [PATCH 060/231] Document TableMmapBacking fields --- crypto/stark/src/table.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index dba1cfe53..6d0ec8cc4 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -14,9 +14,14 @@ use rayon::prelude::*; #[cfg(feature = "disk-spill")] pub(crate) struct TableMmapBacking { mmap: memmap2::Mmap, + /// Owns the file descriptor backing the mmap. Dropping it would close + /// the descriptor and invalidate the mmap. _file: std::fs::File, + /// Number of columns per row. width: usize, + /// Number of rows. height: usize, + /// Size in bytes of a single element. elem_size: usize, } From 6fb5a02f02e05b97e5c4c7fd3a4ce3d9273138c4 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 19:28:48 -0300 Subject: [PATCH 061/231] Add safety comment and fix doc for advise_drop_cache --- crypto/stark/src/table.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 6d0ec8cc4..d23c486e4 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -273,11 +273,14 @@ impl Table { } /// Advise the kernel to drop mmap pages from the page cache. - /// Call after reading spilled data into pool buffers to free ~37GB of - /// cached pages that would otherwise persist under memory pressure. + /// Call after reading spilled data into pool buffers so the same + /// data doesn't occupy RAM in both places. #[cfg(feature = "disk-spill")] pub fn advise_drop_cache(&self) { if let Some(ref backing) = self.mmap_backing { + // SAFETY: the pointer and length come from a valid mmap. + // MADV_DONTNEED is advisory — it cannot cause UB, only + // tells the kernel these pages can be reclaimed. unsafe { libc::madvise( backing.mmap.as_ptr() as *mut libc::c_void, From 431af172bba6c384f2e4be2556eefd82d28c0205 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 19:31:57 -0300 Subject: [PATCH 062/231] Shorten advise_drop_cache safety comment --- crypto/stark/src/table.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index d23c486e4..09cbe3623 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -278,9 +278,8 @@ impl Table { #[cfg(feature = "disk-spill")] pub fn advise_drop_cache(&self) { if let Some(ref backing) = self.mmap_backing { - // SAFETY: the pointer and length come from a valid mmap. - // MADV_DONTNEED is advisory — it cannot cause UB, only - // tells the kernel these pages can be reclaimed. + // SAFETY: pointer and length are from a valid mmap. + // MADV_DONTNEED is advisory and cannot cause UB. unsafe { libc::madvise( backing.mmap.as_ptr() as *mut libc::c_void, From 5128e8620a987665801bd73c6d6b3b9e9522e2a9 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 19:36:29 -0300 Subject: [PATCH 063/231] Restore original halt timestamp comment --- prover/src/tables/trace_builder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 717171388..4fe97ca84 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -1941,7 +1941,7 @@ impl Traces { bitwise::update_multiplicities(&mut bitwise_table, &bitwise_ops); drop(bitwise_ops); - // --- Extract halt timestamp (needs cpu_ops) --- + // Extract halt timestamp from the last ECALL instruction let halt_op = cpu_ops .iter() .rev() From b6b3283d36e57b362f223a086681d84cfaee6538 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 19:39:08 -0300 Subject: [PATCH 064/231] Restore Phase 3 comment in prove_with_options --- prover/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/prover/src/lib.rs b/prover/src/lib.rs index 972cacab7..a1db9a9d9 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -508,6 +508,7 @@ pub fn prove_with_options( #[cfg(feature = "instruments")] let trace_build_elapsed = phase_start.elapsed(); + // Phase 3: AIR construction #[cfg(feature = "instruments")] let phase_start = std::time::Instant::now(); From c3cae82906a4e5841a6ec6efebf046989697919a Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 19:42:53 -0300 Subject: [PATCH 065/231] Simplify spill_main_to_disk doc --- crypto/stark/src/trace.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/crypto/stark/src/trace.rs b/crypto/stark/src/trace.rs index 29e7dc306..b383d2c01 100644 --- a/crypto/stark/src/trace.rs +++ b/crypto/stark/src/trace.rs @@ -153,10 +153,8 @@ where self.num_aux_columns = num_aux_columns; } - /// Spill the main trace data to disk via mmap. - /// After this call, `main_table.data` is freed but all accessors - /// (`get_main`, `columns_main`, `extract_columns_main_into`) continue - /// to work transparently through mmap. + /// Write main trace data to a temp file and free the in-memory vector. + /// Accessors read from the mmap after this call. #[cfg(feature = "disk-spill")] pub fn spill_main_to_disk(&mut self) -> std::io::Result<()> { self.main_table.spill_to_disk() From 6441b46bb363affd3a51965425e1494ddda8d68c Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 19:45:28 -0300 Subject: [PATCH 066/231] Simplify MmapBacking doc and document _file fields --- crypto/stark/src/trace.rs | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/crypto/stark/src/trace.rs b/crypto/stark/src/trace.rs index b383d2c01..3ee15bce0 100644 --- a/crypto/stark/src/trace.rs +++ b/crypto/stark/src/trace.rs @@ -223,22 +223,16 @@ where pub(crate) mmap_backing: Option, } -/// File-backed mmap storage for LDE column data. -/// -/// Columns are stored in separate files for main and aux (since they may be -/// spilled at different times during Phase A and Phase B of proving). -/// Each file has column-major layout: -/// ```text -/// [col_0][col_1]...[col_N] -/// ``` -/// Each column occupies `num_rows * elem_size` contiguous bytes. -/// Elements are stored as their native in-memory representation, -/// which is valid because `FieldElement` is `#[repr(transparent)]`. +/// File-backed mmap storage for LDE column data (column-major layout). +/// Main and aux columns are in separate files since they are spilled +/// at different times (Phase A and Phase C). #[cfg(feature = "disk-spill")] pub(crate) struct MmapBacking { main_mmap: memmap2::Mmap, + /// Owns the file descriptor backing main_mmap. _main_file: std::fs::File, aux_mmap: Option, + /// Owns the file descriptor backing aux_mmap. _aux_file: Option, num_rows: usize, num_main_cols: usize, From fcb8a3591c71df1f66c247a93fd93f84786020cf Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 20:02:57 -0300 Subject: [PATCH 067/231] Fix safety comments in get_main and get_aux --- crypto/stark/src/trace.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/crypto/stark/src/trace.rs b/crypto/stark/src/trace.rs index 3ee15bce0..17009211d 100644 --- a/crypto/stark/src/trace.rs +++ b/crypto/stark/src/trace.rs @@ -313,10 +313,8 @@ where backing.num_main_cols ); let offset = (col * backing.num_rows + row) * backing.main_elem_size; - // SAFETY: FieldElement is #[repr(transparent)] over F::BaseType. - // The mmap is page-aligned and elements are contiguously packed at - // multiples of main_elem_size, so alignment is satisfied. - // The data was written from identical types on the same machine. + // SAFETY: spill_main_from_pool writes columns contiguously to this + // mmap. FieldElement is #[repr(transparent)] over F::BaseType. return unsafe { &*(backing.main_mmap.as_ptr().add(offset) as *const FieldElement) }; } &self.main_columns[col][row] @@ -338,7 +336,7 @@ where .as_ref() .expect("aux mmap must exist when accessing aux columns"); let offset = (col * backing.num_rows + row) * backing.aux_elem_size; - // SAFETY: Same as get_main — repr(transparent) + page-aligned mmap. + // SAFETY: same layout as get_main, see comment there. return unsafe { &*(aux_mmap.as_ptr().add(offset) as *const FieldElement) }; } &self.aux_columns[col][row] From 0a2a9bfac4bcd939281c219de5469a025150502e Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 20:07:45 -0300 Subject: [PATCH 068/231] Inline safety comment for get_aux --- crypto/stark/src/trace.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/stark/src/trace.rs b/crypto/stark/src/trace.rs index 17009211d..4cbe22600 100644 --- a/crypto/stark/src/trace.rs +++ b/crypto/stark/src/trace.rs @@ -336,7 +336,8 @@ where .as_ref() .expect("aux mmap must exist when accessing aux columns"); let offset = (col * backing.num_rows + row) * backing.aux_elem_size; - // SAFETY: same layout as get_main, see comment there. + // SAFETY: add_aux_from_pool writes columns contiguously to this + // mmap. FieldElement is #[repr(transparent)] over E::BaseType. return unsafe { &*(aux_mmap.as_ptr().add(offset) as *const FieldElement) }; } &self.aux_columns[col][row] From 853c0fa86ccd06d6e5f365cd448b9832ab2f80fa Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 20:09:36 -0300 Subject: [PATCH 069/231] Simplify spill_main_from_pool doc --- crypto/stark/src/trace.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/crypto/stark/src/trace.rs b/crypto/stark/src/trace.rs index 4cbe22600..c86aa192c 100644 --- a/crypto/stark/src/trace.rs +++ b/crypto/stark/src/trace.rs @@ -381,12 +381,8 @@ where self.lde_step_size * step } - /// Write pool column data to a temp file, mmap it, and return an mmap-backed - /// LDETraceTable. The pool buffers are NOT consumed — they keep their capacity - /// for reuse by the next chunk. - /// - /// This is used during Phase A to snapshot the main LDE columns from the pool - /// before the pool is overwritten by the next chunk. + /// Write pool column data to a temp file and return an mmap-backed + /// LDETraceTable. Pool buffers keep their capacity for reuse. #[cfg(feature = "disk-spill")] pub fn spill_main_from_pool( main_pool: &[Vec>], From 3a349842b22cf4cc05d2728badecaf805801dac3 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 20:11:37 -0300 Subject: [PATCH 070/231] Simplify write_pool_columns_to_mmap doc --- crypto/stark/src/trace.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/crypto/stark/src/trace.rs b/crypto/stark/src/trace.rs index c86aa192c..aeb8dc783 100644 --- a/crypto/stark/src/trace.rs +++ b/crypto/stark/src/trace.rs @@ -451,13 +451,8 @@ where Ok(()) } - /// Write borrowed pool columns to a temp file and mmap them. - /// Does NOT consume the pool — columns keep their capacity. - /// - /// Note: the concrete element types are `FieldElement` (8 bytes, - /// `#[repr(transparent)]` over `u64`) and `FieldElement` - /// (24 bytes, `#[repr(transparent)]` over `[u64; 3]`). Neither has padding, - /// so the raw byte round-trip is well-defined. + /// Write pool columns to a temp file and return the mmap + file handle. + /// Pool buffers keep their capacity for reuse. #[cfg(feature = "disk-spill")] fn write_pool_columns_to_mmap( columns: &[Vec], From 8150c298fdb304713a71aeee80b5bd8d6303fe3e Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 20:12:51 -0300 Subject: [PATCH 071/231] Fix mmap safety comment in write_pool_columns_to_mmap --- crypto/stark/src/trace.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/stark/src/trace.rs b/crypto/stark/src/trace.rs index aeb8dc783..cd133a881 100644 --- a/crypto/stark/src/trace.rs +++ b/crypto/stark/src/trace.rs @@ -482,7 +482,8 @@ where } writer.flush()?; } - // SAFETY: We own the file exclusively. + // SAFETY: tempfile() creates an anonymous file with no filesystem path, + // so no other process can open or modify it. let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? }; Ok((mmap, file)) } From 2d76233c0a40425d23211c6f0022d5b74e9b9567 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 20:14:16 -0300 Subject: [PATCH 072/231] Fix generic type name in safety comment --- crypto/stark/src/trace.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/stark/src/trace.rs b/crypto/stark/src/trace.rs index cd133a881..63d6ae1cd 100644 --- a/crypto/stark/src/trace.rs +++ b/crypto/stark/src/trace.rs @@ -473,7 +473,7 @@ where { let mut writer = std::io::BufWriter::new(&file); for col in columns { - // SAFETY: FieldElement is #[repr(transparent)] over BaseType, + // SAFETY: T is a FieldElement which is #[repr(transparent)], // so the Vec has the same byte layout as a contiguous array. let bytes: &[u8] = unsafe { std::slice::from_raw_parts(col.as_ptr() as *const u8, col.len() * elem_size) From 39647bf257fa856a0a3c949ccc9bddb82dde7927 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 20:25:23 -0300 Subject: [PATCH 073/231] Fix formatting --- crypto/stark/src/table.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 09cbe3623..376ea4b13 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -35,7 +35,6 @@ impl Clone for TableMmapBacking { } } - #[cfg(feature = "disk-spill")] impl std::fmt::Debug for TableMmapBacking { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { From 535fa5aefabf52be782cb7a64d1f45799e66150f Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 1 Apr 2026 20:29:05 -0300 Subject: [PATCH 074/231] Add comment for TLS instruments pattern --- crypto/stark/src/prover.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index e94362a5c..7b285688e 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -2114,6 +2114,7 @@ pub trait IsStarkProver< domain, )?; + // Collect per-table sub-op timing via TLS. #[cfg(feature = "instruments")] { let sub_ops = From 63d1c054fa02395acaf140388756ceee5593918c Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 8 Apr 2026 17:31:56 -0300 Subject: [PATCH 075/231] Extract round1_from_lde to deduplicate disk-spill Round1 construction --- crypto/stark/src/prover.rs | 126 ++++++++++++++++--------------------- crypto/stark/src/table.rs | 1 + 2 files changed, 55 insertions(+), 72 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 7b285688e..62f094c5c 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -648,30 +648,17 @@ pub trait IsStarkProver< )) } - /// Reconstruct a full Round1 struct by recomputing LDE evaluations and using - /// the stored Merkle trees from metadata. Uses pool buffers to avoid allocation. - /// - /// The Merkle trees were already built during Phase A/C and are reused here, - /// eliminating redundant Keccak hashing. - fn reconstruct_round1( + /// Build a Round1 from a pre-built LDETraceTable and stored metadata. + /// Reuses Merkle trees from Phase A/C via Arc (pointer copy, no deep clone). + fn round1_from_lde( air: &dyn AIR, - trace: &TraceTable, - domain: &Domain, + lde_trace: LDETraceTable, metadata: &Round1Metadata, - twiddles: &LdeTwiddles, - main_pool: &mut [Vec>], - aux_pool: &mut [Vec>], - ) -> Result, ProvingError> + ) -> Round1 where FieldElement: AsBytes, FieldElement: AsBytes, { - // Recompute main LDE into pool buffers (extract columns directly, no T1 transpose) - let num_main_cols = trace.num_main_columns; - trace.extract_columns_main_into(main_pool); - Self::expand_pool_to_lde::(main_pool, num_main_cols, domain, twiddles); - - // Use stored Merkle trees from Phase A/C via Arc (pointer copy, no deep clone) let main = Round1CommitmentData:: { lde_trace_merkle_tree: Arc::clone(&metadata.main_merkle_tree), lde_trace_merkle_root: metadata.main_merkle_root, @@ -680,13 +667,8 @@ pub trait IsStarkProver< num_precomputed_cols: metadata.num_precomputed_cols, }; - // Recompute aux LDE into pool buffers, use stored aux Merkle tree - let (aux, num_aux_cols) = if air.has_aux_trace() { - let n_aux = trace.num_aux_columns; - trace.extract_columns_aux_into(aux_pool); - Self::expand_pool_to_lde::(aux_pool, n_aux, domain, twiddles); - // Safe: has_aux_trace() is true only when Phase C stored aux tree/root - let aux_commitment = Round1CommitmentData:: { + let aux = if air.has_aux_trace() { + Some(Round1CommitmentData:: { lde_trace_merkle_tree: Arc::clone( metadata .aux_merkle_tree @@ -699,10 +681,51 @@ pub trait IsStarkProver< precomputed_merkle_tree: None, precomputed_merkle_root: None, num_precomputed_cols: 0, - }; - (Some(aux_commitment), n_aux) + }) + } else { + None + }; + + Round1 { + lde_trace, + main, + aux, + rap_challenges: metadata.rap_challenges.clone(), + bus_public_inputs: metadata.bus_public_inputs.clone(), + } + } + + /// Reconstruct a full Round1 struct by recomputing LDE evaluations and using + /// the stored Merkle trees from metadata. Uses pool buffers to avoid allocation. + /// + /// The Merkle trees were already built during Phase A/C and are reused here, + /// eliminating redundant Keccak hashing. + fn reconstruct_round1( + air: &dyn AIR, + trace: &TraceTable, + domain: &Domain, + metadata: &Round1Metadata, + twiddles: &LdeTwiddles, + main_pool: &mut [Vec>], + aux_pool: &mut [Vec>], + ) -> Result, ProvingError> + where + FieldElement: AsBytes, + FieldElement: AsBytes, + { + // Recompute main LDE into pool buffers (extract columns directly, no T1 transpose) + let num_main_cols = trace.num_main_columns; + trace.extract_columns_main_into(main_pool); + Self::expand_pool_to_lde::(main_pool, num_main_cols, domain, twiddles); + + // Recompute aux LDE into pool buffers + let num_aux_cols = if air.has_aux_trace() { + let n_aux = trace.num_aux_columns; + trace.extract_columns_aux_into(aux_pool); + Self::expand_pool_to_lde::(aux_pool, n_aux, domain, twiddles); + n_aux } else { - (None, 0) + 0 }; // Take column Vecs from pool (zero-copy move) instead of cloning. @@ -718,13 +741,7 @@ pub trait IsStarkProver< let lde_trace = LDETraceTable::from_columns(main_cols, aux_cols, air.step_size(), domain.blowup_factor); - Ok(Round1 { - lde_trace, - main, - aux, - rap_challenges: metadata.rap_challenges.clone(), - bus_public_inputs: metadata.bus_public_inputs.clone(), - }) + Ok(Self::round1_from_lde(air, lde_trace, metadata)) } /// Reconstruct Round1 for every table, print the bus balance report, and @@ -2064,43 +2081,8 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let table_start = Instant::now(); - let main = Round1CommitmentData:: { - lde_trace_merkle_tree: Arc::clone(&metadata.main_merkle_tree), - lde_trace_merkle_root: metadata.main_merkle_root, - precomputed_merkle_tree: metadata - .precomputed_merkle_tree - .as_ref() - .map(Arc::clone), - precomputed_merkle_root: metadata.precomputed_merkle_root, - num_precomputed_cols: metadata.num_precomputed_cols, - }; - - let aux = if air.has_aux_trace() { - Some(Round1CommitmentData:: { - lde_trace_merkle_tree: Arc::clone( - metadata - .aux_merkle_tree - .as_ref() - .expect("aux tree must exist when has_aux_trace"), - ), - lde_trace_merkle_root: metadata - .aux_merkle_root - .expect("aux root must exist when has_aux_trace"), - precomputed_merkle_tree: None, - precomputed_merkle_root: None, - num_precomputed_cols: 0, - }) - } else { - None - }; - - let round_1_result = Round1 { - lde_trace, - main, - aux, - rap_challenges: metadata.rap_challenges.clone(), - bus_public_inputs: metadata.bus_public_inputs.clone(), - }; + let round_1_result = + Self::round1_from_lde(*air, lde_trace, metadata); if let Some(ref bpi) = round_1_result.bus_public_inputs { table_transcript.append_field_element(&bpi.table_contribution); diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 376ea4b13..0b1003826 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -46,6 +46,7 @@ impl std::fmt::Debug for TableMmapBacking { } } +/// NOTE: compares all mmap bytes, O(n) in table size. Only used by Table's PartialEq derive. #[cfg(feature = "disk-spill")] impl PartialEq for TableMmapBacking { fn eq(&self, other: &Self) -> bool { From 7ec6dc696c850fbb30b576f04246d8fe69b59d79 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 8 Apr 2026 17:35:51 -0300 Subject: [PATCH 076/231] Fix precomputed tree spill error propagation, remove dead spill code --- crypto/stark/src/fri/fri_commitment.rs | 60 ----------------- crypto/stark/src/prover.rs | 93 +++----------------------- 2 files changed, 9 insertions(+), 144 deletions(-) diff --git a/crypto/stark/src/fri/fri_commitment.rs b/crypto/stark/src/fri/fri_commitment.rs index f37d9fd57..347df1940 100644 --- a/crypto/stark/src/fri/fri_commitment.rs +++ b/crypto/stark/src/fri/fri_commitment.rs @@ -15,21 +15,6 @@ where pub merkle_tree: MerkleTree, pub coset_offset: FieldElement, pub domain_size: usize, - #[cfg(feature = "disk-spill")] - eval_mmap: Option, -} - -/// File-backed mmap storage for FRI layer evaluations. -/// After `spill_evaluation_to_disk()`, the in-memory evaluation vector is freed -/// and element access goes through this mmap instead. -#[cfg(feature = "disk-spill")] -#[derive(Clone)] -struct EvalMmapBacking { - mmap: std::sync::Arc, - /// Owns the file descriptor backing the mmap. Dropping it would close - /// the descriptor and invalidate the mmap. - _file: std::sync::Arc, - elem_size: usize, } impl FriLayer @@ -49,56 +34,11 @@ where merkle_tree, coset_offset, domain_size, - #[cfg(feature = "disk-spill")] - eval_mmap: None, } } #[inline] pub fn get_evaluation(&self, index: usize) -> &FieldElement { - #[cfg(feature = "disk-spill")] - if let Some(ref backing) = self.eval_mmap { - let offset = index * backing.elem_size; - // SAFETY: spill_evaluation_to_disk writes self.evaluation as contiguous - // bytes to this mmap. FieldElement is #[repr(transparent)] over its - // base type, so the byte layout matches the original elements. - return unsafe { &*(backing.mmap.as_ptr().add(offset) as *const FieldElement) }; - } &self.evaluation[index] } - - #[cfg(feature = "disk-spill")] - pub fn spill_evaluation_to_disk(&mut self) -> std::io::Result<()> { - use std::io::Write; - - if self.evaluation.is_empty() || self.eval_mmap.is_some() { - return Ok(()); - } - - let elem_size = std::mem::size_of::>(); - let total_bytes = self.evaluation.len() * elem_size; - - let file = tempfile::tempfile()?; - file.set_len(total_bytes as u64)?; - { - let mut writer = std::io::BufWriter::new(&file); - // SAFETY: FieldElement is #[repr(transparent)], so the Vec - // can be viewed as a contiguous byte slice. - let bytes = unsafe { - std::slice::from_raw_parts(self.evaluation.as_ptr() as *const u8, total_bytes) - }; - writer.write_all(bytes)?; - writer.flush()?; - } - // SAFETY: tempfile() creates an anonymous file with no filesystem path, - // so no other process can open or modify it. - let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? }; - self.evaluation = Vec::new(); - self.eval_mmap = Some(EvalMmapBacking { - mmap: std::sync::Arc::new(mmap), - _file: std::sync::Arc::new(file), - elem_size, - }); - Ok(()) - } } diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 62f094c5c..523b07492 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -242,21 +242,6 @@ where pub(crate) composition_poly_merkle_tree: BatchedMerkleTree, /// The commitment to the composition polynomial parts. pub(crate) composition_poly_root: Commitment, - #[cfg(feature = "disk-spill")] - eval_mmaps: Option>, -} - -/// File-backed mmap storage for a single composition polynomial part's LDE evaluations. -/// After `spill_evaluations_to_disk()`, elements are read from the mmap instead of -/// the in-memory vector. -#[cfg(feature = "disk-spill")] -struct Round2EvalMmap { - mmap: memmap2::Mmap, - /// Owns the file descriptor backing the mmap. Dropping it would close - /// the descriptor and invalidate the mmap. - _file: std::fs::File, - len: usize, - elem_size: usize, } impl Round2 @@ -265,74 +250,17 @@ where FieldElement: AsBytes, { pub fn num_composition_parts(&self) -> usize { - #[cfg(feature = "disk-spill")] - if let Some(ref mmaps) = self.eval_mmaps { - return mmaps.len(); - } self.lde_composition_poly_evaluations.len() } #[inline] pub fn get_composition_eval(&self, part: usize, index: usize) -> &FieldElement { - #[cfg(feature = "disk-spill")] - if let Some(ref mmaps) = self.eval_mmaps { - let m = &mmaps[part]; - let offset = index * m.elem_size; - // SAFETY: spill_evaluations_to_disk writes the evaluations as contiguous - // bytes to this mmap. FieldElement is #[repr(transparent)]. - return unsafe { &*(m.mmap.as_ptr().add(offset) as *const FieldElement) }; - } &self.lde_composition_poly_evaluations[part][index] } pub fn composition_eval_len(&self, part: usize) -> usize { - #[cfg(feature = "disk-spill")] - if let Some(ref mmaps) = self.eval_mmaps { - return mmaps[part].len; - } self.lde_composition_poly_evaluations[part].len() } - - #[cfg(feature = "disk-spill")] - pub fn spill_evaluations_to_disk(&mut self) -> std::io::Result<()> { - use std::io::Write; - - if self.lde_composition_poly_evaluations.is_empty() || self.eval_mmaps.is_some() { - return Ok(()); - } - - let elem_size = std::mem::size_of::>(); - let mut mmaps = Vec::with_capacity(self.lde_composition_poly_evaluations.len()); - - for part in self.lde_composition_poly_evaluations.drain(..) { - let total_bytes = part.len() * elem_size; - let file = tempfile::tempfile()?; - file.set_len(total_bytes as u64)?; - { - let mut writer = std::io::BufWriter::new(&file); - // SAFETY: FieldElement is #[repr(transparent)], so the Vec - // can be viewed as a contiguous byte slice. - let bytes = - unsafe { std::slice::from_raw_parts(part.as_ptr() as *const u8, total_bytes) }; - writer.write_all(bytes)?; - writer.flush()?; - } - // SAFETY: tempfile() creates an anonymous file with no filesystem path, - // so no other process can open or modify it. - let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? }; - let len = part.len(); - drop(part); - mmaps.push(Round2EvalMmap { - mmap, - _file: file, - len, - elem_size, - }); - } - - self.eval_mmaps = Some(mmaps); - Ok(()) - } } /// A container for the results of the third round of the STARK Prove protocol. @@ -1047,8 +975,6 @@ pub trait IsStarkProver< lde_composition_poly_evaluations: lde_composition_poly_parts_evaluations, composition_poly_merkle_tree, composition_poly_root, - #[cfg(feature = "disk-spill")] - eval_mmaps: None, }) } @@ -1795,10 +1721,10 @@ pub trait IsStarkProver< })?; } - let precomputed_tree = pre_tree.map(|t| { - let mut arc = Arc::new(t); - #[cfg(feature = "disk-spill")] - { + let precomputed_tree = match pre_tree { + Some(t) => { + let mut arc = Arc::new(t); + #[cfg(feature = "disk-spill")] Arc::get_mut(&mut arc) .expect("sole Arc owner") .spill_nodes_to_disk() @@ -1806,11 +1732,11 @@ pub trait IsStarkProver< ProvingError::WrongParameter(format!( "disk-spill precomputed Merkle tree: {e}" )) - }) - .unwrap(); + })?; + Some(arc) } - arc - }); + None => None, + }; main_commits.push(MainCommitData { main_tree, @@ -2081,8 +2007,7 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let table_start = Instant::now(); - let round_1_result = - Self::round1_from_lde(*air, lde_trace, metadata); + let round_1_result = Self::round1_from_lde(*air, lde_trace, metadata); if let Some(ref bpi) = round_1_result.bus_public_inputs { table_transcript.append_field_element(&bpi.table_contribution); From 289cb8352eeb8734e8b2aa068dbb2ae7fdc72102 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 8 Apr 2026 17:37:26 -0300 Subject: [PATCH 077/231] Remove trivial FriLayer::get_evaluation wrapper --- crypto/stark/src/fri/fri_commitment.rs | 5 ----- crypto/stark/src/fri/mod.rs | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/crypto/stark/src/fri/fri_commitment.rs b/crypto/stark/src/fri/fri_commitment.rs index 347df1940..b0b3188b2 100644 --- a/crypto/stark/src/fri/fri_commitment.rs +++ b/crypto/stark/src/fri/fri_commitment.rs @@ -36,9 +36,4 @@ where domain_size, } } - - #[inline] - pub fn get_evaluation(&self, index: usize) -> &FieldElement { - &self.evaluation[index] - } } diff --git a/crypto/stark/src/fri/mod.rs b/crypto/stark/src/fri/mod.rs index 377e03f6c..87ab66a5b 100644 --- a/crypto/stark/src/fri/mod.rs +++ b/crypto/stark/src/fri/mod.rs @@ -103,7 +103,7 @@ where let mut index = *iota_s; for layer in fri_layers { // symmetric element - let evaluation_sym = layer.get_evaluation(index ^ 1).clone(); + let evaluation_sym = layer.evaluation[index ^ 1].clone(); let auth_path_sym = layer.merkle_tree.get_proof_by_pos(index >> 1).unwrap(); layers_evaluations_sym.push(evaluation_sym); layers_auth_paths_sym.push(auth_path_sym); From 914b632f9ffc5cd63d15d61bebdf67ae8244bccc Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 8 Apr 2026 17:39:04 -0300 Subject: [PATCH 078/231] Remove trivial Round2 wrapper methods --- crypto/stark/src/prover.rs | 39 ++++++++------------------------------ 1 file changed, 8 insertions(+), 31 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 523b07492..c2bfa9652 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -244,25 +244,6 @@ where pub(crate) composition_poly_root: Commitment, } -impl Round2 -where - F: IsField, - FieldElement: AsBytes, -{ - pub fn num_composition_parts(&self) -> usize { - self.lde_composition_poly_evaluations.len() - } - - #[inline] - pub fn get_composition_eval(&self, part: usize, index: usize) -> &FieldElement { - &self.lde_composition_poly_evaluations[part][index] - } - - pub fn composition_eval_len(&self, part: usize) -> usize { - self.lde_composition_poly_evaluations[part].len() - } -} - /// A container for the results of the third round of the STARK Prove protocol. pub struct Round3 { /// Evaluations of the trace polynomials, main ans auxiliary, at the out-of-domain challenge. @@ -990,7 +971,7 @@ pub trait IsStarkProver< FieldElement: AsBytes, FieldElement: AsBytes, { - let num_parts = round_2_result.num_composition_parts(); + let num_parts = round_2_result.lde_composition_poly_evaluations.len(); let z_power = z.pow(num_parts); let domain_size = domain.interpolation_domain_size; let blowup_factor = domain.blowup_factor; @@ -1071,7 +1052,7 @@ pub trait IsStarkProver< let gamma = transcript.sample_field_element(); - let n_terms_composition_poly = round_2_result.num_composition_parts(); + let n_terms_composition_poly = round_2_result.lde_composition_poly_evaluations.len(); let num_terms_trace = air.context().transition_offsets.len() * air.step_size() * air.context().trace_columns; @@ -1211,7 +1192,7 @@ pub trait IsStarkProver< { let domain_size = domain.interpolation_domain_size; let blowup_factor = domain.blowup_factor; - let num_parts = round_2_result.num_composition_parts(); + let num_parts = round_2_result.lde_composition_poly_evaluations.len(); let z_power = z.pow(num_parts); // pole for H terms // Number of evaluation points per trace column (= transition_offsets.len() * step_size) @@ -1272,7 +1253,7 @@ pub trait IsStarkProver< // H terms: Σ_j γ_j * (H_j(x_i) - H_j(z^K)) * inv_h[i] let mut result = FieldElement::::zero(); for j in 0..num_parts { - let h_j_val = round_2_result.get_composition_eval(j, row_idx); + let h_j_val = &round_2_result.lde_composition_poly_evaluations[j][row_idx]; let h_j_ood = &h_ood[j]; let numerator = h_j_val - h_j_ood; result += &composition_poly_gammas[j] * numerator * &inv_h[i]; @@ -1318,17 +1299,13 @@ pub trait IsStarkProver< .get_proof_by_pos(index) .unwrap(); - let num_parts = round_2_result.num_composition_parts(); + let num_parts = round_2_result.lde_composition_poly_evaluations.len(); let lde_composition_poly_parts_evaluation: Vec<_> = (0..num_parts) .flat_map(|j| { - let part_len = round_2_result.composition_eval_len(j) as u64; + let part = &round_2_result.lde_composition_poly_evaluations[j]; vec![ - round_2_result - .get_composition_eval(j, reverse_index(index * 2, part_len)) - .clone(), - round_2_result - .get_composition_eval(j, reverse_index(index * 2 + 1, part_len)) - .clone(), + part[reverse_index(index * 2, part.len() as u64)].clone(), + part[reverse_index(index * 2 + 1, part.len() as u64)].clone(), ] }) .collect(); From 3aac1143a55272d28f9d8e907ffa0cac58a52bde Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 14 Apr 2026 16:53:48 -0300 Subject: [PATCH 079/231] Document repr(transparent) requirement for mmap casts --- crypto/math/src/field/element.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crypto/math/src/field/element.rs b/crypto/math/src/field/element.rs index fb2019df4..7e51d7950 100644 --- a/crypto/math/src/field/element.rs +++ b/crypto/math/src/field/element.rs @@ -39,6 +39,10 @@ use serde::ser::{Serialize, SerializeStruct, Serializer}; use super::traits::{IsPrimeField, IsSubFieldOf, LegendreSymbol}; /// A field element with operations algorithms defined in `F` +/// +/// `#[repr(transparent)]` is required by the disk-spill code in +/// `crypto/stark` (`table.rs`, `trace.rs`), which casts mmap bytes to +/// `FieldElement`. #[allow(clippy::derived_hash_with_manual_eq)] #[repr(transparent)] #[derive(Debug, Clone, Hash, Copy)] From 059292cfddb6652b344b8e905fecb18e277ad4df Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 14 Apr 2026 16:55:04 -0300 Subject: [PATCH 080/231] Assert mmap bounds in release builds --- crypto/stark/src/table.rs | 8 ++++++-- crypto/stark/src/trace.rs | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 0b1003826..38b459362 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -130,7 +130,9 @@ impl Table { pub fn get_row(&self, row_idx: usize) -> &[FieldElement] { #[cfg(feature = "disk-spill")] if let Some(ref backing) = self.mmap_backing { - debug_assert!( + // Guard the unsafe pointer math below; matches the non-spill + // path's checked indexing so release builds don't drop the check. + assert!( row_idx < backing.height, "Table::get_row out of bounds: row={row_idx}, height={}", backing.height @@ -192,7 +194,9 @@ impl Table { pub fn get(&self, row: usize, col: usize) -> &FieldElement { #[cfg(feature = "disk-spill")] if let Some(ref backing) = self.mmap_backing { - debug_assert!( + // Guard the unsafe pointer math below; matches the non-spill + // path's checked indexing so release builds don't drop the check. + assert!( row < backing.height && col < backing.width, "Table::get out of bounds: row={row}, col={col}, height={}, width={}", backing.height, diff --git a/crypto/stark/src/trace.rs b/crypto/stark/src/trace.rs index 63d6ae1cd..80730eab6 100644 --- a/crypto/stark/src/trace.rs +++ b/crypto/stark/src/trace.rs @@ -306,7 +306,9 @@ where pub fn get_main(&self, row: usize, col: usize) -> &FieldElement { #[cfg(feature = "disk-spill")] if let Some(ref backing) = self.mmap_backing { - debug_assert!( + // Guard the unsafe pointer math below; matches the non-spill + // path's checked indexing so release builds don't drop the check. + assert!( row < backing.num_rows && col < backing.num_main_cols, "get_main out of bounds: row={row}, col={col}, num_rows={}, num_main_cols={}", backing.num_rows, @@ -325,7 +327,9 @@ where pub fn get_aux(&self, row: usize, col: usize) -> &FieldElement { #[cfg(feature = "disk-spill")] if let Some(ref backing) = self.mmap_backing { - debug_assert!( + // Guard the unsafe pointer math below; matches the non-spill + // path's checked indexing so release builds don't drop the check. + assert!( row < backing.num_rows && col < backing.num_aux_cols, "get_aux out of bounds: row={row}, col={col}, num_rows={}, num_aux_cols={}", backing.num_rows, From 156ad8ac3f32455a5e00f9037a20dc51a0f32663 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 14 Apr 2026 16:56:43 -0300 Subject: [PATCH 081/231] Close mmap-backing file descriptors after creation --- crypto/crypto/src/merkle_tree/merkle.rs | 7 +++---- crypto/stark/src/table.rs | 7 +++---- crypto/stark/src/trace.rs | 23 ++++++++++------------- 3 files changed, 16 insertions(+), 21 deletions(-) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index f50897b0c..d16210339 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -29,9 +29,6 @@ impl std::error::Error for Error {} #[cfg(feature = "disk-spill")] pub(crate) struct MmapNodeBacking { mmap: memmap2::Mmap, - /// Owns the file descriptor backing the mmap. Dropping it would close - /// the descriptor and invalidate the mmap. - _file: std::fs::File, node_count: usize, node_size: usize, } @@ -307,6 +304,9 @@ where // SAFETY: tempfile() creates an anonymous file with no filesystem path, // so no other process can open or modify it. + // The mapping keeps its own reference to the underlying object + // (Unix: kernel VMA; Windows: duplicated handle in memmap2), so the + // `file` local can drop at end of scope without invalidating it. let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? }; // Free the heap allocation @@ -314,7 +314,6 @@ where self.mmap_backing = Some(MmapNodeBacking { mmap, - _file: file, node_count, node_size, }); diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 38b459362..6f94071f0 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -14,9 +14,6 @@ use rayon::prelude::*; #[cfg(feature = "disk-spill")] pub(crate) struct TableMmapBacking { mmap: memmap2::Mmap, - /// Owns the file descriptor backing the mmap. Dropping it would close - /// the descriptor and invalidate the mmap. - _file: std::fs::File, /// Number of columns per row. width: usize, /// Number of rows. @@ -260,11 +257,13 @@ impl Table { // SAFETY: tempfile() creates an anonymous file with no filesystem path, // so no other process can open or modify it. + // The mapping keeps its own reference to the underlying object + // (Unix: kernel VMA; Windows: duplicated handle in memmap2), so the + // `file` local can drop at end of scope without invalidating it. let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? }; self.mmap_backing = Some(TableMmapBacking { mmap, - _file: file, width: self.width, height: self.height, elem_size, diff --git a/crypto/stark/src/trace.rs b/crypto/stark/src/trace.rs index 80730eab6..6360e3494 100644 --- a/crypto/stark/src/trace.rs +++ b/crypto/stark/src/trace.rs @@ -229,11 +229,7 @@ where #[cfg(feature = "disk-spill")] pub(crate) struct MmapBacking { main_mmap: memmap2::Mmap, - /// Owns the file descriptor backing main_mmap. - _main_file: std::fs::File, aux_mmap: Option, - /// Owns the file descriptor backing aux_mmap. - _aux_file: Option, num_rows: usize, num_main_cols: usize, num_aux_cols: usize, @@ -401,7 +397,7 @@ where }; let main_elem_size = std::mem::size_of::>(); - let (main_mmap, main_file) = + let main_mmap = Self::write_pool_columns_to_mmap(&main_pool[..num_main_cols], main_elem_size)?; let lde_step_size = trace_step_size * blowup_factor; @@ -414,9 +410,7 @@ where blowup_factor, mmap_backing: Some(MmapBacking { main_mmap, - _main_file: main_file, aux_mmap: None, - _aux_file: None, num_rows, num_main_cols, num_aux_cols: 0, @@ -441,27 +435,27 @@ where } let aux_elem_size = std::mem::size_of::>(); - let (aux_mmap, aux_file) = - Self::write_pool_columns_to_mmap(&aux_pool[..num_aux_cols], aux_elem_size)?; + let aux_mmap = Self::write_pool_columns_to_mmap(&aux_pool[..num_aux_cols], aux_elem_size)?; let backing = self .mmap_backing .as_mut() .expect("add_aux_from_pool requires main already spilled"); backing.aux_mmap = Some(aux_mmap); - backing._aux_file = Some(aux_file); backing.num_aux_cols = num_aux_cols; Ok(()) } - /// Write pool columns to a temp file and return the mmap + file handle. + /// Write pool columns to a temp file and return the mmap. + /// The file descriptor is closed before returning; the mapping keeps + /// its own reference to the underlying object. /// Pool buffers keep their capacity for reuse. #[cfg(feature = "disk-spill")] fn write_pool_columns_to_mmap( columns: &[Vec], elem_size: usize, - ) -> std::io::Result<(memmap2::Mmap, std::fs::File)> { + ) -> std::io::Result { use std::io::Write; let num_cols = columns.len(); @@ -488,8 +482,11 @@ where } // SAFETY: tempfile() creates an anonymous file with no filesystem path, // so no other process can open or modify it. + // The mapping keeps its own reference to the underlying object + // (Unix: kernel VMA; Windows: duplicated handle in memmap2), so the + // `file` local can drop at end of scope without invalidating it. let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? }; - Ok((mmap, file)) + Ok(mmap) } } From 563cca2eeac003915a3f2997eefd404041ca8fc0 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 14 Apr 2026 16:57:17 -0300 Subject: [PATCH 082/231] Check spill byte counts for overflow --- crypto/stark/src/table.rs | 21 +++++++++++++++++---- crypto/stark/src/trace.rs | 12 +++++++++++- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 6f94071f0..b9824e186 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -241,16 +241,29 @@ impl Table { } let elem_size = std::mem::size_of::>(); - let total_bytes = self.data.len() * elem_size; + let total_bytes = (self.data.len() as u64) + .checked_mul(elem_size as u64) + .ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "spill_to_disk: byte count overflows u64", + ) + })?; let file = tempfile::tempfile()?; - file.set_len(total_bytes as u64)?; + file.set_len(total_bytes)?; { let mut writer = std::io::BufWriter::new(&file); // SAFETY: FieldElement is #[repr(transparent)] over F::BaseType. // The Vec has the same byte layout as a contiguous array. - let bytes: &[u8] = - unsafe { std::slice::from_raw_parts(self.data.as_ptr() as *const u8, total_bytes) }; + // `self.data.len() * elem_size` fits in usize because Vec allocations + // are bounded by isize::MAX bytes. + let bytes: &[u8] = unsafe { + std::slice::from_raw_parts( + self.data.as_ptr() as *const u8, + self.data.len() * elem_size, + ) + }; writer.write_all(bytes)?; writer.flush()?; } diff --git a/crypto/stark/src/trace.rs b/crypto/stark/src/trace.rs index 6360e3494..1ae5fafc6 100644 --- a/crypto/stark/src/trace.rs +++ b/crypto/stark/src/trace.rs @@ -464,7 +464,15 @@ where columns.iter().all(|c| c.len() == num_rows), "all columns must have the same length" ); - let total_bytes = (num_cols * num_rows * elem_size) as u64; + let total_bytes = (num_cols as u64) + .checked_mul(num_rows as u64) + .and_then(|n| n.checked_mul(elem_size as u64)) + .ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "write_pool_columns_to_mmap: byte count overflows u64", + ) + })?; let file = tempfile::tempfile()?; file.set_len(total_bytes)?; @@ -473,6 +481,8 @@ where for col in columns { // SAFETY: T is a FieldElement which is #[repr(transparent)], // so the Vec has the same byte layout as a contiguous array. + // `col.len() * elem_size` fits in usize because Vec allocations + // are bounded by isize::MAX bytes. let bytes: &[u8] = unsafe { std::slice::from_raw_parts(col.as_ptr() as *const u8, col.len() * elem_size) }; From 3280720ee76be995ba303fd30763516ba2f99fc9 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 14 Apr 2026 16:57:35 -0300 Subject: [PATCH 083/231] Validate elem_size matches size_of in write_pool_columns_to_mmap --- crypto/stark/src/trace.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/crypto/stark/src/trace.rs b/crypto/stark/src/trace.rs index 1ae5fafc6..3b8c5b0b2 100644 --- a/crypto/stark/src/trace.rs +++ b/crypto/stark/src/trace.rs @@ -458,6 +458,12 @@ where ) -> std::io::Result { use std::io::Write; + debug_assert_eq!( + elem_size, + std::mem::size_of::(), + "elem_size must match size_of::(); the `col.len() * elem_size` byte count below assumes it" + ); + let num_cols = columns.len(); let num_rows = if num_cols > 0 { columns[0].len() } else { 0 }; debug_assert!( From b357c8be8f1eb29006381a77a36264dad9ba431f Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 14 Apr 2026 16:58:06 -0300 Subject: [PATCH 084/231] Compare tables by field-element equality --- crypto/stark/src/table.rs | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index b9824e186..a05cddc61 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -43,26 +43,12 @@ impl std::fmt::Debug for TableMmapBacking { } } -/// NOTE: compares all mmap bytes, O(n) in table size. Only used by Table's PartialEq derive. -#[cfg(feature = "disk-spill")] -impl PartialEq for TableMmapBacking { - fn eq(&self, other: &Self) -> bool { - self.width == other.width - && self.height == other.height - && self.elem_size == other.elem_size - && self.mmap[..] == other.mmap[..] - } -} - -#[cfg(feature = "disk-spill")] -impl Eq for TableMmapBacking {} - /// A two-dimensional Table holding field elements, arranged in a row-major order. /// This is the basic underlying data structure used for any two-dimensional component in the /// the STARK protocol implementation, such as the `TraceTable` and the `EvaluationFrame`. /// Since this struct is a representation of a two-dimensional table, all rows should have the same /// length. -#[derive(Clone, Default, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[derive(Clone, Default, Debug, serde::Serialize, serde::Deserialize)] #[serde(bound = "")] pub struct Table { pub data: Vec>, @@ -73,6 +59,26 @@ pub struct Table { pub(crate) mmap_backing: Option, } +/// Element-wise comparison via `get()`, so spilled tables compare by field +/// equality (canonicalized per `F::eq`) rather than raw mmap bytes. +impl PartialEq for Table { + fn eq(&self, other: &Self) -> bool { + if self.width != other.width || self.height != other.height { + return false; + } + for row in 0..self.height { + for col in 0..self.width { + if self.get(row, col) != other.get(row, col) { + return false; + } + } + } + true + } +} + +impl Eq for Table {} + impl Table { /// Crates a new Table instance from a one-dimensional array in row major order /// and the intended width of the table. From 5e8a8ffb086961ae799ba8e0788c89b7aebd58cb Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 14 Apr 2026 16:58:38 -0300 Subject: [PATCH 085/231] Propagate aux trace spill errors through ProvingError --- crypto/stark/src/prover.rs | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index c2bfa9652..ee2226002 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1761,19 +1761,20 @@ pub trait IsStarkProver< #[cfg(not(feature = "parallel"))] let aux_iter = air_trace_pairs.iter_mut(); let bus_inputs_vec: Vec>> = aux_iter - .map(|(air, trace, _)| { - if air.has_aux_trace() { + .map( + |(air, trace, _)| -> Result>, ProvingError> { + if !air.has_aux_trace() { + return Ok(None); + } let result = air.build_auxiliary_trace(*trace, &lookup_challenges); #[cfg(feature = "disk-spill")] - trace - .spill_aux_to_disk() - .expect("disk-spill aux trace after build"); - result - } else { - None - } - }) - .collect(); + trace.spill_aux_to_disk().map_err(|e| { + ProvingError::WrongParameter(format!("disk-spill aux trace: {e}")) + })?; + Ok(result) + }, + ) + .collect::, _>>()?; #[cfg(feature = "instruments")] let aux_build_elapsed = phase_start.elapsed(); From 52d431dd35632df760fc47dda3c17f07e3ee872b Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 14 Apr 2026 16:59:15 -0300 Subject: [PATCH 086/231] Advise page cache drop for aux mmap after extract --- crypto/stark/src/prover.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index ee2226002..1231e978e 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1821,6 +1821,8 @@ pub trait IsStarkProver< if air.has_aux_trace() { let num_aux_cols = trace.num_aux_columns; trace.extract_columns_aux_into(&mut pool.aux); + #[cfg(feature = "disk-spill")] + trace.aux_table.advise_drop_cache(); #[cfg(feature = "instruments")] let t_sub = Instant::now(); Self::expand_pool_to_lde::( From fca8dba6adc0b89b6ee75fca17a33164dc5724b3 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 14 Apr 2026 16:59:50 -0300 Subject: [PATCH 087/231] Run disk-spill tests in CI --- .github/workflows/pr_main.yaml | 48 +++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr_main.yaml b/.github/workflows/pr_main.yaml index 393491084..60a30bddf 100644 --- a/.github/workflows/pr_main.yaml +++ b/.github/workflows/pr_main.yaml @@ -105,16 +105,18 @@ jobs: test: name: Test if: always() - needs: [test-executor, test-prover] + needs: [test-executor, test-prover, test-disk-spill] runs-on: ubuntu-latest steps: - name: Check results run: | executor="${{ needs.test-executor.result }}" prover="${{ needs.test-prover.result }}" + disk_spill="${{ needs.test-disk-spill.result }}" echo "test-executor: $executor" echo "test-prover: $prover" + echo "test-disk-spill: $disk_spill" # Allow "success" or "skipped" (skipped on merge queue pushes) if [[ "$executor" != "success" && "$executor" != "skipped" ]]; then @@ -123,6 +125,50 @@ jobs: if [[ "$prover" != "success" && "$prover" != "skipped" ]]; then exit 1 fi + if [[ "$disk_spill" != "success" && "$disk_spill" != "skipped" ]]; then + exit 1 + fi + + test-disk-spill: + name: Disk-spill tests + runs-on: ubuntu-latest + if: github.event_name != 'push' || github.actor != 'github-merge-queue[bot]' + steps: + - name: Checkout sources + uses: actions/checkout@v4 + + - name: Setup Rust Environment + uses: ./.github/actions/setup-rust + + - name: Cache cargo build artifacts + uses: Swatinem/rust-cache@v2 + with: + shared-key: "lambda-vm-disk-spill" + cache-all-crates: "true" + + - name: Cache compiled ASM ELF artifacts + id: cache-asm-elfs + uses: actions/cache@v4 + with: + path: executor/program_artifacts/asm + key: asm-elf-artifacts-${{ hashFiles('executor/programs/asm/**') }} + + - name: Install clang and lld + if: steps.cache-asm-elfs.outputs.cache-hit != 'true' + run: sudo apt-get update && sudo apt-get install -y clang lld + + - name: Compile ASM programs to ELF + if: steps.cache-asm-elfs.outputs.cache-hit != 'true' + run: | + make compile-programs-asm + + - name: Run stark disk-spill tests + run: | + cargo test --release -p stark --features disk-spill disk_spill + + - name: Run prover disk-spill tests + run: | + cargo test --release -p lambda-vm-prover --features disk-spill disk_spill -- --test-threads=1 build-prover-tests: name: Build prover tests From eb76d401728a28c6479c50f9001a4465a51e9516 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 20 Apr 2026 18:07:03 -0300 Subject: [PATCH 088/231] Cache LDE in RAM instead of spilling to disk --- crypto/stark/src/prover.rs | 852 ++++++++++++++----------------------- crypto/stark/src/table.rs | 20 + crypto/stark/src/trace.rs | 308 +------------- 3 files changed, 342 insertions(+), 838 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 8842ab799..a8847f86a 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -158,6 +158,68 @@ where bus_public_inputs: Option>, } +/// LDE columns for main (Phase A) and auxiliary (Phase C) traces, consumed by value in Phase D. +/// +/// Memory trade-off: all N tables' LDE columns are live simultaneously between Phase A/C +/// and Phase D (O(N × cols × lde_size)). +struct Lde { + main: Vec>>, + aux: Vec>>, +} + +impl Round1Commitments +where + Field: IsFFTField + IsSubFieldOf + Send + Sync, + FieldExtension: IsField + Send + Sync, + FieldElement: AsBytes, + FieldElement: AsBytes, +{ + /// Build a `Round1` by consuming a `Lde` and borrowing commitment data. + fn build_round1( + &self, + lde: Lde, + step_size: usize, + blowup_factor: usize, + has_aux_trace: bool, + ) -> Round1 { + let lde_trace = LDETraceTable::from_columns(lde.main, lde.aux, step_size, blowup_factor); + + let main = Round1CommitmentData:: { + lde_trace_merkle_tree: Arc::clone(&self.main_merkle_tree), + lde_trace_merkle_root: self.main_merkle_root, + precomputed_merkle_tree: self.precomputed_merkle_tree.as_ref().map(Arc::clone), + precomputed_merkle_root: self.precomputed_merkle_root, + num_precomputed_cols: self.num_precomputed_cols, + }; + + let aux = if has_aux_trace { + Some(Round1CommitmentData:: { + lde_trace_merkle_tree: Arc::clone( + self.aux_merkle_tree + .as_ref() + .expect("aux tree must exist when has_aux_trace"), + ), + lde_trace_merkle_root: self + .aux_merkle_root + .expect("aux root must exist when has_aux_trace"), + precomputed_merkle_tree: None, + precomputed_merkle_root: None, + num_precomputed_cols: 0, + }) + } else { + None + }; + + Round1 { + lde_trace, + main, + aux, + rap_challenges: self.rap_challenges.clone(), + bus_public_inputs: self.bus_public_inputs.clone(), + } + } +} + /// Pre-computed twiddle factors and coset weights for a given domain size. /// /// Shared across all columns of the same table, and across all phases (A, C, Rounds 2-4) @@ -166,7 +228,7 @@ where /// /// The `coset_weights` vector stores `[n_inv, n_inv*g, n_inv*g², ..., n_inv*g^{n-1}]` /// where `g` is the coset offset and `n_inv = 1/n`. These are used in the iFFT+coset-shift -/// step of `expand_pool_to_lde`. +/// step of `expand_columns_to_lde`. pub struct LdeTwiddles { inv: LayerTwiddles, fwd: LayerTwiddles, @@ -225,12 +287,6 @@ fn table_parallelism() -> usize { } } -/// A set of LDE column buffer pools for one concurrent table slot. -struct PoolSet { - main: Vec>>, - aux: Vec>>, -} - /// A container for the results of the second round of the STARK Prove protocol. pub struct Round2 where @@ -416,14 +472,12 @@ pub trait IsStarkProver< .expect("coset LDE computation") } - /// Expand pool buffers in-place from N column evaluations to N×blowup LDE evaluations. + /// Expand each column in-place from N evaluations to N×blowup LDE evaluations. /// - /// The pool buffers already contain column data extracted via `extract_columns_*_into`. - /// This performs iFFT + coset shift + FFT in-place, eliminating the T1 transpose copy. - /// Coset weights are pre-cached in `LdeTwiddles` to avoid recomputation across phases. - fn expand_pool_to_lde( - pool: &mut [Vec>], - num_cols: usize, + /// Performs iFFT + coset shift + FFT in place. Coset weights are pre-cached in + /// `LdeTwiddles` to avoid recomputation across phases. + fn expand_columns_to_lde( + columns: &mut [Vec>], domain: &Domain, twiddles: &LdeTwiddles, ) where @@ -431,14 +485,14 @@ pub trait IsStarkProver< E: IsSubFieldOf + IsField + Send + Sync, FieldElement: Send + Sync, { - if num_cols == 0 { + if columns.is_empty() { return; } #[cfg(feature = "parallel")] - let iter = pool[..num_cols].par_iter_mut(); + let iter = columns.par_iter_mut(); #[cfg(not(feature = "parallel"))] - let iter = pool[..num_cols].iter_mut(); + let iter = columns.iter_mut(); iter.for_each(|buf| { Polynomial::coset_lde_full_expand::( buf, @@ -451,14 +505,13 @@ pub trait IsStarkProver< }); } - /// Compute main LDE, commit, return tree and root. - /// Uses the provided pool buffers to avoid allocation; the pool retains capacity for reuse. + /// Compute main LDE, commit, and return the Merkle tree/root along with the + /// owned LDE columns (consumed later in Phase D). #[allow(clippy::type_complexity)] fn commit_main_trace( trace: &TraceTable, domain: &Domain, twiddles: &LdeTwiddles, - main_pool: &mut [Vec>], ) -> Result< ( BatchedMerkleTree, @@ -466,6 +519,7 @@ pub trait IsStarkProver< Option>, Option, usize, + Vec>>, ), ProvingError, > @@ -473,31 +527,35 @@ pub trait IsStarkProver< FieldElement: AsBytes, FieldElement: AsBytes, { - let num_cols = trace.num_main_columns; - trace.extract_columns_main_into(main_pool); - // Data is now in the pool buffers. Evict the mmap pages from the OS - // page cache so the same data doesn't occupy RAM in both places. + let lde_size = domain.interpolation_domain_size * domain.blowup_factor; + let mut columns = trace.extract_columns_main(lde_size); + // Data is now in `columns`. Evict the mmap pages from the OS page + // cache so the same data doesn't occupy RAM in both places. #[cfg(feature = "disk-spill")] trace.main_table.advise_drop_cache(); #[cfg(feature = "instruments")] let t_sub = Instant::now(); - Self::expand_pool_to_lde::(main_pool, num_cols, domain, twiddles); + Self::expand_columns_to_lde::(&mut columns, domain, twiddles); #[cfg(feature = "instruments")] let main_lde_dur = t_sub.elapsed(); #[cfg(feature = "instruments")] let t_sub = Instant::now(); - let (tree, root) = Self::commit_columns_bit_reversed(&main_pool[..num_cols]) - .ok_or(ProvingError::EmptyCommitment)?; + #[allow(unused_mut)] + let (mut tree, root) = + Self::commit_columns_bit_reversed(&columns).ok_or(ProvingError::EmptyCommitment)?; #[cfg(feature = "instruments")] crate::instruments::accum_r1_main(main_lde_dur, t_sub.elapsed()); - // Pool buffers retain capacity; tree is returned to caller - Ok((tree, root, None, None, 0)) + #[cfg(feature = "disk-spill")] + tree.spill_nodes_to_disk().map_err(|e| { + ProvingError::WrongParameter(format!("disk-spill main Merkle tree: {e}")) + })?; + + Ok((tree, root, None, None, 0, columns)) } /// Commit preprocessed trace: precomputed and multiplicity columns get separate trees. - /// Uses pool buffers to avoid allocation. #[allow(clippy::type_complexity)] fn commit_preprocessed_trace( trace: &TraceTable, @@ -505,7 +563,6 @@ pub trait IsStarkProver< precomputed_commitment: Commitment, num_precomputed_cols: usize, twiddles: &LdeTwiddles, - main_pool: &mut [Vec>], ) -> Result< ( BatchedMerkleTree, @@ -513,6 +570,7 @@ pub trait IsStarkProver< Option>, Option, usize, + Vec>>, ), ProvingError, > @@ -520,26 +578,26 @@ pub trait IsStarkProver< FieldElement: AsBytes, FieldElement: AsBytes, { - let num_cols = trace.num_main_columns; - trace.extract_columns_main_into(main_pool); - // Data is now in the pool buffers. Evict the mmap pages from the OS - // page cache so the same data doesn't occupy RAM in both places. + let lde_size = domain.interpolation_domain_size * domain.blowup_factor; + let mut columns = trace.extract_columns_main(lde_size); #[cfg(feature = "disk-spill")] trace.main_table.advise_drop_cache(); #[cfg(feature = "instruments")] let t_sub = Instant::now(); - Self::expand_pool_to_lde::(main_pool, num_cols, domain, twiddles); + Self::expand_columns_to_lde::(&mut columns, domain, twiddles); #[cfg(feature = "instruments")] let main_lde_dur = t_sub.elapsed(); #[cfg(feature = "instruments")] let t_sub = Instant::now(); - let (precomputed_tree, precomputed_root) = - Self::commit_columns_bit_reversed(&main_pool[..num_precomputed_cols]) + #[allow(unused_mut)] + let (mut precomputed_tree, precomputed_root) = + Self::commit_columns_bit_reversed(&columns[..num_precomputed_cols]) .ok_or(ProvingError::EmptyCommitment)?; - let (mult_tree, mult_root) = - Self::commit_columns_bit_reversed(&main_pool[num_precomputed_cols..num_cols]) + #[allow(unused_mut)] + let (mut mult_tree, mult_root) = + Self::commit_columns_bit_reversed(&columns[num_precomputed_cols..]) .ok_or(ProvingError::EmptyCommitment)?; #[cfg(feature = "instruments")] crate::instruments::accum_r1_main(main_lde_dur, t_sub.elapsed()); @@ -549,110 +607,60 @@ pub trait IsStarkProver< "Prover's precomputed commitment doesn't match hardcoded AIR commitment" ); - // Pool buffers retain capacity; trees are returned to caller + #[cfg(feature = "disk-spill")] + { + precomputed_tree.spill_nodes_to_disk().map_err(|e| { + ProvingError::WrongParameter(format!("disk-spill precomputed Merkle tree: {e}")) + })?; + mult_tree.spill_nodes_to_disk().map_err(|e| { + ProvingError::WrongParameter(format!("disk-spill main Merkle tree: {e}")) + })?; + } + Ok(( mult_tree, mult_root, Some(precomputed_tree), Some(precomputed_root), num_precomputed_cols, + columns, )) } - /// Build a Round1 from a pre-built LDETraceTable and stored metadata. - /// Reuses Merkle trees from Phase A/C via Arc (pointer copy, no deep clone). - fn round1_from_lde( - air: &dyn AIR, - lde_trace: LDETraceTable, - metadata: &Round1Commitments, - ) -> Round1 - where - FieldElement: AsBytes, - FieldElement: AsBytes, - { - let main = Round1CommitmentData:: { - lde_trace_merkle_tree: Arc::clone(&metadata.main_merkle_tree), - lde_trace_merkle_root: metadata.main_merkle_root, - precomputed_merkle_tree: metadata.precomputed_merkle_tree.as_ref().map(Arc::clone), - precomputed_merkle_root: metadata.precomputed_merkle_root, - num_precomputed_cols: metadata.num_precomputed_cols, - }; - - let aux = if air.has_aux_trace() { - Some(Round1CommitmentData:: { - lde_trace_merkle_tree: Arc::clone( - metadata - .aux_merkle_tree - .as_ref() - .expect("aux tree must exist when has_trace_interaction"), - ), - lde_trace_merkle_root: metadata - .aux_merkle_root - .expect("aux root must exist when has_trace_interaction"), - precomputed_merkle_tree: None, - precomputed_merkle_root: None, - num_precomputed_cols: 0, - }) - } else { - None - }; - - Round1 { - lde_trace, - main, - aux, - rap_challenges: metadata.rap_challenges.clone(), - bus_public_inputs: metadata.bus_public_inputs.clone(), - } - } - - /// Reconstruct a full Round1 struct by recomputing LDE evaluations and using - /// the stored Merkle trees from metadata. Uses pool buffers to avoid allocation. + /// Recompute Round1 from the trace, reusing the Merkle trees stored in commitments. /// - /// The Merkle trees were already built during Phase A/C and are reused here, - /// eliminating redundant Keccak hashing. + /// Only used by `run_debug_checks` — Phase D consumes the cached LDE + /// directly and does not go through this path. + #[cfg(feature = "debug-checks")] fn reconstruct_round1( air: &dyn AIR, trace: &TraceTable, domain: &Domain, - metadata: &Round1Commitments, + commitment: &Round1Commitments, twiddles: &LdeTwiddles, - main_pool: &mut [Vec>], - aux_pool: &mut [Vec>], ) -> Result, ProvingError> where FieldElement: AsBytes, FieldElement: AsBytes, { - // Recompute main LDE into pool buffers (extract columns directly, no T1 transpose) - let num_main_cols = trace.num_main_columns; - trace.extract_columns_main_into(main_pool); - Self::expand_pool_to_lde::(main_pool, num_main_cols, domain, twiddles); - - // Recompute aux LDE into pool buffers - let num_aux_cols = if air.has_aux_trace() { - let n_aux = trace.num_aux_columns; - trace.extract_columns_aux_into(aux_pool); - Self::expand_pool_to_lde::(aux_pool, n_aux, domain, twiddles); - n_aux + let lde_size = domain.interpolation_domain_size * domain.blowup_factor; + let mut main = trace.extract_columns_main(lde_size); + Self::expand_columns_to_lde::(&mut main, domain, twiddles); + + let aux = if air.has_aux_trace() { + let mut aux = trace.extract_columns_aux(lde_size); + Self::expand_columns_to_lde::(&mut aux, domain, twiddles); + aux } else { - 0 + Vec::new() }; - // Take column Vecs from pool (zero-copy move) instead of cloning. - // After prove_rounds_2_to_4, columns are returned to the pool via into_columns. - let main_cols: Vec<_> = main_pool[..num_main_cols] - .iter_mut() - .map(std::mem::take) - .collect(); - let aux_cols: Vec<_> = aux_pool[..num_aux_cols] - .iter_mut() - .map(std::mem::take) - .collect(); - let lde_trace = - LDETraceTable::from_columns(main_cols, aux_cols, air.step_size(), domain.blowup_factor); - - Ok(Self::round1_from_lde(air, lde_trace, metadata)) + Ok(commitment.build_round1( + Lde { main, aux }, + air.step_size(), + domain.blowup_factor, + air.has_aux_trace(), + )) } /// Reconstruct Round1 for every table, print the bus balance report, and @@ -660,11 +668,9 @@ pub trait IsStarkProver< #[cfg(feature = "debug-checks")] fn run_debug_checks( air_trace_pairs: &[AirTracePair<'_, Field, FieldExtension, PI>], - metadatas: &[Round1Commitments], + commitments: &[Round1Commitments], domains: &[Domain], twiddle_caches: &[LdeTwiddles], - main_pool: &mut [Vec>], - aux_pool: &mut [Vec>], ) where FieldElement: AsBytes, FieldElement: AsBytes, @@ -672,15 +678,13 @@ pub trait IsStarkProver< { let mut temp_results: Vec> = Vec::with_capacity(air_trace_pairs.len()); - for (((air, trace, _), metadata), (domain, twiddles)) in air_trace_pairs + for (((air, trace, _), commitment), (domain, twiddles)) in air_trace_pairs .iter() - .zip(metadatas.iter()) + .zip(commitments.iter()) .zip(domains.iter().zip(twiddle_caches.iter())) { - let result = Self::reconstruct_round1( - *air, *trace, domain, metadata, twiddles, main_pool, aux_pool, - ) - .expect("reconstruct_round1 failed in debug-checks"); + let result = Self::reconstruct_round1(*air, *trace, domain, commitment, twiddles) + .expect("reconstruct_round1 failed in debug-checks"); temp_results.push(result); } @@ -1292,22 +1296,21 @@ pub trait IsStarkProver< /// at the domain value corresponding to the FRI query challenge `index` and its symmetric /// element. fn open_composition_poly( - round_2_result: &Round2, + composition_poly_merkle_tree: &BatchedMerkleTree, + lde_composition_poly_evaluations: &[Vec>], index: usize, ) -> PolynomialOpenings where FieldElement: AsBytes + Sync + Send, FieldElement: AsBytes + Sync + Send, { - let proof = round_2_result - .composition_poly_merkle_tree + let proof = composition_poly_merkle_tree .get_proof_by_pos(index) .unwrap(); - let num_parts = round_2_result.lde_composition_poly_evaluations.len(); - let lde_composition_poly_parts_evaluation: Vec<_> = (0..num_parts) - .flat_map(|j| { - let part = &round_2_result.lde_composition_poly_evaluations[j]; + let lde_composition_poly_parts_evaluation: Vec<_> = lde_composition_poly_evaluations + .iter() + .flat_map(|part| { vec![ part[reverse_index(index * 2, part.len() as u64)].clone(), part[reverse_index(index * 2 + 1, part.len() as u64)].clone(), @@ -1468,7 +1471,11 @@ pub trait IsStarkProver< ) }); - let composition_openings = Self::open_composition_poly(round_2_result, *index); + let composition_openings = Self::open_composition_poly( + &round_2_result.composition_poly_merkle_tree, + &round_2_result.lde_composition_poly_evaluations, + *index, + ); let aux_trace_polys = round_1_result.aux.as_ref().map(|aux| { Self::open_trace_polys_aux( @@ -1490,7 +1497,7 @@ pub trait IsStarkProver< openings } - // TODO: propagate errors instead of unwrap() in commit_columns, reconstruct_round1, and expand_pool_to_lde + // TODO: propagate errors instead of unwrap() in commit_columns, reconstruct_round1, and expand_columns_to_lde /// Generates STARK proofs for one or more AIRs with a shared transcript. /// /// # Multi-Table Proving with LogUp @@ -1532,7 +1539,7 @@ pub trait IsStarkProver< .any(|(air, _, _)| air.has_aux_trace()); // ===================================================================== - // Pre-pass: compute domains, twiddles, and max dimensions for pool allocation + // Pre-pass: compute domains and twiddles // ===================================================================== #[cfg(feature = "instruments")] @@ -1540,60 +1547,32 @@ pub trait IsStarkProver< let mut domains = Vec::with_capacity(num_airs); let mut twiddle_caches: Vec> = Vec::with_capacity(num_airs); - let mut max_main_cols = 0usize; - let mut max_aux_cols = 0usize; - let mut max_lde_size = 0usize; for (air, trace, _pub_inputs) in &*air_trace_pairs { let trace_length = trace.num_rows(); let domain = new_domain(*air, trace_length); - - let lde_size = domain.interpolation_domain_size * domain.blowup_factor; let twiddles = LdeTwiddles::new(&domain); - max_main_cols = max_main_cols.max(trace.num_main_columns); - max_aux_cols = max_aux_cols.max(air.num_auxiliary_rap_columns()); - max_lde_size = max_lde_size.max(lde_size); - domains.push(domain); twiddle_caches.push(twiddles); } - // Spill all main trace tables to mmap before allocating pool buffers. - // This frees the heap-allocated trace data, making room for the LDE pool - // buffers which are much larger (blowup_factor × trace size). - #[cfg(feature = "disk-spill")] - for (_, trace, _) in air_trace_pairs.iter_mut() { - trace - .main_table - .spill_to_disk() - .map_err(|e| ProvingError::WrongParameter(format!("disk-spill early main: {e}")))?; - } - - // Number of tables to process concurrently. - // disk-spill: Phase A/C commit one table at a time to limit pool memory. - // Rounds 2-4 use full parallelism (no pools, reads from mmap). let k = table_parallelism().min(num_airs).max(1); + + // Spill all main trace tables to mmap before any Round 1 LDE work. + // Freeing heap makes room for LDE columns built below. #[cfg(feature = "disk-spill")] - let k_commit = 1_usize; - #[cfg(not(feature = "disk-spill"))] - let k_commit = k; - // k_commit=1: pre-allocate pool to max_lde_size to avoid reallocation. - // k_commit>1: start empty and grow on demand. - let mut pool_sets: Vec> = (0..k_commit) - .map(|_| PoolSet { - main: (0..max_main_cols) - .map(|_| { - if k_commit == 1 { - Vec::with_capacity(max_lde_size) - } else { - Vec::new() - } - }) - .collect(), - aux: (0..max_aux_cols).map(|_| Vec::new()).collect(), - }) - .collect(); + { + #[cfg(feature = "parallel")] + let spill_iter = air_trace_pairs.par_iter_mut(); + #[cfg(not(feature = "parallel"))] + let spill_iter = air_trace_pairs.iter_mut(); + spill_iter.try_for_each(|(_, trace, _)| { + trace.main_table.spill_to_disk().map_err(|e| { + ProvingError::WrongParameter(format!("disk-spill early main: {e}")) + }) + })?; + } #[cfg(feature = "instruments")] let prepass_elapsed = phase_start.elapsed(); @@ -1602,131 +1581,58 @@ pub trait IsStarkProver< // Round 1, Phase A: Commit all main traces (parallel in chunks of K) // ===================================================================== // All main trace commitments must be in the transcript before sampling - // LogUp challenges. Pool buffers are reused across chunks. + // LogUp challenges. #[cfg(feature = "instruments")] let phase_start = Instant::now(); let mut main_commits: Vec> = Vec::with_capacity(num_airs); + let mut main_ldes: Vec>>> = Vec::with_capacity(num_airs); - // One mmap-backed LDE table per AIR. Filled during Phase A (main) and - // Phase C (aux), then read from mmap in Rounds 2-4. - #[cfg(feature = "disk-spill")] - let mut spilled_ldes: Vec>> = - (0..num_airs).map(|_| None).collect(); - - for chunk_start in (0..num_airs).step_by(k_commit) { - let chunk_end = (chunk_start + k_commit).min(num_airs); - let chunk_size = chunk_end - chunk_start; + for chunk_start in (0..num_airs).step_by(k) { + let chunk_end = (chunk_start + k).min(num_airs); + let chunk_range = chunk_start..chunk_end; #[cfg(feature = "parallel")] - let iter = pool_sets[..chunk_size].par_iter_mut().enumerate(); + let iter = chunk_range.into_par_iter(); #[cfg(not(feature = "parallel"))] - let iter = pool_sets[..chunk_size].iter_mut().enumerate(); + let iter = chunk_range; let chunk_results: Vec> = iter - .map(|(j, pool)| { - let idx = chunk_start + j; + .map(|idx| { let (air, trace, _) = &air_trace_pairs[idx]; let domain = &domains[idx]; let twiddles = &twiddle_caches[idx]; - let (tree, root, pre_tree, pre_root, n_pre) = if air.is_preprocessed() { + if air.is_preprocessed() { Self::commit_preprocessed_trace( *trace, domain, air.precomputed_commitment(), air.num_precomputed_columns(), twiddles, - &mut pool.main, - )? - } else { - Self::commit_main_trace(*trace, domain, twiddles, &mut pool.main)? - }; - - // Spill LDE from pool to mmap while pool is still filled. - // Pool buffers keep their capacity for reuse by the next table, - // avoiding resize reallocation spikes in coset_lde_full_expand. - #[cfg(feature = "disk-spill")] - let spilled = { - let num_main_cols = trace.num_main_columns; - LDETraceTable::spill_main_from_pool( - &pool.main, - num_main_cols, - air.step_size(), - domain.blowup_factor, ) - .map_err(|e| { - ProvingError::WrongParameter(format!( - "disk-spill main LDE table {idx}: {e}" - )) - })? - }; - - #[cfg(feature = "disk-spill")] - return Ok((tree, root, pre_tree, pre_root, n_pre, spilled)); - #[cfg(not(feature = "disk-spill"))] - Ok((tree, root, pre_tree, pre_root, n_pre)) + } else { + Self::commit_main_trace(*trace, domain, twiddles) + } }) .collect(); // Sequential: append roots to shared transcript (Fiat-Shamir ordering) - #[allow(unused_variables, unused_mut)] - for (j, result) in chunk_results.into_iter().enumerate() { - #[cfg(feature = "disk-spill")] - let (tree, root, pre_tree, pre_root, n_pre, spilled) = result?; - #[cfg(not(feature = "disk-spill"))] - let (tree, root, pre_tree, pre_root, n_pre) = result?; - + for result in chunk_results { + let (tree, root, pre_tree, pre_root, n_pre, cached_main) = result?; if let Some(ref pre_r) = pre_root { transcript.append_bytes(pre_r); } transcript.append_bytes(&root); - - #[cfg(feature = "disk-spill")] - { - let idx = chunk_start + j; - spilled_ldes[idx] = Some(spilled); - } - - #[allow(unused_mut)] - let mut main_tree = Arc::new(tree); - #[cfg(feature = "disk-spill")] - { - Arc::get_mut(&mut main_tree) - .expect("sole Arc owner") - .spill_nodes_to_disk() - .map_err(|e| { - ProvingError::WrongParameter(format!( - "disk-spill main Merkle tree: {e}" - )) - })?; - } - - let precomputed_tree = match pre_tree { - Some(t) => { - let mut arc = Arc::new(t); - #[cfg(feature = "disk-spill")] - Arc::get_mut(&mut arc) - .expect("sole Arc owner") - .spill_nodes_to_disk() - .map_err(|e| { - ProvingError::WrongParameter(format!( - "disk-spill precomputed Merkle tree: {e}" - )) - })?; - Some(arc) - } - None => None, - }; - main_commits.push(MainCommitData { - main_tree, + main_tree: Arc::new(tree), main_root: root, - precomputed_tree, + precomputed_tree: pre_tree.map(Arc::new), precomputed_root: pre_root, num_precomputed_cols: n_pre, }); + main_ldes.push(cached_main); } } @@ -1736,6 +1642,7 @@ pub trait IsStarkProver< // ===================================================================== // Round 1, Phase B: Sample shared LogUp challenges // ===================================================================== + let lookup_challenges: Vec> = if needs_lookup_challenges { (0..LOGUP_NUM_CHALLENGES) .map(|_| transcript.sample_field_element()) @@ -1753,7 +1660,7 @@ pub trait IsStarkProver< // // Split into two passes for parallelism: // Pass 1 (parallel): Build all auxiliary traces (fingerprint + batch inversion) - // Pass 2 (sequential): Fork transcript → extract → LDE → commit (shared pool) + // Pass 2 (parallel): Fork transcript → extract → LDE → commit // Pass 1: Build aux traces in parallel. // Each build_auxiliary_trace has internal parallelism (batch_inverse, par_chunks), @@ -1766,26 +1673,37 @@ pub trait IsStarkProver< #[cfg(not(feature = "parallel"))] let aux_iter = air_trace_pairs.iter_mut(); let bus_inputs_vec: Vec>> = aux_iter - .map( - |(air, trace, _)| -> Result>, ProvingError> { - if !air.has_aux_trace() { - return Ok(None); - } - let result = air.build_auxiliary_trace(*trace, &lookup_challenges); - #[cfg(feature = "disk-spill")] + .map(|(air, trace, _)| { + if air.has_aux_trace() { + air.build_auxiliary_trace(*trace, &lookup_challenges) + } else { + None + } + }) + .collect(); + + // Spill all aux trace tables to mmap before any Round 1 aux LDE work. + #[cfg(feature = "disk-spill")] + { + #[cfg(feature = "parallel")] + let spill_iter = air_trace_pairs.par_iter_mut(); + #[cfg(not(feature = "parallel"))] + let spill_iter = air_trace_pairs.iter_mut(); + spill_iter.try_for_each(|(air, trace, _)| { + if air.has_aux_trace() { trace.spill_aux_to_disk().map_err(|e| { ProvingError::WrongParameter(format!("disk-spill aux trace: {e}")) })?; - Ok(result) - }, - ) - .collect::, _>>()?; + } + Ok(()) + })?; + } #[cfg(feature = "instruments")] let aux_build_elapsed = phase_start.elapsed(); // Pass 2: Parallel fork transcript → extract → LDE → commit in chunks of K. - // Each table gets its own transcript fork and pool set. + // Each table gets its own transcript fork. #[cfg(feature = "instruments")] let phase_start = Instant::now(); @@ -1805,34 +1723,33 @@ pub trait IsStarkProver< let mut aux_results: Vec<( Option>>, Option, + Vec>>, )> = Vec::with_capacity(num_airs); - for chunk_start in (0..num_airs).step_by(k_commit) { - let chunk_end = (chunk_start + k_commit).min(num_airs); - let chunk_size = chunk_end - chunk_start; + for chunk_start in (0..num_airs).step_by(k) { + let chunk_end = (chunk_start + k).min(num_airs); + let chunk_range = chunk_start..chunk_end; #[cfg(feature = "parallel")] - let iter = pool_sets[..chunk_size].par_iter_mut().enumerate(); + let iter = chunk_range.into_par_iter(); #[cfg(not(feature = "parallel"))] - let iter = pool_sets[..chunk_size].iter_mut().enumerate(); + let iter = chunk_range; let chunk_aux: Vec> = iter - .map(|(j, pool)| { - let idx = chunk_start + j; + .map(|idx| { let (air, trace, _) = &air_trace_pairs[idx]; let domain = &domains[idx]; let twiddles = &twiddle_caches[idx]; if air.has_aux_trace() { - let num_aux_cols = trace.num_aux_columns; - trace.extract_columns_aux_into(&mut pool.aux); + let lde_size = domain.interpolation_domain_size * domain.blowup_factor; + let mut columns = trace.extract_columns_aux(lde_size); #[cfg(feature = "disk-spill")] trace.aux_table.advise_drop_cache(); #[cfg(feature = "instruments")] let t_sub = Instant::now(); - Self::expand_pool_to_lde::( - &mut pool.aux, - num_aux_cols, + Self::expand_columns_to_lde::( + &mut columns, domain, twiddles, ); @@ -1840,72 +1757,50 @@ pub trait IsStarkProver< let aux_lde_dur = t_sub.elapsed(); #[cfg(feature = "instruments")] let t_sub = Instant::now(); - let (tree, root) = - Self::commit_columns_bit_reversed(&pool.aux[..num_aux_cols]) - .ok_or(ProvingError::EmptyCommitment)?; + #[allow(unused_mut)] + let (mut tree, root) = Self::commit_columns_bit_reversed(&columns) + .ok_or(ProvingError::EmptyCommitment)?; #[cfg(feature = "instruments")] crate::instruments::accum_r1_aux(aux_lde_dur, t_sub.elapsed()); - Ok((Some(Arc::new(tree)), Some(root))) + + #[cfg(feature = "disk-spill")] + tree.spill_nodes_to_disk().map_err(|e| { + ProvingError::WrongParameter(format!("disk-spill aux Merkle tree: {e}")) + })?; + + Ok((Some(Arc::new(tree)), Some(root), columns)) } else { - Ok((None, None)) + Ok((None, None, Vec::new())) } }) .collect(); // Sequential: append aux roots to forked transcripts - #[allow(unused_variables)] for (j, result) in chunk_aux.into_iter().enumerate() { - #[allow(unused_mut)] - let (mut aux_tree, aux_root) = result?; + let (aux_tree, aux_root, cached_aux) = result?; if let Some(ref root) = aux_root { table_transcripts[chunk_start + j].append_bytes(root); } - - // Spill aux LDE columns from pool and aux Merkle tree nodes to disk. - #[cfg(feature = "disk-spill")] - { - let idx = chunk_start + j; - let (air, trace, _) = &air_trace_pairs[idx]; - if air.has_aux_trace() { - let num_aux_cols = trace.num_aux_columns; - if let Some(ref mut spilled) = spilled_ldes[idx] { - spilled - .add_aux_from_pool(&pool_sets[j].aux, num_aux_cols) - .map_err(|e| { - ProvingError::WrongParameter(format!( - "disk-spill aux LDE table {idx}: {e}" - )) - })?; - } - } - if let Some(ref mut tree_arc) = aux_tree { - Arc::get_mut(tree_arc) - .expect("sole Arc owner") - .spill_nodes_to_disk() - .map_err(|e| { - ProvingError::WrongParameter(format!( - "disk-spill aux Merkle tree {idx}: {e}" - )) - })?; - } - } - - aux_results.push((aux_tree, aux_root)); + aux_results.push((aux_tree, aux_root, cached_aux)); } } - // Build metadata sequentially from main_commits + aux_results + bus_inputs - let mut metadatas: Vec> = + // Build commitments and cached LDEs as separate vecs: + // commitments are borrowed in Phase D, LDEs are consumed by value. + let mut commitments: Vec> = Vec::with_capacity(num_airs); - for ((main_commit, (aux_tree, aux_root)), bus_public_inputs) in main_commits - .into_iter() - .zip(aux_results) - .zip(bus_inputs_vec) + let mut cached_ldes: Vec> = Vec::with_capacity(num_airs); + for (((main_commit, main_lde), (aux_tree, aux_root, cached_aux)), bus_public_inputs) in + main_commits + .into_iter() + .zip(main_ldes) + .zip(aux_results) + .zip(bus_inputs_vec) { - metadatas.push(Round1Commitments { - main_merkle_tree: Arc::clone(&main_commit.main_tree), + commitments.push(Round1Commitments { + main_merkle_tree: main_commit.main_tree, main_merkle_root: main_commit.main_root, - precomputed_merkle_tree: main_commit.precomputed_tree.as_ref().map(Arc::clone), + precomputed_merkle_tree: main_commit.precomputed_tree, precomputed_merkle_root: main_commit.precomputed_root, num_precomputed_cols: main_commit.num_precomputed_cols, aux_merkle_tree: aux_tree, @@ -1913,34 +1808,24 @@ pub trait IsStarkProver< rap_challenges: lookup_challenges.clone(), bus_public_inputs, }); + cached_ldes.push(Lde { + main: main_lde, + aux: cached_aux, + }); } #[cfg(feature = "instruments")] let aux_commit_elapsed = phase_start.elapsed(); #[cfg(feature = "debug-checks")] - { - let debug_pool = &mut pool_sets[0]; - Self::run_debug_checks( - &air_trace_pairs, - &metadatas, - &domains, - &twiddle_caches, - &mut debug_pool.main, - &mut debug_pool.aux, - ); - } - - // Free pool buffers — the disk-spill Rounds 2-4 path reads from - // spilled LDEs (mmap), so pool buffers are no longer needed. - #[cfg(feature = "disk-spill")] - drop(pool_sets); + Self::run_debug_checks(&air_trace_pairs, &commitments, &domains, &twiddle_caches); // ===================================================================== // Rounds 2-4: Parallel per-table proving in chunks of K // ===================================================================== - // disk-spill: reads LDE data from mmap (spilled_ldes). - // non-disk-spill: recomputes LDE from the trace (reconstruct_round1). + // Each chunk of K tables is processed in parallel. Cached LDE columns + // from Phase A/C are consumed here (zero-copy move), eliminating the + // expensive reconstruct_round1 recomputation. #[cfg(feature = "instruments")] let phase_start = Instant::now(); @@ -1953,192 +1838,86 @@ pub trait IsStarkProver< )> = Vec::with_capacity(num_airs); let mut proofs = Vec::with_capacity(num_airs); + let mut lde_drain = cached_ldes.into_iter(); + for chunk_start in (0..num_airs).step_by(k) { + let chunk_end = (chunk_start + k).min(num_airs); + let chunk_size = chunk_end - chunk_start; - // ----- disk-spill path: read from spilled LDEs ----- - #[cfg(feature = "disk-spill")] - { - for chunk_start in (0..num_airs).step_by(k) { - let chunk_end = (chunk_start + k).min(num_airs); - - // Pre-take spilled LDEs for the chunk (needs &mut, can't do inside par_iter) - let chunk_ldes: Vec<_> = (chunk_start..chunk_end) - .map(|i| { - spilled_ldes[i] - .take() - .expect("spilled LDE must exist for every AIR") - }) - .collect(); - - let chunk_transcripts = &mut table_transcripts[chunk_start..chunk_end]; - - #[cfg(feature = "parallel")] - let iter = chunk_ldes - .into_par_iter() - .zip(chunk_transcripts.par_iter_mut()) - .enumerate(); - #[cfg(not(feature = "parallel"))] - let iter = chunk_ldes - .into_iter() - .zip(chunk_transcripts.iter_mut()) - .enumerate(); - - let chunk_results: Vec> = iter - .map(|(j, (lde_trace, table_transcript))| { - let idx = chunk_start + j; - let (air, _trace, pub_inputs) = &air_trace_pairs[idx]; - let metadata = &metadatas[idx]; - let domain = &domains[idx]; - - #[cfg(feature = "instruments")] - let table_start = Instant::now(); - - let round_1_result = Self::round1_from_lde(*air, lde_trace, metadata); - - if let Some(ref bpi) = round_1_result.bus_public_inputs { - table_transcript.append_field_element(&bpi.table_contribution); - } + let chunk_ldes: Vec> = + lde_drain.by_ref().take(chunk_size).collect(); + let chunk_commitments = &commitments[chunk_start..chunk_end]; + let chunk_transcripts = &mut table_transcripts[chunk_start..chunk_end]; - let proof = Self::prove_rounds_2_to_4( - *air, - *pub_inputs, - &round_1_result, - table_transcript, - domain, - )?; + #[cfg(feature = "parallel")] + let iter = chunk_ldes + .into_par_iter() + .zip(chunk_commitments.par_iter()) + .zip(chunk_transcripts.par_iter_mut()) + .enumerate(); + #[cfg(not(feature = "parallel"))] + let iter = chunk_ldes + .into_iter() + .zip(chunk_commitments.iter()) + .zip(chunk_transcripts.iter_mut()) + .enumerate(); - // Collect per-table sub-op timing via TLS. - #[cfg(feature = "instruments")] - { - let sub_ops = - crate::instruments::take_round_sub_ops().unwrap_or_default(); - return Ok(( - proof, - ( - air.name().to_string(), - air_trace_pairs[idx].1.num_rows(), - table_start.elapsed(), - sub_ops, - ), - )); - } - #[cfg(not(feature = "instruments"))] - Ok(proof) - }) - .collect(); + let chunk_results: Vec> = iter + .map(|(j, ((lde, commitment), table_transcript))| { + let idx = chunk_start + j; + let (air, trace, pub_inputs) = &air_trace_pairs[idx]; + let _ = trace; // used by instruments + let domain = &domains[idx]; - for result in chunk_results { #[cfg(feature = "instruments")] - { - let (proof, timing) = result?; - proofs.push(proof); - table_timings.push(timing); - } - #[cfg(not(feature = "instruments"))] - proofs.push(result?); - } - } - } + let table_start = Instant::now(); - // ----- non-disk-spill path: recompute LDE from trace ----- - #[cfg(not(feature = "disk-spill"))] - { - for chunk_start in (0..num_airs).step_by(k) { - let chunk_end = (chunk_start + k).min(num_airs); - let chunk_size = chunk_end - chunk_start; - - let chunk_transcripts = &mut table_transcripts[chunk_start..chunk_end]; - - #[cfg(feature = "parallel")] - let iter = pool_sets[..chunk_size] - .par_iter_mut() - .zip(chunk_transcripts.par_iter_mut()) - .enumerate(); - #[cfg(not(feature = "parallel"))] - let iter = pool_sets[..chunk_size] - .iter_mut() - .zip(chunk_transcripts.iter_mut()) - .enumerate(); - - let chunk_results: Vec> = iter - .map(|(j, (pool, table_transcript))| { - let idx = chunk_start + j; - let (air, trace, pub_inputs) = &air_trace_pairs[idx]; - let metadata = &metadatas[idx]; - let domain = &domains[idx]; - let twiddles = &twiddle_caches[idx]; - - #[cfg(feature = "instruments")] - let table_start = Instant::now(); - - #[cfg(feature = "instruments")] - let lde_start = Instant::now(); - let round_1_result = Self::reconstruct_round1( - *air, - *trace, - domain, - metadata, - twiddles, - &mut pool.main, - &mut pool.aux, - )?; - #[cfg(feature = "instruments")] - let lde_dur = lde_start.elapsed(); - - if let Some(ref bpi) = round_1_result.bus_public_inputs { - table_transcript.append_field_element(&bpi.table_contribution); - } + // Build Round1 from cached LDE (consumed by value, no recomputation). + let round_1_result = commitment.build_round1( + lde, + air.step_size(), + domain.blowup_factor, + air.has_aux_trace(), + ); - let proof = Self::prove_rounds_2_to_4( - *air, - *pub_inputs, - &round_1_result, - table_transcript, - domain, - )?; + if let Some(ref bpi) = round_1_result.bus_public_inputs { + table_transcript.append_field_element(&bpi.table_contribution); + } - // Collect per-table sub-op timing via TLS. - // Both the store (inside prove_rounds_2_to_4) and this take run on the - // same rayon worker thread, so sub-ops are valid in both sequential and - // parallel mode. - #[cfg(feature = "instruments")] - let table_timing = { - let mut sub_ops = - crate::instruments::take_round_sub_ops().unwrap_or_default(); - sub_ops.trace_lde += lde_dur; - ( - air.name().to_string(), - trace.num_rows(), - table_start.elapsed(), - sub_ops, - ) - }; - - // Return column Vecs to pool (zero-copy move back) - let (main_cols, aux_cols) = round_1_result.lde_trace.into_columns(); - for (slot, col) in pool.main.iter_mut().zip(main_cols) { - *slot = col; - } - for (slot, col) in pool.aux.iter_mut().zip(aux_cols) { - *slot = col; - } + let proof = Self::prove_rounds_2_to_4( + *air, + *pub_inputs, + &round_1_result, + table_transcript, + domain, + )?; - #[cfg(feature = "instruments")] - return Ok((proof, table_timing)); - #[cfg(not(feature = "instruments"))] - Ok(proof) - }) - .collect(); + #[cfg(feature = "instruments")] + let table_timing = { + let sub_ops = crate::instruments::take_round_sub_ops().unwrap_or_default(); + ( + air.name().to_string(), + trace.num_rows(), + table_start.elapsed(), + sub_ops, + ) + }; - for result in chunk_results { #[cfg(feature = "instruments")] - { - let (proof, timing) = result?; - proofs.push(proof); - table_timings.push(timing); - } + return Ok((proof, table_timing)); #[cfg(not(feature = "instruments"))] - proofs.push(result?); + Ok(proof) + }) + .collect(); + + for result in chunk_results { + #[cfg(feature = "instruments")] + { + let (proof, timing) = result?; + proofs.push(proof); + table_timings.push(timing); } + #[cfg(not(feature = "instruments"))] + proofs.push(result?); } } @@ -2295,7 +2074,6 @@ pub trait IsStarkProver< let (r4_fft, r4_merkle, r4_deep_comp, r4_queries) = crate::instruments::take_r4_sub().unwrap_or((zero, zero, zero, zero)); crate::instruments::store_round_sub_ops(crate::instruments::TableSubOps { - trace_lde: std::time::Duration::ZERO, // added by caller from lde_dur constraints: r2_constraints, comp_decompose: r2_fft, comp_commit: r2_merkle, diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index a05cddc61..3a2cb5d30 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -167,6 +167,26 @@ impl Table { .collect() } + /// Extract columns as owned vectors, with each allocated at `capacity`. + /// + /// `capacity` is a hint sized for downstream LDE expansion so the FFT grows + /// in place without a second allocation. + pub fn extract_columns(&self, capacity: usize) -> Vec>> { + let capacity = capacity.max(self.height); + #[cfg(feature = "parallel")] + let iter = (0..self.width).into_par_iter(); + #[cfg(not(feature = "parallel"))] + let iter = 0..self.width; + iter.map(|col_idx| { + let mut buf = Vec::with_capacity(capacity); + for row_idx in 0..self.height { + buf.push(self.get(row_idx, col_idx).clone()); + } + buf + }) + .collect() + } + /// Extract columns directly into pre-allocated output buffers. /// /// Each `output[col_idx]` is cleared and filled with the column data. diff --git a/crypto/stark/src/trace.rs b/crypto/stark/src/trace.rs index 3b8c5b0b2..141912711 100644 --- a/crypto/stark/src/trace.rs +++ b/crypto/stark/src/trace.rs @@ -182,20 +182,15 @@ where .unwrap() } - /// Extract main columns directly into pre-allocated output buffers. - /// - /// Eliminates the T1 transpose allocation that `columns_main()` performs. - /// When `output` buffers have sufficient capacity, no heap allocation occurs. - pub fn extract_columns_main_into(&self, output: &mut [Vec>]) { - self.main_table.extract_columns_into(output); + /// Extract main columns as owned vectors, each allocated at `capacity`. + /// Pass the LDE size so downstream FFT expansion is in-place. + pub fn extract_columns_main(&self, capacity: usize) -> Vec>> { + self.main_table.extract_columns(capacity) } - /// Extract auxiliary columns directly into pre-allocated output buffers. - /// - /// Eliminates the T1 transpose allocation that `columns_aux()` performs. - /// When `output` buffers have sufficient capacity, no heap allocation occurs. - pub fn extract_columns_aux_into(&self, output: &mut [Vec>]) { - self.aux_table.extract_columns_into(output); + /// Extract auxiliary columns as owned vectors, each allocated at `capacity`. + pub fn extract_columns_aux(&self, capacity: usize) -> Vec>> { + self.aux_table.extract_columns(capacity) } } /// Column-major LDE trace table. @@ -216,25 +211,6 @@ where pub(crate) aux_columns: Vec>>, pub(crate) lde_step_size: usize, pub(crate) blowup_factor: usize, - /// When `disk-spill` is enabled and data has been spilled to disk, - /// this holds the mmap backing. Access methods read from here instead - /// of `main_columns`/`aux_columns` (which are empty after spill). - #[cfg(feature = "disk-spill")] - pub(crate) mmap_backing: Option, -} - -/// File-backed mmap storage for LDE column data (column-major layout). -/// Main and aux columns are in separate files since they are spilled -/// at different times (Phase A and Phase C). -#[cfg(feature = "disk-spill")] -pub(crate) struct MmapBacking { - main_mmap: memmap2::Mmap, - aux_mmap: Option, - num_rows: usize, - num_main_cols: usize, - num_aux_cols: usize, - main_elem_size: usize, - aux_elem_size: usize, } impl LDETraceTable @@ -257,39 +233,24 @@ where aux_columns, lde_step_size, blowup_factor, - #[cfg(feature = "disk-spill")] - mmap_backing: None, } } /// Consume self and return the owned column vectors. - /// When mmap-backed (disk-spill), returns empty Vecs since columns were freed. #[allow(clippy::type_complexity)] pub fn into_columns(self) -> (Vec>>, Vec>>) { (self.main_columns, self.aux_columns) } pub fn num_main_cols(&self) -> usize { - #[cfg(feature = "disk-spill")] - if let Some(ref backing) = self.mmap_backing { - return backing.num_main_cols; - } self.main_columns.len() } pub fn num_aux_cols(&self) -> usize { - #[cfg(feature = "disk-spill")] - if let Some(ref backing) = self.mmap_backing { - return backing.num_aux_cols; - } self.aux_columns.len() } pub fn num_rows(&self) -> usize { - #[cfg(feature = "disk-spill")] - if let Some(ref backing) = self.mmap_backing { - return backing.num_rows; - } if self.main_columns.is_empty() { 0 } else { @@ -300,46 +261,12 @@ where /// Get a single main-trace element by (row, col). #[inline] pub fn get_main(&self, row: usize, col: usize) -> &FieldElement { - #[cfg(feature = "disk-spill")] - if let Some(ref backing) = self.mmap_backing { - // Guard the unsafe pointer math below; matches the non-spill - // path's checked indexing so release builds don't drop the check. - assert!( - row < backing.num_rows && col < backing.num_main_cols, - "get_main out of bounds: row={row}, col={col}, num_rows={}, num_main_cols={}", - backing.num_rows, - backing.num_main_cols - ); - let offset = (col * backing.num_rows + row) * backing.main_elem_size; - // SAFETY: spill_main_from_pool writes columns contiguously to this - // mmap. FieldElement is #[repr(transparent)] over F::BaseType. - return unsafe { &*(backing.main_mmap.as_ptr().add(offset) as *const FieldElement) }; - } &self.main_columns[col][row] } /// Get a single aux-trace element by (row, col). #[inline] pub fn get_aux(&self, row: usize, col: usize) -> &FieldElement { - #[cfg(feature = "disk-spill")] - if let Some(ref backing) = self.mmap_backing { - // Guard the unsafe pointer math below; matches the non-spill - // path's checked indexing so release builds don't drop the check. - assert!( - row < backing.num_rows && col < backing.num_aux_cols, - "get_aux out of bounds: row={row}, col={col}, num_rows={}, num_aux_cols={}", - backing.num_rows, - backing.num_aux_cols - ); - let aux_mmap = backing - .aux_mmap - .as_ref() - .expect("aux mmap must exist when accessing aux columns"); - let offset = (col * backing.num_rows + row) * backing.aux_elem_size; - // SAFETY: add_aux_from_pool writes columns contiguously to this - // mmap. FieldElement is #[repr(transparent)] over E::BaseType. - return unsafe { &*(aux_mmap.as_ptr().add(offset) as *const FieldElement) }; - } &self.aux_columns[col][row] } @@ -380,130 +307,6 @@ where pub fn step_to_row(&self, step: usize) -> usize { self.lde_step_size * step } - - /// Write pool column data to a temp file and return an mmap-backed - /// LDETraceTable. Pool buffers keep their capacity for reuse. - #[cfg(feature = "disk-spill")] - pub fn spill_main_from_pool( - main_pool: &[Vec>], - num_main_cols: usize, - trace_step_size: usize, - blowup_factor: usize, - ) -> std::io::Result { - let num_rows = if num_main_cols > 0 { - main_pool[0].len() - } else { - 0 - }; - - let main_elem_size = std::mem::size_of::>(); - let main_mmap = - Self::write_pool_columns_to_mmap(&main_pool[..num_main_cols], main_elem_size)?; - - let lde_step_size = trace_step_size * blowup_factor; - let aux_elem_size = std::mem::size_of::>(); - - Ok(Self { - main_columns: Vec::new(), - aux_columns: Vec::new(), - lde_step_size, - blowup_factor, - mmap_backing: Some(MmapBacking { - main_mmap, - aux_mmap: None, - num_rows, - num_main_cols, - num_aux_cols: 0, - main_elem_size, - aux_elem_size, - }), - }) - } - - /// Add aux LDE columns from the pool to an already-spilled LDETraceTable. - /// - /// Used during Phase B to attach aux data to a table whose main LDE was - /// already spilled in Phase A. - #[cfg(feature = "disk-spill")] - pub fn add_aux_from_pool( - &mut self, - aux_pool: &[Vec>], - num_aux_cols: usize, - ) -> std::io::Result<()> { - if num_aux_cols == 0 { - return Ok(()); - } - - let aux_elem_size = std::mem::size_of::>(); - let aux_mmap = Self::write_pool_columns_to_mmap(&aux_pool[..num_aux_cols], aux_elem_size)?; - - let backing = self - .mmap_backing - .as_mut() - .expect("add_aux_from_pool requires main already spilled"); - backing.aux_mmap = Some(aux_mmap); - backing.num_aux_cols = num_aux_cols; - - Ok(()) - } - - /// Write pool columns to a temp file and return the mmap. - /// The file descriptor is closed before returning; the mapping keeps - /// its own reference to the underlying object. - /// Pool buffers keep their capacity for reuse. - #[cfg(feature = "disk-spill")] - fn write_pool_columns_to_mmap( - columns: &[Vec], - elem_size: usize, - ) -> std::io::Result { - use std::io::Write; - - debug_assert_eq!( - elem_size, - std::mem::size_of::(), - "elem_size must match size_of::(); the `col.len() * elem_size` byte count below assumes it" - ); - - let num_cols = columns.len(); - let num_rows = if num_cols > 0 { columns[0].len() } else { 0 }; - debug_assert!( - columns.iter().all(|c| c.len() == num_rows), - "all columns must have the same length" - ); - let total_bytes = (num_cols as u64) - .checked_mul(num_rows as u64) - .and_then(|n| n.checked_mul(elem_size as u64)) - .ok_or_else(|| { - std::io::Error::new( - std::io::ErrorKind::InvalidInput, - "write_pool_columns_to_mmap: byte count overflows u64", - ) - })?; - - let file = tempfile::tempfile()?; - file.set_len(total_bytes)?; - { - let mut writer = std::io::BufWriter::new(&file); - for col in columns { - // SAFETY: T is a FieldElement which is #[repr(transparent)], - // so the Vec has the same byte layout as a contiguous array. - // `col.len() * elem_size` fits in usize because Vec allocations - // are bounded by isize::MAX bytes. - let bytes: &[u8] = unsafe { - std::slice::from_raw_parts(col.as_ptr() as *const u8, col.len() * elem_size) - }; - writer.write_all(bytes)?; - } - writer.flush()?; - } - // SAFETY: tempfile() creates an anonymous file with no filesystem path, - // so no other process can open or modify it. - // The mapping keeps its own reference to the underlying object - // (Unix: kernel VMA; Windows: duplicated handle in memmap2), so the - // `file` local can drop at end of scope without invalidating it. - let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? }; - Ok(mmap) - } } /// Given a slice of trace polynomials, an evaluation point `x`, the frame offsets @@ -691,103 +494,6 @@ where Table::new(table_data, table_width) } -#[cfg(all(test, feature = "disk-spill"))] -mod disk_spill_tests { - use super::*; - use math::field::extensions_goldilocks::Degree3GoldilocksExtensionField; - use math::field::goldilocks::GoldilocksField; - - type F = GoldilocksField; - type E = Degree3GoldilocksExtensionField; - - /// Spill main LDE columns from a simulated pool, then verify `get_main()` - /// returns the correct values from the mmap backing. - #[test] - fn test_lde_spill_main_roundtrip() { - let num_cols = 3; - let num_rows = 16; - - // Simulate pool: column-major Vec> - let pool: Vec>> = (0..num_cols) - .map(|c| { - (0..num_rows) - .map(|r| FieldElement::::from((c * num_rows + r) as u64)) - .collect() - }) - .collect(); - - let lde = LDETraceTable::::spill_main_from_pool( - &pool, num_cols, /*trace_step_size=*/ 1, /*blowup_factor=*/ 1, - ) - .expect("spill_main_from_pool failed"); - - assert_eq!(lde.num_main_cols(), num_cols); - assert_eq!(lde.num_rows(), num_rows); - assert!( - lde.main_columns.is_empty(), - "main_columns should be empty after spill" - ); - - // Verify every element - for (c, pool_col) in pool.iter().enumerate() { - for (r, pool_val) in pool_col.iter().enumerate() { - assert_eq!( - lde.get_main(r, c), - pool_val, - "mismatch at (row={r}, col={c})" - ); - } - } - } - - /// Spill main + aux LDE columns and verify both are accessible. - #[test] - fn test_lde_spill_main_and_aux_roundtrip() { - let num_main = 2; - let num_aux = 2; - let num_rows = 8; - - let main_pool: Vec>> = (0..num_main) - .map(|c| { - (0..num_rows) - .map(|r| FieldElement::::from((c * num_rows + r) as u64)) - .collect() - }) - .collect(); - - let aux_pool: Vec>> = (0..num_aux) - .map(|c| { - (0..num_rows) - .map(|r| FieldElement::::from((100 + c * num_rows + r) as u64)) - .collect() - }) - .collect(); - - let mut lde = LDETraceTable::::spill_main_from_pool(&main_pool, num_main, 1, 1) - .expect("spill_main_from_pool failed"); - - lde.add_aux_from_pool(&aux_pool, num_aux) - .expect("add_aux_from_pool failed"); - - assert_eq!(lde.num_main_cols(), num_main); - assert_eq!(lde.num_aux_cols(), num_aux); - - // Verify main - for (c, main_col) in main_pool.iter().enumerate() { - for (r, main_val) in main_col.iter().enumerate() { - assert_eq!(lde.get_main(r, c), main_val); - } - } - - // Verify aux - for (c, aux_col) in aux_pool.iter().enumerate() { - for (r, aux_val) in aux_col.iter().enumerate() { - assert_eq!(lde.get_aux(r, c), aux_val); - } - } - } -} - pub fn columns2rows(columns: Vec>) -> Vec> where F: Clone, From 9f9950317100038035b22c1aa31bc535a9ec4c5f Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 20 Apr 2026 18:22:32 -0300 Subject: [PATCH 089/231] Fix non-parallel disk-spill build by declaring spill_iter mut --- crypto/stark/src/prover.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index a8847f86a..73308b07a 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1566,7 +1566,7 @@ pub trait IsStarkProver< #[cfg(feature = "parallel")] let spill_iter = air_trace_pairs.par_iter_mut(); #[cfg(not(feature = "parallel"))] - let spill_iter = air_trace_pairs.iter_mut(); + let mut spill_iter = air_trace_pairs.iter_mut(); spill_iter.try_for_each(|(_, trace, _)| { trace.main_table.spill_to_disk().map_err(|e| { ProvingError::WrongParameter(format!("disk-spill early main: {e}")) @@ -1688,7 +1688,7 @@ pub trait IsStarkProver< #[cfg(feature = "parallel")] let spill_iter = air_trace_pairs.par_iter_mut(); #[cfg(not(feature = "parallel"))] - let spill_iter = air_trace_pairs.iter_mut(); + let mut spill_iter = air_trace_pairs.iter_mut(); spill_iter.try_for_each(|(air, trace, _)| { if air.has_aux_trace() { trace.spill_aux_to_disk().map_err(|e| { From 0485cb613ffc539590fae3f365949edab4cdcd41 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 20 Apr 2026 18:32:21 -0300 Subject: [PATCH 090/231] Drop trace_lde from TableSubOps --- crypto/stark/src/instruments.rs | 2 -- prover/src/instruments.rs | 8 +------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/crypto/stark/src/instruments.rs b/crypto/stark/src/instruments.rs index 3c13ef2e1..dd566b1ae 100644 --- a/crypto/stark/src/instruments.rs +++ b/crypto/stark/src/instruments.rs @@ -5,8 +5,6 @@ use std::time::Duration; /// Sub-operation timing breakdown for a single table in Rounds 2-4. #[derive(Clone, Debug, Default)] pub struct TableSubOps { - /// reconstruct_round1 (expand_pool_to_lde) - pub trace_lde: Duration, /// evaluator.evaluate() pub constraints: Duration, /// decompose_and_extend_d2 diff --git a/prover/src/instruments.rs b/prover/src/instruments.rs index e3db38b95..ac517e85b 100644 --- a/prover/src/instruments.rs +++ b/prover/src/instruments.rs @@ -96,7 +96,6 @@ pub fn print_report( entry.total_dur += *dur; entry.total_rows += rows; entry.count += 1; - entry.sub_ops.trace_lde += sub_ops.trace_lde; entry.sub_ops.constraints += sub_ops.constraints; entry.sub_ops.comp_decompose += sub_ops.comp_decompose; entry.sub_ops.comp_commit += sub_ops.comp_commit; @@ -134,7 +133,6 @@ pub fn print_report( } // Sub-operation totals across all tables - let mut total_trace_lde = Duration::ZERO; let mut total_constraints = Duration::ZERO; let mut total_comp_decompose = Duration::ZERO; let mut total_comp_commit = Duration::ZERO; @@ -144,7 +142,6 @@ pub fn print_report( let mut total_fri_commit = Duration::ZERO; let mut total_queries = Duration::ZERO; for (_, t) in &sorted { - total_trace_lde += t.sub_ops.trace_lde; total_constraints += t.sub_ops.constraints; total_comp_decompose += t.sub_ops.comp_decompose; total_comp_commit += t.sub_ops.comp_commit; @@ -155,8 +152,7 @@ pub fn print_report( total_queries += t.sub_ops.queries; } - let sub_ops_sum = total_trace_lde - + total_constraints + let sub_ops_sum = total_constraints + total_comp_decompose + total_comp_commit + total_ood @@ -166,7 +162,6 @@ pub fn print_report( + total_queries; if sub_ops_sum > Duration::ZERO { let mut sub_ops: Vec<(&str, Duration)> = vec![ - ("R1 expand_pool_to_lde", total_trace_lde), ("R2 evaluate", total_constraints), ("R2 decompose_and_extend_d2", total_comp_decompose), ("R2 commit_composition_poly", total_comp_commit), @@ -189,7 +184,6 @@ pub fn print_report( // Cross-round totals: all FFT work and all Merkle work let total_fft = mp.round1_sub.main_lde + mp.round1_sub.aux_lde - + total_trace_lde + total_comp_decompose + total_deep_extend; let total_merkle = mp.round1_sub.main_merkle From 73fd963787800b9b5907f2489bc2479dd2184b9b Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 20 Apr 2026 19:16:45 -0300 Subject: [PATCH 091/231] Add fib_iterative_24M program --- executor/programs/asm/fib_iterative_24M.s | 24 +++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 executor/programs/asm/fib_iterative_24M.s diff --git a/executor/programs/asm/fib_iterative_24M.s b/executor/programs/asm/fib_iterative_24M.s new file mode 100644 index 000000000..8e17fe693 --- /dev/null +++ b/executor/programs/asm/fib_iterative_24M.s @@ -0,0 +1,24 @@ + .attribute 5, "rv64i2p1_m2p0" + .globl main +main: + # Iterative Fibonacci - pure register arithmetic + # ~24M steps + # + # Loop body: 5 instructions per iteration + # 4800000 iterations × 5 = 24000000 + setup/teardown + + li t0, 0 # a = fib(0) = 0 + li t1, 1 # b = fib(1) = 1 + li a0, 4800000 # iteration count + +.loop: + add t2, t0, t1 # t2 = a + b + mv t0, t1 # a = b + mv t1, t2 # b = t2 + addi a0, a0, -1 # n-- + bnez a0, .loop # loop if n != 0 + + mv a0, t1 # result = b + li a0, 0 + li a7, 93 + ecall # halt with result in a0 From ba2903e96f90510a47b18f87cac9ad40648e888b Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 20 Apr 2026 19:20:06 -0300 Subject: [PATCH 092/231] Remove fib_iterative_32M/64M/128M programs --- executor/programs/asm/fib_iterative_128M.s | 24 ---------------------- executor/programs/asm/fib_iterative_32M.s | 24 ---------------------- executor/programs/asm/fib_iterative_64M.s | 24 ---------------------- 3 files changed, 72 deletions(-) delete mode 100644 executor/programs/asm/fib_iterative_128M.s delete mode 100644 executor/programs/asm/fib_iterative_32M.s delete mode 100644 executor/programs/asm/fib_iterative_64M.s diff --git a/executor/programs/asm/fib_iterative_128M.s b/executor/programs/asm/fib_iterative_128M.s deleted file mode 100644 index b7eb30470..000000000 --- a/executor/programs/asm/fib_iterative_128M.s +++ /dev/null @@ -1,24 +0,0 @@ - .attribute 5, "rv64i2p1_m2p0" - .globl main -main: - # Iterative Fibonacci - pure register arithmetic - # ~128M steps - # - # Loop body: 5 instructions per iteration - # 25600000 iterations × 5 = 128000000 + setup/teardown - - li t0, 0 # a = fib(0) = 0 - li t1, 1 # b = fib(1) = 1 - li a0, 25600000 # iteration count - -.loop: - add t2, t0, t1 # t2 = a + b - mv t0, t1 # a = b - mv t1, t2 # b = t2 - addi a0, a0, -1 # n-- - bnez a0, .loop # loop if n != 0 - - mv a0, t1 # result = b - li a0, 0 - li a7, 93 - ecall # halt with result in a0 diff --git a/executor/programs/asm/fib_iterative_32M.s b/executor/programs/asm/fib_iterative_32M.s deleted file mode 100644 index df6644193..000000000 --- a/executor/programs/asm/fib_iterative_32M.s +++ /dev/null @@ -1,24 +0,0 @@ - .attribute 5, "rv64i2p1_m2p0" - .globl main -main: - # Iterative Fibonacci - pure register arithmetic - # ~32M steps - # - # Loop body: 5 instructions per iteration - # 6400000 iterations × 5 = 32000000 + setup/teardown - - li t0, 0 # a = fib(0) = 0 - li t1, 1 # b = fib(1) = 1 - li a0, 6400000 # iteration count - -.loop: - add t2, t0, t1 # t2 = a + b - mv t0, t1 # a = b - mv t1, t2 # b = t2 - addi a0, a0, -1 # n-- - bnez a0, .loop # loop if n != 0 - - mv a0, t1 # result = b - li a0, 0 - li a7, 93 - ecall # halt with result in a0 diff --git a/executor/programs/asm/fib_iterative_64M.s b/executor/programs/asm/fib_iterative_64M.s deleted file mode 100644 index af232577b..000000000 --- a/executor/programs/asm/fib_iterative_64M.s +++ /dev/null @@ -1,24 +0,0 @@ - .attribute 5, "rv64i2p1_m2p0" - .globl main -main: - # Iterative Fibonacci - pure register arithmetic - # ~64M steps - # - # Loop body: 5 instructions per iteration - # 12800000 iterations × 5 = 64000000 + setup/teardown - - li t0, 0 # a = fib(0) = 0 - li t1, 1 # b = fib(1) = 1 - li a0, 12800000 # iteration count - -.loop: - add t2, t0, t1 # t2 = a + b - mv t0, t1 # a = b - mv t1, t2 # b = t2 - addi a0, a0, -1 # n-- - bnez a0, .loop # loop if n != 0 - - mv a0, t1 # result = b - li a0, 0 - li a7, 93 - ecall # halt with result in a0 From d768316b12b3083920e6d43fc530abd84d53765f Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 21 Apr 2026 13:12:54 -0300 Subject: [PATCH 093/231] Revert unrelated fib_iterative_2M tweak --- executor/programs/asm/fib_iterative_2M.s | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/executor/programs/asm/fib_iterative_2M.s b/executor/programs/asm/fib_iterative_2M.s index 96cdf68e2..e224db769 100644 --- a/executor/programs/asm/fib_iterative_2M.s +++ b/executor/programs/asm/fib_iterative_2M.s @@ -5,11 +5,11 @@ main: # ~2M steps # # Loop body: 5 instructions per iteration - # 400000 iterations × 5 = 2000000 + 4 setup/teardown ≈ 2000004 + # 399999 iterations × 5 = 1999995 + 4 setup/teardown = 1999999 li t0, 0 # a = fib(0) = 0 li t1, 1 # b = fib(1) = 1 - li a0, 400000 # iteration count + li a0, 399999 # iteration count .loop: add t2, t0, t1 # t2 = a + b @@ -19,6 +19,5 @@ main: bnez a0, .loop # loop if n != 0 mv a0, t1 # result = b - li a0, 0 li a7, 93 ecall # halt with result in a0 From 97526a55152965afa08ae91245d293ea388faed3 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 21 Apr 2026 13:13:02 -0300 Subject: [PATCH 094/231] Tighten FieldElement repr invariant doc --- crypto/math/src/field/element.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/crypto/math/src/field/element.rs b/crypto/math/src/field/element.rs index 7e51d7950..919ccfbad 100644 --- a/crypto/math/src/field/element.rs +++ b/crypto/math/src/field/element.rs @@ -40,9 +40,12 @@ use super::traits::{IsPrimeField, IsSubFieldOf, LegendreSymbol}; /// A field element with operations algorithms defined in `F` /// -/// `#[repr(transparent)]` is required by the disk-spill code in -/// `crypto/stark` (`table.rs`, `trace.rs`), which casts mmap bytes to -/// `FieldElement`. +/// # Layout invariant +/// +/// `#[repr(transparent)]` is load-bearing: the `disk-spill` feature in +/// `crypto/stark` casts raw mmap bytes to `*const FieldElement` +/// (see `table.rs::get`, `trace.rs`). Changing the `repr`, adding +/// fields, or introducing padding silently makes those casts UB. #[allow(clippy::derived_hash_with_manual_eq)] #[repr(transparent)] #[derive(Debug, Clone, Hash, Copy)] From 40c86c7967a1ff83c0142b60ee3faeb4005c030a Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 21 Apr 2026 13:13:10 -0300 Subject: [PATCH 095/231] Harden disk-spill Table safety and portability --- crypto/stark/src/table.rs | 80 +++++++++++++++++++++++++++++++++------ 1 file changed, 68 insertions(+), 12 deletions(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 3a2cb5d30..1290c4a2d 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -22,16 +22,6 @@ pub(crate) struct TableMmapBacking { elem_size: usize, } -// Table derives Clone, which requires all fields to implement Clone. -// TableMmapBacking implements Clone to satisfy this, but panics because -// mmap-backed data cannot be cloned. -#[cfg(feature = "disk-spill")] -impl Clone for TableMmapBacking { - fn clone(&self) -> Self { - panic!("TableMmapBacking cannot be cloned — spilled tables should not be cloned") - } -} - #[cfg(feature = "disk-spill")] impl std::fmt::Debug for TableMmapBacking { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -48,7 +38,7 @@ impl std::fmt::Debug for TableMmapBacking { /// the STARK protocol implementation, such as the `TraceTable` and the `EvaluationFrame`. /// Since this struct is a representation of a two-dimensional table, all rows should have the same /// length. -#[derive(Clone, Default, Debug, serde::Serialize, serde::Deserialize)] +#[derive(Default, Debug, serde::Serialize, serde::Deserialize)] #[serde(bound = "")] pub struct Table { pub data: Vec>, @@ -59,6 +49,37 @@ pub struct Table { pub(crate) mmap_backing: Option, } +/// Cloning a spilled table reads its mmap bytes into a fresh heap `Vec` +/// and returns an unspilled clone. This is cold — callers pay the full +/// materialization cost — but avoids the runtime panic a derived impl +/// would produce on `TableMmapBacking`. +impl Clone for Table { + fn clone(&self) -> Self { + #[cfg(feature = "disk-spill")] + if self.mmap_backing.is_some() { + let mut data = Vec::with_capacity(self.width * self.height); + for row in 0..self.height { + for col in 0..self.width { + data.push(self.get(row, col).clone()); + } + } + return Self { + data, + width: self.width, + height: self.height, + mmap_backing: None, + }; + } + Self { + data: self.data.clone(), + width: self.width, + height: self.height, + #[cfg(feature = "disk-spill")] + mmap_backing: None, + } + } +} + /// Element-wise comparison via `get()`, so spilled tables compare by field /// equality (canonicalized per `F::eq`) rather than raw mmap bytes. impl PartialEq for Table { @@ -260,6 +281,13 @@ impl Table { /// No-op if the table is empty or already spilled. #[cfg(feature = "disk-spill")] pub fn spill_to_disk(&mut self) -> std::io::Result<()> { + const { + assert!( + std::mem::size_of::>() + .is_multiple_of(std::mem::align_of::>()), + "FieldElement size must be a multiple of its alignment for mmap interior reads to be aligned" + ) + } use std::io::Write; if self.data.is_empty() || self.mmap_backing.is_some() { @@ -317,7 +345,11 @@ impl Table { /// Advise the kernel to drop mmap pages from the page cache. /// Call after reading spilled data into pool buffers so the same /// data doesn't occupy RAM in both places. - #[cfg(feature = "disk-spill")] + /// + /// Unix-only: `madvise(MADV_DONTNEED)` has no direct Windows equivalent, + /// so this is a no-op on non-Unix targets (callers rely on natural + /// page-cache reclaim there). + #[cfg(all(feature = "disk-spill", unix))] pub fn advise_drop_cache(&self) { if let Some(ref backing) = self.mmap_backing { // SAFETY: pointer and length are from a valid mmap. @@ -332,6 +364,9 @@ impl Table { } } + #[cfg(all(feature = "disk-spill", not(unix)))] + pub fn advise_drop_cache(&self) {} + /// Given a step size, converts the given table into a `Frame`. /// Clones row data into owned Vecs (only used by verifier on small OOD tables). pub fn into_frame(&self, main_trace_columns: usize, step_size: usize) -> Frame { @@ -466,4 +501,25 @@ mod disk_spill_tests { assert_eq!(table.get(0, 0), &FieldElement::::from(0u64)); assert_eq!(table.get(3, 3), &FieldElement::::from(15u64)); } + + /// Cloning a spilled table materializes bytes into a fresh heap Vec, + /// yielding an unspilled clone with the same element values. + #[test] + fn test_clone_spilled_table_materializes_to_heap() { + let width = 4; + let height = 8; + let data: Vec> = (0..width * height) + .map(|i| FieldElement::::from(i as u64)) + .collect(); + + let mut table = Table::new(data, width); + table.spill_to_disk().expect("spill_to_disk failed"); + assert!(table.is_spilled()); + + let cloned = table.clone(); + assert!(!cloned.is_spilled(), "clone should not be spilled"); + assert_eq!(cloned.width, width); + assert_eq!(cloned.height, height); + assert_eq!(cloned, table, "clone must equal source element-wise"); + } } From d61ed608e72043fa422bfb84cb4092c3b7cb7d83 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 21 Apr 2026 13:13:16 -0300 Subject: [PATCH 096/231] Add DiskSpill proving error variant --- crypto/stark/src/prover.rs | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 73308b07a..46f39048c 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -72,6 +72,10 @@ where pub enum ProvingError { WrongParameter(String), EmptyCommitment, + /// I/O failure while spilling prover state (traces, LDE, Merkle trees) to disk: + /// typically out of disk space, fd exhaustion, or mmap failure. + #[cfg(feature = "disk-spill")] + DiskSpill(String), } /// A container for the intermediate results of the commitments to a trace table, main or auxiliary in case of RAP, @@ -548,9 +552,8 @@ pub trait IsStarkProver< crate::instruments::accum_r1_main(main_lde_dur, t_sub.elapsed()); #[cfg(feature = "disk-spill")] - tree.spill_nodes_to_disk().map_err(|e| { - ProvingError::WrongParameter(format!("disk-spill main Merkle tree: {e}")) - })?; + tree.spill_nodes_to_disk() + .map_err(|e| ProvingError::DiskSpill(format!("main Merkle tree: {e}")))?; Ok((tree, root, None, None, 0, columns)) } @@ -609,12 +612,12 @@ pub trait IsStarkProver< #[cfg(feature = "disk-spill")] { - precomputed_tree.spill_nodes_to_disk().map_err(|e| { - ProvingError::WrongParameter(format!("disk-spill precomputed Merkle tree: {e}")) - })?; - mult_tree.spill_nodes_to_disk().map_err(|e| { - ProvingError::WrongParameter(format!("disk-spill main Merkle tree: {e}")) - })?; + precomputed_tree + .spill_nodes_to_disk() + .map_err(|e| ProvingError::DiskSpill(format!("precomputed Merkle tree: {e}")))?; + mult_tree + .spill_nodes_to_disk() + .map_err(|e| ProvingError::DiskSpill(format!("mult Merkle tree: {e}")))?; } Ok(( @@ -1568,9 +1571,10 @@ pub trait IsStarkProver< #[cfg(not(feature = "parallel"))] let mut spill_iter = air_trace_pairs.iter_mut(); spill_iter.try_for_each(|(_, trace, _)| { - trace.main_table.spill_to_disk().map_err(|e| { - ProvingError::WrongParameter(format!("disk-spill early main: {e}")) - }) + trace + .main_table + .spill_to_disk() + .map_err(|e| ProvingError::DiskSpill(format!("early main: {e}"))) })?; } @@ -1691,9 +1695,9 @@ pub trait IsStarkProver< let mut spill_iter = air_trace_pairs.iter_mut(); spill_iter.try_for_each(|(air, trace, _)| { if air.has_aux_trace() { - trace.spill_aux_to_disk().map_err(|e| { - ProvingError::WrongParameter(format!("disk-spill aux trace: {e}")) - })?; + trace + .spill_aux_to_disk() + .map_err(|e| ProvingError::DiskSpill(format!("aux trace: {e}")))?; } Ok(()) })?; @@ -1765,7 +1769,7 @@ pub trait IsStarkProver< #[cfg(feature = "disk-spill")] tree.spill_nodes_to_disk().map_err(|e| { - ProvingError::WrongParameter(format!("disk-spill aux Merkle tree: {e}")) + ProvingError::DiskSpill(format!("aux Merkle tree: {e}")) })?; Ok((Some(Arc::new(tree)), Some(root), columns)) From fb5c370b41920d8d87e2af5141edfd33b0f52cc1 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 21 Apr 2026 13:13:21 -0300 Subject: [PATCH 097/231] Match instruments label to function name --- crypto/stark/src/instruments.rs | 4 ++-- prover/src/instruments.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/crypto/stark/src/instruments.rs b/crypto/stark/src/instruments.rs index dd566b1ae..3c9f432d7 100644 --- a/crypto/stark/src/instruments.rs +++ b/crypto/stark/src/instruments.rs @@ -26,11 +26,11 @@ pub struct TableSubOps { /// Sub-operation breakdown for Round 1 aux commit pass. #[derive(Clone, Debug, Default)] pub struct Round1SubOps { - /// Main trace: expand_pool_to_lde (LDE/FFT) + /// Main trace: expand_columns_to_lde (LDE/FFT) pub main_lde: Duration, /// Main trace: commit_columns_bit_reversed (Merkle) pub main_merkle: Duration, - /// Aux trace: expand_pool_to_lde (LDE/FFT) + /// Aux trace: expand_columns_to_lde (LDE/FFT) pub aux_lde: Duration, /// Aux trace: commit_columns_bit_reversed (Merkle) pub aux_merkle: Duration, diff --git a/prover/src/instruments.rs b/prover/src/instruments.rs index ac517e85b..e087cf55a 100644 --- a/prover/src/instruments.rs +++ b/prover/src/instruments.rs @@ -75,11 +75,11 @@ pub fn print_report( row_top("Pre-pass (domains/twiddles)", mp.prepass, total); row_top("Round 1", round1, total); row_sub(" Main trace commits", mp.main_commits, total); - row_sub(" expand_pool_to_lde", mp.round1_sub.main_lde, total); + row_sub(" expand_columns_to_lde", mp.round1_sub.main_lde, total); row_sub(" commit (Merkle)", mp.round1_sub.main_merkle, total); row_sub(" Aux trace build (parallel)", mp.aux_build, total); row_sub(" Aux trace commit", mp.aux_commit, total); - row_sub(" expand_pool_to_lde", mp.round1_sub.aux_lde, total); + row_sub(" expand_columns_to_lde", mp.round1_sub.aux_lde, total); row_sub(" commit (Merkle)", mp.round1_sub.aux_merkle, total); row_top("Rounds 2\u{2013}4", mp.rounds_2_4, total); From f90c5cbbff45627be049aa5155864d18706a54c8 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 21 Apr 2026 14:22:14 -0300 Subject: [PATCH 098/231] Drop heading from FieldElement doc --- crypto/math/src/field/element.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/crypto/math/src/field/element.rs b/crypto/math/src/field/element.rs index 919ccfbad..60ee96318 100644 --- a/crypto/math/src/field/element.rs +++ b/crypto/math/src/field/element.rs @@ -40,8 +40,6 @@ use super::traits::{IsPrimeField, IsSubFieldOf, LegendreSymbol}; /// A field element with operations algorithms defined in `F` /// -/// # Layout invariant -/// /// `#[repr(transparent)]` is load-bearing: the `disk-spill` feature in /// `crypto/stark` casts raw mmap bytes to `*const FieldElement` /// (see `table.rs::get`, `trace.rs`). Changing the `repr`, adding From 494e4e9570779d827297ac1944af49abc4a0e698 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 21 Apr 2026 14:42:13 -0300 Subject: [PATCH 099/231] Reword FieldElement doc without metaphor --- crypto/math/src/field/element.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/math/src/field/element.rs b/crypto/math/src/field/element.rs index 60ee96318..ddae0ca9d 100644 --- a/crypto/math/src/field/element.rs +++ b/crypto/math/src/field/element.rs @@ -40,8 +40,8 @@ use super::traits::{IsPrimeField, IsSubFieldOf, LegendreSymbol}; /// A field element with operations algorithms defined in `F` /// -/// `#[repr(transparent)]` is load-bearing: the `disk-spill` feature in -/// `crypto/stark` casts raw mmap bytes to `*const FieldElement` +/// `#[repr(transparent)]` is required for soundness: the `disk-spill` +/// feature in `crypto/stark` casts raw mmap bytes to `*const FieldElement` /// (see `table.rs::get`, `trace.rs`). Changing the `repr`, adding /// fields, or introducing padding silently makes those casts UB. #[allow(clippy::derived_hash_with_manual_eq)] From dc5a63a6652c14b772e8ecaa17ddfc81e00a6de9 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 21 Apr 2026 14:45:28 -0300 Subject: [PATCH 100/231] Drop 'silently' from FieldElement doc --- crypto/math/src/field/element.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/math/src/field/element.rs b/crypto/math/src/field/element.rs index ddae0ca9d..52db430bb 100644 --- a/crypto/math/src/field/element.rs +++ b/crypto/math/src/field/element.rs @@ -43,7 +43,7 @@ use super::traits::{IsPrimeField, IsSubFieldOf, LegendreSymbol}; /// `#[repr(transparent)]` is required for soundness: the `disk-spill` /// feature in `crypto/stark` casts raw mmap bytes to `*const FieldElement` /// (see `table.rs::get`, `trace.rs`). Changing the `repr`, adding -/// fields, or introducing padding silently makes those casts UB. +/// fields, or introducing padding makes those casts UB. #[allow(clippy::derived_hash_with_manual_eq)] #[repr(transparent)] #[derive(Debug, Clone, Hash, Copy)] From d4b548b9e55a96788b33dddb186eea4de6a12807 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 21 Apr 2026 14:48:36 -0300 Subject: [PATCH 101/231] Make disk-spill opt-in for CLI --- bin/cli/Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/bin/cli/Cargo.toml b/bin/cli/Cargo.toml index ccac011f8..922106bbc 100644 --- a/bin/cli/Cargo.toml +++ b/bin/cli/Cargo.toml @@ -13,7 +13,6 @@ tikv-jemallocator = "0.6" tikv-jemalloc-ctl = { version = "0.6", features = ["stats"], optional = true } [features] -default = ["disk-spill"] jemalloc-stats = ["dep:tikv-jemalloc-ctl"] disk-spill = ["prover/disk-spill"] instruments = ["prover/instruments"] From 93bb2ad295496cee46abd5678197ee9c607b9a3d Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 21 Apr 2026 14:50:37 -0300 Subject: [PATCH 102/231] Add clippy pass for disk-spill feature --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index c02bffc49..7553bf192 100644 --- a/Makefile +++ b/Makefile @@ -181,6 +181,7 @@ check: clippy: cargo clippy --workspace --all-targets -- -D warnings -A clippy::op_ref cargo clippy --workspace --all-targets --no-default-features --features lambda-vm-prover/debug-checks -- -D warnings -A clippy::op_ref + cargo clippy --workspace --all-targets --features lambda-vm-prover/disk-spill -- -D warnings -A clippy::op_ref fmt: cargo fmt --all @@ -190,6 +191,7 @@ lint: cargo fmt --check --all cargo clippy --workspace --all-targets -- -D warnings -A clippy::op_ref cargo clippy --workspace --all-targets --no-default-features --features lambda-vm-prover/debug-checks -- -D warnings -A clippy::op_ref + cargo clippy --workspace --all-targets --features lambda-vm-prover/disk-spill -- -D warnings -A clippy::op_ref flamegraph-prover: cd crypto/stark && samply record cargo bench --bench profile_prover --features parallel From 2a58932108334db61ee7e9e97c5e1ccf1405fa84 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 21 Apr 2026 19:21:05 -0300 Subject: [PATCH 103/231] Fsync spill files before mmap --- crypto/crypto/src/merkle_tree/merkle.rs | 5 +++++ crypto/stark/src/table.rs | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index d16210339..ba9056ac5 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -301,6 +301,11 @@ where writer.write_all(bytes)?; writer.flush()?; } + // Flush dirty pages to disk so the subsequent mmap reads the data + // that was written, not the zero-filled holes left by `set_len`. + // Under memory pressure, unsynced pages can be evicted from the + // page cache before readback, producing partially-zeroed reads. + file.sync_all()?; // SAFETY: tempfile() creates an anonymous file with no filesystem path, // so no other process can open or modify it. diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 1290c4a2d..976f58564 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -321,6 +321,11 @@ impl Table { writer.write_all(bytes)?; writer.flush()?; } + // Flush dirty pages to disk so the subsequent mmap reads the data + // that was written, not the zero-filled holes left by `set_len`. + // Under memory pressure, unsynced pages can be evicted from the + // page cache before readback, producing partially-zeroed reads. + file.sync_all()?; // SAFETY: tempfile() creates an anonymous file with no filesystem path, // so no other process can open or modify it. From 99b62d184069583a24249f58ea5660ba38553b93 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 22 Apr 2026 15:19:30 -0300 Subject: [PATCH 104/231] Spill via MmapMut instead of write --- crypto/crypto/src/merkle_tree/merkle.rs | 39 +++++++++------------ crypto/stark/src/table.rs | 45 ++++++++++--------------- 2 files changed, 33 insertions(+), 51 deletions(-) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index ba9056ac5..27897ecad 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -279,7 +279,6 @@ where "B::Node must have alignment 1 for mmap safety" ) } - use std::io::Write; if self.nodes.is_empty() { return Ok(()); @@ -291,28 +290,22 @@ where let file = tempfile::tempfile()?; file.set_len(total_bytes as u64)?; - { - let mut writer = std::io::BufWriter::new(&file); - // SAFETY: B::Node is a plain byte array ([u8; N]), so casting - // the contiguous Vec to a byte slice is valid. - let bytes = unsafe { - core::slice::from_raw_parts(self.nodes.as_ptr() as *const u8, total_bytes) - }; - writer.write_all(bytes)?; - writer.flush()?; - } - // Flush dirty pages to disk so the subsequent mmap reads the data - // that was written, not the zero-filled holes left by `set_len`. - // Under memory pressure, unsynced pages can be evicted from the - // page cache before readback, producing partially-zeroed reads. - file.sync_all()?; - - // SAFETY: tempfile() creates an anonymous file with no filesystem path, - // so no other process can open or modify it. - // The mapping keeps its own reference to the underlying object - // (Unix: kernel VMA; Windows: duplicated handle in memmap2), so the - // `file` local can drop at end of scope without invalidating it. - let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? }; + + // Write directly through a writable mmap, then downgrade to read-only. + // Avoids the write(2) → page-cache → mmap hand-off, which on Linux + // under memory pressure could produce partially-zeroed reads from the + // read-only mmap. + // + // SAFETY: tempfile() creates an anonymous file with no filesystem + // path, so no other process can open or modify it. + let mut mmap_mut = unsafe { memmap2::MmapOptions::new().map_mut(&file)? }; + // SAFETY: B::Node is a plain byte array ([u8; N]), so casting + // the contiguous Vec to a byte slice is valid. + let bytes = + unsafe { core::slice::from_raw_parts(self.nodes.as_ptr() as *const u8, total_bytes) }; + mmap_mut.copy_from_slice(bytes); + mmap_mut.flush()?; + let mmap = mmap_mut.make_read_only()?; // Free the heap allocation self.nodes = Vec::new(); diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 976f58564..a875491aa 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -288,7 +288,6 @@ impl Table { "FieldElement size must be a multiple of its alignment for mmap interior reads to be aligned" ) } - use std::io::Write; if self.data.is_empty() || self.mmap_backing.is_some() { return Ok(()); @@ -306,33 +305,23 @@ impl Table { let file = tempfile::tempfile()?; file.set_len(total_bytes)?; - { - let mut writer = std::io::BufWriter::new(&file); - // SAFETY: FieldElement is #[repr(transparent)] over F::BaseType. - // The Vec has the same byte layout as a contiguous array. - // `self.data.len() * elem_size` fits in usize because Vec allocations - // are bounded by isize::MAX bytes. - let bytes: &[u8] = unsafe { - std::slice::from_raw_parts( - self.data.as_ptr() as *const u8, - self.data.len() * elem_size, - ) - }; - writer.write_all(bytes)?; - writer.flush()?; - } - // Flush dirty pages to disk so the subsequent mmap reads the data - // that was written, not the zero-filled holes left by `set_len`. - // Under memory pressure, unsynced pages can be evicted from the - // page cache before readback, producing partially-zeroed reads. - file.sync_all()?; - - // SAFETY: tempfile() creates an anonymous file with no filesystem path, - // so no other process can open or modify it. - // The mapping keeps its own reference to the underlying object - // (Unix: kernel VMA; Windows: duplicated handle in memmap2), so the - // `file` local can drop at end of scope without invalidating it. - let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? }; + + // Write directly through a writable mmap, then downgrade to read-only. + // Avoids the write(2) → page-cache → mmap hand-off, which on Linux + // under memory pressure could produce partially-zeroed reads from the + // read-only mmap (the previous implementation relied on that handoff). + // + // SAFETY: tempfile() creates an anonymous file with no filesystem + // path, so no other process can open or modify it. + let mut mmap_mut = unsafe { memmap2::MmapOptions::new().map_mut(&file)? }; + // SAFETY: FieldElement is #[repr(transparent)] over F::BaseType. + // The Vec has the same byte layout as a contiguous array. + let bytes: &[u8] = unsafe { + std::slice::from_raw_parts(self.data.as_ptr() as *const u8, self.data.len() * elem_size) + }; + mmap_mut.copy_from_slice(bytes); + mmap_mut.flush()?; + let mmap = mmap_mut.make_read_only()?; self.mmap_backing = Some(TableMmapBacking { mmap, From e462b29a2670bdfbe1ff40b40b0305987802711a Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 22 Apr 2026 16:34:33 -0300 Subject: [PATCH 105/231] Make disk-spill default for CLI --- bin/cli/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/cli/Cargo.toml b/bin/cli/Cargo.toml index 0d8e5d1d4..59df83441 100644 --- a/bin/cli/Cargo.toml +++ b/bin/cli/Cargo.toml @@ -13,6 +13,7 @@ tikv-jemallocator = "0.6" tikv-jemalloc-ctl = { version = "0.6", features = ["stats"], optional = true } [features] +default = ["disk-spill"] jemalloc-stats = ["dep:tikv-jemalloc-ctl"] disk-spill = ["prover/disk-spill"] instruments = ["prover/instruments", "stark/instruments"] From bbf812c27c949b716c75b1a2fb0c1ca700571eb5 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Fri, 24 Apr 2026 17:18:15 -0300 Subject: [PATCH 106/231] Spill memw_aligned and memw_register tables too --- prover/src/tables/trace_builder.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index c1fa828c7..3e427b0b0 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -2213,6 +2213,12 @@ impl Traces { for t in &mut self.memws { spill(t)?; } + for t in &mut self.memw_aligneds { + spill(t)?; + } + for t in &mut self.memw_registers { + spill(t)?; + } for t in &mut self.loads { spill(t)?; } From de76ce7645e891f20c0d737a300b89380f5cdbd4 Mon Sep 17 00:00:00 2001 From: Gabriel Bosio <38794644+gabrielbosio@users.noreply.github.com> Date: Thu, 30 Apr 2026 16:37:32 -0300 Subject: [PATCH 107/231] Disk spilling based on heap usage (#557) * Compile disk-spill types unconditionally * Dispatch disk-spill via runtime StorageMode * Remove disk-spill feature flag * Auto-pick StorageMode from available RAM * Add ProofOptions.max_ram_bytes cap * Add tests for auto-storage selection * Drop disk-spill feature from CI and Makefile * Address bot review comments * Honor max_ram_bytes during trace build * Saturate peak estimate and tighten cap application * Document estimator and trace-build-mode limits * Estimate main_elements before trace-table build * Warn when OS memory readout unavailable * Include PAGE tables in main_elements estimate * Replace silent wasm break with compile_error * Guard wasm32 via target_arch not feature * Log effective budget on auto disk-spill * Guard crypto/crypto std on wasm32 * Simplify auto-disk-spill helpers * Add streaming counter pass alongside prep * Switch prove flow to streaming counter pass and analytical peak * Remove PreparedTraceInputs and prepare_from_elf_and_logs * Build peak_bytes from per-table allocations * Print predicted peak bytes for threshold calibration * Make table_parallelism pub * Rework peak_bytes for chunk parallelism * Add peak_bytes calibration test * Add auto-spill threshold calibration script * Bump auto-spill threshold to 90% * Derive auto-spill log percent from constants * Make disk-spill opt-in via feature flag * Log predicted peak via log::info * Warn on Ram fallback in select_storage_mode * Assert aux upper-bound in count tests * Default to Disk on unknown memory state * Trim adverbs and em-dashes from docs * Use saturating arithmetic in peak_bytes * Gate mmap deps behind disk-spill feature * Pass disk-spill feature to stark CI test * Restore disk-spill position in CLI features * Compile spill methods only with disk-spill * Restore disk-spill position in crypto features * Gate DiskSpill variant on disk-spill feature * Move unused-warning suppressions to bindings * Remove redundant multi_prove doc line * Hide disk-spill plumbing when feature is off * Gate StorageMode behind disk-spill feature * Drop redundant cfg on Disk variant * Drop unused TableLengths element accessors * Gate count_table_lengths on disk-spill * Inline derive_ops back into build_traces * Drop redundant peak_bytes formulas from doc * Trim auto_storage module doc to one line * Drop effective_budget log; tighten visibility * Trim verbose doc and inline comments * Drop disk-spill feature comment in prover * Return Err on commit overflow in count_table_lengths * Note container limits in available_ram_bytes * Add max_ram_bytes to ProofOptions in test --- Cargo.lock | 150 +++++- bin/cli/Cargo.toml | 2 +- bin/cli/src/main.rs | 5 +- crypto/crypto/src/lib.rs | 11 + crypto/math/src/field/element.rs | 4 +- crypto/stark/Cargo.toml | 4 +- crypto/stark/benches/profile_prover.rs | 1 + crypto/stark/benches/prover_benchmark.rs | 1 + crypto/stark/src/lib.rs | 8 + crypto/stark/src/proof/options.rs | 8 + crypto/stark/src/prover.rs | 95 +++- crypto/stark/src/storage_mode.rs | 8 + crypto/stark/src/table.rs | 18 +- crypto/stark/src/tests/prover_tests.rs | 7 + crypto/stark/src/trace.rs | 3 - prover/Cargo.toml | 4 +- prover/src/auto_storage.rs | 443 ++++++++++++++++++ prover/src/lib.rs | 64 ++- prover/src/tables/trace_builder.rs | 396 ++++++++++++++-- prover/src/tests/disk_spill_tests.rs | 111 ++--- prover/src/tests/mod.rs | 2 + .../src/tests/peak_bytes_calibration_tests.rs | 89 ++++ scripts/calibrate_threshold.sh | 54 +++ 23 files changed, 1302 insertions(+), 186 deletions(-) create mode 100644 crypto/stark/src/storage_mode.rs create mode 100644 prover/src/auto_storage.rs create mode 100644 prover/src/tests/peak_bytes_calibration_tests.rs create mode 100755 scripts/calibrate_threshold.sh diff --git a/Cargo.lock b/Cargo.lock index 67319b1a7..b9e0350d1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -567,6 +567,7 @@ version = "0.1.0" dependencies = [ "bincode", "clap 4.5.53", + "env_logger", "executor", "lambda-vm-prover", "stark", @@ -1616,7 +1617,7 @@ dependencies = [ "js-sys", "log", "wasm-bindgen", - "windows-core", + "windows-core 0.62.2", ] [[package]] @@ -1945,10 +1946,12 @@ dependencies = [ "crypto", "env_logger", "executor", + "log", "math", "rayon", "serde", "stark", + "sysinfo", ] [[package]] @@ -2143,6 +2146,15 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "ntapi" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae" +dependencies = [ + "winapi", +] + [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -3270,6 +3282,19 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "sysinfo" +version = "0.31.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "355dbe4f8799b304b05e1b0f05fc59b2a18d36645cf169607da45bde2f69a1be" +dependencies = [ + "core-foundation-sys", + "libc", + "memchr", + "ntapi", + "windows", +] + [[package]] name = "tap" version = "1.0.1" @@ -3806,19 +3831,52 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.57.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12342cb4d8e3b046f3d80effd474a7a02447231330ef77d71daa6fbc40681143" +dependencies = [ + "windows-core 0.57.0", + "windows-targets", +] + +[[package]] +name = "windows-core" +version = "0.57.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2ed2439a290666cd67ecce2b0ffaad89c2a56b976b736e6ece670297897832d" +dependencies = [ + "windows-implement 0.57.0", + "windows-interface 0.57.0", + "windows-result 0.1.2", + "windows-targets", +] + [[package]] name = "windows-core" version = "0.62.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" dependencies = [ - "windows-implement", - "windows-interface", + "windows-implement 0.60.2", + "windows-interface 0.59.3", "windows-link", - "windows-result", + "windows-result 0.4.1", "windows-strings", ] +[[package]] +name = "windows-implement" +version = "0.57.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "windows-implement" version = "0.60.2" @@ -3830,6 +3888,17 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "windows-interface" +version = "0.57.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "windows-interface" version = "0.59.3" @@ -3847,6 +3916,15 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-result" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e383302e8ec8515204254685643de10811af0ed97ea37210dc26fb0032647f8" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-result" version = "0.4.1" @@ -3874,6 +3952,70 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + [[package]] name = "winnow" version = "0.7.14" diff --git a/bin/cli/Cargo.toml b/bin/cli/Cargo.toml index 59df83441..e6f16582f 100644 --- a/bin/cli/Cargo.toml +++ b/bin/cli/Cargo.toml @@ -11,9 +11,9 @@ clap = { version = "4.3.10", features = ["derive"] } bincode = "1" tikv-jemallocator = "0.6" tikv-jemalloc-ctl = { version = "0.6", features = ["stats"], optional = true } +env_logger = "0.11" [features] -default = ["disk-spill"] jemalloc-stats = ["dep:tikv-jemalloc-ctl"] disk-spill = ["prover/disk-spill"] instruments = ["prover/instruments", "stark/instruments"] diff --git a/bin/cli/src/main.rs b/bin/cli/src/main.rs index ff7dca780..07dbff57c 100644 --- a/bin/cli/src/main.rs +++ b/bin/cli/src/main.rs @@ -174,6 +174,7 @@ enum Commands { } fn main() -> ExitCode { + env_logger::init(); let cli = Cli::parse(); match cli.command { @@ -378,10 +379,6 @@ fn cmd_prove( #[cfg(feature = "jemalloc-stats")] let tracker = heap_tracker::HeapTracker::start(); - if cfg!(feature = "disk-spill") { - eprintln!("Disk-spill: enabled"); - } - #[cfg(all(feature = "jemalloc-stats", feature = "instruments"))] stark::instruments::set_heap_reader(|| { tikv_jemalloc_ctl::epoch::advance().ok(); diff --git a/crypto/crypto/src/lib.rs b/crypto/crypto/src/lib.rs index 20462a407..1287a3d66 100644 --- a/crypto/crypto/src/lib.rs +++ b/crypto/crypto/src/lib.rs @@ -1,5 +1,16 @@ #![allow(clippy::op_ref)] #![cfg_attr(not(feature = "std"), no_std)] + +// `std` pulls in `memmap2` (used by `crypto/stark`'s disk-backed Merkle node +// storage), which doesn't compile on wasm32. Fail loudly here so downstream +// crates that depend on `crypto/crypto` directly with `std` get a clear +// message instead of a transitive memmap2 build error. +#[cfg(all(target_arch = "wasm32", feature = "std"))] +compile_error!( + "wasm32 targets are not supported with feature \"std\": StorageMode::Disk \ + requires memmap2, which does not compile on wasm32" +); + #[macro_use] extern crate alloc; diff --git a/crypto/math/src/field/element.rs b/crypto/math/src/field/element.rs index bedf30baa..d25470323 100644 --- a/crypto/math/src/field/element.rs +++ b/crypto/math/src/field/element.rs @@ -40,8 +40,8 @@ use super::traits::{IsPrimeField, IsSubFieldOf, LegendreSymbol}; /// A field element with operations algorithms defined in `F` /// -/// `#[repr(transparent)]` is required for soundness: the `disk-spill` -/// feature in `crypto/stark` casts raw mmap bytes to `*const FieldElement` +/// `#[repr(transparent)]` is required for soundness: `StorageMode::Disk` +/// in `crypto/stark` casts raw mmap bytes to `*const FieldElement` /// (see `table.rs::get`, `trace.rs`). Changing the `repr`, adding /// fields, or introducing padding makes those casts UB. #[allow(clippy::derived_hash_with_manual_eq)] diff --git a/crypto/stark/Cargo.toml b/crypto/stark/Cargo.toml index 34edb0f72..d847cac32 100644 --- a/crypto/stark/Cargo.toml +++ b/crypto/stark/Cargo.toml @@ -22,7 +22,9 @@ itertools = "0.11.0" # Parallelization crates rayon = { version = "1.8.0", optional = true } -# Disk-spill: mmap LDE data to reduce heap memory during proving +# Memory-mapped backing for trace tables and Merkle tree nodes, used when the +# runtime picks `StorageMode::Disk` to keep peak RAM bounded. Activated by +# the `disk-spill` feature. memmap2 = { version = "0.9", optional = true } tempfile = { version = "3", optional = true } libc = { version = "0.2", optional = true } diff --git a/crypto/stark/benches/profile_prover.rs b/crypto/stark/benches/profile_prover.rs index dbff24440..52be8c49a 100644 --- a/crypto/stark/benches/profile_prover.rs +++ b/crypto/stark/benches/profile_prover.rs @@ -21,6 +21,7 @@ fn main() { fri_number_of_queries: 100, coset_offset: 3, grinding_factor: 0, + max_ram_bytes: None, }; let num_columns = 16; diff --git a/crypto/stark/benches/prover_benchmark.rs b/crypto/stark/benches/prover_benchmark.rs index 2729fff29..4dfd02634 100644 --- a/crypto/stark/benches/prover_benchmark.rs +++ b/crypto/stark/benches/prover_benchmark.rs @@ -61,6 +61,7 @@ fn benchmark_proof_options() -> ProofOptions { fri_number_of_queries: 30, coset_offset: 3, grinding_factor: 0, + max_ram_bytes: None, } } diff --git a/crypto/stark/src/lib.rs b/crypto/stark/src/lib.rs index 09ca16ed4..d6bca2e5b 100644 --- a/crypto/stark/src/lib.rs +++ b/crypto/stark/src/lib.rs @@ -1,3 +1,9 @@ +// `StorageMode::Disk` is implemented via `memmap2`, which doesn't compile on +// wasm32. Fail loudly at the top of the crate rather than via a confusing +// transitive memmap2 error deeper in the dep graph. +#[cfg(all(target_arch = "wasm32", feature = "disk-spill"))] +compile_error!("the `disk-spill` feature requires memmap2, which does not compile on wasm32"); + #[cfg(feature = "debug-checks")] pub mod bus_debug; pub mod constraints; @@ -14,6 +20,8 @@ pub mod instruments; pub mod lookup; pub mod proof; pub mod prover; +#[cfg(feature = "disk-spill")] +pub mod storage_mode; pub mod table; pub mod trace; pub mod traits; diff --git a/crypto/stark/src/proof/options.rs b/crypto/stark/src/proof/options.rs index 70976b993..ad2dfbf30 100644 --- a/crypto/stark/src/proof/options.rs +++ b/crypto/stark/src/proof/options.rs @@ -38,6 +38,10 @@ impl fmt::Display for ProofOptionsError { /// - `fri_number_of_queries`: the number of queries for the FRI layer /// - `coset_offset`: the offset for the coset /// - `grinding_factor`: the number of leading zeros that we want for the Hash(hash || nonce) +/// - `max_ram_bytes`: optional ceiling on prover RAM usage. When set, the +/// prover spills trace tables and Merkle-tree nodes to mmap if the +/// estimated peak exceeds this cap (or 80% of system-available RAM, +/// whichever is smaller). LDE column vectors remain in RAM regardless. #[cfg_attr(feature = "wasm", wasm_bindgen)] #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] pub struct ProofOptions { @@ -45,6 +49,8 @@ pub struct ProofOptions { pub fri_number_of_queries: usize, pub coset_offset: u64, pub grinding_factor: u8, + #[serde(default)] + pub max_ram_bytes: Option, } impl ProofOptions { @@ -56,6 +62,7 @@ impl ProofOptions { fri_number_of_queries: 3, coset_offset: 3, grinding_factor: 1, + max_ram_bytes: None, } } } @@ -112,6 +119,7 @@ impl GoldilocksCubicProofOptions { fri_number_of_queries, coset_offset: 3, grinding_factor, + max_ram_bytes: None, }) } } diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 561166f53..92ae5bfd7 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -28,6 +28,8 @@ use crate::domain::new_domain; use crate::fri; use crate::lookup::LOGUP_NUM_CHALLENGES; use crate::proof::stark::{DeepPolynomialOpenings, PolynomialOpenings}; +#[cfg(feature = "disk-spill")] +use crate::storage_mode::StorageMode; use crate::table::Table; use crate::trace::LDETraceTable; @@ -300,7 +302,7 @@ impl LdeTwiddles { /// Number of tables to process concurrently in `multi_prove`. /// Default: num_cores / 3 (benchmarked optimal on both M3 Pro and EPYC 9454P). /// Override with `TABLE_PARALLELISM` env var. -fn table_parallelism() -> usize { +pub fn table_parallelism() -> usize { #[cfg(feature = "parallel")] { std::env::var("TABLE_PARALLELISM") @@ -544,6 +546,7 @@ pub trait IsStarkProver< trace: &TraceTable, domain: &Domain, twiddles: &LdeTwiddles, + #[cfg(feature = "disk-spill")] storage_mode: StorageMode, ) -> Result< ( BatchedMerkleTree, @@ -561,10 +564,11 @@ pub trait IsStarkProver< { let lde_size = domain.interpolation_domain_size * domain.blowup_factor; let mut columns = trace.extract_columns_main(lde_size); - // Data is now in `columns`. Evict the mmap pages from the OS page - // cache so the same data doesn't occupy RAM in both places. #[cfg(feature = "disk-spill")] - trace.main_table.advise_drop_cache(); + if storage_mode == StorageMode::Disk { + // Evict mmap pages so spilled data doesn't occupy heap + cache. + trace.main_table.advise_drop_cache(); + } #[cfg(feature = "instruments")] let t_sub = Instant::now(); Self::expand_columns_to_lde::(&mut columns, domain, twiddles); @@ -580,8 +584,10 @@ pub trait IsStarkProver< crate::instruments::accum_r1_main(main_lde_dur, t_sub.elapsed()); #[cfg(feature = "disk-spill")] - tree.spill_nodes_to_disk() - .map_err(|e| ProvingError::DiskSpill(format!("main Merkle tree: {e}")))?; + if storage_mode == StorageMode::Disk { + tree.spill_nodes_to_disk() + .map_err(|e| ProvingError::DiskSpill(format!("main Merkle tree: {e}")))?; + } Ok((tree, root, None, None, 0, columns)) } @@ -594,6 +600,7 @@ pub trait IsStarkProver< precomputed_commitment: Commitment, num_precomputed_cols: usize, twiddles: &LdeTwiddles, + #[cfg(feature = "disk-spill")] storage_mode: StorageMode, ) -> Result< ( BatchedMerkleTree, @@ -612,7 +619,9 @@ pub trait IsStarkProver< let lde_size = domain.interpolation_domain_size * domain.blowup_factor; let mut columns = trace.extract_columns_main(lde_size); #[cfg(feature = "disk-spill")] - trace.main_table.advise_drop_cache(); + if storage_mode == StorageMode::Disk { + trace.main_table.advise_drop_cache(); + } #[cfg(feature = "instruments")] let t_sub = Instant::now(); Self::expand_columns_to_lde::(&mut columns, domain, twiddles); @@ -639,7 +648,7 @@ pub trait IsStarkProver< ); #[cfg(feature = "disk-spill")] - { + if storage_mode == StorageMode::Disk { precomputed_tree .spill_nodes_to_disk() .map_err(|e| ProvingError::DiskSpill(format!("precomputed Merkle tree: {e}")))?; @@ -1585,8 +1594,42 @@ pub trait IsStarkProver< /// /// The transcript must be safely initialized before passing it to this method. fn multi_prove( + air_trace_pairs: Vec>, + transcript: &mut (impl IsStarkTranscript + Clone + Send), + ) -> Result, ProvingError> + where + FieldElement: AsBytes, + FieldElement: AsBytes, + PI: Send + Sync + Clone, + { + Self::multi_prove_inner( + air_trace_pairs, + transcript, + #[cfg(feature = "disk-spill")] + StorageMode::Ram, + ) + } + + /// Same as `multi_prove` but lets callers back intermediate state with mmap + /// files to cap peak RAM usage. + #[cfg(feature = "disk-spill")] + fn multi_prove_with_mode( + air_trace_pairs: Vec>, + transcript: &mut (impl IsStarkTranscript + Clone + Send), + storage_mode: StorageMode, + ) -> Result, ProvingError> + where + FieldElement: AsBytes, + FieldElement: AsBytes, + PI: Send + Sync + Clone, + { + Self::multi_prove_inner(air_trace_pairs, transcript, storage_mode) + } + + fn multi_prove_inner( mut air_trace_pairs: Vec>, transcript: &mut (impl IsStarkTranscript + Clone + Send), + #[cfg(feature = "disk-spill")] storage_mode: StorageMode, ) -> Result, ProvingError> where FieldElement: AsBytes, @@ -1614,9 +1657,7 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let phase_start = Instant::now(); - // Deduplicate Domain + LdeTwiddles by (trace_length, blowup_factor, coset_offset). - // Many tables share the same domain size (e.g., 7+ tables at 2^20). - // Without dedup, each creates its own Domain (~24 MB) and LdeTwiddles (~32 MB). + // Deduplicate Domain/LdeTwiddles by (trace_length, blowup, coset). type DomainEntry = (Arc>, Arc>); let mut domain_cache: std::collections::HashMap<(usize, usize, u64), DomainEntry> = std::collections::HashMap::new(); @@ -1648,17 +1689,13 @@ pub trait IsStarkProver< domains.push(domain); twiddle_caches.push(twiddles); } - // Free the HashMap (which holds extra strong Arc references) before the - // long proving rounds begin. `domains` and `twiddle_caches` already hold - // the only surviving Arcs we care about. drop(domain_cache); let k = table_parallelism().min(num_airs).max(1); - // Spill all main trace tables to mmap before any Round 1 LDE work. - // Freeing heap makes room for LDE columns built below. + // Spill main traces to mmap before Round 1 LDE. #[cfg(feature = "disk-spill")] - { + if storage_mode == StorageMode::Disk { #[cfg(feature = "parallel")] let spill_iter = air_trace_pairs.par_iter_mut(); #[cfg(not(feature = "parallel"))] @@ -1712,9 +1749,17 @@ pub trait IsStarkProver< air.precomputed_commitment(), air.num_precomputed_columns(), twiddles, + #[cfg(feature = "disk-spill")] + storage_mode, ) } else { - Self::commit_main_trace(*trace, domain, twiddles) + Self::commit_main_trace( + *trace, + domain, + twiddles, + #[cfg(feature = "disk-spill")] + storage_mode, + ) } }) .collect(); @@ -1789,7 +1834,7 @@ pub trait IsStarkProver< // Spill all aux trace tables to mmap before any Round 1 aux LDE work. #[cfg(feature = "disk-spill")] - { + if storage_mode == StorageMode::Disk { #[cfg(feature = "parallel")] let spill_iter = air_trace_pairs.par_iter_mut(); #[cfg(not(feature = "parallel"))] @@ -1854,7 +1899,9 @@ pub trait IsStarkProver< let lde_size = domain.interpolation_domain_size * domain.blowup_factor; let mut columns = trace.extract_columns_aux(lde_size); #[cfg(feature = "disk-spill")] - trace.aux_table.advise_drop_cache(); + if storage_mode == StorageMode::Disk { + trace.aux_table.advise_drop_cache(); + } #[cfg(feature = "instruments")] let t_sub = Instant::now(); Self::expand_columns_to_lde::( @@ -1873,9 +1920,11 @@ pub trait IsStarkProver< crate::instruments::accum_r1_aux(aux_lde_dur, t_sub.elapsed()); #[cfg(feature = "disk-spill")] - tree.spill_nodes_to_disk().map_err(|e| { - ProvingError::DiskSpill(format!("aux Merkle tree: {e}")) - })?; + if storage_mode == StorageMode::Disk { + tree.spill_nodes_to_disk().map_err(|e| { + ProvingError::DiskSpill(format!("aux Merkle tree: {e}")) + })?; + } Ok((Some(Arc::new(tree)), Some(root), columns)) } else { diff --git a/crypto/stark/src/storage_mode.rs b/crypto/stark/src/storage_mode.rs new file mode 100644 index 000000000..0ba4b32b5 --- /dev/null +++ b/crypto/stark/src/storage_mode.rs @@ -0,0 +1,8 @@ +/// Storage backend for intermediate prover state: `Ram` (heap) or `Disk` (mmap). +/// Disk trades wall time for peak RAM. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum StorageMode { + #[default] + Ram, + Disk, +} diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index a875491aa..e1f6628e9 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -39,6 +39,7 @@ impl std::fmt::Debug for TableMmapBacking { /// Since this struct is a representation of a two-dimensional table, all rows should have the same /// length. #[derive(Default, Debug, serde::Serialize, serde::Deserialize)] +#[cfg_attr(not(feature = "disk-spill"), derive(Clone))] #[serde(bound = "")] pub struct Table { pub data: Vec>, @@ -53,9 +54,9 @@ pub struct Table { /// and returns an unspilled clone. This is cold — callers pay the full /// materialization cost — but avoids the runtime panic a derived impl /// would produce on `TableMmapBacking`. +#[cfg(feature = "disk-spill")] impl Clone for Table { fn clone(&self) -> Self { - #[cfg(feature = "disk-spill")] if self.mmap_backing.is_some() { let mut data = Vec::with_capacity(self.width * self.height); for row in 0..self.height { @@ -74,7 +75,6 @@ impl Clone for Table { data: self.data.clone(), width: self.width, height: self.height, - #[cfg(feature = "disk-spill")] mmap_backing: None, } } @@ -263,15 +263,9 @@ impl Table { } /// Returns true if this table's data has been spilled to disk via mmap. + #[cfg(feature = "disk-spill")] pub fn is_spilled(&self) -> bool { - #[cfg(feature = "disk-spill")] - { - self.mmap_backing.is_some() - } - #[cfg(not(feature = "disk-spill"))] - { - false - } + self.mmap_backing.is_some() } /// Spill the table's row-major data to a temp file and mmap it back. @@ -340,9 +334,7 @@ impl Table { /// Call after reading spilled data into pool buffers so the same /// data doesn't occupy RAM in both places. /// - /// Unix-only: `madvise(MADV_DONTNEED)` has no direct Windows equivalent, - /// so this is a no-op on non-Unix targets (callers rely on natural - /// page-cache reclaim there). + /// Unix-only: no-op on non-Unix targets. #[cfg(all(feature = "disk-spill", unix))] pub fn advise_drop_cache(&self) { if let Some(ref backing) = self.mmap_backing { diff --git a/crypto/stark/src/tests/prover_tests.rs b/crypto/stark/src/tests/prover_tests.rs index c4d0a22fd..fd8c400bb 100644 --- a/crypto/stark/src/tests/prover_tests.rs +++ b/crypto/stark/src/tests/prover_tests.rs @@ -32,6 +32,7 @@ fn test_domain_constructor() { fri_number_of_queries: 1, coset_offset, grinding_factor, + max_ram_bytes: None, }; let domain = Domain::new( @@ -123,6 +124,7 @@ fn barycentric_trace_eval_matches_horner_trace_eval() { fri_number_of_queries: 1, coset_offset, grinding_factor: 0, + max_ram_bytes: None, }; let air = simple_fibonacci::FibonacciAIR::::new(&proof_options); @@ -211,6 +213,7 @@ fn test_decompose_and_extend_d2_matches_original() { fri_number_of_queries: 1, coset_offset: 3, grinding_factor: 0, + max_ram_bytes: None, }; // We need an AIR with composition_poly_degree_bound = 2 * trace_length. @@ -271,12 +274,14 @@ fn test_multi_prove_mixed_coset_offsets() { fri_number_of_queries: 3, coset_offset: 3, grinding_factor: 1, + max_ram_bytes: None, }; let proof_options_7 = ProofOptions { blowup_factor: 2, fri_number_of_queries: 3, coset_offset: 7, grinding_factor: 1, + max_ram_bytes: None, }; // Both AIRs have the same trace length and blowup, but different coset offsets. @@ -341,6 +346,7 @@ fn test_multi_prove_dedups_shared_domain_params() { fri_number_of_queries: 3, coset_offset: 3, grinding_factor: 1, + max_ram_bytes: None, }; let mut trace_1 = simple_fibonacci::fibonacci_trace([Felt::from(1), Felt::from(1)], 8); @@ -431,6 +437,7 @@ fn test_deep_poly_direct_2n_matches_interpolate_fft_extend() { fri_number_of_queries: 1, coset_offset: 3, grinding_factor: 0, + max_ram_bytes: None, }; let air = QuadraticAIR::::new(&proof_options); diff --git a/crypto/stark/src/trace.rs b/crypto/stark/src/trace.rs index 80bb98e8a..65723e195 100644 --- a/crypto/stark/src/trace.rs +++ b/crypto/stark/src/trace.rs @@ -1,6 +1,3 @@ -#[cfg(all(feature = "disk-spill", feature = "wasm"))] -compile_error!("disk-spill and wasm features are mutually exclusive"); - use crate::domain::Domain; use crate::table::Table; use itertools::Itertools; diff --git a/prover/Cargo.toml b/prover/Cargo.toml index ed8e7e77f..76a247039 100644 --- a/prover/Cargo.toml +++ b/prover/Cargo.toml @@ -7,8 +7,8 @@ edition = "2024" default = ["parallel"] parallel = ["stark/parallel", "math/parallel", "crypto/parallel", "dep:rayon"] debug-checks = ["stark/debug-checks"] -disk-spill = ["stark/disk-spill"] instruments = ["stark/instruments"] +disk-spill = ["stark/disk-spill"] [dependencies] stark = { path = "../crypto/stark" } @@ -17,6 +17,8 @@ math = { path = "../crypto/math" } executor = { path = "../executor" } serde = { version = "1.0", features = ["derive"] } rayon = { version = "1.8.0", optional = true } +sysinfo = { version = "0.31", default-features = false, features = ["system"] } +log = "0.4" [dev-dependencies] env_logger = "*" diff --git a/prover/src/auto_storage.rs b/prover/src/auto_storage.rs new file mode 100644 index 000000000..02f7e40cb --- /dev/null +++ b/prover/src/auto_storage.rs @@ -0,0 +1,443 @@ +//! Automatic `StorageMode` selection from an analytical peak-RAM estimate. + +use crate::tables::bitwise::{ + NUM_ROWS as BITWISE_ROWS, bus_interactions as bitwise_buses, cols::NUM_COLUMNS as BITWISE_COLS, +}; +use crate::tables::branch::{bus_interactions as branch_buses, cols::NUM_COLUMNS as BRANCH_COLS}; +use crate::tables::commit::{bus_interactions as commit_buses, cols::NUM_COLUMNS as COMMIT_COLS}; +use crate::tables::cpu::{bus_interactions as cpu_buses, cols::NUM_COLUMNS as CPU_COLS}; +use crate::tables::decode::{bus_interactions as decode_buses, cols::NUM_COLUMNS as DECODE_COLS}; +use crate::tables::dvrm::{bus_interactions as dvrm_buses, cols::NUM_COLUMNS as DVRM_COLS}; +use crate::tables::halt::{bus_interactions as halt_buses, cols::NUM_COLUMNS as HALT_COLS}; +use crate::tables::load::{bus_interactions as load_buses, cols::NUM_COLUMNS as LOAD_COLS}; +use crate::tables::lt::{bus_interactions as lt_buses, cols::NUM_COLUMNS as LT_COLS}; +use crate::tables::memw::{bus_interactions as memw_buses, cols::NUM_COLUMNS as MEMW_COLS}; +use crate::tables::memw_aligned::{ + bus_interactions as memw_a_buses, cols::NUM_COLUMNS as MEMW_A_COLS, +}; +use crate::tables::memw_register::{ + bus_interactions as memw_r_buses, cols::NUM_COLUMNS as MEMW_R_COLS, +}; +use crate::tables::mul::{bus_interactions as mul_buses, cols::NUM_COLUMNS as MUL_COLS}; +use crate::tables::page::{ + DEFAULT_PAGE_SIZE as PAGE_SIZE, bus_interactions as page_buses, cols::NUM_COLUMNS as PAGE_COLS, +}; +use crate::tables::register::{ + NUM_REGISTER_ADDRESSES, bus_interactions as register_buses, cols::NUM_COLUMNS as REGISTER_COLS, +}; +use crate::tables::shift::{bus_interactions as shift_buses, cols::NUM_COLUMNS as SHIFT_COLS}; +use crate::tables::trace_builder::TableLengths; +use stark::storage_mode::StorageMode; +use sysinfo::System; + +const GOLDILOCKS_BYTES: u64 = 8; +const CUBIC_EXT_BYTES: u64 = 24; +const KECCAK_NODE_BYTES: u64 = 32; +const LOG_STRUCT_BYTES: u64 = 40; +const MEMORY_CELL_BYTES: u64 = 32; +const INSTRUCTION_MAP_BYTES_PER_ROW: u64 = 32; + +/// 9/10 budget headroom for OS, other processes, and allocator slack. +const SAFETY_FRACTION_NUM: u64 = 9; +const SAFETY_FRACTION_DEN: u64 = 10; + +/// `(rows, main_cols, aux_cols, num_main_merkle_trees)` for a single table. +type TableSpec = (u64, u64, u64, u64); + +/// Bytes alive for the duration of phase D (LDE columns + main/aux Merkle). +fn persistent_per_table(spec: TableSpec, blowup: u64) -> u64 { + let (rows, main_cols, aux_cols, main_trees) = spec; + let main_lde = rows + .saturating_mul(main_cols) + .saturating_mul(GOLDILOCKS_BYTES) + .saturating_mul(1 + blowup); + let aux_lde = rows + .saturating_mul(aux_cols) + .saturating_mul(CUBIC_EXT_BYTES) + .saturating_mul(1 + blowup); + let main_merkle = main_trees + .saturating_mul(2) + .saturating_mul(rows) + .saturating_mul(blowup) + .saturating_mul(KECCAK_NODE_BYTES); + let aux_merkle = 2u64 + .saturating_mul(rows) + .saturating_mul(blowup) + .saturating_mul(KECCAK_NODE_BYTES); + main_lde + .saturating_add(aux_lde) + .saturating_add(main_merkle) + .saturating_add(aux_merkle) +} + +/// Bytes (constraint evals, composition, FRI) alive during rounds 2-4 for one chunk. +fn transient_per_table(spec: TableSpec, blowup: u64) -> u64 { + let (rows, _, _, _) = spec; + let lde_size = rows.saturating_mul(blowup); + let constraint_evals = lde_size.saturating_mul(CUBIC_EXT_BYTES); + let composition_lde = lde_size.saturating_mul(2).saturating_mul(CUBIC_EXT_BYTES); + let composition_merkle = lde_size.saturating_mul(KECCAK_NODE_BYTES); + let fri_evals = lde_size.saturating_mul(CUBIC_EXT_BYTES); + let fri_merkle = lde_size.saturating_mul(KECCAK_NODE_BYTES); + constraint_evals + .saturating_add(composition_lde) + .saturating_add(composition_merkle) + .saturating_add(fri_evals) + .saturating_add(fri_merkle) +} + +/// Bytes for one Domain/LdeTwiddles cache entry. +fn domain_cache_bytes(rows: u64, blowup: u64) -> u64 { + rows.saturating_mul(3 + 2 * blowup) + .saturating_mul(GOLDILOCKS_BYTES) +} + +fn aux_cols(bus_count: usize) -> u64 { + bus_count.div_ceil(2) as u64 +} + +/// Per-table specs in the same order as `air_trace_pairs` in `prove`. +fn table_specs(lengths: &TableLengths) -> Vec { + let bitwise_rows = BITWISE_ROWS as u64; + let register_rows = NUM_REGISTER_ADDRESSES.next_power_of_two() as u64; + let halt_rows = 1u64; + let page_rows = PAGE_SIZE as u64; + + let mut specs = vec![ + ( + lengths.cpu_padded_rows, + CPU_COLS as u64, + aux_cols(cpu_buses().len()), + 1, + ), + ( + lengths.memw_padded_rows, + MEMW_COLS as u64, + aux_cols(memw_buses().len()), + 1, + ), + ( + lengths.memw_aligned_padded_rows, + MEMW_A_COLS as u64, + aux_cols(memw_a_buses().len()), + 1, + ), + ( + lengths.memw_register_padded_rows, + MEMW_R_COLS as u64, + aux_cols(memw_r_buses().len()), + 1, + ), + ( + lengths.load_padded_rows, + LOAD_COLS as u64, + aux_cols(load_buses().len()), + 1, + ), + ( + lengths.lt_padded_rows, + LT_COLS as u64, + aux_cols(lt_buses().len()), + 1, + ), + ( + lengths.shift_padded_rows, + SHIFT_COLS as u64, + aux_cols(shift_buses().len()), + 1, + ), + ( + lengths.mul_padded_rows, + MUL_COLS as u64, + aux_cols(mul_buses().len()), + 1, + ), + ( + lengths.dvrm_padded_rows, + DVRM_COLS as u64, + aux_cols(dvrm_buses().len()), + 1, + ), + ( + lengths.branch_padded_rows, + BRANCH_COLS as u64, + aux_cols(branch_buses().len()), + 1, + ), + ( + lengths.commit_padded_rows, + COMMIT_COLS as u64, + aux_cols(commit_buses().len()), + 1, + ), + // BITWISE / DECODE / PAGE / REGISTER take the preprocessed-trace commit + // path: it extracts ALL columns into the LDE and builds two Merkle trees + // (precomputed_tree + mult_tree), so main_cols = full NUM_COLUMNS and + // main_trees = 2. + ( + bitwise_rows, + BITWISE_COLS as u64, + aux_cols(bitwise_buses().len()), + 2, + ), + ( + lengths.decode_rows, + DECODE_COLS as u64, + aux_cols(decode_buses().len()), + 2, + ), + (halt_rows, HALT_COLS as u64, aux_cols(halt_buses().len()), 1), + ( + register_rows, + REGISTER_COLS as u64, + aux_cols(register_buses().len()), + 2, + ), + ]; + // Each unique 256 KB page → its own PAGE table at PAGE_SIZE rows. + for _ in 0..lengths.unique_page_count { + specs.push(( + page_rows, + PAGE_COLS as u64, + aux_cols(page_buses(0).len()), + 2, + )); + } + specs +} + +/// Peak RAM estimate in bytes for a proof whose trace shape matches `lengths`. +/// +/// `blowup_factor` is `ProofOptions::blowup_factor`. `table_parallelism` is the +/// `k` used by `multi_prove_with_mode` to chunk rounds 2-4; pass +/// `stark::prover::table_parallelism()` so the worst-case-chunk transient term +/// matches the runtime. +pub fn peak_bytes(lengths: &TableLengths, blowup_factor: u8, table_parallelism: usize) -> u64 { + let blowup = blowup_factor as u64; + let k = table_parallelism.max(1); + let specs = table_specs(lengths); + + // Persistent: every table's LDE + main/aux Merkle is alive across phase D. + let persistent_total: u64 = specs + .iter() + .map(|s| persistent_per_table(*s, blowup)) + .fold(0u64, u64::saturating_add); + + // Transient: only k tables run round 2-4 in parallel. Conservative bound is + // the top-k tables by transient bytes (worst possible chunk assignment). + let mut transient_per: Vec = specs + .iter() + .map(|s| transient_per_table(*s, blowup)) + .collect(); + transient_per.sort_unstable_by(|a, b| b.cmp(a)); + let transient_total: u64 = transient_per + .iter() + .take(k) + .copied() + .fold(0u64, u64::saturating_add); + + // Domain + LdeTwiddles cache: one entry per unique padded-row count + // (blowup_factor and coset_offset are constant across tables in this + // codebase, so the unique key collapses to `rows`). + let mut unique_rows: Vec = specs.iter().map(|s| s.0).collect(); + unique_rows.sort_unstable(); + unique_rows.dedup(); + let domain_total: u64 = unique_rows + .iter() + .map(|&r| domain_cache_bytes(r, blowup)) + .fold(0u64, u64::saturating_add); + + // State alive across the prove call (memory cells, log Vec, instruction + // map). Independent of trace shape. + let state_total = lengths + .unique_byte_count + .saturating_mul(MEMORY_CELL_BYTES) + .saturating_add(lengths.cycle_count.saturating_mul(LOG_STRUCT_BYTES)) + .saturating_add( + lengths + .decode_rows + .saturating_mul(INSTRUCTION_MAP_BYTES_PER_ROW), + ); + + persistent_total + .saturating_add(transient_total) + .saturating_add(domain_total) + .saturating_add(state_total) +} + +/// User cap ∩ OS available, or None if both are unknown. +fn effective_budget(available: Option, cap: Option) -> Option { + match (cap, available) { + (Some(c), Some(a)) => Some(c.min(a)), + (Some(c), None) => Some(c), + (None, a) => a, + } +} + +/// Disk if `estimated` exceeds 90% of the effective budget, else Ram. +/// Defaults to Disk when budget is unknown (sysinfo failure + no cap). +pub fn select_storage_mode( + estimated: u64, + available: Option, + cap: Option, +) -> StorageMode { + let Some(budget) = effective_budget(available, cap) else { + log::warn!( + "Auto disk-spill: sysinfo could not read system memory and no cap set, \ + defaulting to Disk. Pass max_ram_bytes if the machine has enough RAM." + ); + return StorageMode::Disk; + }; + let threshold = budget.saturating_mul(SAFETY_FRACTION_NUM) / SAFETY_FRACTION_DEN; + + if estimated > threshold { + StorageMode::Disk + } else { + StorageMode::Ram + } +} + +/// OS-available RAM, or None if sysinfo can't read it (e.g. stripped containers). +/// Returns `Some(0)` on near-OOM so callers force Disk rather than fall back to Ram. +/// +/// Reads host `/proc/meminfo`, not cgroup limits — set `max_ram_bytes` in +/// containerized environments to bound the budget to the container's limit. +pub fn available_ram_bytes() -> Option { + let mut sys = System::new(); + sys.refresh_memory(); + // total_memory == 0 means sysinfo can't read; otherwise available is real. + if sys.total_memory() == 0 { + None + } else { + Some(sys.available_memory()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const GB: u64 = 1_000_000_000; + /// Larger than the table count, so every table lands in the top-k and the + /// per-table delta in `peak_bytes_per_table_increment_is_exact` is purely + /// additive. + const ALL_TABLES: usize = 1_000; + + fn empty_lengths() -> TableLengths { + TableLengths::default() + } + + /// Adding rows to a single chunked table must increase `peak_bytes` by + /// exactly the per-row contribution from the formula in the module doc. + /// Verifies the per-table breakdown is exact rather than averaged. + #[test] + fn peak_bytes_per_table_increment_is_exact() { + let blowup = 2u8; + let b = blowup as u64; + + let baseline = peak_bytes(&empty_lengths(), blowup, ALL_TABLES); + + let mut lengths = empty_lengths(); + lengths.cpu_padded_rows = 4; + let bumped = peak_bytes(&lengths, blowup, ALL_TABLES); + + let cpu_main = CPU_COLS as u64; + let cpu_aux = cpu_buses().len().div_ceil(2) as u64; + let per_row_persistent = cpu_main * GOLDILOCKS_BYTES * (1 + b) + + cpu_aux * CUBIC_EXT_BYTES * (1 + b) + + 2 * b * KECCAK_NODE_BYTES // main Merkle (1 tree) + + 2 * b * KECCAK_NODE_BYTES; // aux Merkle + let per_row_transient = b * CUBIC_EXT_BYTES // constraint_evaluations + + 2 * b * CUBIC_EXT_BYTES // composition LDE (2 parts, d=2) + + b * KECCAK_NODE_BYTES // composition Merkle (PairKeccak) + + b * CUBIC_EXT_BYTES // FRI evals (geometric ≈ 1) + + b * KECCAK_NODE_BYTES; // FRI Merkle (geometric ≈ 1) + let per_row_domain = (3 + 2 * b) * GOLDILOCKS_BYTES; + + // CPU adds 4 rows of persistent + transient (top-k by ALL_TABLES) + + // its 4-row Domain entry (a fresh unique key not previously present). + assert_eq!( + bumped - baseline, + 4 * (per_row_persistent + per_row_transient + per_row_domain) + ); + } + + /// Higher blowup_factor should produce a strictly larger estimate. + #[test] + fn peak_bytes_scales_with_blowup() { + let lengths = empty_lengths(); + let two = peak_bytes(&lengths, 2, ALL_TABLES); + let four = peak_bytes(&lengths, 4, ALL_TABLES); + let eight = peak_bytes(&lengths, 8, ALL_TABLES); + assert!(two < four); + assert!(four < eight); + } + + /// Lower table_parallelism caps the transient sum to fewer tables, so the + /// estimate must be monotone in `k`. + #[test] + fn peak_bytes_monotone_in_table_parallelism() { + let lengths = empty_lengths(); + let k1 = peak_bytes(&lengths, 2, 1); + let k4 = peak_bytes(&lengths, 2, 4); + let k_all = peak_bytes(&lengths, 2, ALL_TABLES); + assert!(k1 < k4); + assert!(k4 <= k_all); + } + + #[test] + fn select_ram_when_estimate_below_threshold() { + // 10 GB estimated, 32 GB available → threshold 28.8 GB → Ram. + let mode = select_storage_mode(10 * GB, Some(32 * GB), None); + assert_eq!(mode, StorageMode::Ram); + } + + #[test] + fn select_disk_when_estimate_exceeds_threshold() { + // 30 GB estimated, 32 GB available → threshold 28.8 GB → Disk. + let mode = select_storage_mode(30 * GB, Some(32 * GB), None); + assert_eq!(mode, StorageMode::Disk); + } + + #[test] + fn cap_forces_disk_when_smaller_than_available() { + // 10 GB estimated, 64 GB available (would be Ram), but cap=4 GB + // → threshold = 4 × 0.9 = 3.6 GB → Disk. + let mode = select_storage_mode(10 * GB, Some(64 * GB), Some(4 * GB)); + assert_eq!(mode, StorageMode::Disk); + } + + #[test] + fn cap_ignored_when_larger_than_available() { + // available=8 GB dominates a cap of 64 GB. + // threshold = 8 × 0.9 = 7.2 GB, estimate 10 GB → Disk. + let mode = select_storage_mode(10 * GB, Some(8 * GB), Some(64 * GB)); + assert_eq!(mode, StorageMode::Disk); + } + + #[test] + fn tiny_cap_always_forces_disk() { + let mode = select_storage_mode( + peak_bytes(&empty_lengths(), 2, ALL_TABLES), + Some(64 * GB), + Some(1_000_000), + ); + assert_eq!(mode, StorageMode::Disk); + } + + #[test] + fn unknown_available_with_no_cap_defaults_to_disk() { + // sysinfo failed and no cap was set. Default to Disk: sysinfo fails + // in stripped-down containers where Ram would OOM. Pass max_ram_bytes + // to opt out on a known-sized machine. + let mode = select_storage_mode(peak_bytes(&empty_lengths(), 2, ALL_TABLES), None, None); + assert_eq!(mode, StorageMode::Disk); + } + + #[test] + fn unknown_available_with_cap_uses_cap_as_budget() { + // OS can't report; cap is the whole budget. + let mode = select_storage_mode(10 * GB, None, Some(4 * GB)); + assert_eq!(mode, StorageMode::Disk); + } +} diff --git a/prover/src/lib.rs b/prover/src/lib.rs index 7d60aeb30..7239a109e 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -10,6 +10,8 @@ //! assert!(lambda_vm_prover::verify(&vm_proof, &elf_bytes).unwrap()); //! ``` +#[cfg(feature = "disk-spill")] +pub mod auto_storage; pub mod constraints; #[cfg(feature = "debug-checks")] mod debug_report; @@ -574,16 +576,51 @@ pub fn prove_with_options_and_inputs( #[cfg(feature = "instruments")] let phase_start = std::time::Instant::now(); - // Generate all traces from ELF and execution logs. - // Page tables are derived from the prover's MemoryState (all accessed pages). - let mut traces = Traces::from_elf_and_logs(&program, &result.logs, max_rows, private_inputs)?; + // Pick where trace buffers and Merkle tree nodes live for this proof. + // With the `disk-spill` feature enabled, the analytical estimate decides + // between Ram and Disk; without it, we never spill. + #[cfg(feature = "disk-spill")] + let storage_mode = { + // Stream over logs once to compute exact per-table row counts without + // allocating any op vectors. Use the resulting `TableLengths` to + // estimate peak heap analytically and pick a storage mode. + let lengths = crate::tables::trace_builder::count_table_lengths( + &program, + &result.logs, + max_rows, + private_inputs, + )?; + + let available = auto_storage::available_ram_bytes(); + let estimated_peak = auto_storage::peak_bytes( + &lengths, + proof_options.blowup_factor, + stark::prover::table_parallelism(), + ); + let mode = auto_storage::select_storage_mode( + estimated_peak, + available, + proof_options.max_ram_bytes, + ); - drop(result); + log::info!("predicted_peak_bytes: {estimated_peak}, storage_mode: {mode:?}"); + + mode + }; + // Phase 5: build the full traces with the chosen mode. `Disk` spills each + // chunk as it's built, so the trace never fully materializes in RAM. #[cfg(feature = "disk-spill")] - traces - .spill_all_main_to_disk() - .map_err(|e| Error::Prover(format!("disk-spill traces: {e}")))?; + let mut traces = Traces::from_elf_and_logs_with_mode( + &program, + &result.logs, + max_rows, + private_inputs, + storage_mode, + )?; + #[cfg(not(feature = "disk-spill"))] + let mut traces = Traces::from_elf_and_logs(&program, &result.logs, max_rows, private_inputs)?; + drop(result); #[cfg(feature = "instruments")] let trace_build_elapsed = phase_start.elapsed(); @@ -611,11 +648,14 @@ pub fn prove_with_options_and_inputs( let runtime_page_ranges = traces.runtime_page_ranges(); // Phase 4: Prove (multi_prove) - let proof = Prover::multi_prove( - airs.air_trace_pairs(&mut traces), - &mut DefaultTranscript::::new(&[]), - ) - .map_err(|e| Error::Prover(format!("{e:?}")))?; + let air_pairs = airs.air_trace_pairs(&mut traces); + let transcript = &mut DefaultTranscript::::new(&[]); + #[cfg(feature = "disk-spill")] + let proof = Prover::multi_prove_with_mode(air_pairs, transcript, storage_mode) + .map_err(|e| Error::Prover(format!("{e:?}")))?; + #[cfg(not(feature = "disk-spill"))] + let proof = + Prover::multi_prove(air_pairs, transcript).map_err(|e| Error::Prover(format!("{e:?}")))?; #[cfg(feature = "instruments")] { diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index ee1ff3ccd..df962ef72 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -31,6 +31,8 @@ use executor::elf::Elf; use executor::vm::instruction::decoding::Instruction; use executor::vm::logs::Log; use executor::vm::memory::U64HashMap; +#[cfg(feature = "disk-spill")] +use stark::storage_mode::StorageMode; use stark::trace::TraceTable; use super::bitwise::{self, BitwiseOperation, BitwiseOperationType}; @@ -97,6 +99,14 @@ impl MemoryState { Self { cells } } + /// Count unique memory pages touched during execution. + #[cfg(feature = "disk-spill")] + fn unique_page_count(&self, page_size: u64) -> u64 { + let mask = !(page_size - 1); + let pages: std::collections::HashSet = self.cells.keys().map(|&a| a & mask).collect(); + pages.len() as u64 + } + /// Pre-populate the private input memory region at `PRIVATE_INPUT_START_INDEX`. fn add_private_input(&mut self, private_input: &[u8]) { if private_input.is_empty() { @@ -1685,25 +1695,14 @@ struct CollectedOps { commit_ops: Vec, } -/// Chunk raw ops and generate one trace table per chunk. -#[cfg(not(feature = "disk-spill"))] +/// Chunk raw ops and generate one trace table per chunk. When `storage_mode` +/// is `Disk`, each chunk's main table is spilled to mmap before the next chunk +/// is built so peak heap usage stays bounded. fn chunk_and_generate( ops: &[T], max_rows: usize, generate: impl Fn(&[T]) -> TraceTable, -) -> Vec> { - if ops.is_empty() { - vec![generate(&[])] - } else { - ops.chunks(max_rows).map(generate).collect() - } -} - -#[cfg(feature = "disk-spill")] -fn chunk_generate_and_spill( - ops: &[T], - max_rows: usize, - generate: impl Fn(&[T]) -> TraceTable, + #[cfg(feature = "disk-spill")] storage_mode: StorageMode, ) -> Result>, Error> { let op_chunks: Vec<&[T]> = if ops.is_empty() { vec![&[][..]] @@ -1712,10 +1711,14 @@ fn chunk_generate_and_spill( }; let mut tables = Vec::with_capacity(op_chunks.len()); for chunk in op_chunks { + #[allow(unused_mut)] let mut t = generate(chunk); - t.main_table - .spill_to_disk() - .map_err(|e| Error::Prover(format!("disk-spill trace: {e}")))?; + #[cfg(feature = "disk-spill")] + if storage_mode == StorageMode::Disk { + t.main_table + .spill_to_disk() + .map_err(|e| Error::Prover(format!("disk-spill trace: {e}")))?; + } tables.push(t); } Ok(tables) @@ -1840,6 +1843,7 @@ fn build_traces( decode_pc_to_row: HashMap, register_state: RegisterState, max_rows: &super::MaxRowsConfig, + #[cfg(feature = "disk-spill")] storage_mode: StorageMode, private_input: &[u8], ) -> Result { let CollectedOps { @@ -1907,39 +1911,76 @@ fn build_traces( .ok_or(Error::MissingHaltEcall)?; let halt_timestamp = halt_op.timestamp; - // Dispatch macro: disk-spill variant spills main_table to mmap as tables are generated, - // freeing the heap buffers before the next chunk is built. - macro_rules! gen_traces { - ($ops:expr, $max:expr, $gen:expr) => {{ - #[cfg(feature = "disk-spill")] - { - chunk_generate_and_spill($ops, $max, $gen)? - } - #[cfg(not(feature = "disk-spill"))] - { - chunk_and_generate($ops, $max, $gen) - } - }}; - } - - let cpus = gen_traces!(&cpu_ops, max_rows.cpu, cpu::generate_cpu_trace); - let memws = gen_traces!(&memw_ops, max_rows.memw, memw::generate_memw_trace); - let memw_aligneds = gen_traces!( + let cpus = chunk_and_generate( + &cpu_ops, + max_rows.cpu, + cpu::generate_cpu_trace, + #[cfg(feature = "disk-spill")] + storage_mode, + )?; + let memws = chunk_and_generate( + &memw_ops, + max_rows.memw, + memw::generate_memw_trace, + #[cfg(feature = "disk-spill")] + storage_mode, + )?; + let memw_aligneds = chunk_and_generate( &memw_aligned_ops, max_rows.memw_aligned, - memw_aligned::generate_memw_aligned_trace - ); - let memw_registers = gen_traces!( + memw_aligned::generate_memw_aligned_trace, + #[cfg(feature = "disk-spill")] + storage_mode, + )?; + let memw_registers = chunk_and_generate( &memw_register_ops, max_rows.memw_register, - memw_register::generate_memw_register_trace - ); - let loads = gen_traces!(&load_ops, max_rows.load, load::generate_load_trace); - let lts = gen_traces!(<_ops, max_rows.lt, lt::generate_lt_trace); - let shifts = gen_traces!(&shift_ops, max_rows.shift, shift::generate_shift_trace); - let muls = gen_traces!(&mul_ops, max_rows.mul, mul::generate_mul_trace); - let dvrms = gen_traces!(&dvrm_ops, max_rows.dvrm, dvrm::generate_dvrm_trace); - let branches = gen_traces!(&branch_ops, max_rows.branch, branch::generate_branch_trace); + memw_register::generate_memw_register_trace, + #[cfg(feature = "disk-spill")] + storage_mode, + )?; + let loads = chunk_and_generate( + &load_ops, + max_rows.load, + load::generate_load_trace, + #[cfg(feature = "disk-spill")] + storage_mode, + )?; + let lts = chunk_and_generate( + <_ops, + max_rows.lt, + lt::generate_lt_trace, + #[cfg(feature = "disk-spill")] + storage_mode, + )?; + let shifts = chunk_and_generate( + &shift_ops, + max_rows.shift, + shift::generate_shift_trace, + #[cfg(feature = "disk-spill")] + storage_mode, + )?; + let muls = chunk_and_generate( + &mul_ops, + max_rows.mul, + mul::generate_mul_trace, + #[cfg(feature = "disk-spill")] + storage_mode, + )?; + let dvrms = chunk_and_generate( + &dvrm_ops, + max_rows.dvrm, + dvrm::generate_dvrm_trace, + #[cfg(feature = "disk-spill")] + storage_mode, + )?; + let branches = chunk_and_generate( + &branch_ops, + max_rows.branch, + branch::generate_branch_trace, + #[cfg(feature = "disk-spill")] + storage_mode, + )?; let mut bitwise = bitwise::generate_bitwise_trace(); bitwise::update_multiplicities(&mut bitwise, &bitwise_ops); @@ -2019,6 +2060,234 @@ fn build_traces( }) } +/// Padded row count after chunking: each chunk rounds up to `next_power_of_two().max(4)`. +#[cfg(feature = "disk-spill")] +fn padded_chunked_rows(ops_count: usize, max_rows: usize) -> u64 { + if ops_count == 0 { + return 4; // empty-chunk tables still allocate one 4-row padded chunk + } + let mut total: u64 = 0; + let mut remaining = ops_count; + while remaining > 0 { + let chunk_size = remaining.min(max_rows); + total += chunk_size.next_power_of_two().max(4) as u64; + remaining -= chunk_size; + } + total +} + +/// Per-table padded row counts plus auxiliary metrics for peak-heap estimation. +#[cfg(feature = "disk-spill")] +#[derive(Debug, Default, Clone)] +pub struct TableLengths { + pub cpu_padded_rows: u64, + pub memw_padded_rows: u64, + pub memw_aligned_padded_rows: u64, + pub memw_register_padded_rows: u64, + pub load_padded_rows: u64, + pub lt_padded_rows: u64, + pub shift_padded_rows: u64, + pub mul_padded_rows: u64, + pub dvrm_padded_rows: u64, + pub branch_padded_rows: u64, + pub commit_padded_rows: u64, + pub decode_rows: u64, + pub unique_page_count: u64, + /// Executor cycle count. + pub cycle_count: u64, + /// Unique byte addresses touched (dominant non-trace heap term). + pub unique_byte_count: u64, +} + +/// Compute upper-bound per-table row counts without allocating op vectors. +/// Returns bounds (not exact) for tables that dedup ops: LT, MUL, DVRM, BRANCH. +/// Must stay in sync with `Traces::from_elf_and_logs`. +#[cfg(feature = "disk-spill")] +pub fn count_table_lengths( + elf: &Elf, + logs: &[Log], + max_rows: &super::MaxRowsConfig, + private_input: &[u8], +) -> Result { + // Phase 0: ELF → instructions + DECODE row count. + let instructions = decode::instructions_from_elf(elf) + .map_err(|e| Error::Execution(format!("Failed to parse instructions: {e}")))?; + let (decode_trace, _decode_pc_to_row) = decode::generate_decode_trace(&instructions); + let decode_rows = decode_trace.num_rows() as u64; + + // Memory + register state for partition predicates that need timestamps. + let mut memory_state = MemoryState::from_elf(elf); + memory_state.add_private_input(private_input); + let mut register_state = RegisterState::new(elf.entry_point); + + // Raw counts (pre-chunking + pre-padding). + let mut cpu_count = 0usize; + // memw_by_width[i] for i in 0..4 maps width 1/2/4/8 → wide-MEMW counts. + // Used by the LT-from-MEMW derivation: each wide-MEMW op contributes + // 1, 2, 4, or 8 LT ops based on its width. + let mut memw_by_width: [usize; 4] = [0; 4]; + let mut memw_aligned_count = 0usize; + let mut memw_register_count = 0usize; + let mut load_count = 0usize; + let mut lt_count = 0usize; + let mut shift_count = 0usize; + let mut mul_count = 0usize; + let mut dvrm_count = 0usize; + let mut branch_count = 0usize; + let mut commit_count = 0usize; + let mut current_commit_index = 0u32; + + let partition_memw = |op: &MemwOperation, + by_width: &mut [usize; 4], + aligned: &mut usize, + register: &mut usize| { + if is_register_op(op) { + *register += 1; + } else if is_aligned_op(op) { + *aligned += 1; + } else { + let idx = match op.width { + 1 => 0, + 2 => 1, + 4 => 2, + 8 => 3, + _ => return, + }; + by_width[idx] += 1; + } + }; + + for (i, log) in logs.iter().enumerate() { + let timestamp = (i as u64) * 4 + 4; + let instruction = instructions + .get(&log.current_pc) + .copied() + .ok_or(Error::MissingInstruction(log.current_pc))?; + let cpu_op = CpuOperation::from_log_and_instruction(log, timestamp, instruction); + cpu_count += 1; + + // Memory ops from load/store + if cpu_op.decode.op_load { + let (memw_op, _load_op, _bitwise) = + collect_load_op_from_cpu(&cpu_op, &mut memory_state); + partition_memw( + &memw_op, + &mut memw_by_width, + &mut memw_aligned_count, + &mut memw_register_count, + ); + load_count += 1; + } else if cpu_op.decode.op_store { + let memw_op = collect_store_op_from_cpu(&cpu_op, &mut memory_state); + partition_memw( + &memw_op, + &mut memw_by_width, + &mut memw_aligned_count, + &mut memw_register_count, + ); + } + + // Register accesses (M1 read rs1, M3 read rs2, M5 write rd). + let reg_memw_ops = collect_register_ops_from_cpu(&cpu_op, &mut register_state); + for memw_op in ®_memw_ops { + partition_memw( + memw_op, + &mut memw_by_width, + &mut memw_aligned_count, + &mut memw_register_count, + ); + } + + // ECALL Commit + if cpu_op.ecall_commit { + let commit_ops = expand_commit_operations_for_ecall( + &cpu_op, + &memory_state, + current_commit_index as u64, + ); + commit_count += commit_ops.len(); + let reg_commit_ops = + collect_commit_memw_ops(&cpu_op, &mut register_state, &mut memory_state); + for memw_op in ®_commit_ops { + partition_memw( + memw_op, + &mut memw_by_width, + &mut memw_aligned_count, + &mut memw_register_count, + ); + } + let count = u32::try_from(cpu_op.commit_count) + .map_err(|_| Error::Execution("commit_count exceeds u32 range".into()))?; + current_commit_index = current_commit_index + .checked_add(count) + .ok_or_else(|| Error::Execution("commit index exceeds u32 range".into()))?; + } + + // CPU-side per-instruction-kind counters + if cpu_op.decode.op_slt || cpu_op.decode.op_blt { + lt_count += 1; + } + if cpu_op.decode.op_shift { + shift_count += 1; + } + if cpu_op.decode.op_mul { + mul_count += 1; + } + if cpu_op.decode.op_divrem { + dvrm_count += 1; + } + if cpu_op.branch_cond { + branch_count += 1; + } + } + + // HALT finalization: 32 register MEMW ops at ts=u64::MAX. Their timestamp + // delta vs old_timestamp is enormous, so they fail `is_register_op`'s + // `<= 0x10000` check and fall through to wide MEMW. + let halt_memw_ops = collect_halt_ops(&mut register_state); + for memw_op in &halt_memw_ops { + partition_memw( + memw_op, + &mut memw_by_width, + &mut memw_aligned_count, + &mut memw_register_count, + ); + } + + // LT-from-MEMW: per wide-MEMW op, 1/2/4/8 LT ops by width. + // LT-from-MEMW_A: 1 LT op per memw_aligned op. + let memw_count = memw_by_width.iter().sum::(); + let lt_from_memw = + memw_by_width[0] + 2 * memw_by_width[1] + 4 * memw_by_width[2] + 8 * memw_by_width[3]; + lt_count += lt_from_memw + memw_aligned_count; + + // DVRM-derived: 2 mul ops (lo + hi) and 1 lt op (|r| < |d|) per dvrm. + mul_count += 2 * dvrm_count; + lt_count += dvrm_count; + + let unique_page_count = memory_state.unique_page_count(page::DEFAULT_PAGE_SIZE as u64); + let unique_byte_count = memory_state.cells.len() as u64; + let cycle_count = logs.len() as u64; + + Ok(TableLengths { + cpu_padded_rows: padded_chunked_rows(cpu_count, max_rows.cpu), + memw_padded_rows: padded_chunked_rows(memw_count, max_rows.memw), + memw_aligned_padded_rows: padded_chunked_rows(memw_aligned_count, max_rows.memw_aligned), + memw_register_padded_rows: padded_chunked_rows(memw_register_count, max_rows.memw_register), + load_padded_rows: padded_chunked_rows(load_count, max_rows.load), + lt_padded_rows: padded_chunked_rows(lt_count, max_rows.lt), + shift_padded_rows: padded_chunked_rows(shift_count, max_rows.shift), + mul_padded_rows: padded_chunked_rows(mul_count, max_rows.mul), + dvrm_padded_rows: padded_chunked_rows(dvrm_count, max_rows.dvrm), + branch_padded_rows: padded_chunked_rows(branch_count, max_rows.branch), + commit_padded_rows: commit_count.next_power_of_two().max(4) as u64, + decode_rows, + unique_page_count, + cycle_count, + unique_byte_count, + }) +} + impl Traces { /// Returns the total number of main-trace field elements across all tables. /// @@ -2399,6 +2668,35 @@ impl Traces { logs: &[Log], max_rows: &super::MaxRowsConfig, private_input: &[u8], + ) -> Result { + Self::from_elf_and_logs_inner( + elf, + logs, + max_rows, + private_input, + #[cfg(feature = "disk-spill")] + StorageMode::Ram, + ) + } + + /// Same as `from_elf_and_logs` but lets the caller pick a storage mode. + #[cfg(feature = "disk-spill")] + pub fn from_elf_and_logs_with_mode( + elf: &Elf, + logs: &[Log], + max_rows: &super::MaxRowsConfig, + private_input: &[u8], + storage_mode: StorageMode, + ) -> Result { + Self::from_elf_and_logs_inner(elf, logs, max_rows, private_input, storage_mode) + } + + fn from_elf_and_logs_inner( + elf: &Elf, + logs: &[Log], + max_rows: &super::MaxRowsConfig, + private_input: &[u8], + #[cfg(feature = "disk-spill")] storage_mode: StorageMode, ) -> Result { // Phase 0: ELF → DECODE + instructions // IMPORTANT: Use generate_decode_trace (same as compute_precomputed_commitment) @@ -2438,6 +2736,8 @@ impl Traces { decode_pc_to_row, register_state, max_rows, + #[cfg(feature = "disk-spill")] + storage_mode, private_input, ) } @@ -2487,6 +2787,8 @@ impl Traces { decode_pc_to_row, register_state, max_rows, + #[cfg(feature = "disk-spill")] + StorageMode::Ram, &[], ) } diff --git a/prover/src/tests/disk_spill_tests.rs b/prover/src/tests/disk_spill_tests.rs index 3b8d14cbe..c8f7f6f9c 100644 --- a/prover/src/tests/disk_spill_tests.rs +++ b/prover/src/tests/disk_spill_tests.rs @@ -1,115 +1,76 @@ -//! Tests for the `disk-spill` feature. -//! -//! Verifies that proving and verification produce correct results when main -//! traces, LDE columns, and Merkle tree nodes are spilled to disk via mmap. +//! End-to-end tests forcing Disk storage via a 1 MB `max_ram_bytes` cap, so +//! even the smallest ELF deterministically crosses the threshold. use crate::VmProof; use crate::tables::MaxRowsConfig; use crate::test_utils::asm_elf_bytes; +use stark::proof::options::GoldilocksCubicProofOptions; -/// Prove + verify a small program end-to-end with disk-spill enabled. -/// This exercises the full pipeline: trace generation, main-trace spill, -/// LDE spill, Merkle-tree spill, and verification. +const FORCE_DISK_CAP: u64 = 1_000_000; + +fn options_forcing_disk() -> stark::proof::options::ProofOptions { + let mut opts = GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is always valid"); + opts.max_ram_bytes = Some(FORCE_DISK_CAP); + opts +} + +/// Prove + verify a small program with Disk storage forced. #[test] fn test_disk_spill_prove_and_verify_small() { let elf_bytes = asm_elf_bytes("sub"); - let result = crate::prove_and_verify(&elf_bytes); - assert!( - result.is_ok(), - "prove_and_verify failed: {:?}", - result.err() - ); - assert!(result.unwrap(), "verification returned false"); + let opts = options_forcing_disk(); + let vm_proof = crate::prove_with_options(&elf_bytes, &opts, &MaxRowsConfig::default()) + .expect("prove failed"); + let ok = crate::verify_with_options(&vm_proof, &elf_bytes, &opts).expect("verify failed"); + assert!(ok, "verification returned false"); } -/// Prove + verify with `MaxRowsConfig::small()` (2^5 = 32 rows per chunk) -/// to force many chunks. This ensures disk-spill works across chunk boundaries -/// where pool buffers are reused and main traces are spilled per-chunk. +/// Prove + verify with small chunks to exercise spill across chunk boundaries. #[test] fn test_disk_spill_prove_and_verify_with_chunks() { let elf_bytes = asm_elf_bytes("sub"); - let proof_options = stark::proof::options::GoldilocksCubicProofOptions::with_blowup(2) - .expect("blowup=2 is always valid"); - let vm_proof = crate::prove_with_options(&elf_bytes, &proof_options, &MaxRowsConfig::small()); - assert!( - vm_proof.is_ok(), - "prove_with_options failed: {:?}", - vm_proof.err() - ); - let vm_proof = vm_proof.unwrap(); - - let ok = crate::verify_with_options(&vm_proof, &elf_bytes, &proof_options); - assert!(ok.is_ok(), "verify_with_options failed: {:?}", ok.err()); - assert!(ok.unwrap(), "verification returned false"); + let opts = options_forcing_disk(); + let vm_proof = crate::prove_with_options(&elf_bytes, &opts, &MaxRowsConfig::small()) + .expect("prove failed"); + let ok = crate::verify_with_options(&vm_proof, &elf_bytes, &opts).expect("verify failed"); + assert!(ok, "verification returned false"); } -/// Prove, serialize with bincode, deserialize, then verify. -/// This reproduces the exact CLI path: prove → write → read → verify. +/// Prove, serialize, deserialize, verify (CLI roundtrip). #[test] fn test_disk_spill_serialization_roundtrip() { let elf_bytes = asm_elf_bytes("sub"); - let proof = crate::prove(&elf_bytes).expect("prove failed"); + let opts = options_forcing_disk(); + let proof = crate::prove_with_options(&elf_bytes, &opts, &MaxRowsConfig::default()) + .expect("prove failed"); let bytes = bincode::serialize(&proof).expect("serialize failed"); - eprintln!("Proof serialized: {} bytes", bytes.len()); - let proof2: VmProof = bincode::deserialize(&bytes).expect("deserialize failed"); - let valid = crate::verify(&proof2, &elf_bytes).expect("verify failed"); + let valid = crate::verify_with_options(&proof2, &elf_bytes, &opts).expect("verify failed"); assert!(valid, "verification failed after serialization roundtrip"); } -/// Print struct sizes to verify memory analysis -#[test] -fn test_print_struct_sizes() { - use std::mem::size_of; - eprintln!( - "CpuOperation: {} bytes", - size_of::() - ); - eprintln!( - "MemwOperation: {} bytes", - size_of::() - ); - eprintln!( - "LtOperation: {} bytes", - size_of::() - ); - eprintln!( - "BranchOperation: {} bytes", - size_of::() - ); - eprintln!( - "BitwiseOperation: {} bytes", - size_of::() - ); - eprintln!( - "ShiftOperation: {} bytes", - size_of::() - ); -} - -/// Test prove+verify with a larger program (2M instructions). -/// This catches bugs that only manifest at scale (multiple chunks, larger tables). +/// Prove + verify a 2M-instruction program to catch scale-only bugs. #[test] fn test_disk_spill_prove_and_verify_2m() { let _ = env_logger::builder().is_test(true).try_init(); let elf_bytes = asm_elf_bytes("fib_iterative_2M"); - let result = crate::prove_and_verify(&elf_bytes).expect("prove_and_verify failed"); - assert!(result, "verification returned false for fib_iterative_2M"); + let opts = options_forcing_disk(); + let vm_proof = crate::prove_with_options(&elf_bytes, &opts, &MaxRowsConfig::default()) + .expect("prove failed"); + let ok = crate::verify_with_options(&vm_proof, &elf_bytes, &opts).expect("verify failed"); + assert!(ok, "verification returned false for fib_iterative_2M"); } -/// Same as above but with small chunks (MaxRowsConfig::small()). +/// Same as roundtrip test but with small chunks. #[test] fn test_disk_spill_serialization_roundtrip_chunked() { let elf_bytes = asm_elf_bytes("sub"); - let opts = stark::proof::options::GoldilocksCubicProofOptions::with_blowup(2) - .expect("blowup=2 is always valid"); + let opts = options_forcing_disk(); let proof = crate::prove_with_options(&elf_bytes, &opts, &MaxRowsConfig::small()) .expect("prove failed"); let bytes = bincode::serialize(&proof).expect("serialize failed"); - eprintln!("Chunked proof serialized: {} bytes", bytes.len()); - let proof2: VmProof = bincode::deserialize(&bytes).expect("deserialize failed"); let valid = crate::verify_with_options(&proof2, &elf_bytes, &opts).expect("verify failed"); assert!( diff --git a/prover/src/tests/mod.rs b/prover/src/tests/mod.rs index 338ba8c87..303c24401 100644 --- a/prover/src/tests/mod.rs +++ b/prover/src/tests/mod.rs @@ -24,6 +24,8 @@ pub mod lt_bus_tests; pub mod lt_tests; #[cfg(test)] pub mod mul_tests; +#[cfg(all(test, feature = "disk-spill"))] +pub mod peak_bytes_calibration_tests; #[cfg(test)] pub mod prove_elfs_tests; #[cfg(test)] diff --git a/prover/src/tests/peak_bytes_calibration_tests.rs b/prover/src/tests/peak_bytes_calibration_tests.rs new file mode 100644 index 000000000..6dc7c562f --- /dev/null +++ b/prover/src/tests/peak_bytes_calibration_tests.rs @@ -0,0 +1,89 @@ +//! Calibration test: predicted [`peak_bytes`] vs measured RSS during a real proof. +//! +//! Runs a small fib_iterative proof, samples the process's RSS while the proof +//! is running, and asserts the prediction is within 2× of the measured peak +//! (after subtracting the pre-proof baseline). RSS includes mmap'd files, the +//! code segment, and allocator slack on top of the heap-only quantity that +//! [`peak_bytes`] models, so the bound is intentionally loose; the test is a +//! regression guard against silent drift, not a tightness measure. +//! +//! [`peak_bytes`]: crate::auto_storage::peak_bytes + +use std::sync::Arc; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use std::thread; +use std::time::Duration; + +use stark::proof::options::GoldilocksCubicProofOptions; + +use crate::auto_storage; +use crate::tables::MaxRowsConfig; +use crate::tables::trace_builder::count_table_lengths; +use crate::test_utils::{asm_elf_bytes, run_asm_elf}; + +fn current_rss_bytes() -> Option { + let pid = sysinfo::get_current_pid().ok()?; + let mut sys = sysinfo::System::new(); + sys.refresh_processes(sysinfo::ProcessesToUpdate::Some(&[pid])); + sys.process(pid).map(|p| p.memory() as usize) +} + +#[test] +fn peak_bytes_within_2x_of_measured_rss() { + let (elf, logs, _) = run_asm_elf("fib_iterative_372k"); + let elf_bytes = asm_elf_bytes("fib_iterative_372k"); + + let max_rows = MaxRowsConfig::default(); + let lengths = + count_table_lengths(&elf, &logs, &max_rows, &[]).expect("count_table_lengths succeeds"); + + let opts = GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is valid"); + let predicted = auto_storage::peak_bytes( + &lengths, + opts.blowup_factor, + stark::prover::table_parallelism(), + ) as usize; + + // Drop logs etc. before sampling baseline so they don't inflate it. + drop(logs); + + let baseline = current_rss_bytes().expect("RSS reader works on this platform"); + let peak = Arc::new(AtomicUsize::new(baseline)); + let stop = Arc::new(AtomicBool::new(false)); + + let sampler = { + let peak = Arc::clone(&peak); + let stop = Arc::clone(&stop); + thread::spawn(move || { + while !stop.load(Ordering::Relaxed) { + if let Some(rss) = current_rss_bytes() { + peak.fetch_max(rss, Ordering::Relaxed); + } + thread::sleep(Duration::from_millis(50)); + } + }) + }; + + let _proof = crate::prove_with_options_and_inputs(&elf_bytes, &[], &opts, &max_rows) + .expect("proof succeeds"); + + stop.store(true, Ordering::Relaxed); + sampler.join().expect("sampler joins"); + + let measured = peak.load(Ordering::Relaxed).saturating_sub(baseline); + + eprintln!( + "peak_bytes calibration: predicted={predicted} bytes, measured_above_baseline={measured} bytes" + ); + + assert!( + predicted.saturating_mul(2) >= measured, + "peak_bytes underestimates measured RSS by more than 2×: \ + predicted={predicted}, measured={measured}" + ); + assert!( + predicted <= measured.saturating_mul(2), + "peak_bytes overestimates measured RSS by more than 2×: \ + predicted={predicted}, measured={measured}" + ); +} diff --git a/scripts/calibrate_threshold.sh b/scripts/calibrate_threshold.sh new file mode 100755 index 000000000..9ecb06cec --- /dev/null +++ b/scripts/calibrate_threshold.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# Calibrate the auto-disk-spill threshold: actual RSS / predicted_peak_bytes. +# +# Usage: calibrate_threshold.sh elf1.elf [elf2.elf ...] +# +# Builds CLI with jemalloc-stats, runs each ELF under `/usr/bin/time -v`, +# and prints predicted vs measured peak. The max of rss/pred is r_max; +# set the threshold in select_storage_mode to ~1/r_max minus a small margin. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +OUT="/tmp/calibrate_threshold" + +mkdir -p "$OUT" +rm -f "$OUT"/*.txt + +echo "Building CLI with jemalloc-stats..." +cargo build --release -p cli --features jemalloc-stats --manifest-path "$ROOT_DIR/Cargo.toml" 2>&1 | tail -1 + +CLI="$ROOT_DIR/target/release/cli" + +printf "\n%-55s %10s %10s %10s %10s %10s\n" \ + "ELF" "pred(MB)" "heap(MB)" "rss(MB)" "rss/pred" "heap/pred" +printf '%.0s-' {1..110} +printf '\n' + +for elf in "$@"; do + name=$(basename "$elf") + RUST_LOG=info /usr/bin/time -v "$CLI" prove "$elf" -o "$OUT/proof.bin" \ + > "$OUT/out.txt" 2> "$OUT/err.txt" || { + echo "FAIL: $name" + tail -5 "$OUT/err.txt" + continue + } + + pred=$(grep -o 'predicted_peak_bytes: [0-9]*' "$OUT/err.txt" | awk '{print $2}') + heap_mb=$(grep -o 'Peak heap: [0-9]*' "$OUT/out.txt" | awk '{print $3}') + rss_kb=$(grep "Maximum resident set size" "$OUT/err.txt" | awk '{print $NF}') + + awk -v name="$name" -v p="$pred" -v h="$heap_mb" -v r="$rss_kb" 'BEGIN { + pred_mb = p / 1024 / 1024 + rss_mb = r / 1024 + printf "%-55s %10.0f %10.0f %10.0f %10.2f %10.2f\n", + name, pred_mb, h, rss_mb, rss_mb/pred_mb, h/pred_mb + }' + + rm -f "$OUT/proof.bin" +done + +echo "" +echo "Take the max rss/pred across runs as r_max." +echo "Set threshold in select_storage_mode to ~1/r_max minus margin (e.g. 0.05)." From 3dce783bb1d8c8016076b407dfbcd9a165799ff5 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Thu, 30 Apr 2026 16:44:45 -0300 Subject: [PATCH 108/231] Remove dead extract_columns_into --- crypto/stark/src/table.rs | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index e1f6628e9..3c8ac4a18 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -208,31 +208,6 @@ impl Table { .collect() } - /// Extract columns directly into pre-allocated output buffers. - /// - /// Each `output[col_idx]` is cleared and filled with the column data. - /// When `output[col_idx].capacity() >= height`, no heap allocation occurs. - /// This eliminates the T1 transpose allocation that `columns()` performs. - pub fn extract_columns_into(&self, output: &mut [Vec>]) { - debug_assert!( - output.len() >= self.width, - "output has {} buffers but table has {} columns", - output.len(), - self.width - ); - #[cfg(feature = "parallel")] - let iter = output[..self.width].par_iter_mut().enumerate(); - #[cfg(not(feature = "parallel"))] - let iter = output[..self.width].iter_mut().enumerate(); - iter.for_each(|(col_idx, buf)| { - buf.clear(); - buf.reserve(self.height.saturating_sub(buf.capacity())); - for row_idx in 0..self.height { - buf.push(self.get(row_idx, col_idx).clone()); - } - }); - } - /// Given row and column indexes, returns the stored field element in that position of the table. #[inline] pub fn get(&self, row: usize, col: usize) -> &FieldElement { @@ -270,7 +245,7 @@ impl Table { /// Spill the table's row-major data to a temp file and mmap it back. /// Frees the heap `data` Vec while preserving access through `get()`, - /// `get_row()`, `columns()`, and `extract_columns_into()`. + /// `get_row()`, and `columns()`. /// /// No-op if the table is empty or already spilled. #[cfg(feature = "disk-spill")] From a626c319eea3710d4cd5e49e565447466b5d6d0a Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Thu, 30 Apr 2026 16:45:16 -0300 Subject: [PATCH 109/231] Use checked_mul in spill_nodes_to_disk --- crypto/crypto/src/merkle_tree/merkle.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index 27897ecad..2471ca3fe 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -286,10 +286,17 @@ where let node_size = core::mem::size_of::(); let node_count = self.nodes.len(); - let total_bytes = node_count * node_size; + let total_bytes = (node_count as u64) + .checked_mul(node_size as u64) + .ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "spill_nodes_to_disk: byte count overflows u64", + ) + })?; let file = tempfile::tempfile()?; - file.set_len(total_bytes as u64)?; + file.set_len(total_bytes)?; // Write directly through a writable mmap, then downgrade to read-only. // Avoids the write(2) → page-cache → mmap hand-off, which on Linux @@ -301,8 +308,9 @@ where let mut mmap_mut = unsafe { memmap2::MmapOptions::new().map_mut(&file)? }; // SAFETY: B::Node is a plain byte array ([u8; N]), so casting // the contiguous Vec to a byte slice is valid. - let bytes = - unsafe { core::slice::from_raw_parts(self.nodes.as_ptr() as *const u8, total_bytes) }; + let bytes = unsafe { + core::slice::from_raw_parts(self.nodes.as_ptr() as *const u8, node_count * node_size) + }; mmap_mut.copy_from_slice(bytes); mmap_mut.flush()?; let mmap = mmap_mut.make_read_only()?; From 9a4b1fc3e56f3949439abb2e610eef5e161b1636 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Thu, 30 Apr 2026 17:09:07 -0300 Subject: [PATCH 110/231] Require Copy on BaseType for spill_to_disk --- crypto/stark/src/prover.rs | 8 ++++++++ crypto/stark/src/table.rs | 12 +++++++++--- crypto/stark/src/trace.rs | 10 ++++++++-- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 92ae5bfd7..1c9e631a7 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1601,6 +1601,8 @@ pub trait IsStarkProver< FieldElement: AsBytes, FieldElement: AsBytes, PI: Send + Sync + Clone, + ::BaseType: Copy, + ::BaseType: Copy, { Self::multi_prove_inner( air_trace_pairs, @@ -1622,6 +1624,8 @@ pub trait IsStarkProver< FieldElement: AsBytes, FieldElement: AsBytes, PI: Send + Sync + Clone, + ::BaseType: Copy, + ::BaseType: Copy, { Self::multi_prove_inner(air_trace_pairs, transcript, storage_mode) } @@ -1635,6 +1639,8 @@ pub trait IsStarkProver< FieldElement: AsBytes, FieldElement: AsBytes, PI: Send + Sync + Clone, + ::BaseType: Copy, + ::BaseType: Copy, { info!("Started proof generation..."); @@ -2114,6 +2120,8 @@ pub trait IsStarkProver< FieldElement: AsBytes, FieldElement: AsBytes, PI: Send + Sync + Clone, + ::BaseType: Copy, + ::BaseType: Copy, { let air_trace_pairs = vec![(air, trace, pub_inputs)]; Self::multi_prove(air_trace_pairs, transcript) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 3c8ac4a18..586312d15 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -164,7 +164,8 @@ impl Table { let offset = row_idx * backing.width * backing.elem_size; // SAFETY: spill_to_disk writes the table in row-major layout, so // width elements at this offset are contiguous. FieldElement - // is #[repr(transparent)]. + // is #[repr(transparent)] and spill_to_disk requires + // F::BaseType: Copy, ruling out indirection or non-trivial drop. return unsafe { std::slice::from_raw_parts( backing.mmap.as_ptr().add(offset) as *const FieldElement, @@ -225,7 +226,9 @@ impl Table { let offset = (row * backing.width + col) * backing.elem_size; // SAFETY: FieldElement is #[repr(transparent)] over F::BaseType. // The mmap is page-aligned and elements are contiguously packed. - // The data was written from identical types on the same machine. + // The data was written from identical types on the same machine, + // and spill_to_disk requires F::BaseType: Copy so the byte + // representation has no indirection or non-trivial drop. return unsafe { &*(backing.mmap.as_ptr().add(offset) as *const FieldElement) }; } let idx = row * self.width + col; @@ -249,7 +252,10 @@ impl Table { /// /// No-op if the table is empty or already spilled. #[cfg(feature = "disk-spill")] - pub fn spill_to_disk(&mut self) -> std::io::Result<()> { + pub fn spill_to_disk(&mut self) -> std::io::Result<()> + where + F::BaseType: Copy, + { const { assert!( std::mem::size_of::>() diff --git a/crypto/stark/src/trace.rs b/crypto/stark/src/trace.rs index 65723e195..334dedd65 100644 --- a/crypto/stark/src/trace.rs +++ b/crypto/stark/src/trace.rs @@ -150,12 +150,18 @@ where /// Write main trace data to a temp file and free the in-memory vector. /// Accessors read from the mmap after this call. #[cfg(feature = "disk-spill")] - pub fn spill_main_to_disk(&mut self) -> std::io::Result<()> { + pub fn spill_main_to_disk(&mut self) -> std::io::Result<()> + where + F::BaseType: Copy, + { self.main_table.spill_to_disk() } #[cfg(feature = "disk-spill")] - pub fn spill_aux_to_disk(&mut self) -> std::io::Result<()> { + pub fn spill_aux_to_disk(&mut self) -> std::io::Result<()> + where + E::BaseType: Copy, + { self.aux_table.spill_to_disk() } From 70634b96e493bc0d18dc7592ac1bc08a70cbc3f4 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Thu, 30 Apr 2026 17:13:13 -0300 Subject: [PATCH 111/231] Materialize spilled data when serializing --- Cargo.lock | 2 + crypto/crypto/Cargo.toml | 1 + crypto/crypto/src/merkle_tree/merkle.rs | 67 ++++++++++++++++++++++++- crypto/stark/Cargo.toml | 1 + crypto/stark/src/table.rs | 57 ++++++++++++++++++++- 5 files changed, 125 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b9e0350d1..7778cb8d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -772,6 +772,7 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" name = "crypto" version = "0.1.0" dependencies = [ + "bincode", "digest", "math", "memmap2", @@ -3190,6 +3191,7 @@ checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" name = "stark" version = "0.1.0" dependencies = [ + "bincode", "criterion 0.4.0", "crypto", "env_logger", diff --git a/crypto/crypto/Cargo.toml b/crypto/crypto/Cargo.toml index d0dffb64c..ae1c01ea5 100644 --- a/crypto/crypto/Cargo.toml +++ b/crypto/crypto/Cargo.toml @@ -26,6 +26,7 @@ math = { path = "../math", features = ["test-utils"] } rand = "0.8.5" rand_chacha = "0.3.1" sha2 = { version = "0.10", default-features = false } +bincode = "1" [features] default = ["asm", "std"] diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index 2471ca3fe..b8606c79d 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -43,7 +43,14 @@ pub(crate) struct MmapNodeBacking { /// The bottom leafs correspond to the hashes of the elements, while each upper /// layer contains the hash of the concatenation of the daughter nodes. #[cfg_attr(not(feature = "disk-spill"), derive(Clone))] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[cfg_attr( + all(feature = "serde", not(feature = "disk-spill")), + derive(serde::Serialize, serde::Deserialize) +)] +#[cfg_attr( + all(feature = "serde", feature = "disk-spill"), + derive(serde::Deserialize) +)] pub struct MerkleTree { pub root: B::Node, nodes: Vec, @@ -52,6 +59,28 @@ pub struct MerkleTree { mmap_backing: Option, } +#[cfg(all(feature = "serde", feature = "disk-spill"))] +impl serde::Serialize for MerkleTree +where + B::Node: serde::Serialize + Copy, +{ + fn serialize(&self, serializer: S) -> Result { + use serde::ser::SerializeStruct; + let mut s = serializer.serialize_struct("MerkleTree", 2)?; + s.serialize_field("root", &self.root)?; + if let Some(ref backing) = self.mmap_backing { + let mut materialized = Vec::with_capacity(backing.node_count); + for i in 0..backing.node_count { + materialized.push(*self.node_get(i).expect("index in bounds")); + } + s.serialize_field("nodes", &materialized)?; + } else { + s.serialize_field("nodes", &self.nodes)?; + } + s.end() + } +} + const ROOT: usize = 0; impl MerkleTree @@ -327,3 +356,39 @@ where Ok(()) } } + +#[cfg(all(test, feature = "serde", feature = "disk-spill"))] +mod disk_spill_serde_tests { + use super::*; + use crate::merkle_tree::backends::field_element::FieldElementBackend; + use math::field::{element::FieldElement, goldilocks::GoldilocksField}; + use sha3::Keccak256; + + type F = GoldilocksField; + type FE = FieldElement; + type Backend = FieldElementBackend; + + /// Serializing a spilled MerkleTree must produce identical bytes to + /// serializing the same tree before spilling, and round-trip back to an + /// equal tree. + #[test] + fn test_serialize_spilled_merkle_tree_matches_unspilled() { + let values: Vec = (1..17).map(FE::from).collect(); + let unspilled = MerkleTree::::build(&values).expect("build merkle tree"); + let unspilled_bytes = bincode::serialize(&unspilled).expect("serialize unspilled"); + + let mut spilled = MerkleTree::::build(&values).expect("build merkle tree"); + spilled.spill_nodes_to_disk().expect("spill_nodes_to_disk"); + let spilled_bytes = bincode::serialize(&spilled).expect("serialize spilled"); + + assert_eq!( + spilled_bytes, unspilled_bytes, + "spilled and unspilled trees must serialize to identical bytes" + ); + + let restored: MerkleTree = + bincode::deserialize(&spilled_bytes).expect("deserialize spilled bytes"); + assert!(restored.mmap_backing.is_none()); + assert_eq!(restored.root, unspilled.root); + } +} diff --git a/crypto/stark/Cargo.toml b/crypto/stark/Cargo.toml index d847cac32..bdd3598a0 100644 --- a/crypto/stark/Cargo.toml +++ b/crypto/stark/Cargo.toml @@ -39,6 +39,7 @@ serde_cbor = { version = "0.11.1" } criterion = { version = "0.4", default-features = false } env_logger = "*" test-log = { version = "0.2.11", features = ["log"] } +bincode = "1" [features] test-utils = [] diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 586312d15..851350542 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -38,8 +38,8 @@ impl std::fmt::Debug for TableMmapBacking { /// the STARK protocol implementation, such as the `TraceTable` and the `EvaluationFrame`. /// Since this struct is a representation of a two-dimensional table, all rows should have the same /// length. -#[derive(Default, Debug, serde::Serialize, serde::Deserialize)] -#[cfg_attr(not(feature = "disk-spill"), derive(Clone))] +#[derive(Default, Debug, serde::Deserialize)] +#[cfg_attr(not(feature = "disk-spill"), derive(serde::Serialize, Clone))] #[serde(bound = "")] pub struct Table { pub data: Vec>, @@ -50,6 +50,31 @@ pub struct Table { pub(crate) mmap_backing: Option, } +#[cfg(feature = "disk-spill")] +impl serde::Serialize for Table +where + FieldElement: serde::Serialize, +{ + fn serialize(&self, serializer: S) -> Result { + use serde::ser::SerializeStruct; + let mut s = serializer.serialize_struct("Table", 3)?; + if self.mmap_backing.is_some() { + let mut materialized = Vec::with_capacity(self.width * self.height); + for r in 0..self.height { + for elem in self.get_row(r) { + materialized.push(elem.clone()); + } + } + s.serialize_field("data", &materialized)?; + } else { + s.serialize_field("data", &self.data)?; + } + s.serialize_field("width", &self.width)?; + s.serialize_field("height", &self.height)?; + s.end() + } +} + /// Cloning a spilled table reads its mmap bytes into a fresh heap `Vec` /// and returns an unspilled clone. This is cold — callers pay the full /// materialization cost — but avoids the runtime panic a derived impl @@ -489,4 +514,32 @@ mod disk_spill_tests { assert_eq!(cloned.height, height); assert_eq!(cloned, table, "clone must equal source element-wise"); } + + /// Serializing a spilled table must produce identical bytes to serializing + /// the same table before spilling, and round-trip back to an equal table. + #[test] + fn test_serialize_spilled_table_matches_unspilled() { + let width = 4; + let height = 8; + let data: Vec> = (0..width * height) + .map(|i| FieldElement::::from(i as u64)) + .collect(); + + let unspilled = Table::new(data.clone(), width); + let unspilled_bytes = bincode::serialize(&unspilled).expect("serialize unspilled"); + + let mut spilled = Table::new(data, width); + spilled.spill_to_disk().expect("spill_to_disk failed"); + let spilled_bytes = bincode::serialize(&spilled).expect("serialize spilled"); + + assert_eq!( + spilled_bytes, unspilled_bytes, + "spilled and unspilled tables must serialize to identical bytes" + ); + + let restored: Table = + bincode::deserialize(&spilled_bytes).expect("deserialize spilled bytes"); + assert!(!restored.is_spilled()); + assert_eq!(restored, unspilled); + } } From 2881cff8ccb71c60585b9530456bcd33cb722912 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Thu, 30 Apr 2026 18:08:15 -0300 Subject: [PATCH 112/231] Gate wasm32 compile_error on disk-spill --- crypto/crypto/src/lib.rs | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/crypto/crypto/src/lib.rs b/crypto/crypto/src/lib.rs index 1287a3d66..3587c0b28 100644 --- a/crypto/crypto/src/lib.rs +++ b/crypto/crypto/src/lib.rs @@ -1,15 +1,8 @@ #![allow(clippy::op_ref)] #![cfg_attr(not(feature = "std"), no_std)] -// `std` pulls in `memmap2` (used by `crypto/stark`'s disk-backed Merkle node -// storage), which doesn't compile on wasm32. Fail loudly here so downstream -// crates that depend on `crypto/crypto` directly with `std` get a clear -// message instead of a transitive memmap2 build error. -#[cfg(all(target_arch = "wasm32", feature = "std"))] -compile_error!( - "wasm32 targets are not supported with feature \"std\": StorageMode::Disk \ - requires memmap2, which does not compile on wasm32" -); +#[cfg(all(target_arch = "wasm32", feature = "disk-spill"))] +compile_error!("the `disk-spill` feature requires memmap2, which does not compile on wasm32"); #[macro_use] extern crate alloc; From 2e22b5b38c850de250251f13e41a1ea09478370e Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Thu, 30 Apr 2026 18:08:48 -0300 Subject: [PATCH 113/231] Assert mmap-fitting alignment instead of layout tautology --- crypto/stark/src/table.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 851350542..911ff8d4a 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -281,11 +281,13 @@ impl Table { where F::BaseType: Copy, { + // mmap base is page-aligned (typically 4096); any element with smaller + // alignment is therefore aligned at every offset, since size_of is + // always a multiple of align_of by Rust layout rules. const { assert!( - std::mem::size_of::>() - .is_multiple_of(std::mem::align_of::>()), - "FieldElement size must be a multiple of its alignment for mmap interior reads to be aligned" + std::mem::align_of::>() <= 4096, + "FieldElement alignment must fit within mmap page alignment" ) } From 9c009f1fd8b38bf3398d0f54f01f281b6d0925fa Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Thu, 30 Apr 2026 18:09:01 -0300 Subject: [PATCH 114/231] Assert page_size is power of two --- prover/src/tables/trace_builder.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index df962ef72..4ff7f09a1 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -102,6 +102,10 @@ impl MemoryState { /// Count unique memory pages touched during execution. #[cfg(feature = "disk-spill")] fn unique_page_count(&self, page_size: u64) -> u64 { + debug_assert!( + page_size.is_power_of_two(), + "page_size must be a power of two for the bitmask to work" + ); let mask = !(page_size - 1); let pages: std::collections::HashSet = self.cells.keys().map(|&a| a & mask).collect(); pages.len() as u64 From 210a8950cef29858031b1df5cd31660f81244a41 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Thu, 30 Apr 2026 18:31:10 -0300 Subject: [PATCH 115/231] Drop stale percentage from max_ram_bytes doc --- crypto/stark/src/proof/options.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crypto/stark/src/proof/options.rs b/crypto/stark/src/proof/options.rs index ad2dfbf30..d41fef510 100644 --- a/crypto/stark/src/proof/options.rs +++ b/crypto/stark/src/proof/options.rs @@ -40,8 +40,9 @@ impl fmt::Display for ProofOptionsError { /// - `grinding_factor`: the number of leading zeros that we want for the Hash(hash || nonce) /// - `max_ram_bytes`: optional ceiling on prover RAM usage. When set, the /// prover spills trace tables and Merkle-tree nodes to mmap if the -/// estimated peak exceeds this cap (or 80% of system-available RAM, -/// whichever is smaller). LDE column vectors remain in RAM regardless. +/// estimated peak exceeds this cap or system-available RAM (less a safety +/// margin), whichever is smaller. LDE column vectors remain in RAM +/// regardless. #[cfg_attr(feature = "wasm", wasm_bindgen)] #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] pub struct ProofOptions { From 6507fa0d00b901df518c7255519d89d11b22f003 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Thu, 30 Apr 2026 18:31:12 -0300 Subject: [PATCH 116/231] Tighten count_table_lengths comment --- prover/src/lib.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/prover/src/lib.rs b/prover/src/lib.rs index 7239a109e..f7e10209f 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -582,8 +582,9 @@ pub fn prove_with_options_and_inputs( #[cfg(feature = "disk-spill")] let storage_mode = { // Stream over logs once to compute exact per-table row counts without - // allocating any op vectors. Use the resulting `TableLengths` to - // estimate peak heap analytically and pick a storage mode. + // building per-instruction op vectors (the decode trace is still built + // for the row count). Use the resulting `TableLengths` to estimate + // peak heap analytically and pick a storage mode. let lengths = crate::tables::trace_builder::count_table_lengths( &program, &result.logs, From cfa6eb2664776c097f01277972316239e417313c Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Thu, 30 Apr 2026 18:31:27 -0300 Subject: [PATCH 117/231] Warn on Ram pick that may exceed cgroup limit --- prover/src/auto_storage.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/prover/src/auto_storage.rs b/prover/src/auto_storage.rs index 02f7e40cb..9ddeeb4ec 100644 --- a/prover/src/auto_storage.rs +++ b/prover/src/auto_storage.rs @@ -293,6 +293,13 @@ pub fn select_storage_mode( if estimated > threshold { StorageMode::Disk } else { + if cap.is_none() && estimated.saturating_mul(2) >= available.unwrap_or(0) { + log::warn!( + "Auto disk-spill picked Ram with estimated_peak={estimated} bytes near \ + available={available:?}. Set max_ram_bytes to bound the budget to a \ + cgroup limit if running in a container." + ); + } StorageMode::Ram } } From a785986e794bca0337f3f7d25ccac19cf665ddec Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Thu, 30 Apr 2026 18:31:28 -0300 Subject: [PATCH 118/231] Reserve spill file blocks via posix_fallocate --- Cargo.lock | 1 + crypto/crypto/Cargo.toml | 3 ++- crypto/crypto/src/merkle_tree/merkle.rs | 19 ++++++++++++++++++- crypto/stark/src/table.rs | 19 ++++++++++++++++++- 4 files changed, 39 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7778cb8d8..c991ea3f1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -774,6 +774,7 @@ version = "0.1.0" dependencies = [ "bincode", "digest", + "libc", "math", "memmap2", "rand 0.8.5", diff --git a/crypto/crypto/Cargo.toml b/crypto/crypto/Cargo.toml index ae1c01ea5..89e314c25 100644 --- a/crypto/crypto/Cargo.toml +++ b/crypto/crypto/Cargo.toml @@ -20,6 +20,7 @@ rand = { version = "0.8.5", default-features = false } rand_chacha = { version = "0.3.1", default-features = false } memmap2 = { version = "0.9", optional = true } tempfile = { version = "3", optional = true } +libc = { version = "0.2", optional = true } [dev-dependencies] math = { path = "../math", features = ["test-utils"] } @@ -34,5 +35,5 @@ asm = ["sha3/asm"] std = ["math/std", "sha3/std", "serde?/std"] serde = ["dep:serde"] parallel = ["dep:rayon"] -disk-spill = ["std", "dep:memmap2", "dep:tempfile"] +disk-spill = ["std", "dep:memmap2", "dep:tempfile", "dep:libc"] alloc = [] \ No newline at end of file diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index b8606c79d..36a956363 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -33,6 +33,23 @@ pub(crate) struct MmapNodeBacking { node_size: usize, } +/// Resize `file` to `total_bytes` and reserve disk blocks where supported, so +/// later mmap writes fault with `ENOSPC` from this call instead of `SIGBUS` +/// after the temp filesystem fills up. +#[cfg(feature = "disk-spill")] +fn reserve_file_blocks(file: &std::fs::File, total_bytes: u64) -> std::io::Result<()> { + file.set_len(total_bytes)?; + #[cfg(target_os = "linux")] + { + use std::os::unix::io::AsRawFd; + let ret = unsafe { libc::posix_fallocate(file.as_raw_fd(), 0, total_bytes as i64) }; + if ret != 0 { + return Err(std::io::Error::from_raw_os_error(ret)); + } + } + Ok(()) +} + /// The struct for the Merkle tree, consisting of the root and the nodes. /// A typical tree would look like this /// root @@ -325,7 +342,7 @@ where })?; let file = tempfile::tempfile()?; - file.set_len(total_bytes)?; + reserve_file_blocks(&file, total_bytes)?; // Write directly through a writable mmap, then downgrade to read-only. // Avoids the write(2) → page-cache → mmap hand-off, which on Linux diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 911ff8d4a..983ff9a07 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -33,6 +33,23 @@ impl std::fmt::Debug for TableMmapBacking { } } +/// Resize `file` to `total_bytes` and reserve disk blocks where supported, so +/// later mmap writes fault with `ENOSPC` from this call instead of `SIGBUS` +/// after the temp filesystem fills up. +#[cfg(feature = "disk-spill")] +fn reserve_file_blocks(file: &std::fs::File, total_bytes: u64) -> std::io::Result<()> { + file.set_len(total_bytes)?; + #[cfg(target_os = "linux")] + { + use std::os::unix::io::AsRawFd; + let ret = unsafe { libc::posix_fallocate(file.as_raw_fd(), 0, total_bytes as i64) }; + if ret != 0 { + return Err(std::io::Error::from_raw_os_error(ret)); + } + } + Ok(()) +} + /// A two-dimensional Table holding field elements, arranged in a row-major order. /// This is the basic underlying data structure used for any two-dimensional component in the /// the STARK protocol implementation, such as the `TraceTable` and the `EvaluationFrame`. @@ -306,7 +323,7 @@ impl Table { })?; let file = tempfile::tempfile()?; - file.set_len(total_bytes)?; + reserve_file_blocks(&file, total_bytes)?; // Write directly through a writable mmap, then downgrade to read-only. // Avoids the write(2) → page-cache → mmap hand-off, which on Linux From b6abf7e7047574a542612c13d2810453223c1899 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Thu, 30 Apr 2026 18:45:28 -0300 Subject: [PATCH 119/231] Share reserve_file_blocks between spill paths --- crypto/crypto/src/lib.rs | 2 ++ crypto/crypto/src/merkle_tree/merkle.rs | 19 +------------------ crypto/crypto/src/mmap_util.rs | 15 +++++++++++++++ crypto/stark/src/table.rs | 19 +------------------ 4 files changed, 19 insertions(+), 36 deletions(-) create mode 100644 crypto/crypto/src/mmap_util.rs diff --git a/crypto/crypto/src/lib.rs b/crypto/crypto/src/lib.rs index 3587c0b28..d7a273d62 100644 --- a/crypto/crypto/src/lib.rs +++ b/crypto/crypto/src/lib.rs @@ -10,6 +10,8 @@ extern crate alloc; pub mod fiat_shamir; pub mod hash; pub mod merkle_tree; +#[cfg(feature = "disk-spill")] +pub mod mmap_util; #[cfg(test)] pub mod tests; diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index 36a956363..b4fee6bde 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -33,23 +33,6 @@ pub(crate) struct MmapNodeBacking { node_size: usize, } -/// Resize `file` to `total_bytes` and reserve disk blocks where supported, so -/// later mmap writes fault with `ENOSPC` from this call instead of `SIGBUS` -/// after the temp filesystem fills up. -#[cfg(feature = "disk-spill")] -fn reserve_file_blocks(file: &std::fs::File, total_bytes: u64) -> std::io::Result<()> { - file.set_len(total_bytes)?; - #[cfg(target_os = "linux")] - { - use std::os::unix::io::AsRawFd; - let ret = unsafe { libc::posix_fallocate(file.as_raw_fd(), 0, total_bytes as i64) }; - if ret != 0 { - return Err(std::io::Error::from_raw_os_error(ret)); - } - } - Ok(()) -} - /// The struct for the Merkle tree, consisting of the root and the nodes. /// A typical tree would look like this /// root @@ -342,7 +325,7 @@ where })?; let file = tempfile::tempfile()?; - reserve_file_blocks(&file, total_bytes)?; + crate::mmap_util::reserve_file_blocks(&file, total_bytes)?; // Write directly through a writable mmap, then downgrade to read-only. // Avoids the write(2) → page-cache → mmap hand-off, which on Linux diff --git a/crypto/crypto/src/mmap_util.rs b/crypto/crypto/src/mmap_util.rs new file mode 100644 index 000000000..9e18e689a --- /dev/null +++ b/crypto/crypto/src/mmap_util.rs @@ -0,0 +1,15 @@ +/// Resize `file` to `total_bytes` and reserve disk blocks where supported, so +/// later mmap writes fault with `ENOSPC` from this call instead of `SIGBUS` +/// after the temp filesystem fills up. +pub fn reserve_file_blocks(file: &std::fs::File, total_bytes: u64) -> std::io::Result<()> { + file.set_len(total_bytes)?; + #[cfg(target_os = "linux")] + { + use std::os::unix::io::AsRawFd; + let ret = unsafe { libc::posix_fallocate(file.as_raw_fd(), 0, total_bytes as i64) }; + if ret != 0 { + return Err(std::io::Error::from_raw_os_error(ret)); + } + } + Ok(()) +} diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 983ff9a07..f2619b8da 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -33,23 +33,6 @@ impl std::fmt::Debug for TableMmapBacking { } } -/// Resize `file` to `total_bytes` and reserve disk blocks where supported, so -/// later mmap writes fault with `ENOSPC` from this call instead of `SIGBUS` -/// after the temp filesystem fills up. -#[cfg(feature = "disk-spill")] -fn reserve_file_blocks(file: &std::fs::File, total_bytes: u64) -> std::io::Result<()> { - file.set_len(total_bytes)?; - #[cfg(target_os = "linux")] - { - use std::os::unix::io::AsRawFd; - let ret = unsafe { libc::posix_fallocate(file.as_raw_fd(), 0, total_bytes as i64) }; - if ret != 0 { - return Err(std::io::Error::from_raw_os_error(ret)); - } - } - Ok(()) -} - /// A two-dimensional Table holding field elements, arranged in a row-major order. /// This is the basic underlying data structure used for any two-dimensional component in the /// the STARK protocol implementation, such as the `TraceTable` and the `EvaluationFrame`. @@ -323,7 +306,7 @@ impl Table { })?; let file = tempfile::tempfile()?; - reserve_file_blocks(&file, total_bytes)?; + crypto::mmap_util::reserve_file_blocks(&file, total_bytes)?; // Write directly through a writable mmap, then downgrade to read-only. // Avoids the write(2) → page-cache → mmap hand-off, which on Linux From 1fa7652f40bc8c299058f103833a58be306c9bf9 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Thu, 30 Apr 2026 21:05:30 -0300 Subject: [PATCH 120/231] Harden posix_fallocate against overflow and EOPNOTSUPP --- crypto/crypto/src/mmap_util.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/crypto/crypto/src/mmap_util.rs b/crypto/crypto/src/mmap_util.rs index 9e18e689a..a889e198a 100644 --- a/crypto/crypto/src/mmap_util.rs +++ b/crypto/crypto/src/mmap_util.rs @@ -6,8 +6,16 @@ pub fn reserve_file_blocks(file: &std::fs::File, total_bytes: u64) -> std::io::R #[cfg(target_os = "linux")] { use std::os::unix::io::AsRawFd; - let ret = unsafe { libc::posix_fallocate(file.as_raw_fd(), 0, total_bytes as i64) }; - if ret != 0 { + let len = i64::try_from(total_bytes).map_err(|_| { + std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "spill file too large for posix_fallocate", + ) + })?; + let ret = unsafe { libc::posix_fallocate(file.as_raw_fd(), 0, len) }; + // EOPNOTSUPP / EINVAL on overlay or network filesystems: file is sized + // by `set_len`, just no early-ENOSPC guarantee. + if ret != 0 && ret != libc::EOPNOTSUPP && ret != libc::EINVAL { return Err(std::io::Error::from_raw_os_error(ret)); } } From d1ab8c21a35d26d6a1194ca4bc98cbd261a7b6e8 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Thu, 30 Apr 2026 21:05:35 -0300 Subject: [PATCH 121/231] Compute decode rows analytically in count_table_lengths --- prover/src/tables/trace_builder.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 4ff7f09a1..758fbeb99 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -2116,8 +2116,9 @@ pub fn count_table_lengths( // Phase 0: ELF → instructions + DECODE row count. let instructions = decode::instructions_from_elf(elf) .map_err(|e| Error::Execution(format!("Failed to parse instructions: {e}")))?; - let (decode_trace, _decode_pc_to_row) = decode::generate_decode_trace(&instructions); - let decode_rows = decode_trace.num_rows() as u64; + // Mirrors the padding inside `generate_decode_trace`: +1 for the CPU + // padding entry, then round up to the next power of two with floor 2. + let decode_rows = (instructions.len() as u64 + 1).next_power_of_two().max(2); // Memory + register state for partition predicates that need timestamps. let mut memory_state = MemoryState::from_elf(elf); From dec616475754da8f65cd5d9e98618e5862cf8e1b Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Thu, 30 Apr 2026 21:05:36 -0300 Subject: [PATCH 122/231] Build calibration CLI with disk-spill feature --- scripts/calibrate_threshold.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/calibrate_threshold.sh b/scripts/calibrate_threshold.sh index 9ecb06cec..fb81dc7a9 100755 --- a/scripts/calibrate_threshold.sh +++ b/scripts/calibrate_threshold.sh @@ -16,8 +16,8 @@ OUT="/tmp/calibrate_threshold" mkdir -p "$OUT" rm -f "$OUT"/*.txt -echo "Building CLI with jemalloc-stats..." -cargo build --release -p cli --features jemalloc-stats --manifest-path "$ROOT_DIR/Cargo.toml" 2>&1 | tail -1 +echo "Building CLI with jemalloc-stats and disk-spill..." +cargo build --release -p cli --features jemalloc-stats,disk-spill --manifest-path "$ROOT_DIR/Cargo.toml" 2>&1 | tail -1 CLI="$ROOT_DIR/target/release/cli" From 382dd9e54f1f20cf4b4d6194493c1390acf674f7 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Fri, 1 May 2026 13:05:23 -0300 Subject: [PATCH 123/231] Replace Copy bound with SpillSafe marker on spill paths --- crypto/crypto/src/merkle_tree/merkle.rs | 4 ++- crypto/math/src/lib.rs | 1 + crypto/math/src/spill_safe.rs | 35 +++++++++++++++++++++++++ crypto/stark/src/prover.rs | 17 ++++++------ crypto/stark/src/table.rs | 13 +++++---- crypto/stark/src/trace.rs | 6 +++-- 6 files changed, 60 insertions(+), 16 deletions(-) create mode 100644 crypto/math/src/spill_safe.rs diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index b4fee6bde..9e03c90be 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -4,6 +4,8 @@ use crate::merkle_tree::proof::BatchProof; use super::{proof::Proof, traits::IsMerkleTreeBackend, utils::*}; use alloc::{collections::BTreeSet, vec::Vec}; +#[cfg(feature = "disk-spill")] +use math::spill_safe::SpillSafe; #[derive(Debug)] pub enum Error { @@ -300,7 +302,7 @@ where #[cfg(feature = "disk-spill")] pub fn spill_nodes_to_disk(&mut self) -> std::io::Result<()> where - B::Node: Copy, + B::Node: SpillSafe, { const { assert!( diff --git a/crypto/math/src/lib.rs b/crypto/math/src/lib.rs index 2f2f1fccb..9d5e6dd97 100644 --- a/crypto/math/src/lib.rs +++ b/crypto/math/src/lib.rs @@ -6,6 +6,7 @@ extern crate alloc; pub mod errors; pub mod field; pub mod helpers; +pub mod spill_safe; pub mod traits; pub mod unsigned_integer; diff --git a/crypto/math/src/spill_safe.rs b/crypto/math/src/spill_safe.rs new file mode 100644 index 000000000..61bfb297c --- /dev/null +++ b/crypto/math/src/spill_safe.rs @@ -0,0 +1,35 @@ +//! Marker trait for types whose in-memory bytes can be reinterpreted as the +//! same type without UB: no padding, every bit pattern valid, no indirection. +//! +//! Used by `crypto/stark` and `crypto/crypto` to gate the byte-cast in +//! mmap-backed disk spilling. Stricter than `Copy`, which permits types with +//! restricted bit patterns (e.g. `bool`, `NonZeroU32`). +//! +//! Implementing this trait is a deliberate `unsafe impl` — the implementer +//! vouches that the layout invariants hold, the compiler does not check. + +use crate::field::{element::FieldElement, traits::IsField}; + +/// # Safety +/// Implementer asserts `Self`'s memory representation contains no padding, +/// every bit pattern is a valid value of `Self`, and `Self` carries no +/// indirection (heap pointers, references, etc.). Adding this `unsafe impl` +/// for a type that violates these invariants is UB at any byte cast. +pub unsafe trait SpillSafe: Copy + 'static {} + +unsafe impl SpillSafe for u8 {} +unsafe impl SpillSafe for u16 {} +unsafe impl SpillSafe for u32 {} +unsafe impl SpillSafe for u64 {} +unsafe impl SpillSafe for u128 {} +unsafe impl SpillSafe for i8 {} +unsafe impl SpillSafe for i16 {} +unsafe impl SpillSafe for i32 {} +unsafe impl SpillSafe for i64 {} +unsafe impl SpillSafe for i128 {} + +unsafe impl SpillSafe for [T; N] {} + +// `FieldElement` is `#[repr(transparent)]` over `F::BaseType`, so its +// layout matches the base type's exactly. SpillSafe propagates through. +unsafe impl SpillSafe for FieldElement where F::BaseType: SpillSafe {} diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 078f399f3..6d75d08e0 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -10,6 +10,7 @@ use math::fft::errors::FFTError; use log::info; use math::field::traits::{IsField, IsSubFieldOf}; +use math::spill_safe::SpillSafe; use math::traits::AsBytes; use math::{ field::{element::FieldElement, traits::IsFFTField}, @@ -1581,8 +1582,8 @@ pub trait IsStarkProver< FieldElement: AsBytes, FieldElement: AsBytes, PI: Send + Sync + Clone, - ::BaseType: Copy, - ::BaseType: Copy, + ::BaseType: SpillSafe, + ::BaseType: SpillSafe, { Self::multi_prove_inner( air_trace_pairs, @@ -1604,8 +1605,8 @@ pub trait IsStarkProver< FieldElement: AsBytes, FieldElement: AsBytes, PI: Send + Sync + Clone, - ::BaseType: Copy, - ::BaseType: Copy, + ::BaseType: SpillSafe, + ::BaseType: SpillSafe, { Self::multi_prove_inner(air_trace_pairs, transcript, storage_mode) } @@ -1619,8 +1620,8 @@ pub trait IsStarkProver< FieldElement: AsBytes, FieldElement: AsBytes, PI: Send + Sync + Clone, - ::BaseType: Copy, - ::BaseType: Copy, + ::BaseType: SpillSafe, + ::BaseType: SpillSafe, { info!("Started proof generation..."); @@ -2100,8 +2101,8 @@ pub trait IsStarkProver< FieldElement: AsBytes, FieldElement: AsBytes, PI: Send + Sync + Clone, - ::BaseType: Copy, - ::BaseType: Copy, + ::BaseType: SpillSafe, + ::BaseType: SpillSafe, { let air_trace_pairs = vec![(air, trace, pub_inputs)]; Self::multi_prove(air_trace_pairs, transcript) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index f2619b8da..a6b8b6d1d 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -3,6 +3,8 @@ use math::field::{ element::FieldElement, traits::{IsField, IsSubFieldOf}, }; +#[cfg(feature = "disk-spill")] +use math::spill_safe::SpillSafe; #[cfg(feature = "parallel")] use rayon::prelude::*; @@ -189,8 +191,9 @@ impl Table { let offset = row_idx * backing.width * backing.elem_size; // SAFETY: spill_to_disk writes the table in row-major layout, so // width elements at this offset are contiguous. FieldElement - // is #[repr(transparent)] and spill_to_disk requires - // F::BaseType: Copy, ruling out indirection or non-trivial drop. + // is #[repr(transparent)] over F::BaseType, and spill_to_disk + // requires F::BaseType: SpillSafe (no padding, all bit patterns + // valid). return unsafe { std::slice::from_raw_parts( backing.mmap.as_ptr().add(offset) as *const FieldElement, @@ -252,8 +255,8 @@ impl Table { // SAFETY: FieldElement is #[repr(transparent)] over F::BaseType. // The mmap is page-aligned and elements are contiguously packed. // The data was written from identical types on the same machine, - // and spill_to_disk requires F::BaseType: Copy so the byte - // representation has no indirection or non-trivial drop. + // and spill_to_disk requires F::BaseType: SpillSafe (no padding, + // all bit patterns valid). return unsafe { &*(backing.mmap.as_ptr().add(offset) as *const FieldElement) }; } let idx = row * self.width + col; @@ -279,7 +282,7 @@ impl Table { #[cfg(feature = "disk-spill")] pub fn spill_to_disk(&mut self) -> std::io::Result<()> where - F::BaseType: Copy, + F::BaseType: SpillSafe, { // mmap base is page-aligned (typically 4096); any element with smaller // alignment is therefore aligned at every offset, since size_of is diff --git a/crypto/stark/src/trace.rs b/crypto/stark/src/trace.rs index 7a58514ea..ac31240b5 100644 --- a/crypto/stark/src/trace.rs +++ b/crypto/stark/src/trace.rs @@ -4,6 +4,8 @@ use itertools::Itertools; use math::fft::errors::FFTError; use math::field::traits::{IsField, IsSubFieldOf}; use math::polynomial::barycentric_inv_denoms; +#[cfg(feature = "disk-spill")] +use math::spill_safe::SpillSafe; use math::{ field::{element::FieldElement, traits::IsFFTField}, polynomial::Polynomial, @@ -152,7 +154,7 @@ where #[cfg(feature = "disk-spill")] pub fn spill_main_to_disk(&mut self) -> std::io::Result<()> where - F::BaseType: Copy, + F::BaseType: SpillSafe, { self.main_table.spill_to_disk() } @@ -160,7 +162,7 @@ where #[cfg(feature = "disk-spill")] pub fn spill_aux_to_disk(&mut self) -> std::io::Result<()> where - E::BaseType: Copy, + E::BaseType: SpillSafe, { self.aux_table.spill_to_disk() } From fb5a8da61f3e19b6babf0ebf06787329269fcf47 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Sat, 2 May 2026 11:02:59 -0300 Subject: [PATCH 124/231] Guard Table::set against spilled tables --- crypto/stark/src/table.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index a6b8b6d1d..31265bf93 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -264,6 +264,11 @@ impl Table { } pub fn set(&mut self, row: usize, col: usize, value: FieldElement) { + #[cfg(feature = "disk-spill")] + debug_assert!( + self.mmap_backing.is_none(), + "Table::set on a spilled table — backing mmap is read-only" + ); let idx = row * self.width + col; self.data[idx] = value; } From 922b5d7d27e1244ec5e6acb3840292bb3a0717d4 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Sat, 2 May 2026 11:03:34 -0300 Subject: [PATCH 125/231] Document SIGBUS risk on non-Linux temp filesystems --- crypto/crypto/src/mmap_util.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crypto/crypto/src/mmap_util.rs b/crypto/crypto/src/mmap_util.rs index a889e198a..4f9ebc6ec 100644 --- a/crypto/crypto/src/mmap_util.rs +++ b/crypto/crypto/src/mmap_util.rs @@ -1,6 +1,11 @@ /// Resize `file` to `total_bytes` and reserve disk blocks where supported, so /// later mmap writes fault with `ENOSPC` from this call instead of `SIGBUS` /// after the temp filesystem fills up. +/// +/// Block reservation only runs on Linux (`posix_fallocate`). On other +/// platforms `set_len` extends the inode but does not reserve blocks: if +/// the temp filesystem fills during the subsequent mmap write, the process +/// is killed by `SIGBUS` with no Rust-level error path. pub fn reserve_file_blocks(file: &std::fs::File, total_bytes: u64) -> std::io::Result<()> { file.set_len(total_bytes)?; #[cfg(target_os = "linux")] From 2df8244948cce0be9c9b6d8c953c0008b85044da Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Sun, 3 May 2026 14:36:19 -0300 Subject: [PATCH 126/231] Spill fixed and page tables in build_traces --- prover/src/tables/trace_builder.rs | 36 ++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 758fbeb99..c8b2db350 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -2003,8 +2003,8 @@ fn build_traces( // Generate remaining traces in parallel (page, register, halt, commit). // chunk_and_generate already handled cpu, lt, memw, load, mul, dvrm, branch above. - let commit_trace = commit::generate_commit_trace(&commit_ops); - let (pages, page_configs, register_trace, halt_trace); + let mut commit_trace = commit::generate_commit_trace(&commit_ops); + let (mut pages, page_configs, mut register_trace, mut halt_trace); #[cfg(feature = "parallel")] { let ((pages_val, register_val), halt_val) = rayon::join( @@ -2042,6 +2042,38 @@ fn build_traces( halt_trace = halt::generate_halt_trace(halt_timestamp); } + // Fixed-size and per-page tables aren't built through `chunk_and_generate`, + // so spill them here before returning. Without this, peak heap holds every + // PAGE table until `multi_prove_inner` spills them later. + #[cfg(feature = "disk-spill")] + if storage_mode == StorageMode::Disk { + bitwise + .main_table + .spill_to_disk() + .map_err(|e| Error::Prover(format!("disk-spill bitwise: {e}")))?; + decode + .main_table + .spill_to_disk() + .map_err(|e| Error::Prover(format!("disk-spill decode: {e}")))?; + commit_trace + .main_table + .spill_to_disk() + .map_err(|e| Error::Prover(format!("disk-spill commit: {e}")))?; + register_trace + .main_table + .spill_to_disk() + .map_err(|e| Error::Prover(format!("disk-spill register: {e}")))?; + halt_trace + .main_table + .spill_to_disk() + .map_err(|e| Error::Prover(format!("disk-spill halt: {e}")))?; + for page in &mut pages { + page.main_table + .spill_to_disk() + .map_err(|e| Error::Prover(format!("disk-spill page: {e}")))?; + } + } + Ok(Traces { cpus, bitwise, From 8be6ba210bdaf744c144cbc74fb87c872ef2d409 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 4 May 2026 11:13:45 -0300 Subject: [PATCH 127/231] Suppress unused_mut warning without disk-spill --- prover/src/tables/trace_builder.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index c8b2db350..957497c94 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -2003,7 +2003,11 @@ fn build_traces( // Generate remaining traces in parallel (page, register, halt, commit). // chunk_and_generate already handled cpu, lt, memw, load, mul, dvrm, branch above. + // `mut` is only used by the disk-spill block below; #[allow] keeps the + // non-disk-spill build warning-free. + #[allow(unused_mut)] let mut commit_trace = commit::generate_commit_trace(&commit_ops); + #[allow(unused_mut)] let (mut pages, page_configs, mut register_trace, mut halt_trace); #[cfg(feature = "parallel")] { From 90c9033ab501b85958b2047e7d73e043188818f4 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 4 May 2026 12:26:53 -0300 Subject: [PATCH 128/231] Skip commit op materialization in count_table_lengths --- prover/src/tables/trace_builder.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 957497c94..ec15a3014 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -2241,12 +2241,11 @@ pub fn count_table_lengths( // ECALL Commit if cpu_op.ecall_commit { - let commit_ops = expand_commit_operations_for_ecall( - &cpu_op, - &memory_state, - current_commit_index as u64, - ); - commit_count += commit_ops.len(); + // Match `expand_commit_operations_for_ecall`'s `0..=count` loop + // without materializing the op vector. + commit_count += (cpu_op.commit_count as usize) + .checked_add(1) + .ok_or_else(|| Error::Execution("commit_count overflows usize".into()))?; let reg_commit_ops = collect_commit_memw_ops(&cpu_op, &mut register_state, &mut memory_state); for memw_op in ®_commit_ops { From 78b6bd63f76703234deffa812912b046715d29d4 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 4 May 2026 12:26:55 -0300 Subject: [PATCH 129/231] Remove unused spill_all_main_to_disk --- prover/src/tables/trace_builder.rs | 54 ------------------------------ 1 file changed, 54 deletions(-) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index ec15a3014..e5aef89eb 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -2530,60 +2530,6 @@ impl Traces { } } - /// Spill all trace table main columns to disk. - /// - /// Frees RAM by memory-mapping the main trace data for every table. - /// This is a no-op for tables that are already spilled or empty. - #[cfg(feature = "disk-spill")] - pub fn spill_all_main_to_disk(&mut self) -> Result<(), Error> { - let spill = |t: &mut TraceTable| { - t.main_table - .spill_to_disk() - .map_err(|e| Error::Prover(format!("disk-spill trace: {e}"))) - }; - - for t in &mut self.cpus { - spill(t)?; - } - spill(&mut self.bitwise)?; - for t in &mut self.lts { - spill(t)?; - } - for t in &mut self.shifts { - spill(t)?; - } - for t in &mut self.memws { - spill(t)?; - } - for t in &mut self.memw_aligneds { - spill(t)?; - } - for t in &mut self.memw_registers { - spill(t)?; - } - for t in &mut self.loads { - spill(t)?; - } - spill(&mut self.decode)?; - for t in &mut self.muls { - spill(t)?; - } - for t in &mut self.dvrms { - spill(t)?; - } - for t in &mut self.pages { - spill(t)?; - } - spill(&mut self.register)?; - for t in &mut self.branches { - spill(t)?; - } - spill(&mut self.halt)?; - spill(&mut self.commit)?; - - Ok(()) - } - /// Extract page configurations from ELF only (deterministic from binary). /// /// Returns PageConfigs for pages covered by ELF segments, with their From 8d0088184d7f3637f51cdacd52de0a16585ad46c Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 4 May 2026 12:30:04 -0300 Subject: [PATCH 130/231] Make spill_nodes_to_disk idempotency check explicit --- crypto/crypto/src/merkle_tree/merkle.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index 9e03c90be..f118ff2bc 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -311,7 +311,7 @@ where ) } - if self.nodes.is_empty() { + if self.nodes.is_empty() || self.mmap_backing.is_some() { return Ok(()); } From 277ebda8c0ce74c891850cf163eb33c461adc0fb Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 4 May 2026 15:53:50 -0300 Subject: [PATCH 131/231] Trim SpillSafe doc to layout invariants --- crypto/math/src/spill_safe.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crypto/math/src/spill_safe.rs b/crypto/math/src/spill_safe.rs index 61bfb297c..f08a24cec 100644 --- a/crypto/math/src/spill_safe.rs +++ b/crypto/math/src/spill_safe.rs @@ -1,9 +1,8 @@ //! Marker trait for types whose in-memory bytes can be reinterpreted as the //! same type without UB: no padding, every bit pattern valid, no indirection. //! -//! Used by `crypto/stark` and `crypto/crypto` to gate the byte-cast in -//! mmap-backed disk spilling. Stricter than `Copy`, which permits types with -//! restricted bit patterns (e.g. `bool`, `NonZeroU32`). +//! Stricter than `Copy`, which permits types with restricted bit patterns +//! (e.g. `bool`, `NonZeroU32`). //! //! Implementing this trait is a deliberate `unsafe impl` — the implementer //! vouches that the layout invariants hold, the compiler does not check. From d5512af226be01bca32c4c409c225dc5baf54fc0 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 4 May 2026 16:08:37 -0300 Subject: [PATCH 132/231] Loosen merkle node alignment to mmap page size --- crypto/crypto/src/merkle_tree/merkle.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index f118ff2bc..fee7f95d8 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -147,7 +147,11 @@ where if let Some(ref backing) = self.mmap_backing { if idx < backing.node_count { // SAFETY: spill_nodes_to_disk writes self.nodes as contiguous bytes - // to this mmap and asserts align_of::() == 1 at compile time. + // to this mmap. The mmap is page-aligned (>= 4096) and + // spill_nodes_to_disk asserts align_of::() <= 4096, so + // every offset idx * node_size lands on an aligned address. + // SpillSafe (a super-trait of B::Node here) guarantees no + // padding and any-bit-pattern validity. let ptr = unsafe { backing.mmap.as_ptr().add(idx * backing.node_size) }; return Some(unsafe { &*(ptr as *const B::Node) }); } @@ -306,8 +310,8 @@ where { const { assert!( - align_of::() == 1, - "B::Node must have alignment 1 for mmap safety" + align_of::() <= 4096, + "B::Node alignment must fit within mmap page alignment" ) } From 7ade6018d37cbada1c783851db0219c5cf393255 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 4 May 2026 16:08:48 -0300 Subject: [PATCH 133/231] Drop unnecessary mmap flush before make_read_only --- crypto/crypto/src/merkle_tree/merkle.rs | 1 - crypto/stark/src/table.rs | 1 - 2 files changed, 2 deletions(-) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index fee7f95d8..8b3208272 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -347,7 +347,6 @@ where core::slice::from_raw_parts(self.nodes.as_ptr() as *const u8, node_count * node_size) }; mmap_mut.copy_from_slice(bytes); - mmap_mut.flush()?; let mmap = mmap_mut.make_read_only()?; // Free the heap allocation diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 31265bf93..fa18d0915 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -330,7 +330,6 @@ impl Table { std::slice::from_raw_parts(self.data.as_ptr() as *const u8, self.data.len() * elem_size) }; mmap_mut.copy_from_slice(bytes); - mmap_mut.flush()?; let mmap = mmap_mut.make_read_only()?; self.mmap_backing = Some(TableMmapBacking { From ce24fb8a7b8c360972c10faf03a24af9aa1398b5 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 4 May 2026 16:08:49 -0300 Subject: [PATCH 134/231] Assert max_rows is positive in padded_chunked_rows --- prover/src/tables/trace_builder.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index e5aef89eb..3e407cb97 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -2103,6 +2103,7 @@ fn build_traces( /// Padded row count after chunking: each chunk rounds up to `next_power_of_two().max(4)`. #[cfg(feature = "disk-spill")] fn padded_chunked_rows(ops_count: usize, max_rows: usize) -> u64 { + debug_assert!(max_rows > 0, "max_rows must be positive"); if ops_count == 0 { return 4; // empty-chunk tables still allocate one 4-row padded chunk } From 7ab6751848388ddb63ae367740cd160789fd3d82 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 4 May 2026 16:17:37 -0300 Subject: [PATCH 135/231] Fail fast when posix_fallocate is unsupported --- crypto/crypto/src/mmap_util.rs | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/crypto/crypto/src/mmap_util.rs b/crypto/crypto/src/mmap_util.rs index 4f9ebc6ec..3b67b8dc5 100644 --- a/crypto/crypto/src/mmap_util.rs +++ b/crypto/crypto/src/mmap_util.rs @@ -1,11 +1,14 @@ -/// Resize `file` to `total_bytes` and reserve disk blocks where supported, so -/// later mmap writes fault with `ENOSPC` from this call instead of `SIGBUS` -/// after the temp filesystem fills up. +/// Resize `file` to `total_bytes` and reserve disk blocks via `posix_fallocate` +/// on Linux, so later mmap writes fault with `ENOSPC` from this call instead +/// of `SIGBUS` after the temp filesystem fills up. /// -/// Block reservation only runs on Linux (`posix_fallocate`). On other -/// platforms `set_len` extends the inode but does not reserve blocks: if -/// the temp filesystem fills during the subsequent mmap write, the process -/// is killed by `SIGBUS` with no Rust-level error path. +/// Linux returns `EOPNOTSUPP` (or sometimes `EINVAL`) on filesystems that +/// can't pre-allocate (NFS, some overlay/FUSE mounts). We surface those as +/// errors so callers fail fast rather than risk a SIGBUS during the write. +/// +/// On non-Linux targets `set_len` extends the inode but does not reserve +/// blocks: if the temp filesystem fills during the subsequent mmap write, +/// the process is killed by `SIGBUS` with no Rust-level error path. pub fn reserve_file_blocks(file: &std::fs::File, total_bytes: u64) -> std::io::Result<()> { file.set_len(total_bytes)?; #[cfg(target_os = "linux")] @@ -18,9 +21,7 @@ pub fn reserve_file_blocks(file: &std::fs::File, total_bytes: u64) -> std::io::R ) })?; let ret = unsafe { libc::posix_fallocate(file.as_raw_fd(), 0, len) }; - // EOPNOTSUPP / EINVAL on overlay or network filesystems: file is sized - // by `set_len`, just no early-ENOSPC guarantee. - if ret != 0 && ret != libc::EOPNOTSUPP && ret != libc::EINVAL { + if ret != 0 { return Err(std::io::Error::from_raw_os_error(ret)); } } From ae6a11f0c49a78036db8230d67677137eaafd351 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 4 May 2026 16:20:55 -0300 Subject: [PATCH 136/231] Tighten reserve_file_blocks doc --- crypto/crypto/src/mmap_util.rs | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/crypto/crypto/src/mmap_util.rs b/crypto/crypto/src/mmap_util.rs index 3b67b8dc5..23f542e05 100644 --- a/crypto/crypto/src/mmap_util.rs +++ b/crypto/crypto/src/mmap_util.rs @@ -1,14 +1,9 @@ -/// Resize `file` to `total_bytes` and reserve disk blocks via `posix_fallocate` -/// on Linux, so later mmap writes fault with `ENOSPC` from this call instead -/// of `SIGBUS` after the temp filesystem fills up. +/// Reserve disk blocks up front so this call fails on a full disk. +/// Without reservation, the kernel sends SIGBUS during the later mmap write. /// -/// Linux returns `EOPNOTSUPP` (or sometimes `EINVAL`) on filesystems that -/// can't pre-allocate (NFS, some overlay/FUSE mounts). We surface those as -/// errors so callers fail fast rather than risk a SIGBUS during the write. -/// -/// On non-Linux targets `set_len` extends the inode but does not reserve -/// blocks: if the temp filesystem fills during the subsequent mmap write, -/// the process is killed by `SIGBUS` with no Rust-level error path. +/// Linux only, using `posix_fallocate`. On other platforms we only call +/// `set_len` and skip reservation, so the kernel can still send SIGBUS if +/// the disk fills mid-write. pub fn reserve_file_blocks(file: &std::fs::File, total_bytes: u64) -> std::io::Result<()> { file.set_len(total_bytes)?; #[cfg(target_os = "linux")] From fe48952bc26eeda7dfb35f63dfa0bad8b52292ee Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 4 May 2026 17:10:26 -0300 Subject: [PATCH 137/231] Stream spilled MerkleTree nodes during serialize --- crypto/crypto/src/merkle_tree/merkle.rs | 37 ++++++++++++++++++++----- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index 8b3208272..9452ff1be 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -64,18 +64,14 @@ pub struct MerkleTree { #[cfg(all(feature = "serde", feature = "disk-spill"))] impl serde::Serialize for MerkleTree where - B::Node: serde::Serialize + Copy, + B::Node: serde::Serialize, { fn serialize(&self, serializer: S) -> Result { use serde::ser::SerializeStruct; let mut s = serializer.serialize_struct("MerkleTree", 2)?; s.serialize_field("root", &self.root)?; - if let Some(ref backing) = self.mmap_backing { - let mut materialized = Vec::with_capacity(backing.node_count); - for i in 0..backing.node_count { - materialized.push(*self.node_get(i).expect("index in bounds")); - } - s.serialize_field("nodes", &materialized)?; + if self.mmap_backing.is_some() { + s.serialize_field("nodes", &MmapNodesSeq(self))?; } else { s.serialize_field("nodes", &self.nodes)?; } @@ -83,6 +79,33 @@ where } } +/// Serializes the spilled nodes as a length-prefixed sequence by reading them +/// one at a time from the mmap, avoiding a transient `Vec` allocation +/// the size of the entire tree. +#[cfg(all(feature = "serde", feature = "disk-spill"))] +struct MmapNodesSeq<'a, B: IsMerkleTreeBackend>(&'a MerkleTree); + +#[cfg(all(feature = "serde", feature = "disk-spill"))] +impl serde::Serialize for MmapNodesSeq<'_, B> +where + B::Node: serde::Serialize, +{ + fn serialize(&self, serializer: S) -> Result { + use serde::ser::SerializeSeq; + let backing = self + .0 + .mmap_backing + .as_ref() + .expect("MmapNodesSeq is only constructed when mmap_backing is Some"); + let n = backing.node_count; + let mut seq = serializer.serialize_seq(Some(n))?; + for i in 0..n { + seq.serialize_element(self.0.node_get(i).expect("index in bounds"))?; + } + seq.end() + } +} + const ROOT: usize = 0; impl MerkleTree From d7de6264b7c09fe4f9d8dc1a82aea84222709f7a Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 4 May 2026 17:15:37 -0300 Subject: [PATCH 138/231] Document MerkleTree disk-spill Serialize impl --- crypto/crypto/src/merkle_tree/merkle.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index 9452ff1be..f47d0c43d 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -61,6 +61,11 @@ pub struct MerkleTree { mmap_backing: Option, } +// `mmap_backing` is `#[serde(skip)]` and `spill_nodes_to_disk` empties `nodes`, +// so the default derive would emit `{root, nodes: []}` and lose the tree. +// +// Output matches the non-disk-spill derive byte-for-byte, so a proof from either +// storage mode deserializes with the same `Deserialize` impl. #[cfg(all(feature = "serde", feature = "disk-spill"))] impl serde::Serialize for MerkleTree where From 54b49686689be842e382bb5bb9fe8b5442857836 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 4 May 2026 17:35:28 -0300 Subject: [PATCH 139/231] Drop #[inline] from MerkleTree node accessors --- crypto/crypto/src/merkle_tree/merkle.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index f47d0c43d..1c1d019c4 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -157,7 +157,6 @@ where } /// Total number of nodes in the tree (inner + leaves). - #[inline] fn node_count(&self) -> usize { #[cfg(feature = "disk-spill")] if let Some(ref backing) = self.mmap_backing { @@ -169,7 +168,6 @@ where /// Access a node by index, returning a reference. /// /// Returns `None` if `idx` is out of bounds. - #[inline] fn node_get(&self, idx: usize) -> Option<&B::Node> { #[cfg(feature = "disk-spill")] if let Some(ref backing) = self.mmap_backing { From c5af906d599fd7e4bb7a8f589694dc86d8753136 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 4 May 2026 17:37:55 -0300 Subject: [PATCH 140/231] Tighten MmapNodesSeq doc comment --- crypto/crypto/src/merkle_tree/merkle.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index 1c1d019c4..169e9bef5 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -84,9 +84,8 @@ where } } -/// Serializes the spilled nodes as a length-prefixed sequence by reading them -/// one at a time from the mmap, avoiding a transient `Vec` allocation -/// the size of the entire tree. +/// Streams the spilled nodes through `serialize_seq` instead of buffering them +/// into a `Vec` the size of the tree. #[cfg(all(feature = "serde", feature = "disk-spill"))] struct MmapNodesSeq<'a, B: IsMerkleTreeBackend>(&'a MerkleTree); From dce4537faa1ac5864468b13d698abc4b46a528d5 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 4 May 2026 17:54:58 -0300 Subject: [PATCH 141/231] Rephrase node_get SAFETY comment --- crypto/crypto/src/merkle_tree/merkle.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index 169e9bef5..e3083665c 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -171,12 +171,15 @@ where #[cfg(feature = "disk-spill")] if let Some(ref backing) = self.mmap_backing { if idx < backing.node_count { - // SAFETY: spill_nodes_to_disk writes self.nodes as contiguous bytes - // to this mmap. The mmap is page-aligned (>= 4096) and - // spill_nodes_to_disk asserts align_of::() <= 4096, so - // every offset idx * node_size lands on an aligned address. - // SpillSafe (a super-trait of B::Node here) guarantees no - // padding and any-bit-pattern validity. + // SAFETY: spill_nodes_to_disk is the only function that populates + // mmap_backing, and its where-clause requires B::Node: SpillSafe. + // Reaching this branch means that bound was checked at construction, + // so B::Node carries no padding and every bit pattern is valid. + // + // Alignment: the mmap base is page-aligned (>= 4096), spill_nodes_to_disk + // asserts align_of::() <= 4096, and Rust guarantees + // size_of:: is a multiple of align_of::, so every + // offset idx * node_size lands on an aligned address. let ptr = unsafe { backing.mmap.as_ptr().add(idx * backing.node_size) }; return Some(unsafe { &*(ptr as *const B::Node) }); } From b47427f7874636ae63b60c7a9a71fc7bea817fc9 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 4 May 2026 18:26:02 -0300 Subject: [PATCH 142/231] Refresh stale comments in spill_nodes_to_disk and spill_safe --- crypto/crypto/src/merkle_tree/merkle.rs | 8 ++++---- crypto/math/src/spill_safe.rs | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index e3083665c..f7aaadfa4 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -329,8 +329,8 @@ where auth_path_set.into_iter().rev().collect() } - /// Write tree nodes to a temp file, mmap it, and free the in-memory vector. - /// Node access methods read from the mmap after this call. + /// Mmap a temp file, copy the tree nodes into the mapping, and free the + /// in-memory vector. Node access methods read from the mmap after this call. #[cfg(feature = "disk-spill")] pub fn spill_nodes_to_disk(&mut self) -> std::io::Result<()> where @@ -369,8 +369,8 @@ where // SAFETY: tempfile() creates an anonymous file with no filesystem // path, so no other process can open or modify it. let mut mmap_mut = unsafe { memmap2::MmapOptions::new().map_mut(&file)? }; - // SAFETY: B::Node is a plain byte array ([u8; N]), so casting - // the contiguous Vec to a byte slice is valid. + // SAFETY: SpillSafe's safety contract requires no padding on B::Node, so + // the contiguous Vec bytes are initialized and reading them as &[u8] is sound. let bytes = unsafe { core::slice::from_raw_parts(self.nodes.as_ptr() as *const u8, node_count * node_size) }; diff --git a/crypto/math/src/spill_safe.rs b/crypto/math/src/spill_safe.rs index f08a24cec..9b75207b0 100644 --- a/crypto/math/src/spill_safe.rs +++ b/crypto/math/src/spill_safe.rs @@ -5,7 +5,7 @@ //! (e.g. `bool`, `NonZeroU32`). //! //! Implementing this trait is a deliberate `unsafe impl` — the implementer -//! vouches that the layout invariants hold, the compiler does not check. +//! asserts the layout invariants hold, the compiler does not check. use crate::field::{element::FieldElement, traits::IsField}; From a31ebedb53e5adf0d7505f58d94897f0c4f85e0e Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 4 May 2026 18:54:24 -0300 Subject: [PATCH 143/231] Stream spilled Table data through serialize_seq --- crypto/stark/src/table.rs | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index fa18d0915..632a1408d 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -61,13 +61,7 @@ where use serde::ser::SerializeStruct; let mut s = serializer.serialize_struct("Table", 3)?; if self.mmap_backing.is_some() { - let mut materialized = Vec::with_capacity(self.width * self.height); - for r in 0..self.height { - for elem in self.get_row(r) { - materialized.push(elem.clone()); - } - } - s.serialize_field("data", &materialized)?; + s.serialize_field("data", &MmapDataSeq(self))?; } else { s.serialize_field("data", &self.data)?; } @@ -77,6 +71,29 @@ where } } +/// Streams the spilled table elements through `serialize_seq` instead of +/// buffering them into a `Vec>` the size of the trace. +#[cfg(feature = "disk-spill")] +struct MmapDataSeq<'a, F: IsField>(&'a Table); + +#[cfg(feature = "disk-spill")] +impl serde::Serialize for MmapDataSeq<'_, F> +where + FieldElement: serde::Serialize, +{ + fn serialize(&self, serializer: S) -> Result { + use serde::ser::SerializeSeq; + let table = self.0; + let mut seq = serializer.serialize_seq(Some(table.width * table.height))?; + for r in 0..table.height { + for elem in table.get_row(r) { + seq.serialize_element(elem)?; + } + } + seq.end() + } +} + /// Cloning a spilled table reads its mmap bytes into a fresh heap `Vec` /// and returns an unspilled clone. This is cold — callers pay the full /// materialization cost — but avoids the runtime panic a derived impl From d37d3e21005eafb677135e172a4fe8aa55781e44 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 4 May 2026 18:54:56 -0300 Subject: [PATCH 144/231] Make Table::set spill guard a hard assert --- crypto/stark/src/table.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 632a1408d..0cf73e34d 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -282,7 +282,7 @@ impl Table { pub fn set(&mut self, row: usize, col: usize, value: FieldElement) { #[cfg(feature = "disk-spill")] - debug_assert!( + assert!( self.mmap_backing.is_none(), "Table::set on a spilled table — backing mmap is read-only" ); From d05affc4979716e72e7b8d33cdb13d88e321e0bb Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 4 May 2026 18:54:58 -0300 Subject: [PATCH 145/231] Drop redundant node_size from MmapNodeBacking --- crypto/crypto/src/merkle_tree/merkle.rs | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index f7aaadfa4..7fcedac04 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -32,7 +32,6 @@ impl std::error::Error for Error {} pub(crate) struct MmapNodeBacking { mmap: memmap2::Mmap, node_count: usize, - node_size: usize, } /// The struct for the Merkle tree, consisting of the root and the nodes. @@ -180,7 +179,7 @@ where // asserts align_of::() <= 4096, and Rust guarantees // size_of:: is a multiple of align_of::, so every // offset idx * node_size lands on an aligned address. - let ptr = unsafe { backing.mmap.as_ptr().add(idx * backing.node_size) }; + let ptr = unsafe { backing.mmap.as_ptr().add(idx * size_of::()) }; return Some(unsafe { &*(ptr as *const B::Node) }); } return None; @@ -347,10 +346,9 @@ where return Ok(()); } - let node_size = core::mem::size_of::(); let node_count = self.nodes.len(); let total_bytes = (node_count as u64) - .checked_mul(node_size as u64) + .checked_mul(size_of::() as u64) .ok_or_else(|| { std::io::Error::new( std::io::ErrorKind::InvalidInput, @@ -372,7 +370,10 @@ where // SAFETY: SpillSafe's safety contract requires no padding on B::Node, so // the contiguous Vec bytes are initialized and reading them as &[u8] is sound. let bytes = unsafe { - core::slice::from_raw_parts(self.nodes.as_ptr() as *const u8, node_count * node_size) + core::slice::from_raw_parts( + self.nodes.as_ptr() as *const u8, + node_count * size_of::(), + ) }; mmap_mut.copy_from_slice(bytes); let mmap = mmap_mut.make_read_only()?; @@ -380,11 +381,7 @@ where // Free the heap allocation self.nodes = Vec::new(); - self.mmap_backing = Some(MmapNodeBacking { - mmap, - node_count, - node_size, - }); + self.mmap_backing = Some(MmapNodeBacking { mmap, node_count }); Ok(()) } From 86992f42eb9344a0a3bea63d7e9933f42284b533 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 4 May 2026 18:55:08 -0300 Subject: [PATCH 146/231] Account for halt padding in peak_bytes --- prover/src/auto_storage.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prover/src/auto_storage.rs b/prover/src/auto_storage.rs index 9ddeeb4ec..00d4935ae 100644 --- a/prover/src/auto_storage.rs +++ b/prover/src/auto_storage.rs @@ -100,7 +100,7 @@ fn aux_cols(bus_count: usize) -> u64 { fn table_specs(lengths: &TableLengths) -> Vec { let bitwise_rows = BITWISE_ROWS as u64; let register_rows = NUM_REGISTER_ADDRESSES.next_power_of_two() as u64; - let halt_rows = 1u64; + let halt_rows = 4u64; let page_rows = PAGE_SIZE as u64; let mut specs = vec![ From 6957e68646ba599304c92e01dee43e081c3c9ab8 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 4 May 2026 18:55:18 -0300 Subject: [PATCH 147/231] Drop stale max_ram_bytes advice from default-Disk warning --- prover/src/auto_storage.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prover/src/auto_storage.rs b/prover/src/auto_storage.rs index 00d4935ae..afc8e6937 100644 --- a/prover/src/auto_storage.rs +++ b/prover/src/auto_storage.rs @@ -284,7 +284,7 @@ pub fn select_storage_mode( let Some(budget) = effective_budget(available, cap) else { log::warn!( "Auto disk-spill: sysinfo could not read system memory and no cap set, \ - defaulting to Disk. Pass max_ram_bytes if the machine has enough RAM." + defaulting to Disk." ); return StorageMode::Disk; }; From 020781eb9d15a1fcf39b41ac94ffa3ce974f9d40 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 4 May 2026 18:55:19 -0300 Subject: [PATCH 148/231] Replace dead unwrap_or in cgroup-near-OOM warning --- prover/src/auto_storage.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prover/src/auto_storage.rs b/prover/src/auto_storage.rs index afc8e6937..061afa752 100644 --- a/prover/src/auto_storage.rs +++ b/prover/src/auto_storage.rs @@ -293,7 +293,7 @@ pub fn select_storage_mode( if estimated > threshold { StorageMode::Disk } else { - if cap.is_none() && estimated.saturating_mul(2) >= available.unwrap_or(0) { + if cap.is_none() && estimated.saturating_mul(2) >= available.unwrap() { log::warn!( "Auto disk-spill picked Ram with estimated_peak={estimated} bytes near \ available={available:?}. Set max_ram_bytes to bound the budget to a \ From 83c8da946544fc6b876d980659db5cf32e2321d5 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 11:02:35 -0300 Subject: [PATCH 149/231] Document why available.unwrap() is safe --- prover/src/auto_storage.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/prover/src/auto_storage.rs b/prover/src/auto_storage.rs index 061afa752..a3340382d 100644 --- a/prover/src/auto_storage.rs +++ b/prover/src/auto_storage.rs @@ -293,6 +293,8 @@ pub fn select_storage_mode( if estimated > threshold { StorageMode::Disk } else { + // `cap.is_none()` plus an `effective_budget` that returned `Some` means + // `available` must be `Some` (see `effective_budget`). if cap.is_none() && estimated.saturating_mul(2) >= available.unwrap() { log::warn!( "Auto disk-spill picked Ram with estimated_peak={estimated} bytes near \ From 3af6a2bdebde8330b68b62dfef0356f5cd7646f7 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 11:02:38 -0300 Subject: [PATCH 150/231] Note advise_drop_cache reliability per platform --- crypto/stark/src/table.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 0cf73e34d..15d3f52bb 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -362,11 +362,12 @@ impl Table { Ok(()) } - /// Advise the kernel to drop mmap pages from the page cache. + /// Hint the kernel to drop mmap pages from the page cache. /// Call after reading spilled data into pool buffers so the same /// data doesn't occupy RAM in both places. /// - /// Unix-only: no-op on non-Unix targets. + /// Reliable on Linux for clean file-backed mappings; on other Unix + /// (macOS/BSD) the hint may be a no-op. No-op on non-Unix targets. #[cfg(all(feature = "disk-spill", unix))] pub fn advise_drop_cache(&self) { if let Some(ref backing) = self.mmap_backing { From 097361ad399199f44b77f3709c2958131810c608 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 12:04:05 -0300 Subject: [PATCH 151/231] Extract spill_slice_to_mmap helper into mmap_util --- crypto/crypto/src/merkle_tree/merkle.rs | 47 +++---------------------- crypto/crypto/src/mmap_util.rs | 45 +++++++++++++++++++++++ crypto/stark/src/prover.rs | 8 +++++ crypto/stark/src/table.rs | 45 ++--------------------- crypto/stark/src/trace.rs | 2 ++ 5 files changed, 62 insertions(+), 85 deletions(-) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index 7fcedac04..faa657357 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -175,7 +175,7 @@ where // Reaching this branch means that bound was checked at construction, // so B::Node carries no padding and every bit pattern is valid. // - // Alignment: the mmap base is page-aligned (>= 4096), spill_nodes_to_disk + // Alignment: the mmap base is page-aligned (>= 4096), spill_slice_to_mmap // asserts align_of::() <= 4096, and Rust guarantees // size_of:: is a multiple of align_of::, so every // offset idx * node_size lands on an aligned address. @@ -328,59 +328,20 @@ where auth_path_set.into_iter().rev().collect() } - /// Mmap a temp file, copy the tree nodes into the mapping, and free the - /// in-memory vector. Node access methods read from the mmap after this call. + /// Spill the node vector to a temp-file-backed mmap and free the heap + /// allocation. Node access methods read from the mmap after this call. #[cfg(feature = "disk-spill")] pub fn spill_nodes_to_disk(&mut self) -> std::io::Result<()> where B::Node: SpillSafe, { - const { - assert!( - align_of::() <= 4096, - "B::Node alignment must fit within mmap page alignment" - ) - } - if self.nodes.is_empty() || self.mmap_backing.is_some() { return Ok(()); } let node_count = self.nodes.len(); - let total_bytes = (node_count as u64) - .checked_mul(size_of::() as u64) - .ok_or_else(|| { - std::io::Error::new( - std::io::ErrorKind::InvalidInput, - "spill_nodes_to_disk: byte count overflows u64", - ) - })?; - - let file = tempfile::tempfile()?; - crate::mmap_util::reserve_file_blocks(&file, total_bytes)?; - - // Write directly through a writable mmap, then downgrade to read-only. - // Avoids the write(2) → page-cache → mmap hand-off, which on Linux - // under memory pressure could produce partially-zeroed reads from the - // read-only mmap. - // - // SAFETY: tempfile() creates an anonymous file with no filesystem - // path, so no other process can open or modify it. - let mut mmap_mut = unsafe { memmap2::MmapOptions::new().map_mut(&file)? }; - // SAFETY: SpillSafe's safety contract requires no padding on B::Node, so - // the contiguous Vec bytes are initialized and reading them as &[u8] is sound. - let bytes = unsafe { - core::slice::from_raw_parts( - self.nodes.as_ptr() as *const u8, - node_count * size_of::(), - ) - }; - mmap_mut.copy_from_slice(bytes); - let mmap = mmap_mut.make_read_only()?; - - // Free the heap allocation + let mmap = crate::mmap_util::spill_slice_to_mmap(&self.nodes)?; self.nodes = Vec::new(); - self.mmap_backing = Some(MmapNodeBacking { mmap, node_count }); Ok(()) diff --git a/crypto/crypto/src/mmap_util.rs b/crypto/crypto/src/mmap_util.rs index 23f542e05..197bba18d 100644 --- a/crypto/crypto/src/mmap_util.rs +++ b/crypto/crypto/src/mmap_util.rs @@ -1,3 +1,5 @@ +use math::spill_safe::SpillSafe; + /// Reserve disk blocks up front so this call fails on a full disk. /// Without reservation, the kernel sends SIGBUS during the later mmap write. /// @@ -22,3 +24,46 @@ pub fn reserve_file_blocks(file: &std::fs::File, total_bytes: u64) -> std::io::R } Ok(()) } + +/// Mmap a fresh temp file, copy `slice` into the mapping, downgrade to +/// read-only, and return it. +/// +/// Writes through the writable mmap rather than via `write(2)` + remap, +/// which on Linux under memory pressure could otherwise produce +/// partially-zeroed reads from the read-only mmap. +/// +/// Alignment: the mmap base is page-aligned (>= 4096), this function +/// asserts `align_of::() <= 4096`, and Rust guarantees `size_of::()` +/// is a multiple of `align_of::()`, so every element offset is aligned. +pub fn spill_slice_to_mmap(slice: &[T]) -> std::io::Result { + const { + assert!( + std::mem::align_of::() <= 4096, + "T alignment must fit within mmap page alignment" + ) + } + + let elem_size = std::mem::size_of::(); + let total_bytes = (slice.len() as u64) + .checked_mul(elem_size as u64) + .ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "spill_slice_to_mmap: byte count overflows u64", + ) + })?; + + let file = tempfile::tempfile()?; + reserve_file_blocks(&file, total_bytes)?; + + // SAFETY: tempfile() creates an anonymous file with no filesystem path, + // so no other process can open or modify it. + let mut mmap_mut = unsafe { memmap2::MmapOptions::new().map_mut(&file)? }; + // SAFETY: SpillSafe's safety contract requires no padding on T, so + // `slice`'s bytes are initialized and reading them as &[u8] is sound. + let bytes: &[u8] = unsafe { + core::slice::from_raw_parts(slice.as_ptr() as *const u8, size_of_val(slice)) + }; + mmap_mut.copy_from_slice(bytes); + mmap_mut.make_read_only() +} diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 6d75d08e0..47b403f3e 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1582,6 +1582,8 @@ pub trait IsStarkProver< FieldElement: AsBytes, FieldElement: AsBytes, PI: Send + Sync + Clone, + Field: Copy + 'static, + FieldExtension: Copy + 'static, ::BaseType: SpillSafe, ::BaseType: SpillSafe, { @@ -1605,6 +1607,8 @@ pub trait IsStarkProver< FieldElement: AsBytes, FieldElement: AsBytes, PI: Send + Sync + Clone, + Field: Copy + 'static, + FieldExtension: Copy + 'static, ::BaseType: SpillSafe, ::BaseType: SpillSafe, { @@ -1620,6 +1624,8 @@ pub trait IsStarkProver< FieldElement: AsBytes, FieldElement: AsBytes, PI: Send + Sync + Clone, + Field: Copy + 'static, + FieldExtension: Copy + 'static, ::BaseType: SpillSafe, ::BaseType: SpillSafe, { @@ -2101,6 +2107,8 @@ pub trait IsStarkProver< FieldElement: AsBytes, FieldElement: AsBytes, PI: Send + Sync + Clone, + Field: Copy + 'static, + FieldExtension: Copy + 'static, ::BaseType: SpillSafe, ::BaseType: SpillSafe, { diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 15d3f52bb..13f7942e3 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -304,59 +304,20 @@ impl Table { #[cfg(feature = "disk-spill")] pub fn spill_to_disk(&mut self) -> std::io::Result<()> where + F: Copy + 'static, F::BaseType: SpillSafe, { - // mmap base is page-aligned (typically 4096); any element with smaller - // alignment is therefore aligned at every offset, since size_of is - // always a multiple of align_of by Rust layout rules. - const { - assert!( - std::mem::align_of::>() <= 4096, - "FieldElement alignment must fit within mmap page alignment" - ) - } - if self.data.is_empty() || self.mmap_backing.is_some() { return Ok(()); } - let elem_size = std::mem::size_of::>(); - let total_bytes = (self.data.len() as u64) - .checked_mul(elem_size as u64) - .ok_or_else(|| { - std::io::Error::new( - std::io::ErrorKind::InvalidInput, - "spill_to_disk: byte count overflows u64", - ) - })?; - - let file = tempfile::tempfile()?; - crypto::mmap_util::reserve_file_blocks(&file, total_bytes)?; - - // Write directly through a writable mmap, then downgrade to read-only. - // Avoids the write(2) → page-cache → mmap hand-off, which on Linux - // under memory pressure could produce partially-zeroed reads from the - // read-only mmap (the previous implementation relied on that handoff). - // - // SAFETY: tempfile() creates an anonymous file with no filesystem - // path, so no other process can open or modify it. - let mut mmap_mut = unsafe { memmap2::MmapOptions::new().map_mut(&file)? }; - // SAFETY: FieldElement is #[repr(transparent)] over F::BaseType. - // The Vec has the same byte layout as a contiguous array. - let bytes: &[u8] = unsafe { - std::slice::from_raw_parts(self.data.as_ptr() as *const u8, self.data.len() * elem_size) - }; - mmap_mut.copy_from_slice(bytes); - let mmap = mmap_mut.make_read_only()?; - + let mmap = crypto::mmap_util::spill_slice_to_mmap(&self.data)?; self.mmap_backing = Some(TableMmapBacking { mmap, width: self.width, height: self.height, - elem_size, + elem_size: std::mem::size_of::>(), }); - - // Free heap allocation self.data = Vec::new(); Ok(()) diff --git a/crypto/stark/src/trace.rs b/crypto/stark/src/trace.rs index ac31240b5..6f5896eb7 100644 --- a/crypto/stark/src/trace.rs +++ b/crypto/stark/src/trace.rs @@ -154,6 +154,7 @@ where #[cfg(feature = "disk-spill")] pub fn spill_main_to_disk(&mut self) -> std::io::Result<()> where + F: Copy + 'static, F::BaseType: SpillSafe, { self.main_table.spill_to_disk() @@ -162,6 +163,7 @@ where #[cfg(feature = "disk-spill")] pub fn spill_aux_to_disk(&mut self) -> std::io::Result<()> where + E: Copy + 'static, E::BaseType: SpillSafe, { self.aux_table.spill_to_disk() From c7029bd3bcd85313293e9237f0ea321e09527fd8 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 12:31:11 -0300 Subject: [PATCH 152/231] Make padded_chunked_rows zero-row guard a hard assert --- prover/src/tables/trace_builder.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 3e407cb97..bff5c988a 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -2103,7 +2103,8 @@ fn build_traces( /// Padded row count after chunking: each chunk rounds up to `next_power_of_two().max(4)`. #[cfg(feature = "disk-spill")] fn padded_chunked_rows(ops_count: usize, max_rows: usize) -> u64 { - debug_assert!(max_rows > 0, "max_rows must be positive"); + // `max_rows <= 0` would loop forever. Called internally with const values > 0. + assert!(max_rows > 0, "max_rows must be positive"); if ops_count == 0 { return 4; // empty-chunk tables still allocate one 4-row padded chunk } From 4e101e190fb216f744fec145278ade9e34701651 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 12:31:13 -0300 Subject: [PATCH 153/231] Note tmpfs caveat for spill files --- crypto/crypto/src/mmap_util.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/crypto/crypto/src/mmap_util.rs b/crypto/crypto/src/mmap_util.rs index 197bba18d..d03fd1d4a 100644 --- a/crypto/crypto/src/mmap_util.rs +++ b/crypto/crypto/src/mmap_util.rs @@ -6,6 +6,9 @@ use math::spill_safe::SpillSafe; /// Linux only, using `posix_fallocate`. On other platforms we only call /// `set_len` and skip reservation, so the kernel can still send SIGBUS if /// the disk fills mid-write. +/// +/// `/tmp` is often tmpfs (RAM-backed) on systemd-default distros; set +/// `TMPDIR` to a disk-backed path so spill files actually live on disk. pub fn reserve_file_blocks(file: &std::fs::File, total_bytes: u64) -> std::io::Result<()> { file.set_len(total_bytes)?; #[cfg(target_os = "linux")] @@ -61,9 +64,8 @@ pub fn spill_slice_to_mmap(slice: &[T]) -> std::io::Result Date: Tue, 5 May 2026 16:34:35 -0300 Subject: [PATCH 154/231] Test count_table_lengths against built Traces --- .../tests/count_table_lengths_drift_tests.rs | 93 +++++++++++++++++++ prover/src/tests/mod.rs | 2 + 2 files changed, 95 insertions(+) create mode 100644 prover/src/tests/count_table_lengths_drift_tests.rs diff --git a/prover/src/tests/count_table_lengths_drift_tests.rs b/prover/src/tests/count_table_lengths_drift_tests.rs new file mode 100644 index 000000000..fc8680d02 --- /dev/null +++ b/prover/src/tests/count_table_lengths_drift_tests.rs @@ -0,0 +1,93 @@ +//! Drift guard: `count_table_lengths` must stay aligned with the actual +//! `Traces::from_elf_and_logs` output. Adding a new table or changing a +//! row-count rule on either side without updating the other should fail +//! this test. + +use crate::tables::MaxRowsConfig; +use crate::tables::trace_builder::{Traces, count_table_lengths}; +use crate::test_utils::run_asm_elf; + +#[test] +fn count_table_lengths_matches_traces() { + let (elf, logs, _) = run_asm_elf("fib_iterative_372k"); + let max_rows = MaxRowsConfig::default(); + + let predicted = + count_table_lengths(&elf, &logs, &max_rows, &[]).expect("count_table_lengths succeeds"); + let traces = + Traces::from_elf_and_logs(&elf, &logs, &max_rows, &[]).expect("trace build succeeds"); + + let sum_heights = |tables: &[stark::trace::TraceTable<_, _>]| -> u64 { + tables.iter().map(|t| t.main_table.height as u64).sum() + }; + + // Exact-match tables: predicted row count equals built trace. + assert_eq!(predicted.cpu_padded_rows, sum_heights(&traces.cpus), "cpu"); + assert_eq!( + predicted.memw_padded_rows, + sum_heights(&traces.memws), + "memw" + ); + assert_eq!( + predicted.memw_aligned_padded_rows, + sum_heights(&traces.memw_aligneds), + "memw_aligned" + ); + assert_eq!( + predicted.memw_register_padded_rows, + sum_heights(&traces.memw_registers), + "memw_register" + ); + assert_eq!( + predicted.load_padded_rows, + sum_heights(&traces.loads), + "load" + ); + assert_eq!( + predicted.shift_padded_rows, + sum_heights(&traces.shifts), + "shift" + ); + assert_eq!( + predicted.commit_padded_rows, traces.commit.main_table.height as u64, + "commit" + ); + assert_eq!( + predicted.decode_rows, traces.decode.main_table.height as u64, + "decode" + ); + + // Upper-bound tables: predicted is `>=` actual (LT/MUL/DVRM/BRANCH dedup ops). + assert!( + predicted.lt_padded_rows >= sum_heights(&traces.lts), + "lt: predicted={} actual={}", + predicted.lt_padded_rows, + sum_heights(&traces.lts) + ); + assert!( + predicted.mul_padded_rows >= sum_heights(&traces.muls), + "mul: predicted={} actual={}", + predicted.mul_padded_rows, + sum_heights(&traces.muls) + ); + assert!( + predicted.dvrm_padded_rows >= sum_heights(&traces.dvrms), + "dvrm: predicted={} actual={}", + predicted.dvrm_padded_rows, + sum_heights(&traces.dvrms) + ); + assert!( + predicted.branch_padded_rows >= sum_heights(&traces.branches), + "branch: predicted={} actual={}", + predicted.branch_padded_rows, + sum_heights(&traces.branches) + ); + + // Auxiliary scalars. + assert_eq!(predicted.cycle_count, logs.len() as u64, "cycle_count"); + assert_eq!( + predicted.unique_page_count, + traces.pages.len() as u64, + "unique_page_count" + ); +} diff --git a/prover/src/tests/mod.rs b/prover/src/tests/mod.rs index 303c24401..4b262ac18 100644 --- a/prover/src/tests/mod.rs +++ b/prover/src/tests/mod.rs @@ -10,6 +10,8 @@ pub mod branch_constraints_tests; pub mod commit_tests; #[cfg(test)] pub mod constraints_tests; +#[cfg(all(test, feature = "disk-spill"))] +pub mod count_table_lengths_drift_tests; #[cfg(test)] pub mod cpu_tests; #[cfg(test)] From 520d60c0be5805386a23afa428efdfb2d74d876d Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 16:51:21 -0300 Subject: [PATCH 155/231] Trim spill_slice_to_mmap doc comment --- crypto/crypto/src/mmap_util.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/crypto/crypto/src/mmap_util.rs b/crypto/crypto/src/mmap_util.rs index d03fd1d4a..43ddf506a 100644 --- a/crypto/crypto/src/mmap_util.rs +++ b/crypto/crypto/src/mmap_util.rs @@ -31,10 +31,6 @@ pub fn reserve_file_blocks(file: &std::fs::File, total_bytes: u64) -> std::io::R /// Mmap a fresh temp file, copy `slice` into the mapping, downgrade to /// read-only, and return it. /// -/// Writes through the writable mmap rather than via `write(2)` + remap, -/// which on Linux under memory pressure could otherwise produce -/// partially-zeroed reads from the read-only mmap. -/// /// Alignment: the mmap base is page-aligned (>= 4096), this function /// asserts `align_of::() <= 4096`, and Rust guarantees `size_of::()` /// is a multiple of `align_of::()`, so every element offset is aligned. From f0d110e76218f2f46bd7b7645a900966169ae06d Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 17:30:22 -0300 Subject: [PATCH 156/231] Gate aux_merkle on aux_cols > 0 --- prover/src/auto_storage.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/prover/src/auto_storage.rs b/prover/src/auto_storage.rs index a3340382d..f8d77ccf3 100644 --- a/prover/src/auto_storage.rs +++ b/prover/src/auto_storage.rs @@ -60,10 +60,13 @@ fn persistent_per_table(spec: TableSpec, blowup: u64) -> u64 { .saturating_mul(rows) .saturating_mul(blowup) .saturating_mul(KECCAK_NODE_BYTES); - let aux_merkle = 2u64 - .saturating_mul(rows) - .saturating_mul(blowup) - .saturating_mul(KECCAK_NODE_BYTES); + let aux_merkle = if aux_cols > 0 { + 2u64.saturating_mul(rows) + .saturating_mul(blowup) + .saturating_mul(KECCAK_NODE_BYTES) + } else { + 0 + }; main_lde .saturating_add(aux_lde) .saturating_add(main_merkle) From 26adc43ff7e73e8e81c17704ca587257593aa062 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 17:30:35 -0300 Subject: [PATCH 157/231] Revert halt_rows estimate to actual height --- prover/src/auto_storage.rs | 2 +- prover/src/tests/count_table_lengths_drift_tests.rs | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/prover/src/auto_storage.rs b/prover/src/auto_storage.rs index f8d77ccf3..068465707 100644 --- a/prover/src/auto_storage.rs +++ b/prover/src/auto_storage.rs @@ -103,7 +103,7 @@ fn aux_cols(bus_count: usize) -> u64 { fn table_specs(lengths: &TableLengths) -> Vec { let bitwise_rows = BITWISE_ROWS as u64; let register_rows = NUM_REGISTER_ADDRESSES.next_power_of_two() as u64; - let halt_rows = 4u64; + let halt_rows = 1u64; let page_rows = PAGE_SIZE as u64; let mut specs = vec![ diff --git a/prover/src/tests/count_table_lengths_drift_tests.rs b/prover/src/tests/count_table_lengths_drift_tests.rs index fc8680d02..33ab650e7 100644 --- a/prover/src/tests/count_table_lengths_drift_tests.rs +++ b/prover/src/tests/count_table_lengths_drift_tests.rs @@ -90,4 +90,7 @@ fn count_table_lengths_matches_traces() { traces.pages.len() as u64, "unique_page_count" ); + + // Mirrors hardcoded `halt_rows = 1` in `auto_storage::table_specs`. + assert_eq!(traces.halt.main_table.height, 1, "halt_rows"); } From a7ccd390ef61ab3deeabb873a72f63b07b115f45 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 17:30:42 -0300 Subject: [PATCH 158/231] Raise auto-Disk warning threshold to 75% --- prover/src/auto_storage.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prover/src/auto_storage.rs b/prover/src/auto_storage.rs index 068465707..382d38833 100644 --- a/prover/src/auto_storage.rs +++ b/prover/src/auto_storage.rs @@ -298,7 +298,7 @@ pub fn select_storage_mode( } else { // `cap.is_none()` plus an `effective_budget` that returned `Some` means // `available` must be `Some` (see `effective_budget`). - if cap.is_none() && estimated.saturating_mul(2) >= available.unwrap() { + if cap.is_none() && estimated.saturating_mul(4) >= available.unwrap().saturating_mul(3) { log::warn!( "Auto disk-spill picked Ram with estimated_peak={estimated} bytes near \ available={available:?}. Set max_ram_bytes to bound the budget to a \ From 404fb7a8d287f3b62425946b303a6985fc1936a6 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 17:37:24 -0300 Subject: [PATCH 159/231] Make reserve_file_blocks private --- crypto/crypto/src/mmap_util.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/crypto/src/mmap_util.rs b/crypto/crypto/src/mmap_util.rs index 43ddf506a..4e493a2dd 100644 --- a/crypto/crypto/src/mmap_util.rs +++ b/crypto/crypto/src/mmap_util.rs @@ -9,7 +9,7 @@ use math::spill_safe::SpillSafe; /// /// `/tmp` is often tmpfs (RAM-backed) on systemd-default distros; set /// `TMPDIR` to a disk-backed path so spill files actually live on disk. -pub fn reserve_file_blocks(file: &std::fs::File, total_bytes: u64) -> std::io::Result<()> { +fn reserve_file_blocks(file: &std::fs::File, total_bytes: u64) -> std::io::Result<()> { file.set_len(total_bytes)?; #[cfg(target_os = "linux")] { From 330567e4b547d852a81f4195b3c51307c39dade5 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 17:39:16 -0300 Subject: [PATCH 160/231] Reorder mmap_util to lead with public API --- crypto/crypto/src/mmap_util.rs | 56 +++++++++++++++++----------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/crypto/crypto/src/mmap_util.rs b/crypto/crypto/src/mmap_util.rs index 4e493a2dd..faa65717a 100644 --- a/crypto/crypto/src/mmap_util.rs +++ b/crypto/crypto/src/mmap_util.rs @@ -1,33 +1,5 @@ use math::spill_safe::SpillSafe; -/// Reserve disk blocks up front so this call fails on a full disk. -/// Without reservation, the kernel sends SIGBUS during the later mmap write. -/// -/// Linux only, using `posix_fallocate`. On other platforms we only call -/// `set_len` and skip reservation, so the kernel can still send SIGBUS if -/// the disk fills mid-write. -/// -/// `/tmp` is often tmpfs (RAM-backed) on systemd-default distros; set -/// `TMPDIR` to a disk-backed path so spill files actually live on disk. -fn reserve_file_blocks(file: &std::fs::File, total_bytes: u64) -> std::io::Result<()> { - file.set_len(total_bytes)?; - #[cfg(target_os = "linux")] - { - use std::os::unix::io::AsRawFd; - let len = i64::try_from(total_bytes).map_err(|_| { - std::io::Error::new( - std::io::ErrorKind::InvalidInput, - "spill file too large for posix_fallocate", - ) - })?; - let ret = unsafe { libc::posix_fallocate(file.as_raw_fd(), 0, len) }; - if ret != 0 { - return Err(std::io::Error::from_raw_os_error(ret)); - } - } - Ok(()) -} - /// Mmap a fresh temp file, copy `slice` into the mapping, downgrade to /// read-only, and return it. /// @@ -65,3 +37,31 @@ pub fn spill_slice_to_mmap(slice: &[T]) -> std::io::Result std::io::Result<()> { + file.set_len(total_bytes)?; + #[cfg(target_os = "linux")] + { + use std::os::unix::io::AsRawFd; + let len = i64::try_from(total_bytes).map_err(|_| { + std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "spill file too large for posix_fallocate", + ) + })?; + let ret = unsafe { libc::posix_fallocate(file.as_raw_fd(), 0, len) }; + if ret != 0 { + return Err(std::io::Error::from_raw_os_error(ret)); + } + } + Ok(()) +} From a265599aa6664929c165e1c6927b42025a250e04 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 17:44:29 -0300 Subject: [PATCH 161/231] Restore owned buffers wording in coset_lde_full_expand doc --- crypto/math/src/fft/polynomial.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/math/src/fft/polynomial.rs b/crypto/math/src/fft/polynomial.rs index f9b6b651f..ccc1ac391 100644 --- a/crypto/math/src/fft/polynomial.rs +++ b/crypto/math/src/fft/polynomial.rs @@ -242,7 +242,7 @@ impl Polynomial> { /// /// Unlike [`coset_lde_full_into`], this skips the `clear + extend_from_slice` step /// since data is already in the buffer. Used for transpose elimination: columns are - /// extracted directly into pool buffers, then expanded in-place. + /// extracted directly into owned buffers, then expanded in-place. pub fn coset_lde_full_expand + Send + Sync>( buffer: &mut Vec>, blowup_factor: usize, From 8581b1e47f0a51f03779b35744d8de6c389caf0b Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 17:59:34 -0300 Subject: [PATCH 162/231] Update FieldElement repr soundness doc to reference SpillSafe --- crypto/math/src/field/element.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/crypto/math/src/field/element.rs b/crypto/math/src/field/element.rs index d25470323..a3816f4a4 100644 --- a/crypto/math/src/field/element.rs +++ b/crypto/math/src/field/element.rs @@ -40,10 +40,11 @@ use super::traits::{IsPrimeField, IsSubFieldOf, LegendreSymbol}; /// A field element with operations algorithms defined in `F` /// -/// `#[repr(transparent)]` is required for soundness: `StorageMode::Disk` -/// in `crypto/stark` casts raw mmap bytes to `*const FieldElement` -/// (see `table.rs::get`, `trace.rs`). Changing the `repr`, adding -/// fields, or introducing padding makes those casts UB. +/// `#[repr(transparent)]` is required for soundness: it makes +/// `FieldElement` byte-identical to `F::BaseType`, satisfying the +/// no-padding requirement of [`SpillSafe`](crate::spill_safe::SpillSafe). +/// Changing the `repr`, adding fields, or introducing padding breaks +/// this contract — UB in any function that requires `T: SpillSafe`. #[allow(clippy::derived_hash_with_manual_eq)] #[repr(transparent)] #[derive(Debug, Clone, Hash, Copy)] From 517734f25f35ed835f8a783b8c4f9b9481daea66 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 18:00:19 -0300 Subject: [PATCH 163/231] Restore derived PartialEq for non-spill Table builds --- crypto/math/src/field/element.rs | 9 ++++----- crypto/stark/src/table.rs | 7 ++++++- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/crypto/math/src/field/element.rs b/crypto/math/src/field/element.rs index a3816f4a4..0eb0aef96 100644 --- a/crypto/math/src/field/element.rs +++ b/crypto/math/src/field/element.rs @@ -40,11 +40,10 @@ use super::traits::{IsPrimeField, IsSubFieldOf, LegendreSymbol}; /// A field element with operations algorithms defined in `F` /// -/// `#[repr(transparent)]` is required for soundness: it makes -/// `FieldElement` byte-identical to `F::BaseType`, satisfying the -/// no-padding requirement of [`SpillSafe`](crate::spill_safe::SpillSafe). -/// Changing the `repr`, adding fields, or introducing padding breaks -/// this contract — UB in any function that requires `T: SpillSafe`. +/// `#[repr(transparent)]` makes `FieldElement` byte-identical to +/// `F::BaseType`, which [`SpillSafe`](crate::spill_safe::SpillSafe) +/// requires. Changing the `repr` or adding fields breaks this and +/// is UB in any function that requires `T: SpillSafe`. #[allow(clippy::derived_hash_with_manual_eq)] #[repr(transparent)] #[derive(Debug, Clone, Hash, Copy)] diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 13f7942e3..331615349 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -41,7 +41,10 @@ impl std::fmt::Debug for TableMmapBacking { /// Since this struct is a representation of a two-dimensional table, all rows should have the same /// length. #[derive(Default, Debug, serde::Deserialize)] -#[cfg_attr(not(feature = "disk-spill"), derive(serde::Serialize, Clone))] +#[cfg_attr( + not(feature = "disk-spill"), + derive(serde::Serialize, Clone, PartialEq, Eq) +)] #[serde(bound = "")] pub struct Table { pub data: Vec>, @@ -126,6 +129,7 @@ impl Clone for Table { /// Element-wise comparison via `get()`, so spilled tables compare by field /// equality (canonicalized per `F::eq`) rather than raw mmap bytes. +#[cfg(feature = "disk-spill")] impl PartialEq for Table { fn eq(&self, other: &Self) -> bool { if self.width != other.width || self.height != other.height { @@ -142,6 +146,7 @@ impl PartialEq for Table { } } +#[cfg(feature = "disk-spill")] impl Eq for Table {} impl Table { From 5fd4ffe606d919e9db36ecf03b9f9902b052d9e0 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 20:42:13 -0300 Subject: [PATCH 164/231] Tighten SpillSafe module doc comment --- crypto/math/src/spill_safe.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/math/src/spill_safe.rs b/crypto/math/src/spill_safe.rs index 9b75207b0..7bcbdf103 100644 --- a/crypto/math/src/spill_safe.rs +++ b/crypto/math/src/spill_safe.rs @@ -4,8 +4,8 @@ //! Stricter than `Copy`, which permits types with restricted bit patterns //! (e.g. `bool`, `NonZeroU32`). //! -//! Implementing this trait is a deliberate `unsafe impl` — the implementer -//! asserts the layout invariants hold, the compiler does not check. +//! `unsafe impl` puts the layout invariants on the implementer. The +//! compiler does not check. use crate::field::{element::FieldElement, traits::IsField}; From 6a3d8d82dc01c28709592011018e169528b73de2 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 21:03:24 -0300 Subject: [PATCH 165/231] Feature-gate ProofOptions::max_ram_bytes behind disk-spill --- crypto/stark/benches/profile_prover.rs | 1 + crypto/stark/benches/prover_benchmark.rs | 1 + crypto/stark/src/proof/options.rs | 12 +++++++----- crypto/stark/src/tests/prover_tests.rs | 7 +++++++ 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/crypto/stark/benches/profile_prover.rs b/crypto/stark/benches/profile_prover.rs index 52be8c49a..b2ee9aa7e 100644 --- a/crypto/stark/benches/profile_prover.rs +++ b/crypto/stark/benches/profile_prover.rs @@ -21,6 +21,7 @@ fn main() { fri_number_of_queries: 100, coset_offset: 3, grinding_factor: 0, + #[cfg(feature = "disk-spill")] max_ram_bytes: None, }; diff --git a/crypto/stark/benches/prover_benchmark.rs b/crypto/stark/benches/prover_benchmark.rs index 4dfd02634..e80c37801 100644 --- a/crypto/stark/benches/prover_benchmark.rs +++ b/crypto/stark/benches/prover_benchmark.rs @@ -61,6 +61,7 @@ fn benchmark_proof_options() -> ProofOptions { fri_number_of_queries: 30, coset_offset: 3, grinding_factor: 0, + #[cfg(feature = "disk-spill")] max_ram_bytes: None, } } diff --git a/crypto/stark/src/proof/options.rs b/crypto/stark/src/proof/options.rs index d41fef510..7180b7af8 100644 --- a/crypto/stark/src/proof/options.rs +++ b/crypto/stark/src/proof/options.rs @@ -38,11 +38,6 @@ impl fmt::Display for ProofOptionsError { /// - `fri_number_of_queries`: the number of queries for the FRI layer /// - `coset_offset`: the offset for the coset /// - `grinding_factor`: the number of leading zeros that we want for the Hash(hash || nonce) -/// - `max_ram_bytes`: optional ceiling on prover RAM usage. When set, the -/// prover spills trace tables and Merkle-tree nodes to mmap if the -/// estimated peak exceeds this cap or system-available RAM (less a safety -/// margin), whichever is smaller. LDE column vectors remain in RAM -/// regardless. #[cfg_attr(feature = "wasm", wasm_bindgen)] #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] pub struct ProofOptions { @@ -50,6 +45,11 @@ pub struct ProofOptions { pub fri_number_of_queries: usize, pub coset_offset: u64, pub grinding_factor: u8, + /// Optional ceiling on prover RAM usage. When set, the prover spills + /// trace tables and Merkle-tree nodes to mmap if the estimated peak + /// exceeds this cap or system-available RAM (less a safety margin), + /// whichever is smaller. LDE column vectors remain in RAM regardless. + #[cfg(feature = "disk-spill")] #[serde(default)] pub max_ram_bytes: Option, } @@ -63,6 +63,7 @@ impl ProofOptions { fri_number_of_queries: 3, coset_offset: 3, grinding_factor: 1, + #[cfg(feature = "disk-spill")] max_ram_bytes: None, } } @@ -120,6 +121,7 @@ impl GoldilocksCubicProofOptions { fri_number_of_queries, coset_offset: 3, grinding_factor, + #[cfg(feature = "disk-spill")] max_ram_bytes: None, }) } diff --git a/crypto/stark/src/tests/prover_tests.rs b/crypto/stark/src/tests/prover_tests.rs index 8029467e6..55914c712 100644 --- a/crypto/stark/src/tests/prover_tests.rs +++ b/crypto/stark/src/tests/prover_tests.rs @@ -32,6 +32,7 @@ fn test_domain_constructor() { fri_number_of_queries: 1, coset_offset, grinding_factor, + #[cfg(feature = "disk-spill")] max_ram_bytes: None, }; @@ -124,6 +125,7 @@ fn barycentric_trace_eval_matches_horner_trace_eval() { fri_number_of_queries: 1, coset_offset, grinding_factor: 0, + #[cfg(feature = "disk-spill")] max_ram_bytes: None, }; @@ -196,6 +198,7 @@ fn test_decompose_and_extend_d2_matches_original() { fri_number_of_queries: 1, coset_offset: 3, grinding_factor: 0, + #[cfg(feature = "disk-spill")] max_ram_bytes: None, }; @@ -257,6 +260,7 @@ fn test_multi_prove_mixed_coset_offsets() { fri_number_of_queries: 3, coset_offset: 3, grinding_factor: 1, + #[cfg(feature = "disk-spill")] max_ram_bytes: None, }; let proof_options_7 = ProofOptions { @@ -264,6 +268,7 @@ fn test_multi_prove_mixed_coset_offsets() { fri_number_of_queries: 3, coset_offset: 7, grinding_factor: 1, + #[cfg(feature = "disk-spill")] max_ram_bytes: None, }; @@ -329,6 +334,7 @@ fn test_multi_prove_dedups_shared_domain_params() { fri_number_of_queries: 3, coset_offset: 3, grinding_factor: 1, + #[cfg(feature = "disk-spill")] max_ram_bytes: None, }; @@ -420,6 +426,7 @@ fn test_deep_poly_direct_2n_matches_interpolate_fft_extend() { fri_number_of_queries: 1, coset_offset: 3, grinding_factor: 0, + #[cfg(feature = "disk-spill")] max_ram_bytes: None, }; From c41d972412c31db2e2dc3133e3b6ed89a6ae3858 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 21:05:17 -0300 Subject: [PATCH 166/231] Tighten wasm32 disk-spill compile_error comment --- crypto/stark/src/lib.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crypto/stark/src/lib.rs b/crypto/stark/src/lib.rs index d6bca2e5b..b4767750c 100644 --- a/crypto/stark/src/lib.rs +++ b/crypto/stark/src/lib.rs @@ -1,6 +1,5 @@ -// `StorageMode::Disk` is implemented via `memmap2`, which doesn't compile on -// wasm32. Fail loudly at the top of the crate rather than via a confusing -// transitive memmap2 error deeper in the dep graph. +// `StorageMode::Disk` uses `memmap2`, which does not build on wasm32. +// Fail at the crate root rather than as a transitive memmap2 error. #[cfg(all(target_arch = "wasm32", feature = "disk-spill"))] compile_error!("the `disk-spill` feature requires memmap2, which does not compile on wasm32"); From d5963b0b3a9ccb89ba8135d03edbfea30fe6f1a4 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 21:07:51 -0300 Subject: [PATCH 167/231] Drop typically from DiskSpill error doc --- crypto/stark/src/prover.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 47b403f3e..4d69b962d 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -104,7 +104,7 @@ pub enum ProvingError { WrongParameter(String), EmptyCommitment, /// I/O failure while spilling prover state (traces, LDE, Merkle trees) to disk: - /// typically out of disk space, fd exhaustion, or mmap failure. + /// out of disk space, fd exhaustion, or mmap failure. #[cfg(feature = "disk-spill")] DiskSpill(String), } From a70a21a08d61080116fd381db17533f0e73eb5e3 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 21:12:29 -0300 Subject: [PATCH 168/231] Drop redundant advise_drop_cache inline comment --- crypto/stark/src/prover.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 4d69b962d..8894feb88 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -567,7 +567,6 @@ pub trait IsStarkProver< let mut columns = trace.extract_columns_main(lde_size); #[cfg(feature = "disk-spill")] if storage_mode == StorageMode::Disk { - // Evict mmap pages so spilled data doesn't occupy heap + cache. trace.main_table.advise_drop_cache(); } #[cfg(feature = "instruments")] From 73a1493323b44a493db90fd6a1bb5a554d743313 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 22:06:16 -0300 Subject: [PATCH 169/231] Merge multi_prove and multi_prove_with_mode into one entry point --- crypto/stark/src/prover.rs | 73 ++++++++----------- crypto/stark/src/tests/air_tests.rs | 6 +- .../src/tests/bus_tests/completeness_tests.rs | 12 +-- .../src/tests/bus_tests/multiplicity_tests.rs | 6 +- .../src/tests/bus_tests/soundness_tests.rs | 44 +++++------ .../src/tests/prove_verify_roundtrip_tests.rs | 2 +- crypto/stark/src/tests/prover_tests.rs | 4 +- prover/src/lib.rs | 13 ++-- prover/src/test_utils.rs | 28 +++++++ prover/src/tests/bitwise_bus_tests.rs | 7 +- prover/src/tests/bitwise_tests.rs | 21 ++++-- prover/src/tests/branch_bus_tests.rs | 7 +- prover/src/tests/decode_tests.rs | 9 +-- prover/src/tests/lt_bus_tests.rs | 7 +- prover/src/tests/prove_elfs_tests.rs | 28 +++---- 15 files changed, 148 insertions(+), 119 deletions(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 8894feb88..bc65cb6fd 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1574,47 +1574,6 @@ pub trait IsStarkProver< /// /// The transcript must be safely initialized before passing it to this method. fn multi_prove( - air_trace_pairs: Vec>, - transcript: &mut (impl IsStarkTranscript + Clone + Send), - ) -> Result, ProvingError> - where - FieldElement: AsBytes, - FieldElement: AsBytes, - PI: Send + Sync + Clone, - Field: Copy + 'static, - FieldExtension: Copy + 'static, - ::BaseType: SpillSafe, - ::BaseType: SpillSafe, - { - Self::multi_prove_inner( - air_trace_pairs, - transcript, - #[cfg(feature = "disk-spill")] - StorageMode::Ram, - ) - } - - /// Same as `multi_prove` but lets callers back intermediate state with mmap - /// files to cap peak RAM usage. - #[cfg(feature = "disk-spill")] - fn multi_prove_with_mode( - air_trace_pairs: Vec>, - transcript: &mut (impl IsStarkTranscript + Clone + Send), - storage_mode: StorageMode, - ) -> Result, ProvingError> - where - FieldElement: AsBytes, - FieldElement: AsBytes, - PI: Send + Sync + Clone, - Field: Copy + 'static, - FieldExtension: Copy + 'static, - ::BaseType: SpillSafe, - ::BaseType: SpillSafe, - { - Self::multi_prove_inner(air_trace_pairs, transcript, storage_mode) - } - - fn multi_prove_inner( mut air_trace_pairs: Vec>, transcript: &mut (impl IsStarkTranscript + Clone + Send), #[cfg(feature = "disk-spill")] storage_mode: StorageMode, @@ -2094,6 +2053,29 @@ pub trait IsStarkProver< Ok(MultiProof { proofs }) } + /// Multi-AIR prove with `StorageMode::Ram`. Test convenience. + #[cfg(test)] + fn multi_prove_ram( + air_trace_pairs: Vec>, + transcript: &mut (impl IsStarkTranscript + Clone + Send), + ) -> Result, ProvingError> + where + FieldElement: AsBytes, + FieldElement: AsBytes, + PI: Send + Sync + Clone, + Field: Copy + 'static, + FieldExtension: Copy + 'static, + ::BaseType: SpillSafe, + ::BaseType: SpillSafe, + { + Self::multi_prove( + air_trace_pairs, + transcript, + #[cfg(feature = "disk-spill")] + StorageMode::Ram, + ) + } + /// Generate a STARK proof for a single AIR/trace. /// This is equivalent to calling `multi_prove` with a single-element slice. fn prove( @@ -2112,8 +2094,13 @@ pub trait IsStarkProver< ::BaseType: SpillSafe, { let air_trace_pairs = vec![(air, trace, pub_inputs)]; - Self::multi_prove(air_trace_pairs, transcript) - .map(|mut multi_proof| multi_proof.proofs.remove(0)) + Self::multi_prove( + air_trace_pairs, + transcript, + #[cfg(feature = "disk-spill")] + StorageMode::Ram, + ) + .map(|mut multi_proof| multi_proof.proofs.remove(0)) } // TODO: propagate errors instead of unwrap() in open_deep_composition_poly and FRI operations diff --git a/crypto/stark/src/tests/air_tests.rs b/crypto/stark/src/tests/air_tests.rs index 11d356ccf..601652acf 100644 --- a/crypto/stark/src/tests/air_tests.rs +++ b/crypto/stark/src/tests/air_tests.rs @@ -400,7 +400,7 @@ fn test_multi_prove_fib_3_tables() { (&air_3, &mut trace_3, &pub_inputs_3), ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec< &dyn AIR< @@ -500,7 +500,7 @@ fn test_multi_prove_2_tables_small_field() { (&air_2, &mut trace_2, &pub_inputs_2), ]; - let multi_proof = Prover::multi_prove( + let multi_proof = Prover::multi_prove_ram( air_trace_pairs, &mut DefaultTranscript::::new(&[]), ) @@ -538,7 +538,7 @@ fn test_multi_prove_different_airs() { )> = vec![(&air_1, &mut trace_1, &()), (&air_2, &mut trace_2, &())]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec< &dyn AIR, diff --git a/crypto/stark/src/tests/bus_tests/completeness_tests.rs b/crypto/stark/src/tests/bus_tests/completeness_tests.rs index 7ca124fe1..afd7ea2b0 100644 --- a/crypto/stark/src/tests/bus_tests/completeness_tests.rs +++ b/crypto/stark/src/tests/bus_tests/completeness_tests.rs @@ -122,7 +122,7 @@ fn test_multi_table_proof() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -185,7 +185,7 @@ fn test_all_padding() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -248,7 +248,7 @@ fn test_single_operation() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -311,7 +311,7 @@ fn test_duplicate_operations() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -374,7 +374,7 @@ fn test_serialization_roundtrip() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); // Serialize and deserialize let serialized = serde_cbor::to_vec(&multi_proof).expect("serialization failed"); @@ -519,7 +519,7 @@ fn test_bus_value_features() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender_air, &receiver_air]; diff --git a/crypto/stark/src/tests/bus_tests/multiplicity_tests.rs b/crypto/stark/src/tests/bus_tests/multiplicity_tests.rs index d4ef1aee9..44c7791f8 100644 --- a/crypto/stark/src/tests/bus_tests/multiplicity_tests.rs +++ b/crypto/stark/src/tests/bus_tests/multiplicity_tests.rs @@ -113,7 +113,7 @@ fn test_multiplicity_one() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender, &receiver]; @@ -223,7 +223,7 @@ fn test_multiplicity_sum() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender, &receiver]; @@ -331,7 +331,7 @@ fn test_multiplicity_negated() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender, &receiver]; diff --git a/crypto/stark/src/tests/bus_tests/soundness_tests.rs b/crypto/stark/src/tests/bus_tests/soundness_tests.rs index e1994ef6a..51e728d9e 100644 --- a/crypto/stark/src/tests/bus_tests/soundness_tests.rs +++ b/crypto/stark/src/tests/bus_tests/soundness_tests.rs @@ -79,7 +79,7 @@ fn test_wrong_result_value() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -142,7 +142,7 @@ fn test_off_by_one() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -205,7 +205,7 @@ fn test_swapped_operands() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -268,7 +268,7 @@ fn test_single_column_wrong() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -335,7 +335,7 @@ fn test_over_report_multiplicity() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -398,7 +398,7 @@ fn test_under_report_multiplicity() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -461,7 +461,7 @@ fn test_zero_multiplicity_skip() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -528,7 +528,7 @@ fn test_phantom_receive() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -591,7 +591,7 @@ fn test_missing_receiver() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -662,7 +662,7 @@ fn test_tampered_table_contribution() { ]; let mut multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); // Corrupt table_contribution in the ADD table's bus public inputs. // This changes the per-row offset L/N used in the circular constraint, @@ -742,7 +742,7 @@ fn test_tampered_acc_ood_evaluation() { ]; let mut multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); // Corrupt the acc column OOD evaluation in the ADD table proof. // With batching + absorption, ADD has 4 main columns and 1 aux column @@ -827,7 +827,7 @@ fn test_missing_bus_public_inputs_rejected() { ]; let mut multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); // Remove bus_public_inputs from the ADD table proof entirely. multi_proof.proofs[1].bus_public_inputs = None; @@ -948,7 +948,7 @@ fn test_zeroed_table_contribution_rejected() { ]; let mut multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); // Zero out table_contribution for the ADD table. let add_proof = &mut multi_proof.proofs[1]; @@ -1026,7 +1026,7 @@ fn test_one_of_many_wrong() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -1134,7 +1134,7 @@ fn test_full_scenario_wrong_add() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -1208,7 +1208,7 @@ fn test_wrong_table_consumes_value_rejected() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -1324,7 +1324,7 @@ fn test_packing_mismatch_direct_vs_word2l() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender, &receiver]; @@ -1429,7 +1429,7 @@ fn test_packing_mismatch_element_count() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender, &receiver]; @@ -1531,7 +1531,7 @@ fn test_packing_mismatch_shift_constant() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender, &receiver]; @@ -1634,7 +1634,7 @@ fn test_compound_mismatch_dwordhhw_vs_dwordwhh() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender, &receiver]; @@ -1727,7 +1727,7 @@ fn test_compound_equals_primitive_expansion() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender, &receiver]; @@ -1843,7 +1843,7 @@ fn test_full_scenario_wrong_mul() { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; diff --git a/crypto/stark/src/tests/prove_verify_roundtrip_tests.rs b/crypto/stark/src/tests/prove_verify_roundtrip_tests.rs index 9e3c60091..9031fd6dd 100644 --- a/crypto/stark/src/tests/prove_verify_roundtrip_tests.rs +++ b/crypto/stark/src/tests/prove_verify_roundtrip_tests.rs @@ -137,7 +137,7 @@ fn test_verify_serialized_multi_table_proofs() { (&mul_air, &mut mul_trace, &()), ]; - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap() + Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap() }; // ========================================================================= diff --git a/crypto/stark/src/tests/prover_tests.rs b/crypto/stark/src/tests/prover_tests.rs index 55914c712..a726aa279 100644 --- a/crypto/stark/src/tests/prover_tests.rs +++ b/crypto/stark/src/tests/prover_tests.rs @@ -297,7 +297,7 @@ fn test_multi_prove_mixed_coset_offsets() { (&air_2, &mut trace_2, &pub_inputs), ]; - let multi_proof = Prover::multi_prove( + let multi_proof = Prover::multi_prove_ram( air_trace_pairs, &mut DefaultTranscript::::new(&[]), ) @@ -365,7 +365,7 @@ fn test_multi_prove_dedups_shared_domain_params() { (&air_3, &mut trace_3, &pub_inputs), ]; - let multi_proof = Prover::multi_prove( + let multi_proof = Prover::multi_prove_ram( air_trace_pairs, &mut DefaultTranscript::::new(&[]), ) diff --git a/prover/src/lib.rs b/prover/src/lib.rs index f7e10209f..cc4f01781 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -651,12 +651,13 @@ pub fn prove_with_options_and_inputs( // Phase 4: Prove (multi_prove) let air_pairs = airs.air_trace_pairs(&mut traces); let transcript = &mut DefaultTranscript::::new(&[]); - #[cfg(feature = "disk-spill")] - let proof = Prover::multi_prove_with_mode(air_pairs, transcript, storage_mode) - .map_err(|e| Error::Prover(format!("{e:?}")))?; - #[cfg(not(feature = "disk-spill"))] - let proof = - Prover::multi_prove(air_pairs, transcript).map_err(|e| Error::Prover(format!("{e:?}")))?; + let proof = Prover::multi_prove( + air_pairs, + transcript, + #[cfg(feature = "disk-spill")] + storage_mode, + ) + .map_err(|e| Error::Prover(format!("{e:?}")))?; #[cfg(feature = "instruments")] { diff --git a/prover/src/test_utils.rs b/prover/src/test_utils.rs index b47554857..419c23793 100644 --- a/prover/src/test_utils.rs +++ b/prover/src/test_utils.rs @@ -12,6 +12,7 @@ use std::path::PathBuf; +use crypto::fiat_shamir::is_transcript::IsStarkTranscript; use executor::elf::Elf; use executor::vm::execution::Executor; use executor::vm::instruction::decoding::Instruction; @@ -21,7 +22,12 @@ use math::field::element::FieldElement; use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator}; use stark::lookup::{AirWithBuses, AuxiliaryTraceBuildData, NullBoundaryConstraintBuilder}; use stark::proof::options::ProofOptions; +use stark::proof::stark::MultiProof; +use stark::prover::{IsStarkProver, Prover, ProvingError}; +#[cfg(feature = "disk-spill")] +use stark::storage_mode::StorageMode; use stark::trace::TraceTable; +use stark::traits::AIR; use crate::constraints::cpu::create_all_cpu_constraints; use crate::tables::bitwise::{ @@ -74,6 +80,28 @@ pub type FE = FieldElement; pub type VmAir = AirWithBuses; +type GoldilocksPair<'a, PI> = ( + &'a dyn AIR, + &'a mut TraceTable, + &'a PI, +); + +/// Multi-AIR prove with `StorageMode::Ram`. Test convenience. +pub fn multi_prove_ram( + air_trace_pairs: Vec>, + transcript: &mut (impl IsStarkTranscript + Clone + Send), +) -> Result, ProvingError> +where + PI: Send + Sync + Clone, +{ + Prover::::multi_prove( + air_trace_pairs, + transcript, + #[cfg(feature = "disk-spill")] + StorageMode::Ram, + ) +} + // ============================================================================= // ELF Execution Helpers // ============================================================================= diff --git a/prover/src/tests/bitwise_bus_tests.rs b/prover/src/tests/bitwise_bus_tests.rs index 317b22362..12b26bbc6 100644 --- a/prover/src/tests/bitwise_bus_tests.rs +++ b/prover/src/tests/bitwise_bus_tests.rs @@ -15,7 +15,6 @@ use stark::lookup::{ NullBoundaryConstraintBuilder, Packing, }; use stark::proof::options::ProofOptions; -use stark::prover::{IsStarkProver, Prover}; use stark::trace::TraceTable; use stark::traits::AIR; use stark::verifier::{IsStarkVerifier, Verifier}; @@ -197,7 +196,8 @@ fn prove_and_verify(sender_lookups: &[(u8, u8, u8)]) -> bool { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&sender_air, &receiver_air]; @@ -307,7 +307,8 @@ fn prove_and_verify_custom( ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&sender_air, &receiver_air]; diff --git a/prover/src/tests/bitwise_tests.rs b/prover/src/tests/bitwise_tests.rs index 2848edef4..7f30d02e0 100644 --- a/prover/src/tests/bitwise_tests.rs +++ b/prover/src/tests/bitwise_tests.rs @@ -589,8 +589,11 @@ mod soundness_tests { (&receiver_air, &mut receiver_trace, &()), ]; - let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + let multi_proof = crate::test_utils::multi_prove_ram( + air_trace_pairs, + &mut DefaultTranscript::::new(&[]), + ) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&sender_air, &receiver_air]; @@ -637,8 +640,11 @@ mod soundness_tests { (&receiver_air, &mut receiver_trace, &()), ]; - let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + let multi_proof = crate::test_utils::multi_prove_ram( + air_trace_pairs, + &mut DefaultTranscript::::new(&[]), + ) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&sender_air, &receiver_air]; @@ -707,8 +713,11 @@ mod soundness_tests { (&prover_receiver_air, &mut malicious_trace, &()), ]; - let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + let multi_proof = crate::test_utils::multi_prove_ram( + air_trace_pairs, + &mut DefaultTranscript::::new(&[]), + ) + .unwrap(); // Verifier uses DIFFERENT AIR with honest commitment let verifier_airs: Vec<&dyn AIR> = diff --git a/prover/src/tests/branch_bus_tests.rs b/prover/src/tests/branch_bus_tests.rs index 1b3ae5071..8d57e1953 100644 --- a/prover/src/tests/branch_bus_tests.rs +++ b/prover/src/tests/branch_bus_tests.rs @@ -17,7 +17,6 @@ use stark::lookup::{ NullBoundaryConstraintBuilder, Packing, }; use stark::proof::options::ProofOptions; -use stark::prover::{IsStarkProver, Prover}; use stark::trace::TraceTable; use stark::traits::AIR; use stark::verifier::{IsStarkVerifier, Verifier}; @@ -340,7 +339,8 @@ fn prove_and_verify(ops: &[BranchOperation]) -> bool { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&sender_air, &receiver_air]; @@ -430,7 +430,8 @@ fn prove_and_verify_custom(ops: &[BranchOperation], receiver_rows: &[CustomBranc ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&sender_air, &receiver_air]; diff --git a/prover/src/tests/decode_tests.rs b/prover/src/tests/decode_tests.rs index 852c2ccd6..7392e17c4 100644 --- a/prover/src/tests/decode_tests.rs +++ b/prover/src/tests/decode_tests.rs @@ -867,7 +867,6 @@ fn test_instructions_from_elf_includes_all_executable() { fn test_decode_soundness_different_elf_rejected() { use crypto::fiat_shamir::default_transcript::DefaultTranscript; use stark::proof::options::ProofOptions; - use stark::prover::{IsStarkProver, Prover}; use stark::traits::AIR; use stark::verifier::{IsStarkVerifier, Verifier}; @@ -948,8 +947,9 @@ fn test_decode_soundness_different_elf_rejected() { (&prover_decode_air, &mut traces.decode, &()), ]; - let proof = Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .expect("Prover failed to generate proof"); + let proof = + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .expect("Prover failed to generate proof"); // ========================================================================= // VERIFIER: Has ELF B (different program!), computes commitment from it @@ -999,7 +999,6 @@ fn test_decode_soundness_different_elf_rejected() { fn test_decode_soundness_same_elf_accepted() { use crypto::fiat_shamir::default_transcript::DefaultTranscript; use stark::proof::options::ProofOptions; - use stark::prover::{IsStarkProver, Prover}; use stark::verifier::{IsStarkVerifier, Verifier}; use crate::VmAirs; @@ -1042,7 +1041,7 @@ fn test_decode_soundness_same_elf_accepted() { &table_counts, ); - let proof = Prover::multi_prove( + let proof = crate::test_utils::multi_prove_ram( prover_airs.air_trace_pairs(&mut traces), &mut DefaultTranscript::::new(&[]), ) diff --git a/prover/src/tests/lt_bus_tests.rs b/prover/src/tests/lt_bus_tests.rs index d794995b7..9097c2323 100644 --- a/prover/src/tests/lt_bus_tests.rs +++ b/prover/src/tests/lt_bus_tests.rs @@ -17,7 +17,6 @@ use stark::lookup::{ NullBoundaryConstraintBuilder, Packing, }; use stark::proof::options::ProofOptions; -use stark::prover::{IsStarkProver, Prover}; use stark::trace::TraceTable; use stark::traits::AIR; use stark::verifier::{IsStarkVerifier, Verifier}; @@ -293,7 +292,8 @@ fn prove_and_verify(ops: &[LtOperation]) -> bool { ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&sender_air, &receiver_air]; @@ -377,7 +377,8 @@ fn prove_and_verify_custom(ops: &[LtOperation], receiver_rows: &[CustomLtRow]) - ]; let multi_proof = - Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&sender_air, &receiver_air]; diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index 7e0fbc181..84cce7f6c 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -18,7 +18,6 @@ use math::field::element::FieldElement; use stark::constraints::transition::TransitionConstraintEvaluator; use stark::lookup::{AirWithBuses, AuxiliaryTraceBuildData}; use stark::proof::options::ProofOptions; -use stark::prover::{IsStarkProver, Prover}; use stark::traits::AIR; use stark::verifier::{IsStarkVerifier, Verifier}; @@ -60,11 +59,13 @@ fn prove_and_verify_vm_minimal(elf: &Elf, traces: &mut Traces) -> bool { // Build air_trace_pairs for all tables let air_trace_pairs = airs.air_trace_pairs(traces); - let multi_proof = - match Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])) { - Ok(proof) => proof, - Err(_) => return false, - }; + let multi_proof = match crate::test_utils::multi_prove_ram( + air_trace_pairs, + &mut DefaultTranscript::::new(&[]), + ) { + Ok(proof) => proof, + Err(_) => return false, + }; // Compute the verifier-side expected COMMIT bus balance from public output bytes let expected_bus_balance = crate::compute_expected_commit_bus_balance( @@ -125,8 +126,9 @@ fn test_cpu_only_no_bus() { _, )> = vec![(&cpu_air, &mut cpu_trace, &())]; - let multi_proof = Prover::multi_prove(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .expect("Prover failed"); + let multi_proof = + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .expect("Prover failed"); let airs: Vec<&dyn AIR> = vec![&cpu_air]; assert!( @@ -769,7 +771,7 @@ fn test_prove_elfs_test_commit_4_wrong_pages_rejected() { &traces.page_configs, &table_counts, ); - let proof = Prover::multi_prove( + let proof = crate::test_utils::multi_prove_ram( prover_airs.air_trace_pairs(&mut traces), &mut DefaultTranscript::::new(&[]), ) @@ -1497,7 +1499,7 @@ fn test_deep_stack_runtime_pages_roundtrip() { &traces.page_configs, &table_counts, ); - let proof = Prover::multi_prove( + let proof = crate::test_utils::multi_prove_ram( prover_airs.air_trace_pairs(&mut traces), &mut DefaultTranscript::::new(&[]), ) @@ -1552,7 +1554,7 @@ fn test_deep_stack_missing_pages_rejected() { &traces.page_configs, &table_counts, ); - let proof = Prover::multi_prove( + let proof = crate::test_utils::multi_prove_ram( prover_airs.air_trace_pairs(&mut traces), &mut DefaultTranscript::::new(&[]), ) @@ -1641,7 +1643,7 @@ fn test_heap_alloc_runtime_pages_roundtrip() { &traces.page_configs, &table_counts, ); - let proof = Prover::multi_prove( + let proof = crate::test_utils::multi_prove_ram( prover_airs.air_trace_pairs(&mut traces), &mut DefaultTranscript::::new(&[]), ) @@ -1813,7 +1815,7 @@ fn test_crafted_zero_count_proof_must_not_verify() { (&airs.decode, &mut decode_trace, &()), ]; - let proof = Prover::multi_prove(pairs, &mut DefaultTranscript::::new(&[])) + let proof = crate::test_utils::multi_prove_ram(pairs, &mut DefaultTranscript::::new(&[])) .expect("Proof generation should succeed"); assert_eq!(proof.proofs.len(), 2); From 4a3acbfafeb87e326570919f5f83ad39b9b6e9e8 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 22:09:56 -0300 Subject: [PATCH 170/231] Restore Domain/LdeTwiddles dedup rationale in multi_prove --- crypto/stark/src/prover.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index bc65cb6fd..a816ecce5 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1608,7 +1608,9 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let phase_start = Instant::now(); - // Deduplicate Domain/LdeTwiddles by (trace_length, blowup, coset). + // Deduplicate Domain + LdeTwiddles by (trace_length, blowup_factor, coset_offset). + // Many tables share the same domain size (e.g., 7+ tables at 2^20). + // Without dedup, each creates its own Domain (~24 MB) and LdeTwiddles (~32 MB). type DomainEntry = (Arc>, Arc>); let mut domain_cache: std::collections::HashMap<(usize, usize, u64), DomainEntry> = std::collections::HashMap::new(); From 9565c2f0745c11803d2de456dba32115259be23c Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 22:12:15 -0300 Subject: [PATCH 171/231] Restore domain_cache drop rationale comment --- crypto/stark/src/prover.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index a816ecce5..22a27f6b1 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1642,6 +1642,9 @@ pub trait IsStarkProver< domains.push(domain); twiddle_caches.push(twiddles); } + // Free the HashMap (which holds extra strong Arc references) before the + // long proving rounds begin. `domains` and `twiddle_caches` already hold + // the only surviving Arcs we care about. drop(domain_cache); let k = table_parallelism().min(num_airs).max(1); From bf9ed5f0cb8cb3b0b791042a3f5bf3ea34711a41 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 22:24:40 -0300 Subject: [PATCH 172/231] Move multi_prove_ram from trait method to test_utils --- crypto/stark/src/lib.rs | 2 + crypto/stark/src/prover.rs | 23 ------- crypto/stark/src/test_utils.rs | 37 +++++++++++ crypto/stark/src/tests/air_tests.rs | 8 ++- .../src/tests/bus_tests/completeness_tests.rs | 19 ++++-- .../src/tests/bus_tests/multiplicity_tests.rs | 10 +-- .../src/tests/bus_tests/soundness_tests.rs | 66 ++++++++++++------- .../src/tests/prove_verify_roundtrip_tests.rs | 8 +-- crypto/stark/src/tests/prover_tests.rs | 4 +- 9 files changed, 111 insertions(+), 66 deletions(-) create mode 100644 crypto/stark/src/test_utils.rs diff --git a/crypto/stark/src/lib.rs b/crypto/stark/src/lib.rs index b4767750c..acc8420f4 100644 --- a/crypto/stark/src/lib.rs +++ b/crypto/stark/src/lib.rs @@ -26,6 +26,8 @@ pub mod trace; pub mod traits; pub mod verifier; +#[cfg(test)] +pub mod test_utils; #[cfg(test)] pub mod tests; diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 22a27f6b1..cbdf22dde 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -2058,29 +2058,6 @@ pub trait IsStarkProver< Ok(MultiProof { proofs }) } - /// Multi-AIR prove with `StorageMode::Ram`. Test convenience. - #[cfg(test)] - fn multi_prove_ram( - air_trace_pairs: Vec>, - transcript: &mut (impl IsStarkTranscript + Clone + Send), - ) -> Result, ProvingError> - where - FieldElement: AsBytes, - FieldElement: AsBytes, - PI: Send + Sync + Clone, - Field: Copy + 'static, - FieldExtension: Copy + 'static, - ::BaseType: SpillSafe, - ::BaseType: SpillSafe, - { - Self::multi_prove( - air_trace_pairs, - transcript, - #[cfg(feature = "disk-spill")] - StorageMode::Ram, - ) - } - /// Generate a STARK proof for a single AIR/trace. /// This is equivalent to calling `multi_prove` with a single-element slice. fn prove( diff --git a/crypto/stark/src/test_utils.rs b/crypto/stark/src/test_utils.rs new file mode 100644 index 000000000..668eeff1f --- /dev/null +++ b/crypto/stark/src/test_utils.rs @@ -0,0 +1,37 @@ +//! Shared test helpers for the stark crate. + +use crate::proof::stark::MultiProof; +use crate::prover::{IsStarkProver, Prover, ProvingError}; +use crate::trace::TraceTable; +use crate::traits::AIR; +use crypto::fiat_shamir::is_transcript::IsStarkTranscript; +use math::field::element::FieldElement; +use math::field::traits::{IsFFTField, IsField, IsSubFieldOf}; +use math::spill_safe::SpillSafe; +use math::traits::{AsBytes, ByteConversion}; + +/// Multi-AIR prove with `StorageMode::Ram`. Test convenience. +pub fn multi_prove_ram( + air_trace_pairs: Vec<( + &dyn AIR, + &mut TraceTable, + &PI, + )>, + transcript: &mut (impl IsStarkTranscript + Clone + Send), +) -> Result, ProvingError> +where + Field: IsSubFieldOf + IsFFTField + Send + Sync + Copy + 'static, + FieldExtension: IsField + Send + Sync + Copy + 'static, + PI: Send + Sync + Clone, + FieldElement: AsBytes + ByteConversion, + FieldElement: AsBytes + ByteConversion, + ::BaseType: SpillSafe, + ::BaseType: SpillSafe, +{ + Prover::::multi_prove( + air_trace_pairs, + transcript, + #[cfg(feature = "disk-spill")] + crate::storage_mode::StorageMode::Ram, + ) +} diff --git a/crypto/stark/src/tests/air_tests.rs b/crypto/stark/src/tests/air_tests.rs index 601652acf..dba0c3e36 100644 --- a/crypto/stark/src/tests/air_tests.rs +++ b/crypto/stark/src/tests/air_tests.rs @@ -400,7 +400,8 @@ fn test_multi_prove_fib_3_tables() { (&air_3, &mut trace_3, &pub_inputs_3), ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec< &dyn AIR< @@ -500,7 +501,7 @@ fn test_multi_prove_2_tables_small_field() { (&air_2, &mut trace_2, &pub_inputs_2), ]; - let multi_proof = Prover::multi_prove_ram( + let multi_proof = crate::test_utils::multi_prove_ram( air_trace_pairs, &mut DefaultTranscript::::new(&[]), ) @@ -538,7 +539,8 @@ fn test_multi_prove_different_airs() { )> = vec![(&air_1, &mut trace_1, &()), (&air_2, &mut trace_2, &())]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec< &dyn AIR, diff --git a/crypto/stark/src/tests/bus_tests/completeness_tests.rs b/crypto/stark/src/tests/bus_tests/completeness_tests.rs index afd7ea2b0..8ed8734dc 100644 --- a/crypto/stark/src/tests/bus_tests/completeness_tests.rs +++ b/crypto/stark/src/tests/bus_tests/completeness_tests.rs @@ -16,7 +16,6 @@ use crate::lookup::{ NullBoundaryConstraintBuilder, Packing, }; use crate::proof::options::ProofOptions; -use crate::prover::{IsStarkProver, Prover}; use crate::trace::TraceTable; use crate::traits::AIR; use crate::verifier::{IsStarkVerifier, Verifier}; @@ -122,7 +121,8 @@ fn test_multi_table_proof() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -185,7 +185,8 @@ fn test_all_padding() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -248,7 +249,8 @@ fn test_single_operation() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -311,7 +313,8 @@ fn test_duplicate_operations() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -374,7 +377,8 @@ fn test_serialization_roundtrip() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); // Serialize and deserialize let serialized = serde_cbor::to_vec(&multi_proof).expect("serialization failed"); @@ -519,7 +523,8 @@ fn test_bus_value_features() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&sender_air, &receiver_air]; diff --git a/crypto/stark/src/tests/bus_tests/multiplicity_tests.rs b/crypto/stark/src/tests/bus_tests/multiplicity_tests.rs index 44c7791f8..62197eb35 100644 --- a/crypto/stark/src/tests/bus_tests/multiplicity_tests.rs +++ b/crypto/stark/src/tests/bus_tests/multiplicity_tests.rs @@ -15,7 +15,6 @@ use crate::lookup::{ NullBoundaryConstraintBuilder, Packing, }; use crate::proof::options::ProofOptions; -use crate::prover::{IsStarkProver, Prover}; use crate::trace::TraceTable; use crate::traits::AIR; use crate::verifier::{IsStarkVerifier, Verifier}; @@ -113,7 +112,8 @@ fn test_multiplicity_one() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&sender, &receiver]; @@ -223,7 +223,8 @@ fn test_multiplicity_sum() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&sender, &receiver]; @@ -331,7 +332,8 @@ fn test_multiplicity_negated() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&sender, &receiver]; diff --git a/crypto/stark/src/tests/bus_tests/soundness_tests.rs b/crypto/stark/src/tests/bus_tests/soundness_tests.rs index 51e728d9e..01b36c6ba 100644 --- a/crypto/stark/src/tests/bus_tests/soundness_tests.rs +++ b/crypto/stark/src/tests/bus_tests/soundness_tests.rs @@ -79,7 +79,8 @@ fn test_wrong_result_value() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -142,7 +143,8 @@ fn test_off_by_one() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -205,7 +207,8 @@ fn test_swapped_operands() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -268,7 +271,8 @@ fn test_single_column_wrong() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -335,7 +339,8 @@ fn test_over_report_multiplicity() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -398,7 +403,8 @@ fn test_under_report_multiplicity() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -461,7 +467,8 @@ fn test_zero_multiplicity_skip() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -528,7 +535,8 @@ fn test_phantom_receive() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -591,7 +599,8 @@ fn test_missing_receiver() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -662,7 +671,8 @@ fn test_tampered_table_contribution() { ]; let mut multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); // Corrupt table_contribution in the ADD table's bus public inputs. // This changes the per-row offset L/N used in the circular constraint, @@ -742,7 +752,8 @@ fn test_tampered_acc_ood_evaluation() { ]; let mut multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); // Corrupt the acc column OOD evaluation in the ADD table proof. // With batching + absorption, ADD has 4 main columns and 1 aux column @@ -827,7 +838,8 @@ fn test_missing_bus_public_inputs_rejected() { ]; let mut multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); // Remove bus_public_inputs from the ADD table proof entirely. multi_proof.proofs[1].bus_public_inputs = None; @@ -948,7 +960,8 @@ fn test_zeroed_table_contribution_rejected() { ]; let mut multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); // Zero out table_contribution for the ADD table. let add_proof = &mut multi_proof.proofs[1]; @@ -1026,7 +1039,8 @@ fn test_one_of_many_wrong() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -1134,7 +1148,8 @@ fn test_full_scenario_wrong_add() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -1208,7 +1223,8 @@ fn test_wrong_table_consumes_value_rejected() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -1324,7 +1340,8 @@ fn test_packing_mismatch_direct_vs_word2l() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&sender, &receiver]; @@ -1429,7 +1446,8 @@ fn test_packing_mismatch_element_count() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&sender, &receiver]; @@ -1531,7 +1549,8 @@ fn test_packing_mismatch_shift_constant() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&sender, &receiver]; @@ -1634,7 +1653,8 @@ fn test_compound_mismatch_dwordhhw_vs_dwordwhh() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&sender, &receiver]; @@ -1727,7 +1747,8 @@ fn test_compound_equals_primitive_expansion() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&sender, &receiver]; @@ -1843,7 +1864,8 @@ fn test_full_scenario_wrong_mul() { ]; let multi_proof = - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; diff --git a/crypto/stark/src/tests/prove_verify_roundtrip_tests.rs b/crypto/stark/src/tests/prove_verify_roundtrip_tests.rs index 9031fd6dd..c7b32fe73 100644 --- a/crypto/stark/src/tests/prove_verify_roundtrip_tests.rs +++ b/crypto/stark/src/tests/prove_verify_roundtrip_tests.rs @@ -17,10 +17,7 @@ use crate::lookup::{ use crate::proof::options::ProofOptions; use crate::proof::stark::MultiProof; use crate::traits::AIR; -use crate::{ - prover::{IsStarkProver, Prover}, - verifier::{IsStarkVerifier, Verifier}, -}; +use crate::verifier::{IsStarkVerifier, Verifier}; type F = GoldilocksField; type E = Degree3GoldilocksExtensionField; @@ -137,7 +134,8 @@ fn test_verify_serialized_multi_table_proofs() { (&mul_air, &mut mul_trace, &()), ]; - Prover::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap() + crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .unwrap() }; // ========================================================================= diff --git a/crypto/stark/src/tests/prover_tests.rs b/crypto/stark/src/tests/prover_tests.rs index a726aa279..c8aee4396 100644 --- a/crypto/stark/src/tests/prover_tests.rs +++ b/crypto/stark/src/tests/prover_tests.rs @@ -297,7 +297,7 @@ fn test_multi_prove_mixed_coset_offsets() { (&air_2, &mut trace_2, &pub_inputs), ]; - let multi_proof = Prover::multi_prove_ram( + let multi_proof = crate::test_utils::multi_prove_ram( air_trace_pairs, &mut DefaultTranscript::::new(&[]), ) @@ -365,7 +365,7 @@ fn test_multi_prove_dedups_shared_domain_params() { (&air_3, &mut trace_3, &pub_inputs), ]; - let multi_proof = Prover::multi_prove_ram( + let multi_proof = crate::test_utils::multi_prove_ram( air_trace_pairs, &mut DefaultTranscript::::new(&[]), ) From c9cd42d93fe865640bee6af3914e723ea8f1edd6 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 22:29:25 -0300 Subject: [PATCH 173/231] Use multi_prove_ram via use statement at call sites --- crypto/stark/src/tests/air_tests.rs | 9 ++- .../src/tests/bus_tests/completeness_tests.rs | 19 ++---- .../src/tests/bus_tests/multiplicity_tests.rs | 10 ++- .../src/tests/bus_tests/soundness_tests.rs | 67 +++++++------------ .../src/tests/prove_verify_roundtrip_tests.rs | 4 +- crypto/stark/src/tests/prover_tests.rs | 5 +- prover/src/tests/bitwise_bus_tests.rs | 7 +- prover/src/tests/bitwise_tests.rs | 22 ++---- prover/src/tests/branch_bus_tests.rs | 7 +- prover/src/tests/decode_tests.rs | 8 +-- prover/src/tests/lt_bus_tests.rs | 7 +- prover/src/tests/prove_elfs_tests.rs | 22 +++--- 12 files changed, 73 insertions(+), 114 deletions(-) diff --git a/crypto/stark/src/tests/air_tests.rs b/crypto/stark/src/tests/air_tests.rs index dba0c3e36..8e20f303e 100644 --- a/crypto/stark/src/tests/air_tests.rs +++ b/crypto/stark/src/tests/air_tests.rs @@ -31,6 +31,7 @@ type Felt = FieldElement; use crate::examples::read_only_memory_logup::{ LogReadOnlyPublicInputs, LogReadOnlyRAP, read_only_logup_trace, }; +use crate::test_utils::multi_prove_ram; #[test_log::test] fn test_prove_fib() { @@ -400,8 +401,7 @@ fn test_multi_prove_fib_3_tables() { (&air_3, &mut trace_3, &pub_inputs_3), ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec< &dyn AIR< @@ -501,7 +501,7 @@ fn test_multi_prove_2_tables_small_field() { (&air_2, &mut trace_2, &pub_inputs_2), ]; - let multi_proof = crate::test_utils::multi_prove_ram( + let multi_proof = multi_prove_ram( air_trace_pairs, &mut DefaultTranscript::::new(&[]), ) @@ -539,8 +539,7 @@ fn test_multi_prove_different_airs() { )> = vec![(&air_1, &mut trace_1, &()), (&air_2, &mut trace_2, &())]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec< &dyn AIR, diff --git a/crypto/stark/src/tests/bus_tests/completeness_tests.rs b/crypto/stark/src/tests/bus_tests/completeness_tests.rs index 8ed8734dc..83f8ac391 100644 --- a/crypto/stark/src/tests/bus_tests/completeness_tests.rs +++ b/crypto/stark/src/tests/bus_tests/completeness_tests.rs @@ -16,6 +16,7 @@ use crate::lookup::{ NullBoundaryConstraintBuilder, Packing, }; use crate::proof::options::ProofOptions; +use crate::test_utils::multi_prove_ram; use crate::trace::TraceTable; use crate::traits::AIR; use crate::verifier::{IsStarkVerifier, Verifier}; @@ -121,8 +122,7 @@ fn test_multi_table_proof() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -185,8 +185,7 @@ fn test_all_padding() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -249,8 +248,7 @@ fn test_single_operation() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -313,8 +311,7 @@ fn test_duplicate_operations() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -377,8 +374,7 @@ fn test_serialization_roundtrip() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); // Serialize and deserialize let serialized = serde_cbor::to_vec(&multi_proof).expect("serialization failed"); @@ -523,8 +519,7 @@ fn test_bus_value_features() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender_air, &receiver_air]; diff --git a/crypto/stark/src/tests/bus_tests/multiplicity_tests.rs b/crypto/stark/src/tests/bus_tests/multiplicity_tests.rs index 62197eb35..7e4d632dd 100644 --- a/crypto/stark/src/tests/bus_tests/multiplicity_tests.rs +++ b/crypto/stark/src/tests/bus_tests/multiplicity_tests.rs @@ -15,6 +15,7 @@ use crate::lookup::{ NullBoundaryConstraintBuilder, Packing, }; use crate::proof::options::ProofOptions; +use crate::test_utils::multi_prove_ram; use crate::trace::TraceTable; use crate::traits::AIR; use crate::verifier::{IsStarkVerifier, Verifier}; @@ -112,8 +113,7 @@ fn test_multiplicity_one() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender, &receiver]; @@ -223,8 +223,7 @@ fn test_multiplicity_sum() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender, &receiver]; @@ -332,8 +331,7 @@ fn test_multiplicity_negated() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender, &receiver]; diff --git a/crypto/stark/src/tests/bus_tests/soundness_tests.rs b/crypto/stark/src/tests/bus_tests/soundness_tests.rs index 01b36c6ba..fc718bf7c 100644 --- a/crypto/stark/src/tests/bus_tests/soundness_tests.rs +++ b/crypto/stark/src/tests/bus_tests/soundness_tests.rs @@ -14,6 +14,7 @@ use crate::examples::multi_table_lookup::{ }; use crate::proof::options::ProofOptions; use crate::prover::{IsStarkProver, Prover}; +use crate::test_utils::multi_prove_ram; use crate::trace::TraceTable; use crate::traits::AIR; use crate::verifier::{IsStarkVerifier, Verifier}; @@ -79,8 +80,7 @@ fn test_wrong_result_value() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -143,8 +143,7 @@ fn test_off_by_one() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -207,8 +206,7 @@ fn test_swapped_operands() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -271,8 +269,7 @@ fn test_single_column_wrong() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -339,8 +336,7 @@ fn test_over_report_multiplicity() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -403,8 +399,7 @@ fn test_under_report_multiplicity() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -467,8 +462,7 @@ fn test_zero_multiplicity_skip() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -535,8 +529,7 @@ fn test_phantom_receive() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -599,8 +592,7 @@ fn test_missing_receiver() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -671,8 +663,7 @@ fn test_tampered_table_contribution() { ]; let mut multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); // Corrupt table_contribution in the ADD table's bus public inputs. // This changes the per-row offset L/N used in the circular constraint, @@ -752,8 +743,7 @@ fn test_tampered_acc_ood_evaluation() { ]; let mut multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); // Corrupt the acc column OOD evaluation in the ADD table proof. // With batching + absorption, ADD has 4 main columns and 1 aux column @@ -838,8 +828,7 @@ fn test_missing_bus_public_inputs_rejected() { ]; let mut multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); // Remove bus_public_inputs from the ADD table proof entirely. multi_proof.proofs[1].bus_public_inputs = None; @@ -960,8 +949,7 @@ fn test_zeroed_table_contribution_rejected() { ]; let mut multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); // Zero out table_contribution for the ADD table. let add_proof = &mut multi_proof.proofs[1]; @@ -1039,8 +1027,7 @@ fn test_one_of_many_wrong() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -1148,8 +1135,7 @@ fn test_full_scenario_wrong_add() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -1223,8 +1209,7 @@ fn test_wrong_table_consumes_value_rejected() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; @@ -1340,8 +1325,7 @@ fn test_packing_mismatch_direct_vs_word2l() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender, &receiver]; @@ -1446,8 +1430,7 @@ fn test_packing_mismatch_element_count() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender, &receiver]; @@ -1549,8 +1532,7 @@ fn test_packing_mismatch_shift_constant() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender, &receiver]; @@ -1653,8 +1635,7 @@ fn test_compound_mismatch_dwordhhw_vs_dwordwhh() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender, &receiver]; @@ -1747,8 +1728,7 @@ fn test_compound_equals_primitive_expansion() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender, &receiver]; @@ -1864,8 +1844,7 @@ fn test_full_scenario_wrong_mul() { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; diff --git a/crypto/stark/src/tests/prove_verify_roundtrip_tests.rs b/crypto/stark/src/tests/prove_verify_roundtrip_tests.rs index c7b32fe73..4059ed481 100644 --- a/crypto/stark/src/tests/prove_verify_roundtrip_tests.rs +++ b/crypto/stark/src/tests/prove_verify_roundtrip_tests.rs @@ -16,6 +16,7 @@ use crate::lookup::{ }; use crate::proof::options::ProofOptions; use crate::proof::stark::MultiProof; +use crate::test_utils::multi_prove_ram; use crate::traits::AIR; use crate::verifier::{IsStarkVerifier, Verifier}; @@ -134,8 +135,7 @@ fn test_verify_serialized_multi_table_proofs() { (&mul_air, &mut mul_trace, &()), ]; - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap() + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap() }; // ========================================================================= diff --git a/crypto/stark/src/tests/prover_tests.rs b/crypto/stark/src/tests/prover_tests.rs index c8aee4396..f4b762724 100644 --- a/crypto/stark/src/tests/prover_tests.rs +++ b/crypto/stark/src/tests/prover_tests.rs @@ -8,6 +8,7 @@ use crate::{ }, proof::options::ProofOptions, prover::{IsStarkProver, Prover, domain_cache_stats, evaluate_polynomial_on_lde_domain}, + test_utils::multi_prove_ram, trace::{LDETraceTable, get_trace_evaluations, get_trace_evaluations_from_lde}, traits::AIR, verifier::{IsStarkVerifier, Verifier}, @@ -297,7 +298,7 @@ fn test_multi_prove_mixed_coset_offsets() { (&air_2, &mut trace_2, &pub_inputs), ]; - let multi_proof = crate::test_utils::multi_prove_ram( + let multi_proof = multi_prove_ram( air_trace_pairs, &mut DefaultTranscript::::new(&[]), ) @@ -365,7 +366,7 @@ fn test_multi_prove_dedups_shared_domain_params() { (&air_3, &mut trace_3, &pub_inputs), ]; - let multi_proof = crate::test_utils::multi_prove_ram( + let multi_proof = multi_prove_ram( air_trace_pairs, &mut DefaultTranscript::::new(&[]), ) diff --git a/prover/src/tests/bitwise_bus_tests.rs b/prover/src/tests/bitwise_bus_tests.rs index 12b26bbc6..2a5fd31dd 100644 --- a/prover/src/tests/bitwise_bus_tests.rs +++ b/prover/src/tests/bitwise_bus_tests.rs @@ -20,6 +20,7 @@ use stark::traits::AIR; use stark::verifier::{IsStarkVerifier, Verifier}; use crate::tables::types::{BusId, FE, GoldilocksExtension, GoldilocksField}; +use crate::test_utils::multi_prove_ram; type F = GoldilocksField; type E = GoldilocksExtension; @@ -196,8 +197,7 @@ fn prove_and_verify(sender_lookups: &[(u8, u8, u8)]) -> bool { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender_air, &receiver_air]; @@ -307,8 +307,7 @@ fn prove_and_verify_custom( ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender_air, &receiver_air]; diff --git a/prover/src/tests/bitwise_tests.rs b/prover/src/tests/bitwise_tests.rs index 7f30d02e0..8337f8bf7 100644 --- a/prover/src/tests/bitwise_tests.rs +++ b/prover/src/tests/bitwise_tests.rs @@ -5,6 +5,7 @@ use crate::tables::bitwise::{ generate_bitwise_trace, is_preprocessed, preprocessed_commitment, row_index, }; use crate::tables::types::FE; +use crate::test_utils::multi_prove_ram; use math::field::element::FieldElement; use stark::proof::options::ProofOptions; @@ -589,11 +590,8 @@ mod soundness_tests { (&receiver_air, &mut receiver_trace, &()), ]; - let multi_proof = crate::test_utils::multi_prove_ram( - air_trace_pairs, - &mut DefaultTranscript::::new(&[]), - ) - .unwrap(); + let multi_proof = + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender_air, &receiver_air]; @@ -640,11 +638,8 @@ mod soundness_tests { (&receiver_air, &mut receiver_trace, &()), ]; - let multi_proof = crate::test_utils::multi_prove_ram( - air_trace_pairs, - &mut DefaultTranscript::::new(&[]), - ) - .unwrap(); + let multi_proof = + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender_air, &receiver_air]; @@ -713,11 +708,8 @@ mod soundness_tests { (&prover_receiver_air, &mut malicious_trace, &()), ]; - let multi_proof = crate::test_utils::multi_prove_ram( - air_trace_pairs, - &mut DefaultTranscript::::new(&[]), - ) - .unwrap(); + let multi_proof = + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); // Verifier uses DIFFERENT AIR with honest commitment let verifier_airs: Vec<&dyn AIR> = diff --git a/prover/src/tests/branch_bus_tests.rs b/prover/src/tests/branch_bus_tests.rs index 8d57e1953..c19a580ad 100644 --- a/prover/src/tests/branch_bus_tests.rs +++ b/prover/src/tests/branch_bus_tests.rs @@ -23,6 +23,7 @@ use stark::verifier::{IsStarkVerifier, Verifier}; use crate::tables::branch::{BranchOperation, cols, generate_branch_trace}; use crate::tables::types::{BusId, FE, GoldilocksExtension, GoldilocksField}; +use crate::test_utils::multi_prove_ram; type F = GoldilocksField; type E = GoldilocksExtension; @@ -339,8 +340,7 @@ fn prove_and_verify(ops: &[BranchOperation]) -> bool { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender_air, &receiver_air]; @@ -430,8 +430,7 @@ fn prove_and_verify_custom(ops: &[BranchOperation], receiver_rows: &[CustomBranc ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender_air, &receiver_air]; diff --git a/prover/src/tests/decode_tests.rs b/prover/src/tests/decode_tests.rs index 7392e17c4..1149cd84f 100644 --- a/prover/src/tests/decode_tests.rs +++ b/prover/src/tests/decode_tests.rs @@ -10,6 +10,7 @@ use crate::tables::decode::{ update_multiplicities, }; use crate::tables::types::{FE, packed_decode as bits}; +use crate::test_utils::multi_prove_ram; use crate::test_utils::run_asm_elf; // ========================================================================= @@ -947,9 +948,8 @@ fn test_decode_soundness_different_elf_rejected() { (&prover_decode_air, &mut traces.decode, &()), ]; - let proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .expect("Prover failed to generate proof"); + let proof = multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .expect("Prover failed to generate proof"); // ========================================================================= // VERIFIER: Has ELF B (different program!), computes commitment from it @@ -1041,7 +1041,7 @@ fn test_decode_soundness_same_elf_accepted() { &table_counts, ); - let proof = crate::test_utils::multi_prove_ram( + let proof = multi_prove_ram( prover_airs.air_trace_pairs(&mut traces), &mut DefaultTranscript::::new(&[]), ) diff --git a/prover/src/tests/lt_bus_tests.rs b/prover/src/tests/lt_bus_tests.rs index 9097c2323..dcc555780 100644 --- a/prover/src/tests/lt_bus_tests.rs +++ b/prover/src/tests/lt_bus_tests.rs @@ -23,6 +23,7 @@ use stark::verifier::{IsStarkVerifier, Verifier}; use crate::tables::lt::{LtOperation, cols, generate_lt_trace}; use crate::tables::types::{BusId, FE, GoldilocksExtension, GoldilocksField}; +use crate::test_utils::multi_prove_ram; type F = GoldilocksField; type E = GoldilocksExtension; @@ -292,8 +293,7 @@ fn prove_and_verify(ops: &[LtOperation]) -> bool { ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender_air, &receiver_air]; @@ -377,8 +377,7 @@ fn prove_and_verify_custom(ops: &[LtOperation], receiver_rows: &[CustomLtRow]) - ]; let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .unwrap(); + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); let airs: Vec<&dyn AIR> = vec![&sender_air, &receiver_air]; diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index 84cce7f6c..7bdb4ba5c 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -28,6 +28,7 @@ use executor::elf::Elf; // Import shared utilities use crate::VmAirs; +use crate::test_utils::multi_prove_ram; use crate::test_utils::run_asm_elf; type F = GoldilocksField; @@ -59,10 +60,8 @@ fn prove_and_verify_vm_minimal(elf: &Elf, traces: &mut Traces) -> bool { // Build air_trace_pairs for all tables let air_trace_pairs = airs.air_trace_pairs(traces); - let multi_proof = match crate::test_utils::multi_prove_ram( - air_trace_pairs, - &mut DefaultTranscript::::new(&[]), - ) { + let multi_proof = match multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + { Ok(proof) => proof, Err(_) => return false, }; @@ -126,9 +125,8 @@ fn test_cpu_only_no_bus() { _, )> = vec![(&cpu_air, &mut cpu_trace, &())]; - let multi_proof = - crate::test_utils::multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) - .expect("Prover failed"); + let multi_proof = multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])) + .expect("Prover failed"); let airs: Vec<&dyn AIR> = vec![&cpu_air]; assert!( @@ -771,7 +769,7 @@ fn test_prove_elfs_test_commit_4_wrong_pages_rejected() { &traces.page_configs, &table_counts, ); - let proof = crate::test_utils::multi_prove_ram( + let proof = multi_prove_ram( prover_airs.air_trace_pairs(&mut traces), &mut DefaultTranscript::::new(&[]), ) @@ -1499,7 +1497,7 @@ fn test_deep_stack_runtime_pages_roundtrip() { &traces.page_configs, &table_counts, ); - let proof = crate::test_utils::multi_prove_ram( + let proof = multi_prove_ram( prover_airs.air_trace_pairs(&mut traces), &mut DefaultTranscript::::new(&[]), ) @@ -1554,7 +1552,7 @@ fn test_deep_stack_missing_pages_rejected() { &traces.page_configs, &table_counts, ); - let proof = crate::test_utils::multi_prove_ram( + let proof = multi_prove_ram( prover_airs.air_trace_pairs(&mut traces), &mut DefaultTranscript::::new(&[]), ) @@ -1643,7 +1641,7 @@ fn test_heap_alloc_runtime_pages_roundtrip() { &traces.page_configs, &table_counts, ); - let proof = crate::test_utils::multi_prove_ram( + let proof = multi_prove_ram( prover_airs.air_trace_pairs(&mut traces), &mut DefaultTranscript::::new(&[]), ) @@ -1815,7 +1813,7 @@ fn test_crafted_zero_count_proof_must_not_verify() { (&airs.decode, &mut decode_trace, &()), ]; - let proof = crate::test_utils::multi_prove_ram(pairs, &mut DefaultTranscript::::new(&[])) + let proof = multi_prove_ram(pairs, &mut DefaultTranscript::::new(&[])) .expect("Proof generation should succeed"); assert_eq!(proof.proofs.len(), 2); From dbc4888c839f04bd009e647e17332a13c0b12a7a Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 22:34:04 -0300 Subject: [PATCH 174/231] Trim TableMmapBacking doc comment --- crypto/stark/src/table.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 331615349..934900888 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -10,7 +10,6 @@ use rayon::prelude::*; /// Mmap-backed storage for a spilled Table. /// -/// The table data is written row-major to a temp file and mmapped back. /// Access goes through pointer arithmetic on the mmap, matching the /// original `data[row * width + col]` layout. #[cfg(feature = "disk-spill")] From df991dfbad8eede33476ff2ea77e040701dd796c Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 22:38:02 -0300 Subject: [PATCH 175/231] Make TableMmapBacking and mmap_backing private --- crypto/stark/src/table.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 934900888..09136f725 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -13,7 +13,7 @@ use rayon::prelude::*; /// Access goes through pointer arithmetic on the mmap, matching the /// original `data[row * width + col]` layout. #[cfg(feature = "disk-spill")] -pub(crate) struct TableMmapBacking { +struct TableMmapBacking { mmap: memmap2::Mmap, /// Number of columns per row. width: usize, @@ -51,7 +51,7 @@ pub struct Table { pub height: usize, #[cfg(feature = "disk-spill")] #[serde(skip)] - pub(crate) mmap_backing: Option, + mmap_backing: Option, } #[cfg(feature = "disk-spill")] From 360c50491d3b2d3330715d772ed4734fe679713b Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 22:44:40 -0300 Subject: [PATCH 176/231] Tighten Table Clone impl doc comment --- crypto/stark/src/table.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 09136f725..8e0070e90 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -96,10 +96,8 @@ where } } -/// Cloning a spilled table reads its mmap bytes into a fresh heap `Vec` -/// and returns an unspilled clone. This is cold — callers pay the full -/// materialization cost — but avoids the runtime panic a derived impl -/// would produce on `TableMmapBacking`. +/// Cloning a spilled table copies its mmap bytes into a fresh heap `Vec` +/// and returns an unspilled clone. #[cfg(feature = "disk-spill")] impl Clone for Table { fn clone(&self) -> Self { From ca38e8257edf18bf6bf98b8724b0d177e83c6693 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 22:48:24 -0300 Subject: [PATCH 177/231] Drop streaming-serialize wrapper doc comments --- crypto/crypto/src/merkle_tree/merkle.rs | 2 -- crypto/stark/src/table.rs | 2 -- 2 files changed, 4 deletions(-) diff --git a/crypto/crypto/src/merkle_tree/merkle.rs b/crypto/crypto/src/merkle_tree/merkle.rs index faa657357..7e77bd844 100644 --- a/crypto/crypto/src/merkle_tree/merkle.rs +++ b/crypto/crypto/src/merkle_tree/merkle.rs @@ -83,8 +83,6 @@ where } } -/// Streams the spilled nodes through `serialize_seq` instead of buffering them -/// into a `Vec` the size of the tree. #[cfg(all(feature = "serde", feature = "disk-spill"))] struct MmapNodesSeq<'a, B: IsMerkleTreeBackend>(&'a MerkleTree); diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 8e0070e90..5f2a7ef84 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -73,8 +73,6 @@ where } } -/// Streams the spilled table elements through `serialize_seq` instead of -/// buffering them into a `Vec>` the size of the trace. #[cfg(feature = "disk-spill")] struct MmapDataSeq<'a, F: IsField>(&'a Table); From 69360a8af449ff4a346cd57809910e3ec55dcb3c Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 22:53:27 -0300 Subject: [PATCH 178/231] Drop Table PartialEq doc comment --- crypto/stark/src/table.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 5f2a7ef84..dbd1611d2 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -122,8 +122,6 @@ impl Clone for Table { } } -/// Element-wise comparison via `get()`, so spilled tables compare by field -/// equality (canonicalized per `F::eq`) rather than raw mmap bytes. #[cfg(feature = "disk-spill")] impl PartialEq for Table { fn eq(&self, other: &Self) -> bool { From 57dbdb236ebf9eb1c313c317d499a27de7af48d5 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 5 May 2026 23:12:47 -0300 Subject: [PATCH 179/231] Tighten get_row/get assert comment --- crypto/stark/src/table.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index dbd1611d2..063945012 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -196,8 +196,7 @@ impl Table { pub fn get_row(&self, row_idx: usize) -> &[FieldElement] { #[cfg(feature = "disk-spill")] if let Some(ref backing) = self.mmap_backing { - // Guard the unsafe pointer math below; matches the non-spill - // path's checked indexing so release builds don't drop the check. + // Ensures the unsafe block's read stays within the mmap. assert!( row_idx < backing.height, "Table::get_row out of bounds: row={row_idx}, height={}", @@ -257,8 +256,7 @@ impl Table { pub fn get(&self, row: usize, col: usize) -> &FieldElement { #[cfg(feature = "disk-spill")] if let Some(ref backing) = self.mmap_backing { - // Guard the unsafe pointer math below; matches the non-spill - // path's checked indexing so release builds don't drop the check. + // Ensures the unsafe block's read stays within the mmap. assert!( row < backing.height && col < backing.width, "Table::get out of bounds: row={row}, col={col}, height={}, width={}", From 19f8d3dc43ef04a3355c1f63751a79f468f18005 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 10:30:47 -0300 Subject: [PATCH 180/231] Restore T1 transpose rationale in extract_columns doc --- crypto/stark/src/table.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 063945012..2d26dd5e1 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -234,7 +234,8 @@ impl Table { /// Extract columns as owned vectors, with each allocated at `capacity`. /// /// `capacity` is a hint sized for downstream LDE expansion so the FFT grows - /// in place without a second allocation. + /// in place without a second allocation. Avoids the T1 transpose `columns()` + /// performs. pub fn extract_columns(&self, capacity: usize) -> Vec>> { let capacity = capacity.max(self.height); #[cfg(feature = "parallel")] From 2645033eb4ec52d7738b7367a5000436be24f970 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 10:35:16 -0300 Subject: [PATCH 181/231] Drop unnecessary inline on Table::get --- crypto/stark/src/table.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 2d26dd5e1..ffc0173c2 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -253,7 +253,6 @@ impl Table { } /// Given row and column indexes, returns the stored field element in that position of the table. - #[inline] pub fn get(&self, row: usize, col: usize) -> &FieldElement { #[cfg(feature = "disk-spill")] if let Some(ref backing) = self.mmap_backing { From 4aa761b0b2943e98e4eda31328bf67ff2ca9ff32 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 10:48:06 -0300 Subject: [PATCH 182/231] Drop is_spilled accessor, inline mmap_backing checks --- crypto/stark/src/table.rs | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index ffc0173c2..e4b2db2b1 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -286,12 +286,6 @@ impl Table { self.data[idx] = value; } - /// Returns true if this table's data has been spilled to disk via mmap. - #[cfg(feature = "disk-spill")] - pub fn is_spilled(&self) -> bool { - self.mmap_backing.is_some() - } - /// Spill the table's row-major data to a temp file and mmap it back. /// Frees the heap `data` Vec while preserving access through `get()`, /// `get_row()`, and `columns()`. @@ -419,7 +413,7 @@ mod disk_spill_tests { .collect(); let mut table = Table::new(data.clone(), width); - assert!(!table.is_spilled()); + assert!(table.mmap_backing.is_none()); // Snapshot values before spill let pre_spill: Vec>> = (0..height) @@ -427,7 +421,7 @@ mod disk_spill_tests { .collect(); table.spill_to_disk().expect("spill_to_disk failed"); - assert!(table.is_spilled()); + assert!(table.mmap_backing.is_some()); assert!( table.data.is_empty(), "heap data should be freed after spill" @@ -457,7 +451,7 @@ mod disk_spill_tests { table .spill_to_disk() .expect("spill_to_disk on empty table failed"); - assert!(!table.is_spilled()); + assert!(table.mmap_backing.is_none()); } /// Spilling twice is idempotent (second call is a no-op). @@ -468,10 +462,10 @@ mod disk_spill_tests { let mut table = Table::new(data, 4); table.spill_to_disk().expect("first spill failed"); - assert!(table.is_spilled()); + assert!(table.mmap_backing.is_some()); table.spill_to_disk().expect("second spill should be no-op"); - assert!(table.is_spilled()); + assert!(table.mmap_backing.is_some()); // Still readable assert_eq!(table.get(0, 0), &FieldElement::::from(0u64)); @@ -490,10 +484,10 @@ mod disk_spill_tests { let mut table = Table::new(data, width); table.spill_to_disk().expect("spill_to_disk failed"); - assert!(table.is_spilled()); + assert!(table.mmap_backing.is_some()); let cloned = table.clone(); - assert!(!cloned.is_spilled(), "clone should not be spilled"); + assert!(cloned.mmap_backing.is_none(), "clone should not be spilled"); assert_eq!(cloned.width, width); assert_eq!(cloned.height, height); assert_eq!(cloned, table, "clone must equal source element-wise"); @@ -523,7 +517,7 @@ mod disk_spill_tests { let restored: Table = bincode::deserialize(&spilled_bytes).expect("deserialize spilled bytes"); - assert!(!restored.is_spilled()); + assert!(restored.mmap_backing.is_none()); assert_eq!(restored, unspilled); } } From 03c288fc491987e59615f0bc43d378ad980b06ca Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 10:50:04 -0300 Subject: [PATCH 183/231] Use intra-doc links in spill_to_disk doc comment --- crypto/stark/src/table.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index e4b2db2b1..0355b656a 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -287,8 +287,8 @@ impl Table { } /// Spill the table's row-major data to a temp file and mmap it back. - /// Frees the heap `data` Vec while preserving access through `get()`, - /// `get_row()`, and `columns()`. + /// Frees the heap `data` Vec while preserving access through + /// [`Self::get`], [`Self::get_row`], and [`Self::columns`]. /// /// No-op if the table is empty or already spilled. #[cfg(feature = "disk-spill")] From b6f80e699dee4da5cfc4e5fd5aed8d25645a24b4 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 10:53:35 -0300 Subject: [PATCH 184/231] Use prelude size_of and import spill_slice_to_mmap --- crypto/crypto/src/mmap_util.rs | 2 +- crypto/stark/src/table.rs | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/crypto/crypto/src/mmap_util.rs b/crypto/crypto/src/mmap_util.rs index faa65717a..5d5244d80 100644 --- a/crypto/crypto/src/mmap_util.rs +++ b/crypto/crypto/src/mmap_util.rs @@ -14,7 +14,7 @@ pub fn spill_slice_to_mmap(slice: &[T]) -> std::io::Result(); + let elem_size = size_of::(); let total_bytes = (slice.len() as u64) .checked_mul(elem_size as u64) .ok_or_else(|| { diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 0355b656a..108d6d956 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -1,4 +1,6 @@ use crate::frame::Frame; +#[cfg(feature = "disk-spill")] +use crypto::mmap_util::spill_slice_to_mmap; use math::field::{ element::FieldElement, traits::{IsField, IsSubFieldOf}, @@ -301,12 +303,12 @@ impl Table { return Ok(()); } - let mmap = crypto::mmap_util::spill_slice_to_mmap(&self.data)?; + let mmap = spill_slice_to_mmap(&self.data)?; self.mmap_backing = Some(TableMmapBacking { mmap, width: self.width, height: self.height, - elem_size: std::mem::size_of::>(), + elem_size: size_of::>(), }); self.data = Vec::new(); From 8d36228eb3634ea27515f2e37d9718d2ebfe9c85 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 10:55:58 -0300 Subject: [PATCH 185/231] Add use statements in mmap_util to drop fully-qualified paths --- crypto/crypto/src/mmap_util.rs | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/crypto/crypto/src/mmap_util.rs b/crypto/crypto/src/mmap_util.rs index 5d5244d80..c5600c5d9 100644 --- a/crypto/crypto/src/mmap_util.rs +++ b/crypto/crypto/src/mmap_util.rs @@ -1,4 +1,8 @@ +use core::slice; use math::spill_safe::SpillSafe; +use memmap2::{Mmap, MmapOptions}; +use std::fs::File; +use std::io::{Error, ErrorKind, Result}; /// Mmap a fresh temp file, copy `slice` into the mapping, downgrade to /// read-only, and return it. @@ -6,10 +10,10 @@ use math::spill_safe::SpillSafe; /// Alignment: the mmap base is page-aligned (>= 4096), this function /// asserts `align_of::() <= 4096`, and Rust guarantees `size_of::()` /// is a multiple of `align_of::()`, so every element offset is aligned. -pub fn spill_slice_to_mmap(slice: &[T]) -> std::io::Result { +pub fn spill_slice_to_mmap(slice: &[T]) -> Result { const { assert!( - std::mem::align_of::() <= 4096, + align_of::() <= 4096, "T alignment must fit within mmap page alignment" ) } @@ -18,8 +22,8 @@ pub fn spill_slice_to_mmap(slice: &[T]) -> std::io::Result(slice: &[T]) -> std::io::Result(slice: &[T]) -> std::io::Result std::io::Result<()> { +fn reserve_file_blocks(file: &File, total_bytes: u64) -> Result<()> { file.set_len(total_bytes)?; #[cfg(target_os = "linux")] { use std::os::unix::io::AsRawFd; let len = i64::try_from(total_bytes).map_err(|_| { - std::io::Error::new( - std::io::ErrorKind::InvalidInput, + Error::new( + ErrorKind::InvalidInput, "spill file too large for posix_fallocate", ) })?; let ret = unsafe { libc::posix_fallocate(file.as_raw_fd(), 0, len) }; if ret != 0 { - return Err(std::io::Error::from_raw_os_error(ret)); + return Err(Error::from_raw_os_error(ret)); } } Ok(()) From 0288653ce414b5dbec5d22a9d207b1a26ebef184 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 10:59:19 -0300 Subject: [PATCH 186/231] Drop redundant doc comments on disk_spill_tests --- crypto/stark/src/table.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs index 108d6d956..f91be2f5e 100644 --- a/crypto/stark/src/table.rs +++ b/crypto/stark/src/table.rs @@ -404,8 +404,6 @@ mod disk_spill_tests { type F = GoldilocksField; - /// Create a Table, spill it to disk, and verify that `get()` and `get_row()` - /// return the same values as before the spill. #[test] fn test_table_spill_roundtrip() { let width = 4; @@ -446,7 +444,6 @@ mod disk_spill_tests { } } - /// Spilling an empty table is a no-op. #[test] fn test_table_spill_empty_is_noop() { let mut table = Table::::new(Vec::new(), 0); @@ -456,7 +453,6 @@ mod disk_spill_tests { assert!(table.mmap_backing.is_none()); } - /// Spilling twice is idempotent (second call is a no-op). #[test] fn test_table_spill_idempotent() { let data: Vec> = @@ -474,8 +470,6 @@ mod disk_spill_tests { assert_eq!(table.get(3, 3), &FieldElement::::from(15u64)); } - /// Cloning a spilled table materializes bytes into a fresh heap Vec, - /// yielding an unspilled clone with the same element values. #[test] fn test_clone_spilled_table_materializes_to_heap() { let width = 4; @@ -495,8 +489,6 @@ mod disk_spill_tests { assert_eq!(cloned, table, "clone must equal source element-wise"); } - /// Serializing a spilled table must produce identical bytes to serializing - /// the same table before spilling, and round-trip back to an equal table. #[test] fn test_serialize_spilled_table_matches_unspilled() { let width = 4; From 15e5d0739bdd2e01ae43fe60213ac8b00894a1f1 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 11:51:29 -0300 Subject: [PATCH 187/231] Drop redundant Test convenience suffix on multi_prove_ram doc --- crypto/stark/src/test_utils.rs | 2 +- prover/src/test_utils.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/stark/src/test_utils.rs b/crypto/stark/src/test_utils.rs index 668eeff1f..245ea9763 100644 --- a/crypto/stark/src/test_utils.rs +++ b/crypto/stark/src/test_utils.rs @@ -10,7 +10,7 @@ use math::field::traits::{IsFFTField, IsField, IsSubFieldOf}; use math::spill_safe::SpillSafe; use math::traits::{AsBytes, ByteConversion}; -/// Multi-AIR prove with `StorageMode::Ram`. Test convenience. +/// Multi-AIR prove with `StorageMode::Ram`. pub fn multi_prove_ram( air_trace_pairs: Vec<( &dyn AIR, diff --git a/prover/src/test_utils.rs b/prover/src/test_utils.rs index 419c23793..091f33264 100644 --- a/prover/src/test_utils.rs +++ b/prover/src/test_utils.rs @@ -86,7 +86,7 @@ type GoldilocksPair<'a, PI> = ( &'a PI, ); -/// Multi-AIR prove with `StorageMode::Ram`. Test convenience. +/// Multi-AIR prove with `StorageMode::Ram`. pub fn multi_prove_ram( air_trace_pairs: Vec>, transcript: &mut (impl IsStarkTranscript + Clone + Send), From 8c48b936e4f04ab24b23c858ffee7c3ed32c764f Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 11:52:59 -0300 Subject: [PATCH 188/231] Drop multi_prove_ram doc comment --- crypto/stark/src/test_utils.rs | 1 - prover/src/test_utils.rs | 1 - 2 files changed, 2 deletions(-) diff --git a/crypto/stark/src/test_utils.rs b/crypto/stark/src/test_utils.rs index 245ea9763..e1be191b7 100644 --- a/crypto/stark/src/test_utils.rs +++ b/crypto/stark/src/test_utils.rs @@ -10,7 +10,6 @@ use math::field::traits::{IsFFTField, IsField, IsSubFieldOf}; use math::spill_safe::SpillSafe; use math::traits::{AsBytes, ByteConversion}; -/// Multi-AIR prove with `StorageMode::Ram`. pub fn multi_prove_ram( air_trace_pairs: Vec<( &dyn AIR, diff --git a/prover/src/test_utils.rs b/prover/src/test_utils.rs index 091f33264..58e1b899f 100644 --- a/prover/src/test_utils.rs +++ b/prover/src/test_utils.rs @@ -86,7 +86,6 @@ type GoldilocksPair<'a, PI> = ( &'a PI, ); -/// Multi-AIR prove with `StorageMode::Ram`. pub fn multi_prove_ram( air_trace_pairs: Vec>, transcript: &mut (impl IsStarkTranscript + Clone + Send), From ad70ae9a2f5e9dbd53f21e4f72df079406bd34c9 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 11:54:59 -0300 Subject: [PATCH 189/231] Drop disk-spill dep comment in stark Cargo.toml --- crypto/stark/Cargo.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/crypto/stark/Cargo.toml b/crypto/stark/Cargo.toml index bdd3598a0..d16987b97 100644 --- a/crypto/stark/Cargo.toml +++ b/crypto/stark/Cargo.toml @@ -22,9 +22,6 @@ itertools = "0.11.0" # Parallelization crates rayon = { version = "1.8.0", optional = true } -# Memory-mapped backing for trace tables and Merkle tree nodes, used when the -# runtime picks `StorageMode::Disk` to keep peak RAM bounded. Activated by -# the `disk-spill` feature. memmap2 = { version = "0.9", optional = true } tempfile = { version = "3", optional = true } libc = { version = "0.2", optional = true } From 2a4d4153aa35aa35b81eeb05f26d21db6578091b Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 11:57:21 -0300 Subject: [PATCH 190/231] Rephrase unique_page_count doc comment --- prover/src/tables/trace_builder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index bff5c988a..ccc234742 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -99,7 +99,7 @@ impl MemoryState { Self { cells } } - /// Count unique memory pages touched during execution. + /// Number of distinct pages that contain at least one cell. #[cfg(feature = "disk-spill")] fn unique_page_count(&self, page_size: u64) -> u64 { debug_assert!( From 78cec6a49ad6d2ad0a5bcfede1ec0f6517292204 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 11:58:34 -0300 Subject: [PATCH 191/231] Import HashSet in trace_builder --- prover/src/tables/trace_builder.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index ccc234742..82d1ce800 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -25,7 +25,7 @@ //! // Use traces.cpus, traces.bitwise, traces.lts, traces.memws, traces.loads //! ``` -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use executor::elf::Elf; use executor::vm::instruction::decoding::Instruction; @@ -107,7 +107,7 @@ impl MemoryState { "page_size must be a power of two for the bitmask to work" ); let mask = !(page_size - 1); - let pages: std::collections::HashSet = self.cells.keys().map(|&a| a & mask).collect(); + let pages: HashSet = self.cells.keys().map(|&a| a & mask).collect(); pages.len() as u64 } From 44ecac53254d7d2c227994169dc129caa47f0040 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 12:06:20 -0300 Subject: [PATCH 192/231] Trim padded_chunked_rows doc comment --- prover/src/tables/trace_builder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 82d1ce800..e594fed8a 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -2100,7 +2100,7 @@ fn build_traces( }) } -/// Padded row count after chunking: each chunk rounds up to `next_power_of_two().max(4)`. +/// Padded row count after chunking. #[cfg(feature = "disk-spill")] fn padded_chunked_rows(ops_count: usize, max_rows: usize) -> u64 { // `max_rows <= 0` would loop forever. Called internally with const values > 0. From 83943110d5d3ccd10623ed61f47e04349fc78fd6 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 12:09:34 -0300 Subject: [PATCH 193/231] Drop redundant TableLengths field doc comments --- prover/src/tables/trace_builder.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index e594fed8a..0ce16d3f9 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -2135,9 +2135,7 @@ pub struct TableLengths { pub commit_padded_rows: u64, pub decode_rows: u64, pub unique_page_count: u64, - /// Executor cycle count. pub cycle_count: u64, - /// Unique byte addresses touched (dominant non-trace heap term). pub unique_byte_count: u64, } From f3c5bfd0e3b9c95affa64d8037eef32466065daa Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 12:12:56 -0300 Subject: [PATCH 194/231] Drop unused_mut rationale comment, gate HashSet import --- prover/src/tables/trace_builder.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 0ce16d3f9..7d4c3934e 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -25,7 +25,9 @@ //! // Use traces.cpus, traces.bitwise, traces.lts, traces.memws, traces.loads //! ``` -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; +#[cfg(feature = "disk-spill")] +use std::collections::HashSet; use executor::elf::Elf; use executor::vm::instruction::decoding::Instruction; @@ -2003,8 +2005,6 @@ fn build_traces( // Generate remaining traces in parallel (page, register, halt, commit). // chunk_and_generate already handled cpu, lt, memw, load, mul, dvrm, branch above. - // `mut` is only used by the disk-spill block below; #[allow] keeps the - // non-disk-spill build warning-free. #[allow(unused_mut)] let mut commit_trace = commit::generate_commit_trace(&commit_ops); #[allow(unused_mut)] From 312b6707b2c1d64a42ef5be4e4d0feb9ed8476d9 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 12:13:50 -0300 Subject: [PATCH 195/231] Drop stale multi_prove_inner reference in trace_builder spill comment --- prover/src/tables/trace_builder.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 7d4c3934e..d785b682b 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -2047,8 +2047,7 @@ fn build_traces( } // Fixed-size and per-page tables aren't built through `chunk_and_generate`, - // so spill them here before returning. Without this, peak heap holds every - // PAGE table until `multi_prove_inner` spills them later. + // so spill them here before returning. #[cfg(feature = "disk-spill")] if storage_mode == StorageMode::Disk { bitwise From 7e008f1c9be7c15a90731f69530b6587ce8b209a Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 12:22:47 -0300 Subject: [PATCH 196/231] Rephrase count_table_lengths doc comment --- prover/src/tables/trace_builder.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index d785b682b..f4aa268cd 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -2138,8 +2138,8 @@ pub struct TableLengths { pub unique_byte_count: u64, } -/// Compute upper-bound per-table row counts without allocating op vectors. -/// Returns bounds (not exact) for tables that dedup ops: LT, MUL, DVRM, BRANCH. +/// Per-table row counts from `logs`, without building op vectors. +/// Exact for tables that don't dedup; upper bound for LT, MUL, DVRM, BRANCH. /// Must stay in sync with `Traces::from_elf_and_logs`. #[cfg(feature = "disk-spill")] pub fn count_table_lengths( From 9b30bfdca8733db5429fdaf9d9eef8777281accb Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 12:58:10 -0300 Subject: [PATCH 197/231] Trim over-specific comments in count_table_lengths --- prover/src/tables/trace_builder.rs | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index f4aa268cd..bf5ea6d39 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -2151,8 +2151,7 @@ pub fn count_table_lengths( // Phase 0: ELF → instructions + DECODE row count. let instructions = decode::instructions_from_elf(elf) .map_err(|e| Error::Execution(format!("Failed to parse instructions: {e}")))?; - // Mirrors the padding inside `generate_decode_trace`: +1 for the CPU - // padding entry, then round up to the next power of two with floor 2. + // Mirrors the padding inside `generate_decode_trace`. let decode_rows = (instructions.len() as u64 + 1).next_power_of_two().max(2); // Memory + register state for partition predicates that need timestamps. @@ -2162,9 +2161,7 @@ pub fn count_table_lengths( // Raw counts (pre-chunking + pre-padding). let mut cpu_count = 0usize; - // memw_by_width[i] for i in 0..4 maps width 1/2/4/8 → wide-MEMW counts. - // Used by the LT-from-MEMW derivation: each wide-MEMW op contributes - // 1, 2, 4, or 8 LT ops based on its width. + // Wide-MEMW counts bucketed by width, used by the LT-from-MEMW derivation. let mut memw_by_width: [usize; 4] = [0; 4]; let mut memw_aligned_count = 0usize; let mut memw_register_count = 0usize; @@ -2227,7 +2224,7 @@ pub fn count_table_lengths( ); } - // Register accesses (M1 read rs1, M3 read rs2, M5 write rd). + // Register accesses. let reg_memw_ops = collect_register_ops_from_cpu(&cpu_op, &mut register_state); for memw_op in ®_memw_ops { partition_memw( @@ -2280,9 +2277,7 @@ pub fn count_table_lengths( } } - // HALT finalization: 32 register MEMW ops at ts=u64::MAX. Their timestamp - // delta vs old_timestamp is enormous, so they fail `is_register_op`'s - // `<= 0x10000` check and fall through to wide MEMW. + // HALT finalization. Halt ops fall through to wide MEMW. let halt_memw_ops = collect_halt_ops(&mut register_state); for memw_op in &halt_memw_ops { partition_memw( @@ -2293,14 +2288,13 @@ pub fn count_table_lengths( ); } - // LT-from-MEMW: per wide-MEMW op, 1/2/4/8 LT ops by width. - // LT-from-MEMW_A: 1 LT op per memw_aligned op. + // LT ops derived from wide-MEMW and memw_aligned ops. let memw_count = memw_by_width.iter().sum::(); let lt_from_memw = memw_by_width[0] + 2 * memw_by_width[1] + 4 * memw_by_width[2] + 8 * memw_by_width[3]; lt_count += lt_from_memw + memw_aligned_count; - // DVRM-derived: 2 mul ops (lo + hi) and 1 lt op (|r| < |d|) per dvrm. + // DVRM derives mul and lt ops. mul_count += 2 * dvrm_count; lt_count += dvrm_count; From 78a92daa802b91728fd18702f7cc583774ab0289 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 13:01:38 -0300 Subject: [PATCH 198/231] Drop redundant ECALL Commit comment --- prover/src/tables/trace_builder.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index bf5ea6d39..8790f13be 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -2235,7 +2235,6 @@ pub fn count_table_lengths( ); } - // ECALL Commit if cpu_op.ecall_commit { // Match `expand_commit_operations_for_ecall`'s `0..=count` loop // without materializing the op vector. From da8fde212678396492408af46b76704017878b8b Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 13:04:55 -0300 Subject: [PATCH 199/231] Use building instead of materializing in commit count comment --- prover/src/tables/trace_builder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 8790f13be..83b5ccca0 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -2237,7 +2237,7 @@ pub fn count_table_lengths( if cpu_op.ecall_commit { // Match `expand_commit_operations_for_ecall`'s `0..=count` loop - // without materializing the op vector. + // without building the op vector. commit_count += (cpu_op.commit_count as usize) .checked_add(1) .ok_or_else(|| Error::Execution("commit_count overflows usize".into()))?; From c320339b5f0c1ac448d2adb528d9865d7d2bb49d Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 13:08:35 -0300 Subject: [PATCH 200/231] Restore ECALL Commit block-marker comment --- prover/src/tables/trace_builder.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 83b5ccca0..ad7b8bcca 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -2235,6 +2235,7 @@ pub fn count_table_lengths( ); } + // ECALL Commit if cpu_op.ecall_commit { // Match `expand_commit_operations_for_ecall`'s `0..=count` loop // without building the op vector. From 56437fcfd4f771d51dd097867fe3a8e25b98f522 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 13:14:51 -0300 Subject: [PATCH 201/231] Merge from_elf_and_logs variants into one entry point --- prover/src/lib.rs | 10 ++++--- prover/src/tables/trace_builder.rs | 28 ------------------ prover/src/test_utils.rs | 19 ++++++++++++ .../tests/count_table_lengths_drift_tests.rs | 6 ++-- prover/src/tests/decode_tests.rs | 4 +-- prover/src/tests/prove_elfs_tests.rs | 29 ++++++++++--------- 6 files changed, 45 insertions(+), 51 deletions(-) diff --git a/prover/src/lib.rs b/prover/src/lib.rs index cc4f01781..69d036ff4 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -30,6 +30,8 @@ use executor::elf::Elf; use executor::vm::execution::Executor; use math::field::element::FieldElement; use stark::prover::{IsStarkProver, Prover}; +#[cfg(feature = "disk-spill")] +use stark::storage_mode::StorageMode; use stark::traits::AIR; use stark::verifier::{IsStarkVerifier, Verifier}; @@ -527,6 +529,8 @@ pub fn count_elements(elf_bytes: &[u8], private_inputs: &[u8]) -> Result<(u64, u &result.logs, &MaxRowsConfig::default(), private_inputs, + #[cfg(feature = "disk-spill")] + StorageMode::Ram, )?; Ok(( traces.total_field_elements(), @@ -611,16 +615,14 @@ pub fn prove_with_options_and_inputs( // Phase 5: build the full traces with the chosen mode. `Disk` spills each // chunk as it's built, so the trace never fully materializes in RAM. - #[cfg(feature = "disk-spill")] - let mut traces = Traces::from_elf_and_logs_with_mode( + let mut traces = Traces::from_elf_and_logs( &program, &result.logs, max_rows, private_inputs, + #[cfg(feature = "disk-spill")] storage_mode, )?; - #[cfg(not(feature = "disk-spill"))] - let mut traces = Traces::from_elf_and_logs(&program, &result.logs, max_rows, private_inputs)?; drop(result); #[cfg(feature = "instruments")] diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index ad7b8bcca..9e202d7de 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -2647,34 +2647,6 @@ impl Traces { logs: &[Log], max_rows: &super::MaxRowsConfig, private_input: &[u8], - ) -> Result { - Self::from_elf_and_logs_inner( - elf, - logs, - max_rows, - private_input, - #[cfg(feature = "disk-spill")] - StorageMode::Ram, - ) - } - - /// Same as `from_elf_and_logs` but lets the caller pick a storage mode. - #[cfg(feature = "disk-spill")] - pub fn from_elf_and_logs_with_mode( - elf: &Elf, - logs: &[Log], - max_rows: &super::MaxRowsConfig, - private_input: &[u8], - storage_mode: StorageMode, - ) -> Result { - Self::from_elf_and_logs_inner(elf, logs, max_rows, private_input, storage_mode) - } - - fn from_elf_and_logs_inner( - elf: &Elf, - logs: &[Log], - max_rows: &super::MaxRowsConfig, - private_input: &[u8], #[cfg(feature = "disk-spill")] storage_mode: StorageMode, ) -> Result { // Phase 0: ELF → DECODE + instructions diff --git a/prover/src/test_utils.rs b/prover/src/test_utils.rs index 58e1b899f..f59cc5ba9 100644 --- a/prover/src/test_utils.rs +++ b/prover/src/test_utils.rs @@ -29,7 +29,9 @@ use stark::storage_mode::StorageMode; use stark::trace::TraceTable; use stark::traits::AIR; +use crate::Error; use crate::constraints::cpu::create_all_cpu_constraints; +use crate::tables::MaxRowsConfig; use crate::tables::bitwise::{ BitwiseOperation, BitwiseOperationType, bus_interactions as bitwise_bus_interactions, cols as bitwise_cols, @@ -72,6 +74,7 @@ use crate::tables::register::{ use crate::tables::shift::{ bus_interactions as shift_bus_interactions, cols as shift_cols, shift_constraints, }; +use crate::tables::trace_builder::Traces; use crate::tables::types::{GoldilocksExtension, GoldilocksField}; pub type F = GoldilocksField; @@ -101,6 +104,22 @@ where ) } +pub fn traces_from_elf_and_logs_ram( + elf: &Elf, + logs: &[Log], + max_rows: &MaxRowsConfig, + private_input: &[u8], +) -> Result { + Traces::from_elf_and_logs( + elf, + logs, + max_rows, + private_input, + #[cfg(feature = "disk-spill")] + StorageMode::Ram, + ) +} + // ============================================================================= // ELF Execution Helpers // ============================================================================= diff --git a/prover/src/tests/count_table_lengths_drift_tests.rs b/prover/src/tests/count_table_lengths_drift_tests.rs index 33ab650e7..f24fa74cb 100644 --- a/prover/src/tests/count_table_lengths_drift_tests.rs +++ b/prover/src/tests/count_table_lengths_drift_tests.rs @@ -4,8 +4,8 @@ //! this test. use crate::tables::MaxRowsConfig; -use crate::tables::trace_builder::{Traces, count_table_lengths}; -use crate::test_utils::run_asm_elf; +use crate::tables::trace_builder::count_table_lengths; +use crate::test_utils::{run_asm_elf, traces_from_elf_and_logs_ram}; #[test] fn count_table_lengths_matches_traces() { @@ -15,7 +15,7 @@ fn count_table_lengths_matches_traces() { let predicted = count_table_lengths(&elf, &logs, &max_rows, &[]).expect("count_table_lengths succeeds"); let traces = - Traces::from_elf_and_logs(&elf, &logs, &max_rows, &[]).expect("trace build succeeds"); + traces_from_elf_and_logs_ram(&elf, &logs, &max_rows, &[]).expect("trace build succeeds"); let sum_heights = |tables: &[stark::trace::TraceTable<_, _>]| -> u64 { tables.iter().map(|t| t.main_table.height as u64).sum() diff --git a/prover/src/tests/decode_tests.rs b/prover/src/tests/decode_tests.rs index 1149cd84f..078fe2600 100644 --- a/prover/src/tests/decode_tests.rs +++ b/prover/src/tests/decode_tests.rs @@ -12,6 +12,7 @@ use crate::tables::decode::{ use crate::tables::types::{FE, packed_decode as bits}; use crate::test_utils::multi_prove_ram; use crate::test_utils::run_asm_elf; +use crate::test_utils::traces_from_elf_and_logs_ram; // ========================================================================= // Packed decode tests @@ -1002,7 +1003,6 @@ fn test_decode_soundness_same_elf_accepted() { use stark::verifier::{IsStarkVerifier, Verifier}; use crate::VmAirs; - use crate::tables::trace_builder::Traces; use crate::tables::types::GoldilocksExtension; type E = GoldilocksExtension; @@ -1031,7 +1031,7 @@ fn test_decode_soundness_same_elf_accepted() { let result = executor.run().expect("Failed to run program"); let mut traces = - Traces::from_elf_and_logs(&prover_elf, &result.logs, &Default::default(), &[]).unwrap(); + traces_from_elf_and_logs_ram(&prover_elf, &result.logs, &Default::default(), &[]).unwrap(); let table_counts = traces.table_counts(); let prover_airs = VmAirs::new( &prover_elf, diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index 7bdb4ba5c..b9765aa8c 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -30,6 +30,7 @@ use executor::elf::Elf; use crate::VmAirs; use crate::test_utils::multi_prove_ram; use crate::test_utils::run_asm_elf; +use crate::test_utils::traces_from_elf_and_logs_ram; type F = GoldilocksField; type E = GoldilocksExtension; @@ -158,7 +159,7 @@ fn test_prove_elfs_sub_fast() { let _ = env_logger::builder().is_test(true).try_init(); let (elf, logs, _instructions) = run_asm_elf("sub"); // Use from_elf_and_logs to get PAGE and REGISTER tables for Memory bus - let mut traces = Traces::from_elf_and_logs(&elf, &logs, &Default::default(), &[]).unwrap(); + let mut traces = traces_from_elf_and_logs_ram(&elf, &logs, &Default::default(), &[]).unwrap(); assert!( prove_and_verify_vm_minimal(&elf, &mut traces), @@ -597,7 +598,7 @@ fn test_prove_elfs_test_xor_8() { #[test] fn test_prove_elfs_test_lb_lh_8() { let (elf, logs, _instructions) = run_asm_elf("test_lb_lh_8"); - let mut traces = Traces::from_elf_and_logs(&elf, &logs, &Default::default(), &[]).unwrap(); + let mut traces = traces_from_elf_and_logs_ram(&elf, &logs, &Default::default(), &[]).unwrap(); assert!( prove_and_verify_vm_minimal(&elf, &mut traces), "test_lb_lh_8 failed" @@ -607,7 +608,7 @@ fn test_prove_elfs_test_lb_lh_8() { #[test] fn test_prove_elfs_test_sb_sh_8() { let (elf, logs, _instructions) = run_asm_elf("test_sb_sh_8"); - let mut traces = Traces::from_elf_and_logs(&elf, &logs, &Default::default(), &[]).unwrap(); + let mut traces = traces_from_elf_and_logs_ram(&elf, &logs, &Default::default(), &[]).unwrap(); assert!( !traces.memws.is_empty(), "test_sb_sh_8 should produce MEMW rows for byte/halfword memory accesses" @@ -624,7 +625,7 @@ fn test_prove_elfs_test_sb_sh_8() { #[test] fn test_prove_elfs_lw_sw() { let (elf, logs, _instructions) = run_asm_elf("lw_sw"); - let mut traces = Traces::from_elf_and_logs(&elf, &logs, &Default::default(), &[]).unwrap(); + let mut traces = traces_from_elf_and_logs_ram(&elf, &logs, &Default::default(), &[]).unwrap(); assert!( !traces.memw_aligneds.is_empty(), "lw_sw should produce MEMW_A rows for aligned word accesses" @@ -644,7 +645,7 @@ fn test_prove_elfs_lw_sw() { #[test] fn test_prove_elfs_test_memw_split_ts() { let (elf, logs, _instructions) = run_asm_elf("test_memw_split_ts"); - let mut traces = Traces::from_elf_and_logs(&elf, &logs, &Default::default(), &[]).unwrap(); + let mut traces = traces_from_elf_and_logs_ram(&elf, &logs, &Default::default(), &[]).unwrap(); assert!( !traces.memws.is_empty(), "test_memw_split_ts should produce MEMW rows (split old_timestamps from sb+sb+lh)" @@ -683,7 +684,7 @@ fn test_prove_elfs_all_branches_16() { #[test] fn test_prove_elfs_all_loadstore_32() { let (elf, logs, _instructions) = run_asm_elf("all_loadstore_32"); - let mut traces = Traces::from_elf_and_logs(&elf, &logs, &Default::default(), &[]).unwrap(); + let mut traces = traces_from_elf_and_logs_ram(&elf, &logs, &Default::default(), &[]).unwrap(); assert!( prove_and_verify_vm_minimal(&elf, &mut traces), "all_loadstore_32 failed" @@ -732,7 +733,7 @@ fn test_prove_elfs_test_commit_4() { ); let mut traces = - Traces::from_elf_and_logs(&elf, &result.logs, &Default::default(), &[]).unwrap(); + traces_from_elf_and_logs_ram(&elf, &result.logs, &Default::default(), &[]).unwrap(); assert_eq!( traces.public_output_bytes, result.return_values.memory_values @@ -758,7 +759,7 @@ fn test_prove_elfs_test_commit_4_wrong_pages_rejected() { executor::vm::execution::Executor::new(&elf, vec![]).expect("Failed to create executor"); let result = executor.run().expect("Failed to run program"); let mut traces = - Traces::from_elf_and_logs(&elf, &result.logs, &Default::default(), &[]).unwrap(); + traces_from_elf_and_logs_ram(&elf, &result.logs, &Default::default(), &[]).unwrap(); // Prover uses correct page configs let table_counts = traces.table_counts(); @@ -1128,7 +1129,7 @@ fn test_debug_memory_tokens_sb_sh() { use std::collections::HashMap; let (elf, logs, _instructions) = run_asm_elf("test_sb_sh_8"); - let traces = Traces::from_elf_and_logs(&elf, &logs, &Default::default(), &[]).unwrap(); + let traces = traces_from_elf_and_logs_ram(&elf, &logs, &Default::default(), &[]).unwrap(); let memw = &traces.memws[0]; // Small test: single MEMW chunk println!("DEBUG: test_sb_sh_8 Memory bus tokens (FULL)"); @@ -1459,7 +1460,7 @@ fn test_debug_memory_tokens_sb_sh() { #[test] fn test_deep_stack_passes() { let (elf, logs, _instructions) = run_asm_elf("deep_stack"); - let mut traces = Traces::from_elf_and_logs(&elf, &logs, &Default::default(), &[]).unwrap(); + let mut traces = traces_from_elf_and_logs_ram(&elf, &logs, &Default::default(), &[]).unwrap(); assert!( prove_and_verify_vm_minimal(&elf, &mut traces), @@ -1481,7 +1482,7 @@ fn test_deep_stack_runtime_pages_roundtrip() { executor::vm::execution::Executor::new(&elf, vec![]).expect("Failed to create executor"); let result = executor.run().expect("Failed to run program"); let mut traces = - Traces::from_elf_and_logs(&elf, &result.logs, &Default::default(), &[]).unwrap(); + traces_from_elf_and_logs_ram(&elf, &result.logs, &Default::default(), &[]).unwrap(); let runtime_page_ranges = traces.runtime_page_ranges(); let table_counts = traces.table_counts(); @@ -1541,7 +1542,7 @@ fn test_deep_stack_missing_pages_rejected() { executor::vm::execution::Executor::new(&elf, vec![]).expect("Failed to create executor"); let result = executor.run().expect("Failed to run program"); let mut traces = - Traces::from_elf_and_logs(&elf, &result.logs, &Default::default(), &[]).unwrap(); + traces_from_elf_and_logs_ram(&elf, &result.logs, &Default::default(), &[]).unwrap(); // Prover uses correct page configs (auto-detected from MemoryState) let table_counts = traces.table_counts(); @@ -1592,7 +1593,7 @@ fn test_deep_stack_missing_pages_rejected() { #[test] fn test_heap_alloc_passes() { let (elf, logs, _instructions) = run_asm_elf("heap_alloc"); - let mut traces = Traces::from_elf_and_logs(&elf, &logs, &Default::default(), &[]).unwrap(); + let mut traces = traces_from_elf_and_logs_ram(&elf, &logs, &Default::default(), &[]).unwrap(); // Verify runtime_page_ranges includes the heap page let ranges = traces.runtime_page_ranges(); @@ -1621,7 +1622,7 @@ fn test_heap_alloc_runtime_pages_roundtrip() { executor::vm::execution::Executor::new(&elf, vec![]).expect("Failed to create executor"); let result = executor.run().expect("Failed to run program"); let mut traces = - Traces::from_elf_and_logs(&elf, &result.logs, &Default::default(), &[]).unwrap(); + traces_from_elf_and_logs_ram(&elf, &result.logs, &Default::default(), &[]).unwrap(); let runtime_page_ranges = traces.runtime_page_ranges(); let table_counts = traces.table_counts(); From 19c7cde53d250dd137b1c5868d2e3d5f424c7be3 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 13:17:18 -0300 Subject: [PATCH 202/231] Trim drift test module doc comment --- prover/src/tests/count_table_lengths_drift_tests.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/prover/src/tests/count_table_lengths_drift_tests.rs b/prover/src/tests/count_table_lengths_drift_tests.rs index f24fa74cb..6b3e1bc54 100644 --- a/prover/src/tests/count_table_lengths_drift_tests.rs +++ b/prover/src/tests/count_table_lengths_drift_tests.rs @@ -1,7 +1,4 @@ -//! Drift guard: `count_table_lengths` must stay aligned with the actual -//! `Traces::from_elf_and_logs` output. Adding a new table or changing a -//! row-count rule on either side without updating the other should fail -//! this test. +//! Asserts `count_table_lengths` matches `Traces::from_elf_and_logs` row counts. use crate::tables::MaxRowsConfig; use crate::tables::trace_builder::count_table_lengths; From 90b4283f6df4159eb634a1087b59ab3bc0008f82 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 14:47:38 -0300 Subject: [PATCH 203/231] Trim disk_spill_tests module doc comment --- prover/src/tests/disk_spill_tests.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/prover/src/tests/disk_spill_tests.rs b/prover/src/tests/disk_spill_tests.rs index c8f7f6f9c..9b7d992d0 100644 --- a/prover/src/tests/disk_spill_tests.rs +++ b/prover/src/tests/disk_spill_tests.rs @@ -1,5 +1,4 @@ -//! End-to-end tests forcing Disk storage via a 1 MB `max_ram_bytes` cap, so -//! even the smallest ELF deterministically crosses the threshold. +//! End-to-end tests forcing `StorageMode::Disk` via a low `max_ram_bytes` cap. use crate::VmProof; use crate::tables::MaxRowsConfig; From 16bc3e303989356d5fc80b287994fe3c8d1bd51d Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 14:51:28 -0300 Subject: [PATCH 204/231] Drop redundant doc comments on disk_spill tests --- prover/src/tests/disk_spill_tests.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/prover/src/tests/disk_spill_tests.rs b/prover/src/tests/disk_spill_tests.rs index 9b7d992d0..2d32e7436 100644 --- a/prover/src/tests/disk_spill_tests.rs +++ b/prover/src/tests/disk_spill_tests.rs @@ -13,7 +13,6 @@ fn options_forcing_disk() -> stark::proof::options::ProofOptions { opts } -/// Prove + verify a small program with Disk storage forced. #[test] fn test_disk_spill_prove_and_verify_small() { let elf_bytes = asm_elf_bytes("sub"); @@ -24,7 +23,6 @@ fn test_disk_spill_prove_and_verify_small() { assert!(ok, "verification returned false"); } -/// Prove + verify with small chunks to exercise spill across chunk boundaries. #[test] fn test_disk_spill_prove_and_verify_with_chunks() { let elf_bytes = asm_elf_bytes("sub"); @@ -35,7 +33,6 @@ fn test_disk_spill_prove_and_verify_with_chunks() { assert!(ok, "verification returned false"); } -/// Prove, serialize, deserialize, verify (CLI roundtrip). #[test] fn test_disk_spill_serialization_roundtrip() { let elf_bytes = asm_elf_bytes("sub"); @@ -49,7 +46,6 @@ fn test_disk_spill_serialization_roundtrip() { assert!(valid, "verification failed after serialization roundtrip"); } -/// Prove + verify a 2M-instruction program to catch scale-only bugs. #[test] fn test_disk_spill_prove_and_verify_2m() { let _ = env_logger::builder().is_test(true).try_init(); @@ -61,7 +57,6 @@ fn test_disk_spill_prove_and_verify_2m() { assert!(ok, "verification returned false for fib_iterative_2M"); } -/// Same as roundtrip test but with small chunks. #[test] fn test_disk_spill_serialization_roundtrip_chunked() { let elf_bytes = asm_elf_bytes("sub"); From 77338e91d591d29f275c6a95b9cd10e6bb80528d Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 15:23:24 -0300 Subject: [PATCH 205/231] Use all_instructions_64 ELF for chunking disk-spill tests --- prover/src/tests/disk_spill_tests.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prover/src/tests/disk_spill_tests.rs b/prover/src/tests/disk_spill_tests.rs index 2d32e7436..c32473643 100644 --- a/prover/src/tests/disk_spill_tests.rs +++ b/prover/src/tests/disk_spill_tests.rs @@ -25,7 +25,7 @@ fn test_disk_spill_prove_and_verify_small() { #[test] fn test_disk_spill_prove_and_verify_with_chunks() { - let elf_bytes = asm_elf_bytes("sub"); + let elf_bytes = asm_elf_bytes("all_instructions_64"); let opts = options_forcing_disk(); let vm_proof = crate::prove_with_options(&elf_bytes, &opts, &MaxRowsConfig::small()) .expect("prove failed"); @@ -59,7 +59,7 @@ fn test_disk_spill_prove_and_verify_2m() { #[test] fn test_disk_spill_serialization_roundtrip_chunked() { - let elf_bytes = asm_elf_bytes("sub"); + let elf_bytes = asm_elf_bytes("all_instructions_64"); let opts = options_forcing_disk(); let proof = crate::prove_with_options(&elf_bytes, &opts, &MaxRowsConfig::small()) .expect("prove failed"); From 152d58ddecf2752865ceae655ac94e5781b8003e Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 15:25:17 -0300 Subject: [PATCH 206/231] Use fib_iterative_372k in disk-spill scale test --- prover/src/tests/disk_spill_tests.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/prover/src/tests/disk_spill_tests.rs b/prover/src/tests/disk_spill_tests.rs index c32473643..a1b2d40dd 100644 --- a/prover/src/tests/disk_spill_tests.rs +++ b/prover/src/tests/disk_spill_tests.rs @@ -47,14 +47,14 @@ fn test_disk_spill_serialization_roundtrip() { } #[test] -fn test_disk_spill_prove_and_verify_2m() { +fn test_disk_spill_prove_and_verify_372k() { let _ = env_logger::builder().is_test(true).try_init(); - let elf_bytes = asm_elf_bytes("fib_iterative_2M"); + let elf_bytes = asm_elf_bytes("fib_iterative_372k"); let opts = options_forcing_disk(); let vm_proof = crate::prove_with_options(&elf_bytes, &opts, &MaxRowsConfig::default()) .expect("prove failed"); let ok = crate::verify_with_options(&vm_proof, &elf_bytes, &opts).expect("verify failed"); - assert!(ok, "verification returned false for fib_iterative_2M"); + assert!(ok, "verification returned false for fib_iterative_372k"); } #[test] From 2f349b04afab1153b5e6d429665d396ada226efc Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 15:37:46 -0300 Subject: [PATCH 207/231] Trim peak_bytes_calibration_tests module doc --- prover/src/tests/peak_bytes_calibration_tests.rs | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/prover/src/tests/peak_bytes_calibration_tests.rs b/prover/src/tests/peak_bytes_calibration_tests.rs index 6dc7c562f..8a2da62e9 100644 --- a/prover/src/tests/peak_bytes_calibration_tests.rs +++ b/prover/src/tests/peak_bytes_calibration_tests.rs @@ -1,13 +1,7 @@ -//! Calibration test: predicted [`peak_bytes`] vs measured RSS during a real proof. -//! -//! Runs a small fib_iterative proof, samples the process's RSS while the proof -//! is running, and asserts the prediction is within 2× of the measured peak -//! (after subtracting the pre-proof baseline). RSS includes mmap'd files, the -//! code segment, and allocator slack on top of the heap-only quantity that -//! [`peak_bytes`] models, so the bound is intentionally loose; the test is a -//! regression guard against silent drift, not a tightness measure. -//! -//! [`peak_bytes`]: crate::auto_storage::peak_bytes +//! Asserts predicted [`peak_bytes`](crate::auto_storage::peak_bytes) stays +//! within 2× of measured RSS during a proof. The 2× bound is loose: RSS +//! counts mmaps, code segment, and allocator slack that the heap-only +//! estimator doesn't model. use std::sync::Arc; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; From ae632e7a9ed8e9f8019953f6987f650df2cc6eec Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 16:24:07 -0300 Subject: [PATCH 208/231] Replace RSS calibration with jemalloc heap-only check --- Cargo.lock | 2 + prover/Cargo.toml | 2 + prover/src/auto_storage.rs | 4 +- .../src/tests/peak_bytes_calibration_tests.rs | 45 +++++++++---------- 4 files changed, 27 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c991ea3f1..1ce476b19 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1954,6 +1954,8 @@ dependencies = [ "serde", "stark", "sysinfo", + "tikv-jemalloc-ctl", + "tikv-jemallocator", ] [[package]] diff --git a/prover/Cargo.toml b/prover/Cargo.toml index 76a247039..13bdb11d5 100644 --- a/prover/Cargo.toml +++ b/prover/Cargo.toml @@ -24,6 +24,8 @@ log = "0.4" env_logger = "*" criterion = { version = "0.5", default-features = false } bincode = "1" +tikv-jemallocator = "0.6" +tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] } [[bench]] name = "vm_prover_benchmark" diff --git a/prover/src/auto_storage.rs b/prover/src/auto_storage.rs index 382d38833..92aa0d482 100644 --- a/prover/src/auto_storage.rs +++ b/prover/src/auto_storage.rs @@ -38,8 +38,8 @@ const MEMORY_CELL_BYTES: u64 = 32; const INSTRUCTION_MAP_BYTES_PER_ROW: u64 = 32; /// 9/10 budget headroom for OS, other processes, and allocator slack. -const SAFETY_FRACTION_NUM: u64 = 9; -const SAFETY_FRACTION_DEN: u64 = 10; +pub(crate) const SAFETY_FRACTION_NUM: u64 = 9; +pub(crate) const SAFETY_FRACTION_DEN: u64 = 10; /// `(rows, main_cols, aux_cols, num_main_merkle_trees)` for a single table. type TableSpec = (u64, u64, u64, u64); diff --git a/prover/src/tests/peak_bytes_calibration_tests.rs b/prover/src/tests/peak_bytes_calibration_tests.rs index 8a2da62e9..2e5430457 100644 --- a/prover/src/tests/peak_bytes_calibration_tests.rs +++ b/prover/src/tests/peak_bytes_calibration_tests.rs @@ -1,7 +1,7 @@ -//! Asserts predicted [`peak_bytes`](crate::auto_storage::peak_bytes) stays -//! within 2× of measured RSS during a proof. The 2× bound is loose: RSS -//! counts mmaps, code segment, and allocator slack that the heap-only -//! estimator doesn't model. +//! Asserts predicted [`peak_bytes`](crate::auto_storage::peak_bytes) does not +//! underestimate jemalloc-measured heap during a proof. Under-estimation is +//! the safety-critical direction: if it grows beyond `SAFETY_FRACTION`, the +//! runtime's `Disk` vs `Ram` decision becomes unsafe. use std::sync::Arc; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; @@ -9,21 +9,23 @@ use std::thread; use std::time::Duration; use stark::proof::options::GoldilocksCubicProofOptions; +use tikv_jemalloc_ctl::{epoch, stats}; use crate::auto_storage; use crate::tables::MaxRowsConfig; use crate::tables::trace_builder::count_table_lengths; use crate::test_utils::{asm_elf_bytes, run_asm_elf}; -fn current_rss_bytes() -> Option { - let pid = sysinfo::get_current_pid().ok()?; - let mut sys = sysinfo::System::new(); - sys.refresh_processes(sysinfo::ProcessesToUpdate::Some(&[pid])); - sys.process(pid).map(|p| p.memory() as usize) +#[global_allocator] +static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; + +fn allocated_bytes() -> usize { + epoch::advance().ok(); + stats::allocated::read().unwrap_or(0) } #[test] -fn peak_bytes_within_2x_of_measured_rss() { +fn peak_bytes_does_not_underestimate_measured_heap() { let (elf, logs, _) = run_asm_elf("fib_iterative_372k"); let elf_bytes = asm_elf_bytes("fib_iterative_372k"); @@ -38,10 +40,9 @@ fn peak_bytes_within_2x_of_measured_rss() { stark::prover::table_parallelism(), ) as usize; - // Drop logs etc. before sampling baseline so they don't inflate it. drop(logs); - let baseline = current_rss_bytes().expect("RSS reader works on this platform"); + let baseline = allocated_bytes(); let peak = Arc::new(AtomicUsize::new(baseline)); let stop = Arc::new(AtomicBool::new(false)); @@ -50,10 +51,8 @@ fn peak_bytes_within_2x_of_measured_rss() { let stop = Arc::clone(&stop); thread::spawn(move || { while !stop.load(Ordering::Relaxed) { - if let Some(rss) = current_rss_bytes() { - peak.fetch_max(rss, Ordering::Relaxed); - } - thread::sleep(Duration::from_millis(50)); + peak.fetch_max(allocated_bytes(), Ordering::Relaxed); + thread::sleep(Duration::from_millis(10)); } }) }; @@ -67,17 +66,15 @@ fn peak_bytes_within_2x_of_measured_rss() { let measured = peak.load(Ordering::Relaxed).saturating_sub(baseline); eprintln!( - "peak_bytes calibration: predicted={predicted} bytes, measured_above_baseline={measured} bytes" + "peak_bytes calibration: predicted={predicted} bytes, measured_heap={measured} bytes, ratio={:.2}", + predicted as f64 / measured as f64 ); + let safety_num = auto_storage::SAFETY_FRACTION_NUM as usize; + let safety_den = auto_storage::SAFETY_FRACTION_DEN as usize; assert!( - predicted.saturating_mul(2) >= measured, - "peak_bytes underestimates measured RSS by more than 2×: \ - predicted={predicted}, measured={measured}" - ); - assert!( - predicted <= measured.saturating_mul(2), - "peak_bytes overestimates measured RSS by more than 2×: \ + predicted.saturating_mul(safety_den) >= measured.saturating_mul(safety_num), + "peak_bytes underestimates measured heap below SAFETY_FRACTION ({safety_num}/{safety_den}): \ predicted={predicted}, measured={measured}" ); } From 5c517ccce1108ceab4c13281154e3f5adef9815f Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 16:27:21 -0300 Subject: [PATCH 209/231] Trim peak_bytes_calibration_tests module doc --- prover/src/tests/peak_bytes_calibration_tests.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/prover/src/tests/peak_bytes_calibration_tests.rs b/prover/src/tests/peak_bytes_calibration_tests.rs index 2e5430457..15497d69e 100644 --- a/prover/src/tests/peak_bytes_calibration_tests.rs +++ b/prover/src/tests/peak_bytes_calibration_tests.rs @@ -1,7 +1,5 @@ //! Asserts predicted [`peak_bytes`](crate::auto_storage::peak_bytes) does not -//! underestimate jemalloc-measured heap during a proof. Under-estimation is -//! the safety-critical direction: if it grows beyond `SAFETY_FRACTION`, the -//! runtime's `Disk` vs `Ram` decision becomes unsafe. +//! underestimate jemalloc-measured heap during a proof. use std::sync::Arc; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; From d2f9be6aa996a0bc7af1b9d8ad353f0d01786fcd Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 17:01:42 -0300 Subject: [PATCH 210/231] Run all prover tests under disk-spill feature in CI --- .github/workflows/pr_main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr_main.yaml b/.github/workflows/pr_main.yaml index ffb5d7a09..cb83676c1 100644 --- a/.github/workflows/pr_main.yaml +++ b/.github/workflows/pr_main.yaml @@ -168,7 +168,7 @@ jobs: - name: Run prover disk-spill tests run: | - cargo test --release -p lambda-vm-prover --features disk-spill disk_spill -- --test-threads=1 + cargo test --release -p lambda-vm-prover --features disk-spill -- --test-threads=1 build-prover-tests: name: Build prover tests From 1230403e783b16bbb1c69b82a573ae795443e9f5 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 17:14:30 -0300 Subject: [PATCH 211/231] Extract AirTracePair alias to silence type_complexity --- crypto/stark/src/test_utils.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/crypto/stark/src/test_utils.rs b/crypto/stark/src/test_utils.rs index e1be191b7..f5cd19f80 100644 --- a/crypto/stark/src/test_utils.rs +++ b/crypto/stark/src/test_utils.rs @@ -10,12 +10,14 @@ use math::field::traits::{IsFFTField, IsField, IsSubFieldOf}; use math::spill_safe::SpillSafe; use math::traits::{AsBytes, ByteConversion}; +type AirTracePair<'a, Field, FieldExtension, PI> = ( + &'a dyn AIR, + &'a mut TraceTable, + &'a PI, +); + pub fn multi_prove_ram( - air_trace_pairs: Vec<( - &dyn AIR, - &mut TraceTable, - &PI, - )>, + air_trace_pairs: Vec>, transcript: &mut (impl IsStarkTranscript + Clone + Send), ) -> Result, ProvingError> where From be014194bc547cf060d0285576ca3f276760fbed Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 18:07:22 -0300 Subject: [PATCH 212/231] Trim comments in prove_with_options_and_inputs storage mode block --- prover/src/lib.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/prover/src/lib.rs b/prover/src/lib.rs index 69d036ff4..2556a3291 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -580,15 +580,9 @@ pub fn prove_with_options_and_inputs( #[cfg(feature = "instruments")] let phase_start = std::time::Instant::now(); - // Pick where trace buffers and Merkle tree nodes live for this proof. - // With the `disk-spill` feature enabled, the analytical estimate decides - // between Ram and Disk; without it, we never spill. + // Pick storage mode from analytical heap estimate. #[cfg(feature = "disk-spill")] let storage_mode = { - // Stream over logs once to compute exact per-table row counts without - // building per-instruction op vectors (the decode trace is still built - // for the row count). Use the resulting `TableLengths` to estimate - // peak heap analytically and pick a storage mode. let lengths = crate::tables::trace_builder::count_table_lengths( &program, &result.logs, From 603feb98e3177dc8ba23b10919c2c0f471cfd220 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 18:09:40 -0300 Subject: [PATCH 213/231] Import count_table_lengths in lib.rs --- prover/src/lib.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/prover/src/lib.rs b/prover/src/lib.rs index 2556a3291..876940a16 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -41,6 +41,8 @@ use crate::tables::decode; use crate::tables::page; use crate::tables::register; use crate::tables::trace_builder::Traces; +#[cfg(feature = "disk-spill")] +use crate::tables::trace_builder::count_table_lengths; use crate::tables::types::BusId; use crate::test_utils::{ E, F, VmAir, create_bitwise_air, create_branch_air, create_commit_air, create_cpu_air, @@ -583,12 +585,7 @@ pub fn prove_with_options_and_inputs( // Pick storage mode from analytical heap estimate. #[cfg(feature = "disk-spill")] let storage_mode = { - let lengths = crate::tables::trace_builder::count_table_lengths( - &program, - &result.logs, - max_rows, - private_inputs, - )?; + let lengths = count_table_lengths(&program, &result.logs, max_rows, private_inputs)?; let available = auto_storage::available_ram_bytes(); let estimated_peak = auto_storage::peak_bytes( From 178b16618b46848eec69e94a250e9cac7f3f6aaa Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 18:13:23 -0300 Subject: [PATCH 214/231] Import table_parallelism in lib.rs --- prover/src/lib.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/prover/src/lib.rs b/prover/src/lib.rs index 876940a16..8cdad380b 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -29,6 +29,8 @@ use crypto::fiat_shamir::is_transcript::IsTranscript; use executor::elf::Elf; use executor::vm::execution::Executor; use math::field::element::FieldElement; +#[cfg(feature = "disk-spill")] +use stark::prover::table_parallelism; use stark::prover::{IsStarkProver, Prover}; #[cfg(feature = "disk-spill")] use stark::storage_mode::StorageMode; @@ -588,11 +590,8 @@ pub fn prove_with_options_and_inputs( let lengths = count_table_lengths(&program, &result.logs, max_rows, private_inputs)?; let available = auto_storage::available_ram_bytes(); - let estimated_peak = auto_storage::peak_bytes( - &lengths, - proof_options.blowup_factor, - stark::prover::table_parallelism(), - ); + let estimated_peak = + auto_storage::peak_bytes(&lengths, proof_options.blowup_factor, table_parallelism()); let mode = auto_storage::select_storage_mode( estimated_peak, available, From 3059f2a4c87932aee097601de1c2cccda4c86615 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 18:17:41 -0300 Subject: [PATCH 215/231] Drop confusing Phase 5 comment before from_elf_and_logs --- prover/src/lib.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/prover/src/lib.rs b/prover/src/lib.rs index 8cdad380b..723c5812c 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -603,8 +603,6 @@ pub fn prove_with_options_and_inputs( mode }; - // Phase 5: build the full traces with the chosen mode. `Disk` spills each - // chunk as it's built, so the trace never fully materializes in RAM. let mut traces = Traces::from_elf_and_logs( &program, &result.logs, From 4693226d738fafde34334d66757736dbcbfb7b9b Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 18:19:44 -0300 Subject: [PATCH 216/231] Inline multi_prove arguments --- prover/src/lib.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/prover/src/lib.rs b/prover/src/lib.rs index 723c5812c..e44fff53e 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -639,11 +639,9 @@ pub fn prove_with_options_and_inputs( let runtime_page_ranges = traces.runtime_page_ranges(); // Phase 4: Prove (multi_prove) - let air_pairs = airs.air_trace_pairs(&mut traces); - let transcript = &mut DefaultTranscript::::new(&[]); let proof = Prover::multi_prove( - air_pairs, - transcript, + airs.air_trace_pairs(&mut traces), + &mut DefaultTranscript::::new(&[]), #[cfg(feature = "disk-spill")] storage_mode, ) From df11ef4e5cc3bf12475814fcc166455e82198f4e Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 19:17:49 -0300 Subject: [PATCH 217/231] Replace ProofOptions::max_ram_bytes with FORCE_DISK_SPILL env var --- crypto/stark/benches/profile_prover.rs | 2 - crypto/stark/benches/prover_benchmark.rs | 2 - crypto/stark/src/proof/options.rs | 11 -- crypto/stark/src/tests/prover_tests.rs | 14 -- prover/src/auto_storage.rs | 182 ++++++++++-------- prover/src/lib.rs | 17 +- prover/src/tests/disk_spill_tests.rs | 38 ++-- prover/src/tests/mod.rs | 2 - .../src/tests/peak_bytes_calibration_tests.rs | 78 -------- scripts/calibrate_threshold.sh | 4 +- 10 files changed, 131 insertions(+), 219 deletions(-) delete mode 100644 prover/src/tests/peak_bytes_calibration_tests.rs diff --git a/crypto/stark/benches/profile_prover.rs b/crypto/stark/benches/profile_prover.rs index b2ee9aa7e..dbff24440 100644 --- a/crypto/stark/benches/profile_prover.rs +++ b/crypto/stark/benches/profile_prover.rs @@ -21,8 +21,6 @@ fn main() { fri_number_of_queries: 100, coset_offset: 3, grinding_factor: 0, - #[cfg(feature = "disk-spill")] - max_ram_bytes: None, }; let num_columns = 16; diff --git a/crypto/stark/benches/prover_benchmark.rs b/crypto/stark/benches/prover_benchmark.rs index e80c37801..2729fff29 100644 --- a/crypto/stark/benches/prover_benchmark.rs +++ b/crypto/stark/benches/prover_benchmark.rs @@ -61,8 +61,6 @@ fn benchmark_proof_options() -> ProofOptions { fri_number_of_queries: 30, coset_offset: 3, grinding_factor: 0, - #[cfg(feature = "disk-spill")] - max_ram_bytes: None, } } diff --git a/crypto/stark/src/proof/options.rs b/crypto/stark/src/proof/options.rs index 7180b7af8..70976b993 100644 --- a/crypto/stark/src/proof/options.rs +++ b/crypto/stark/src/proof/options.rs @@ -45,13 +45,6 @@ pub struct ProofOptions { pub fri_number_of_queries: usize, pub coset_offset: u64, pub grinding_factor: u8, - /// Optional ceiling on prover RAM usage. When set, the prover spills - /// trace tables and Merkle-tree nodes to mmap if the estimated peak - /// exceeds this cap or system-available RAM (less a safety margin), - /// whichever is smaller. LDE column vectors remain in RAM regardless. - #[cfg(feature = "disk-spill")] - #[serde(default)] - pub max_ram_bytes: Option, } impl ProofOptions { @@ -63,8 +56,6 @@ impl ProofOptions { fri_number_of_queries: 3, coset_offset: 3, grinding_factor: 1, - #[cfg(feature = "disk-spill")] - max_ram_bytes: None, } } } @@ -121,8 +112,6 @@ impl GoldilocksCubicProofOptions { fri_number_of_queries, coset_offset: 3, grinding_factor, - #[cfg(feature = "disk-spill")] - max_ram_bytes: None, }) } } diff --git a/crypto/stark/src/tests/prover_tests.rs b/crypto/stark/src/tests/prover_tests.rs index f4b762724..1355b363d 100644 --- a/crypto/stark/src/tests/prover_tests.rs +++ b/crypto/stark/src/tests/prover_tests.rs @@ -33,8 +33,6 @@ fn test_domain_constructor() { fri_number_of_queries: 1, coset_offset, grinding_factor, - #[cfg(feature = "disk-spill")] - max_ram_bytes: None, }; let domain = Domain::new( @@ -126,8 +124,6 @@ fn barycentric_trace_eval_matches_horner_trace_eval() { fri_number_of_queries: 1, coset_offset, grinding_factor: 0, - #[cfg(feature = "disk-spill")] - max_ram_bytes: None, }; let air = simple_fibonacci::FibonacciAIR::::new(&proof_options); @@ -199,8 +195,6 @@ fn test_decompose_and_extend_d2_matches_original() { fri_number_of_queries: 1, coset_offset: 3, grinding_factor: 0, - #[cfg(feature = "disk-spill")] - max_ram_bytes: None, }; // We need an AIR with composition_poly_degree_bound = 2 * trace_length. @@ -261,16 +255,12 @@ fn test_multi_prove_mixed_coset_offsets() { fri_number_of_queries: 3, coset_offset: 3, grinding_factor: 1, - #[cfg(feature = "disk-spill")] - max_ram_bytes: None, }; let proof_options_7 = ProofOptions { blowup_factor: 2, fri_number_of_queries: 3, coset_offset: 7, grinding_factor: 1, - #[cfg(feature = "disk-spill")] - max_ram_bytes: None, }; // Both AIRs have the same trace length and blowup, but different coset offsets. @@ -335,8 +325,6 @@ fn test_multi_prove_dedups_shared_domain_params() { fri_number_of_queries: 3, coset_offset: 3, grinding_factor: 1, - #[cfg(feature = "disk-spill")] - max_ram_bytes: None, }; let mut trace_1 = simple_fibonacci::fibonacci_trace([Felt::from(1), Felt::from(1)], 8); @@ -427,8 +415,6 @@ fn test_deep_poly_direct_2n_matches_interpolate_fft_extend() { fri_number_of_queries: 1, coset_offset: 3, grinding_factor: 0, - #[cfg(feature = "disk-spill")] - max_ram_bytes: None, }; let air = QuadraticAIR::::new(&proof_options); diff --git a/prover/src/auto_storage.rs b/prover/src/auto_storage.rs index 92aa0d482..1ac45222e 100644 --- a/prover/src/auto_storage.rs +++ b/prover/src/auto_storage.rs @@ -1,4 +1,7 @@ //! Automatic `StorageMode` selection from an analytical peak-RAM estimate. +//! +//! `FORCE_DISK_SPILL` env var forces `StorageMode::Disk` regardless of the +//! estimate. use crate::tables::bitwise::{ NUM_ROWS as BITWISE_ROWS, bus_interactions as bitwise_buses, cols::NUM_COLUMNS as BITWISE_COLS, @@ -27,6 +30,7 @@ use crate::tables::register::{ }; use crate::tables::shift::{bus_interactions as shift_buses, cols::NUM_COLUMNS as SHIFT_COLS}; use crate::tables::trace_builder::TableLengths; +use stark::prover::table_parallelism; use stark::storage_mode::StorageMode; use sysinfo::System; @@ -38,8 +42,8 @@ const MEMORY_CELL_BYTES: u64 = 32; const INSTRUCTION_MAP_BYTES_PER_ROW: u64 = 32; /// 9/10 budget headroom for OS, other processes, and allocator slack. -pub(crate) const SAFETY_FRACTION_NUM: u64 = 9; -pub(crate) const SAFETY_FRACTION_DEN: u64 = 10; +const SAFETY_FRACTION_NUM: u64 = 9; +const SAFETY_FRACTION_DEN: u64 = 10; /// `(rows, main_cols, aux_cols, num_main_merkle_trees)` for a single table. type TableSpec = (u64, u64, u64, u64); @@ -209,13 +213,22 @@ fn table_specs(lengths: &TableLengths) -> Vec { specs } +/// Estimates heap from `lengths` and `blowup_factor`. Picks `Disk` if the +/// estimate is greater than available RAM, else `Ram`. `FORCE_DISK_SPILL` env +/// var forces `Disk`. +pub fn decide(lengths: &TableLengths, blowup_factor: u8) -> StorageMode { + if std::env::var("FORCE_DISK_SPILL").is_ok() { + log::info!("storage_mode: Disk (forced via FORCE_DISK_SPILL)"); + return StorageMode::Disk; + } + let estimated = peak_bytes(lengths, blowup_factor, table_parallelism()); + let mode = select_storage_mode(estimated, available_ram_bytes()); + log::info!("estimated_peak_bytes: {estimated}, storage_mode: {mode:?}"); + mode +} + /// Peak RAM estimate in bytes for a proof whose trace shape matches `lengths`. -/// -/// `blowup_factor` is `ProofOptions::blowup_factor`. `table_parallelism` is the -/// `k` used by `multi_prove_with_mode` to chunk rounds 2-4; pass -/// `stark::prover::table_parallelism()` so the worst-case-chunk transient term -/// matches the runtime. -pub fn peak_bytes(lengths: &TableLengths, blowup_factor: u8, table_parallelism: usize) -> u64 { +fn peak_bytes(lengths: &TableLengths, blowup_factor: u8, table_parallelism: usize) -> u64 { let blowup = blowup_factor as u64; let k = table_parallelism.max(1); let specs = table_specs(lengths); @@ -268,53 +281,23 @@ pub fn peak_bytes(lengths: &TableLengths, blowup_factor: u8, table_parallelism: .saturating_add(state_total) } -/// User cap ∩ OS available, or None if both are unknown. -fn effective_budget(available: Option, cap: Option) -> Option { - match (cap, available) { - (Some(c), Some(a)) => Some(c.min(a)), - (Some(c), None) => Some(c), - (None, a) => a, - } -} - -/// Disk if `estimated` exceeds 90% of the effective budget, else Ram. -/// Defaults to Disk when budget is unknown (sysinfo failure + no cap). -pub fn select_storage_mode( - estimated: u64, - available: Option, - cap: Option, -) -> StorageMode { - let Some(budget) = effective_budget(available, cap) else { - log::warn!( - "Auto disk-spill: sysinfo could not read system memory and no cap set, \ - defaulting to Disk." - ); +/// `Disk` if `estimated` exceeds `available` minus a safety margin, else +/// `Ram`. Defaults to `Disk` when `available` is `None`. +fn select_storage_mode(estimated: u64, available: Option) -> StorageMode { + let Some(available) = available else { + log::warn!("Auto disk-spill: sysinfo could not read system memory, defaulting to Disk."); return StorageMode::Disk; }; - let threshold = budget.saturating_mul(SAFETY_FRACTION_NUM) / SAFETY_FRACTION_DEN; - + let threshold = available.saturating_mul(SAFETY_FRACTION_NUM) / SAFETY_FRACTION_DEN; if estimated > threshold { StorageMode::Disk } else { - // `cap.is_none()` plus an `effective_budget` that returned `Some` means - // `available` must be `Some` (see `effective_budget`). - if cap.is_none() && estimated.saturating_mul(4) >= available.unwrap().saturating_mul(3) { - log::warn!( - "Auto disk-spill picked Ram with estimated_peak={estimated} bytes near \ - available={available:?}. Set max_ram_bytes to bound the budget to a \ - cgroup limit if running in a container." - ); - } StorageMode::Ram } } -/// OS-available RAM, or None if sysinfo can't read it (e.g. stripped containers). -/// Returns `Some(0)` on near-OOM so callers force Disk rather than fall back to Ram. -/// -/// Reads host `/proc/meminfo`, not cgroup limits — set `max_ram_bytes` in -/// containerized environments to bound the budget to the container's limit. -pub fn available_ram_bytes() -> Option { +/// OS-available RAM, or `None` if sysinfo can't read it. +fn available_ram_bytes() -> Option { let mut sys = System::new(); sys.refresh_memory(); // total_memory == 0 means sysinfo can't read; otherwise available is real. @@ -400,56 +383,93 @@ mod tests { #[test] fn select_ram_when_estimate_below_threshold() { // 10 GB estimated, 32 GB available → threshold 28.8 GB → Ram. - let mode = select_storage_mode(10 * GB, Some(32 * GB), None); + let mode = select_storage_mode(10 * GB, Some(32 * GB)); assert_eq!(mode, StorageMode::Ram); } #[test] fn select_disk_when_estimate_exceeds_threshold() { // 30 GB estimated, 32 GB available → threshold 28.8 GB → Disk. - let mode = select_storage_mode(30 * GB, Some(32 * GB), None); + let mode = select_storage_mode(30 * GB, Some(32 * GB)); assert_eq!(mode, StorageMode::Disk); } #[test] - fn cap_forces_disk_when_smaller_than_available() { - // 10 GB estimated, 64 GB available (would be Ram), but cap=4 GB - // → threshold = 4 × 0.9 = 3.6 GB → Disk. - let mode = select_storage_mode(10 * GB, Some(64 * GB), Some(4 * GB)); + fn unknown_available_defaults_to_disk() { + let mode = select_storage_mode(peak_bytes(&empty_lengths(), 2, ALL_TABLES), None); assert_eq!(mode, StorageMode::Disk); } - #[test] - fn cap_ignored_when_larger_than_available() { - // available=8 GB dominates a cap of 64 GB. - // threshold = 8 × 0.9 = 7.2 GB, estimate 10 GB → Disk. - let mode = select_storage_mode(10 * GB, Some(8 * GB), Some(64 * GB)); - assert_eq!(mode, StorageMode::Disk); - } + /// Asserts predicted [`peak_bytes`] does not underestimate jemalloc-measured + /// heap during a proof. + mod calibration { + use super::*; + use crate::tables::MaxRowsConfig; + use crate::tables::trace_builder::count_table_lengths; + use crate::test_utils::{asm_elf_bytes, run_asm_elf}; + use stark::proof::options::GoldilocksCubicProofOptions; + use std::sync::Arc; + use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; + use std::thread; + use std::time::Duration; + use tikv_jemalloc_ctl::{epoch, stats}; + + #[global_allocator] + static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; + + fn allocated_bytes() -> usize { + epoch::advance().ok(); + stats::allocated::read().unwrap_or(0) + } - #[test] - fn tiny_cap_always_forces_disk() { - let mode = select_storage_mode( - peak_bytes(&empty_lengths(), 2, ALL_TABLES), - Some(64 * GB), - Some(1_000_000), - ); - assert_eq!(mode, StorageMode::Disk); - } + #[test] + fn peak_bytes_does_not_underestimate_measured_heap() { + let (elf, logs, _) = run_asm_elf("fib_iterative_372k"); + let elf_bytes = asm_elf_bytes("fib_iterative_372k"); - #[test] - fn unknown_available_with_no_cap_defaults_to_disk() { - // sysinfo failed and no cap was set. Default to Disk: sysinfo fails - // in stripped-down containers where Ram would OOM. Pass max_ram_bytes - // to opt out on a known-sized machine. - let mode = select_storage_mode(peak_bytes(&empty_lengths(), 2, ALL_TABLES), None, None); - assert_eq!(mode, StorageMode::Disk); - } + let max_rows = MaxRowsConfig::default(); + let lengths = count_table_lengths(&elf, &logs, &max_rows, &[]) + .expect("count_table_lengths succeeds"); - #[test] - fn unknown_available_with_cap_uses_cap_as_budget() { - // OS can't report; cap is the whole budget. - let mode = select_storage_mode(10 * GB, None, Some(4 * GB)); - assert_eq!(mode, StorageMode::Disk); + let opts = GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is valid"); + let predicted = peak_bytes(&lengths, opts.blowup_factor, table_parallelism()) as usize; + + drop(logs); + + let baseline = allocated_bytes(); + let peak = Arc::new(AtomicUsize::new(baseline)); + let stop = Arc::new(AtomicBool::new(false)); + + let sampler = { + let peak = Arc::clone(&peak); + let stop = Arc::clone(&stop); + thread::spawn(move || { + while !stop.load(Ordering::Relaxed) { + peak.fetch_max(allocated_bytes(), Ordering::Relaxed); + thread::sleep(Duration::from_millis(10)); + } + }) + }; + + let _proof = crate::prove_with_options_and_inputs(&elf_bytes, &[], &opts, &max_rows) + .expect("proof succeeds"); + + stop.store(true, Ordering::Relaxed); + sampler.join().expect("sampler joins"); + + let measured = peak.load(Ordering::Relaxed).saturating_sub(baseline); + + eprintln!( + "peak_bytes calibration: predicted={predicted} bytes, measured_heap={measured} bytes, ratio={:.2}", + predicted as f64 / measured as f64 + ); + + let safety_num = SAFETY_FRACTION_NUM as usize; + let safety_den = SAFETY_FRACTION_DEN as usize; + assert!( + predicted.saturating_mul(safety_den) >= measured.saturating_mul(safety_num), + "peak_bytes underestimates measured heap: predicted={predicted}, measured={measured}" + ); + } } } diff --git a/prover/src/lib.rs b/prover/src/lib.rs index e44fff53e..10f21e1d8 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -29,8 +29,6 @@ use crypto::fiat_shamir::is_transcript::IsTranscript; use executor::elf::Elf; use executor::vm::execution::Executor; use math::field::element::FieldElement; -#[cfg(feature = "disk-spill")] -use stark::prover::table_parallelism; use stark::prover::{IsStarkProver, Prover}; #[cfg(feature = "disk-spill")] use stark::storage_mode::StorageMode; @@ -584,23 +582,10 @@ pub fn prove_with_options_and_inputs( #[cfg(feature = "instruments")] let phase_start = std::time::Instant::now(); - // Pick storage mode from analytical heap estimate. #[cfg(feature = "disk-spill")] let storage_mode = { let lengths = count_table_lengths(&program, &result.logs, max_rows, private_inputs)?; - - let available = auto_storage::available_ram_bytes(); - let estimated_peak = - auto_storage::peak_bytes(&lengths, proof_options.blowup_factor, table_parallelism()); - let mode = auto_storage::select_storage_mode( - estimated_peak, - available, - proof_options.max_ram_bytes, - ); - - log::info!("predicted_peak_bytes: {estimated_peak}, storage_mode: {mode:?}"); - - mode + auto_storage::decide(&lengths, proof_options.blowup_factor) }; let mut traces = Traces::from_elf_and_logs( diff --git a/prover/src/tests/disk_spill_tests.rs b/prover/src/tests/disk_spill_tests.rs index a1b2d40dd..8c71951a7 100644 --- a/prover/src/tests/disk_spill_tests.rs +++ b/prover/src/tests/disk_spill_tests.rs @@ -1,22 +1,34 @@ -//! End-to-end tests forcing `StorageMode::Disk` via a low `max_ram_bytes` cap. +//! End-to-end tests forcing `StorageMode::Disk` via the `FORCE_DISK_SPILL` env var. use crate::VmProof; use crate::tables::MaxRowsConfig; use crate::test_utils::asm_elf_bytes; use stark::proof::options::GoldilocksCubicProofOptions; -const FORCE_DISK_CAP: u64 = 1_000_000; +/// RAII guard that sets `FORCE_DISK_SPILL` for the test's scope and clears it +/// on drop. Tests must run with `--test-threads=1`. +struct ForceDiskGuard; -fn options_forcing_disk() -> stark::proof::options::ProofOptions { - let mut opts = GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is always valid"); - opts.max_ram_bytes = Some(FORCE_DISK_CAP); - opts +impl ForceDiskGuard { + fn new() -> Self { + // SAFETY: tests run with --test-threads=1, no concurrent env access. + unsafe { std::env::set_var("FORCE_DISK_SPILL", "1") }; + Self + } +} + +impl Drop for ForceDiskGuard { + fn drop(&mut self) { + // SAFETY: same as new(). + unsafe { std::env::remove_var("FORCE_DISK_SPILL") }; + } } #[test] fn test_disk_spill_prove_and_verify_small() { + let _guard = ForceDiskGuard::new(); let elf_bytes = asm_elf_bytes("sub"); - let opts = options_forcing_disk(); + let opts = GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is always valid"); let vm_proof = crate::prove_with_options(&elf_bytes, &opts, &MaxRowsConfig::default()) .expect("prove failed"); let ok = crate::verify_with_options(&vm_proof, &elf_bytes, &opts).expect("verify failed"); @@ -25,8 +37,9 @@ fn test_disk_spill_prove_and_verify_small() { #[test] fn test_disk_spill_prove_and_verify_with_chunks() { + let _guard = ForceDiskGuard::new(); let elf_bytes = asm_elf_bytes("all_instructions_64"); - let opts = options_forcing_disk(); + let opts = GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is always valid"); let vm_proof = crate::prove_with_options(&elf_bytes, &opts, &MaxRowsConfig::small()) .expect("prove failed"); let ok = crate::verify_with_options(&vm_proof, &elf_bytes, &opts).expect("verify failed"); @@ -35,8 +48,9 @@ fn test_disk_spill_prove_and_verify_with_chunks() { #[test] fn test_disk_spill_serialization_roundtrip() { + let _guard = ForceDiskGuard::new(); let elf_bytes = asm_elf_bytes("sub"); - let opts = options_forcing_disk(); + let opts = GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is always valid"); let proof = crate::prove_with_options(&elf_bytes, &opts, &MaxRowsConfig::default()) .expect("prove failed"); @@ -48,9 +62,10 @@ fn test_disk_spill_serialization_roundtrip() { #[test] fn test_disk_spill_prove_and_verify_372k() { + let _guard = ForceDiskGuard::new(); let _ = env_logger::builder().is_test(true).try_init(); let elf_bytes = asm_elf_bytes("fib_iterative_372k"); - let opts = options_forcing_disk(); + let opts = GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is always valid"); let vm_proof = crate::prove_with_options(&elf_bytes, &opts, &MaxRowsConfig::default()) .expect("prove failed"); let ok = crate::verify_with_options(&vm_proof, &elf_bytes, &opts).expect("verify failed"); @@ -59,8 +74,9 @@ fn test_disk_spill_prove_and_verify_372k() { #[test] fn test_disk_spill_serialization_roundtrip_chunked() { + let _guard = ForceDiskGuard::new(); let elf_bytes = asm_elf_bytes("all_instructions_64"); - let opts = options_forcing_disk(); + let opts = GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is always valid"); let proof = crate::prove_with_options(&elf_bytes, &opts, &MaxRowsConfig::small()) .expect("prove failed"); diff --git a/prover/src/tests/mod.rs b/prover/src/tests/mod.rs index 4b262ac18..dc5f3fe22 100644 --- a/prover/src/tests/mod.rs +++ b/prover/src/tests/mod.rs @@ -26,8 +26,6 @@ pub mod lt_bus_tests; pub mod lt_tests; #[cfg(test)] pub mod mul_tests; -#[cfg(all(test, feature = "disk-spill"))] -pub mod peak_bytes_calibration_tests; #[cfg(test)] pub mod prove_elfs_tests; #[cfg(test)] diff --git a/prover/src/tests/peak_bytes_calibration_tests.rs b/prover/src/tests/peak_bytes_calibration_tests.rs deleted file mode 100644 index 15497d69e..000000000 --- a/prover/src/tests/peak_bytes_calibration_tests.rs +++ /dev/null @@ -1,78 +0,0 @@ -//! Asserts predicted [`peak_bytes`](crate::auto_storage::peak_bytes) does not -//! underestimate jemalloc-measured heap during a proof. - -use std::sync::Arc; -use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; -use std::thread; -use std::time::Duration; - -use stark::proof::options::GoldilocksCubicProofOptions; -use tikv_jemalloc_ctl::{epoch, stats}; - -use crate::auto_storage; -use crate::tables::MaxRowsConfig; -use crate::tables::trace_builder::count_table_lengths; -use crate::test_utils::{asm_elf_bytes, run_asm_elf}; - -#[global_allocator] -static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; - -fn allocated_bytes() -> usize { - epoch::advance().ok(); - stats::allocated::read().unwrap_or(0) -} - -#[test] -fn peak_bytes_does_not_underestimate_measured_heap() { - let (elf, logs, _) = run_asm_elf("fib_iterative_372k"); - let elf_bytes = asm_elf_bytes("fib_iterative_372k"); - - let max_rows = MaxRowsConfig::default(); - let lengths = - count_table_lengths(&elf, &logs, &max_rows, &[]).expect("count_table_lengths succeeds"); - - let opts = GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is valid"); - let predicted = auto_storage::peak_bytes( - &lengths, - opts.blowup_factor, - stark::prover::table_parallelism(), - ) as usize; - - drop(logs); - - let baseline = allocated_bytes(); - let peak = Arc::new(AtomicUsize::new(baseline)); - let stop = Arc::new(AtomicBool::new(false)); - - let sampler = { - let peak = Arc::clone(&peak); - let stop = Arc::clone(&stop); - thread::spawn(move || { - while !stop.load(Ordering::Relaxed) { - peak.fetch_max(allocated_bytes(), Ordering::Relaxed); - thread::sleep(Duration::from_millis(10)); - } - }) - }; - - let _proof = crate::prove_with_options_and_inputs(&elf_bytes, &[], &opts, &max_rows) - .expect("proof succeeds"); - - stop.store(true, Ordering::Relaxed); - sampler.join().expect("sampler joins"); - - let measured = peak.load(Ordering::Relaxed).saturating_sub(baseline); - - eprintln!( - "peak_bytes calibration: predicted={predicted} bytes, measured_heap={measured} bytes, ratio={:.2}", - predicted as f64 / measured as f64 - ); - - let safety_num = auto_storage::SAFETY_FRACTION_NUM as usize; - let safety_den = auto_storage::SAFETY_FRACTION_DEN as usize; - assert!( - predicted.saturating_mul(safety_den) >= measured.saturating_mul(safety_num), - "peak_bytes underestimates measured heap below SAFETY_FRACTION ({safety_num}/{safety_den}): \ - predicted={predicted}, measured={measured}" - ); -} diff --git a/scripts/calibrate_threshold.sh b/scripts/calibrate_threshold.sh index fb81dc7a9..1fed327e4 100755 --- a/scripts/calibrate_threshold.sh +++ b/scripts/calibrate_threshold.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Calibrate the auto-disk-spill threshold: actual RSS / predicted_peak_bytes. +# Calibrate the auto-disk-spill threshold: actual RSS / estimated_peak_bytes. # # Usage: calibrate_threshold.sh elf1.elf [elf2.elf ...] # @@ -35,7 +35,7 @@ for elf in "$@"; do continue } - pred=$(grep -o 'predicted_peak_bytes: [0-9]*' "$OUT/err.txt" | awk '{print $2}') + pred=$(grep -o 'estimated_peak_bytes: [0-9]*' "$OUT/err.txt" | awk '{print $2}') heap_mb=$(grep -o 'Peak heap: [0-9]*' "$OUT/out.txt" | awk '{print $3}') rss_kb=$(grep "Maximum resident set size" "$OUT/err.txt" | awk '{print $NF}') From 9116dd2033a606ae0a180138f1810b192a47a780 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 19:21:39 -0300 Subject: [PATCH 218/231] Drop env_logger init from disk_spill 372k test --- prover/src/tests/disk_spill_tests.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/prover/src/tests/disk_spill_tests.rs b/prover/src/tests/disk_spill_tests.rs index 8c71951a7..d8331569d 100644 --- a/prover/src/tests/disk_spill_tests.rs +++ b/prover/src/tests/disk_spill_tests.rs @@ -63,7 +63,6 @@ fn test_disk_spill_serialization_roundtrip() { #[test] fn test_disk_spill_prove_and_verify_372k() { let _guard = ForceDiskGuard::new(); - let _ = env_logger::builder().is_test(true).try_init(); let elf_bytes = asm_elf_bytes("fib_iterative_372k"); let opts = GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is always valid"); let vm_proof = crate::prove_with_options(&elf_bytes, &opts, &MaxRowsConfig::default()) From c5a41dcc0a8d2e13690730c7014394d6bc2bdefb Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 6 May 2026 19:25:47 -0300 Subject: [PATCH 219/231] Reword calibrate_threshold doc to not leak internals --- scripts/calibrate_threshold.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/scripts/calibrate_threshold.sh b/scripts/calibrate_threshold.sh index 1fed327e4..795eeb777 100755 --- a/scripts/calibrate_threshold.sh +++ b/scripts/calibrate_threshold.sh @@ -4,8 +4,8 @@ # Usage: calibrate_threshold.sh elf1.elf [elf2.elf ...] # # Builds CLI with jemalloc-stats, runs each ELF under `/usr/bin/time -v`, -# and prints predicted vs measured peak. The max of rss/pred is r_max; -# set the threshold in select_storage_mode to ~1/r_max minus a small margin. +# and prints predicted vs measured peak. Use the rss/pred ratio to adjust +# the safety margin in `auto_storage.rs`. set -euo pipefail @@ -50,5 +50,4 @@ for elf in "$@"; do done echo "" -echo "Take the max rss/pred across runs as r_max." -echo "Set threshold in select_storage_mode to ~1/r_max minus margin (e.g. 0.05)." +echo "Use the rss/pred ratio to adjust the safety margin in auto_storage.rs." From e9f0329202d380e5daeda36295ae83e27a94f6f5 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Thu, 7 May 2026 11:46:08 -0300 Subject: [PATCH 220/231] Move peak_bytes calibration to integration test --- prover/src/auto_storage.rs | 79 ++----------------------------------- prover/tests/calibration.rs | 77 ++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 76 deletions(-) create mode 100644 prover/tests/calibration.rs diff --git a/prover/src/auto_storage.rs b/prover/src/auto_storage.rs index 1ac45222e..a28bcd498 100644 --- a/prover/src/auto_storage.rs +++ b/prover/src/auto_storage.rs @@ -42,8 +42,8 @@ const MEMORY_CELL_BYTES: u64 = 32; const INSTRUCTION_MAP_BYTES_PER_ROW: u64 = 32; /// 9/10 budget headroom for OS, other processes, and allocator slack. -const SAFETY_FRACTION_NUM: u64 = 9; -const SAFETY_FRACTION_DEN: u64 = 10; +pub const SAFETY_FRACTION_NUM: u64 = 9; +pub const SAFETY_FRACTION_DEN: u64 = 10; /// `(rows, main_cols, aux_cols, num_main_merkle_trees)` for a single table. type TableSpec = (u64, u64, u64, u64); @@ -228,7 +228,7 @@ pub fn decide(lengths: &TableLengths, blowup_factor: u8) -> StorageMode { } /// Peak RAM estimate in bytes for a proof whose trace shape matches `lengths`. -fn peak_bytes(lengths: &TableLengths, blowup_factor: u8, table_parallelism: usize) -> u64 { +pub fn peak_bytes(lengths: &TableLengths, blowup_factor: u8, table_parallelism: usize) -> u64 { let blowup = blowup_factor as u64; let k = table_parallelism.max(1); let specs = table_specs(lengths); @@ -399,77 +399,4 @@ mod tests { let mode = select_storage_mode(peak_bytes(&empty_lengths(), 2, ALL_TABLES), None); assert_eq!(mode, StorageMode::Disk); } - - /// Asserts predicted [`peak_bytes`] does not underestimate jemalloc-measured - /// heap during a proof. - mod calibration { - use super::*; - use crate::tables::MaxRowsConfig; - use crate::tables::trace_builder::count_table_lengths; - use crate::test_utils::{asm_elf_bytes, run_asm_elf}; - use stark::proof::options::GoldilocksCubicProofOptions; - use std::sync::Arc; - use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; - use std::thread; - use std::time::Duration; - use tikv_jemalloc_ctl::{epoch, stats}; - - #[global_allocator] - static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; - - fn allocated_bytes() -> usize { - epoch::advance().ok(); - stats::allocated::read().unwrap_or(0) - } - - #[test] - fn peak_bytes_does_not_underestimate_measured_heap() { - let (elf, logs, _) = run_asm_elf("fib_iterative_372k"); - let elf_bytes = asm_elf_bytes("fib_iterative_372k"); - - let max_rows = MaxRowsConfig::default(); - let lengths = count_table_lengths(&elf, &logs, &max_rows, &[]) - .expect("count_table_lengths succeeds"); - - let opts = GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is valid"); - let predicted = peak_bytes(&lengths, opts.blowup_factor, table_parallelism()) as usize; - - drop(logs); - - let baseline = allocated_bytes(); - let peak = Arc::new(AtomicUsize::new(baseline)); - let stop = Arc::new(AtomicBool::new(false)); - - let sampler = { - let peak = Arc::clone(&peak); - let stop = Arc::clone(&stop); - thread::spawn(move || { - while !stop.load(Ordering::Relaxed) { - peak.fetch_max(allocated_bytes(), Ordering::Relaxed); - thread::sleep(Duration::from_millis(10)); - } - }) - }; - - let _proof = crate::prove_with_options_and_inputs(&elf_bytes, &[], &opts, &max_rows) - .expect("proof succeeds"); - - stop.store(true, Ordering::Relaxed); - sampler.join().expect("sampler joins"); - - let measured = peak.load(Ordering::Relaxed).saturating_sub(baseline); - - eprintln!( - "peak_bytes calibration: predicted={predicted} bytes, measured_heap={measured} bytes, ratio={:.2}", - predicted as f64 / measured as f64 - ); - - let safety_num = SAFETY_FRACTION_NUM as usize; - let safety_den = SAFETY_FRACTION_DEN as usize; - assert!( - predicted.saturating_mul(safety_den) >= measured.saturating_mul(safety_num), - "peak_bytes underestimates measured heap: predicted={predicted}, measured={measured}" - ); - } - } } diff --git a/prover/tests/calibration.rs b/prover/tests/calibration.rs new file mode 100644 index 000000000..ff11bcf4b --- /dev/null +++ b/prover/tests/calibration.rs @@ -0,0 +1,77 @@ +//! Asserts predicted `peak_bytes` does not underestimate jemalloc-measured +//! heap during a proof. Lives in its own integration-test binary so that +//! `#[global_allocator]` and `tikv_jemalloc_ctl::stats::allocated` reads are +//! isolated from the rest of the prover test suite. + +#![cfg(feature = "disk-spill")] + +use lambda_vm_prover::auto_storage::{SAFETY_FRACTION_DEN, SAFETY_FRACTION_NUM, peak_bytes}; +use lambda_vm_prover::prove_with_options_and_inputs; +use lambda_vm_prover::tables::MaxRowsConfig; +use lambda_vm_prover::tables::trace_builder::count_table_lengths; +use lambda_vm_prover::test_utils::{asm_elf_bytes, run_asm_elf}; +use stark::proof::options::GoldilocksCubicProofOptions; +use stark::prover::table_parallelism; +use std::sync::Arc; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use std::thread; +use std::time::Duration; +use tikv_jemalloc_ctl::{epoch, stats}; + +#[global_allocator] +static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; + +fn allocated_bytes() -> usize { + epoch::advance().ok(); + stats::allocated::read().unwrap_or(0) +} + +#[test] +fn peak_bytes_does_not_underestimate_measured_heap() { + let (elf, logs, _) = run_asm_elf("fib_iterative_372k"); + let elf_bytes = asm_elf_bytes("fib_iterative_372k"); + + let max_rows = MaxRowsConfig::default(); + let lengths = + count_table_lengths(&elf, &logs, &max_rows, &[]).expect("count_table_lengths succeeds"); + + let opts = GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is valid"); + let predicted = peak_bytes(&lengths, opts.blowup_factor, table_parallelism()) as usize; + + drop(logs); + + let baseline = allocated_bytes(); + let peak = Arc::new(AtomicUsize::new(baseline)); + let stop = Arc::new(AtomicBool::new(false)); + + let sampler = { + let peak = Arc::clone(&peak); + let stop = Arc::clone(&stop); + thread::spawn(move || { + while !stop.load(Ordering::Relaxed) { + peak.fetch_max(allocated_bytes(), Ordering::Relaxed); + thread::sleep(Duration::from_millis(10)); + } + }) + }; + + let _proof = + prove_with_options_and_inputs(&elf_bytes, &[], &opts, &max_rows).expect("proof succeeds"); + + stop.store(true, Ordering::Relaxed); + sampler.join().expect("sampler joins"); + + let measured = peak.load(Ordering::Relaxed).saturating_sub(baseline); + + eprintln!( + "peak_bytes calibration: predicted={predicted} bytes, measured_heap={measured} bytes, ratio={:.2}", + predicted as f64 / measured as f64 + ); + + let safety_num = SAFETY_FRACTION_NUM as usize; + let safety_den = SAFETY_FRACTION_DEN as usize; + assert!( + predicted.saturating_mul(safety_den) >= measured.saturating_mul(safety_num), + "peak_bytes underestimates measured heap: predicted={predicted}, measured={measured}" + ); +} From dbf075c857cfff63b7a3f8137378092f9c2c205e Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Thu, 7 May 2026 11:57:25 -0300 Subject: [PATCH 221/231] Compile Rust ELFs in Disk-spill tests job --- .github/workflows/pr_main.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/.github/workflows/pr_main.yaml b/.github/workflows/pr_main.yaml index cb83676c1..0465bb8d0 100644 --- a/.github/workflows/pr_main.yaml +++ b/.github/workflows/pr_main.yaml @@ -162,6 +162,22 @@ jobs: run: | make compile-programs-asm + - name: Cache compiled Rust ELF artifacts and build cache + id: cache-rust-elfs + uses: actions/cache@v4 + with: + path: | + executor/program_artifacts/rust + executor/shared_target + key: rust-elf-artifacts-${{ hashFiles('executor/programs/rust/**', 'executor/programs/riscv64im-lambda-vm-elf.json', 'syscalls/**', 'Makefile') }} + restore-keys: | + rust-elf-artifacts- + + - name: Compile Rust programs to ELF + if: steps.cache-rust-elfs.outputs.cache-hit != 'true' + run: | + make compile-programs-rust + - name: Run stark disk-spill tests run: | cargo test --release -p stark --features disk-spill disk_spill From 6c29ed7e81a7f6efff4d85d23010b407aa97649b Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Thu, 7 May 2026 12:48:02 -0300 Subject: [PATCH 222/231] Use checked_next_power_of_two for commit_count --- prover/src/tables/trace_builder.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 9e202d7de..c5d6244e9 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -2313,7 +2313,10 @@ pub fn count_table_lengths( mul_padded_rows: padded_chunked_rows(mul_count, max_rows.mul), dvrm_padded_rows: padded_chunked_rows(dvrm_count, max_rows.dvrm), branch_padded_rows: padded_chunked_rows(branch_count, max_rows.branch), - commit_padded_rows: commit_count.next_power_of_two().max(4) as u64, + commit_padded_rows: commit_count + .checked_next_power_of_two() + .unwrap_or(usize::MAX) + .max(4) as u64, decode_rows, unique_page_count, cycle_count, From ac9cfe80ac24f1b304f6d82ee5963e8feba91384 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Thu, 7 May 2026 17:40:40 -0300 Subject: [PATCH 223/231] Replace fib_iterative_24M with 32M --- .../asm/{fib_iterative_24M.s => fib_iterative_32M.s} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename executor/programs/asm/{fib_iterative_24M.s => fib_iterative_32M.s} (80%) diff --git a/executor/programs/asm/fib_iterative_24M.s b/executor/programs/asm/fib_iterative_32M.s similarity index 80% rename from executor/programs/asm/fib_iterative_24M.s rename to executor/programs/asm/fib_iterative_32M.s index 8e17fe693..df6644193 100644 --- a/executor/programs/asm/fib_iterative_24M.s +++ b/executor/programs/asm/fib_iterative_32M.s @@ -2,14 +2,14 @@ .globl main main: # Iterative Fibonacci - pure register arithmetic - # ~24M steps + # ~32M steps # # Loop body: 5 instructions per iteration - # 4800000 iterations × 5 = 24000000 + setup/teardown + # 6400000 iterations × 5 = 32000000 + setup/teardown li t0, 0 # a = fib(0) = 0 li t1, 1 # b = fib(1) = 1 - li a0, 4800000 # iteration count + li a0, 6400000 # iteration count .loop: add t2, t0, t1 # t2 = a + b From 8d33a329967d0a04425359e32a23e4082bdac900 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Wed, 13 May 2026 12:51:00 -0300 Subject: [PATCH 224/231] Use traces_from_elf_and_logs_ram in keccak tests --- Cargo.lock | 1 + prover/src/tests/prove_elfs_tests.rs | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1ce476b19..10c018bbe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1956,6 +1956,7 @@ dependencies = [ "sysinfo", "tikv-jemalloc-ctl", "tikv-jemallocator", + "tiny-keccak", ] [[package]] diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index fb53e46de..4fb321f00 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -724,7 +724,7 @@ fn test_prove_elfs_keccak() { let (elf, logs, _instructions) = run_asm_elf("test_keccak"); // Must use from_elf_and_logs (not from_logs_minimal) because keccak accesses // RAM (stack memory), which requires PAGE tables for Memory bus balance. - let mut traces = Traces::from_elf_and_logs(&elf, &logs, &Default::default(), &[]).unwrap(); + let mut traces = traces_from_elf_and_logs_ram(&elf, &logs, &Default::default(), &[]).unwrap(); assert!( prove_and_verify_vm_minimal(&elf, &mut traces), @@ -760,7 +760,7 @@ fn test_prove_elfs_keccak_multi_call() { ); let mut traces = - Traces::from_elf_and_logs(&elf, &result.logs, &Default::default(), &[]).unwrap(); + traces_from_elf_and_logs_ram(&elf, &result.logs, &Default::default(), &[]).unwrap(); assert_eq!( traces.public_output_bytes, result.return_values.memory_values @@ -793,7 +793,7 @@ fn test_prove_elfs_keccak_unaligned_state_addr() { executor::vm::execution::Executor::new(&elf, vec![]).expect("Failed to create executor"); let result = executor.run().expect("Failed to run program"); let mut traces = - Traces::from_elf_and_logs(&elf, &result.logs, &Default::default(), &[]).unwrap(); + traces_from_elf_and_logs_ram(&elf, &result.logs, &Default::default(), &[]).unwrap(); // Tamper the first real keccak row: replace addr(1) (a byte cell) with a // value outside [0, 256). The new IS_BYTE bus sender will emit this From 7da645a20dbdc52691d2342e785f9cc638491d6b Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 18 May 2026 13:12:36 -0300 Subject: [PATCH 225/231] Use full bitwise for direct-indexing and security tests --- prover/src/tests/decode_tests.rs | 12 +++++++++--- prover/src/tests/prove_elfs_tests.rs | 10 +++++++++- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/prover/src/tests/decode_tests.rs b/prover/src/tests/decode_tests.rs index e108f3af1..f1f60e5ba 100644 --- a/prover/src/tests/decode_tests.rs +++ b/prover/src/tests/decode_tests.rs @@ -1030,9 +1030,15 @@ fn test_decode_soundness_same_elf_accepted() { .expect("Failed to create executor"); let result = executor.run().expect("Failed to run program"); - let mut traces = - Traces::from_elf_and_logs_minimal(&prover_elf, &result.logs, &Default::default(), &[]) - .unwrap(); + let mut traces = Traces::from_elf_and_logs( + &prover_elf, + &result.logs, + &Default::default(), + &[], + #[cfg(feature = "disk-spill")] + stark::storage_mode::StorageMode::Ram, + ) + .unwrap(); let table_counts = traces.table_counts(); let prover_airs = VmAirs::new( &prover_elf, diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index dca5a4774..ffc5f23de 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -1229,7 +1229,15 @@ fn test_debug_memory_tokens_sb_sh() { use std::collections::HashMap; let (elf, logs, _instructions) = run_asm_elf("test_sb_sh_8"); - let traces = Traces::from_elf_and_logs_minimal(&elf, &logs, &Default::default(), &[]).unwrap(); + let traces = Traces::from_elf_and_logs( + &elf, + &logs, + &Default::default(), + &[], + #[cfg(feature = "disk-spill")] + stark::storage_mode::StorageMode::Ram, + ) + .unwrap(); let memw = &traces.memws[0]; // Small test: single MEMW chunk println!("DEBUG: test_sb_sh_8 Memory bus tokens (FULL)"); From dbb012a9dbaf6974933bd7160459ba4b25445b2c Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 18 May 2026 15:51:02 -0300 Subject: [PATCH 226/231] Filter prover CI step to disk-spill tests --- .github/workflows/pr_main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr_main.yaml b/.github/workflows/pr_main.yaml index 0465bb8d0..f892845a7 100644 --- a/.github/workflows/pr_main.yaml +++ b/.github/workflows/pr_main.yaml @@ -184,7 +184,7 @@ jobs: - name: Run prover disk-spill tests run: | - cargo test --release -p lambda-vm-prover --features disk-spill -- --test-threads=1 + cargo test --release -p lambda-vm-prover --features disk-spill -- --test-threads=1 disk_spill count_table_lengths build-prover-tests: name: Build prover tests From 494bc48bef0d77aefb44b1f34fd18e1a64e0b7f6 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 18 May 2026 15:59:13 -0300 Subject: [PATCH 227/231] Move output assert before drop(result) --- prover/src/lib.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/prover/src/lib.rs b/prover/src/lib.rs index a0ee57af7..dbe13d20b 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -615,6 +615,10 @@ pub fn prove_with_options_and_inputs( #[cfg(feature = "disk-spill")] storage_mode, )?; + debug_assert_eq!( + traces.public_output_bytes, result.return_values.memory_values, + "public output diverged between executor view and trace reconstruction" + ); drop(result); #[cfg(feature = "instruments")] @@ -673,11 +677,6 @@ pub fn prove_with_options_and_inputs( .filter(|c| c.is_private_input) .count(); - debug_assert_eq!( - traces.public_output_bytes, result.return_values.memory_values, - "public output diverged between executor view and trace reconstruction" - ); - Ok(VmProof { proof, runtime_page_ranges, From 7588c748a85add28bb9cd606c622aa8d5398afe8 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 18 May 2026 18:24:17 -0300 Subject: [PATCH 228/231] Merge disk-spill prove and serde roundtrip tests --- prover/src/tests/disk_spill_tests.rs | 59 +++++++++++----------------- 1 file changed, 23 insertions(+), 36 deletions(-) diff --git a/prover/src/tests/disk_spill_tests.rs b/prover/src/tests/disk_spill_tests.rs index d8331569d..686f9bf6e 100644 --- a/prover/src/tests/disk_spill_tests.rs +++ b/prover/src/tests/disk_spill_tests.rs @@ -25,39 +25,43 @@ impl Drop for ForceDiskGuard { } #[test] -fn test_disk_spill_prove_and_verify_small() { +fn test_disk_spill_prove_verify_and_roundtrip_small() { let _guard = ForceDiskGuard::new(); let elf_bytes = asm_elf_bytes("sub"); let opts = GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is always valid"); - let vm_proof = crate::prove_with_options(&elf_bytes, &opts, &MaxRowsConfig::default()) + let proof = crate::prove_with_options(&elf_bytes, &opts, &MaxRowsConfig::default()) .expect("prove failed"); - let ok = crate::verify_with_options(&vm_proof, &elf_bytes, &opts).expect("verify failed"); - assert!(ok, "verification returned false"); -} + assert!( + crate::verify_with_options(&proof, &elf_bytes, &opts).expect("verify failed"), + "verification returned false" + ); -#[test] -fn test_disk_spill_prove_and_verify_with_chunks() { - let _guard = ForceDiskGuard::new(); - let elf_bytes = asm_elf_bytes("all_instructions_64"); - let opts = GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is always valid"); - let vm_proof = crate::prove_with_options(&elf_bytes, &opts, &MaxRowsConfig::small()) - .expect("prove failed"); - let ok = crate::verify_with_options(&vm_proof, &elf_bytes, &opts).expect("verify failed"); - assert!(ok, "verification returned false"); + let bytes = bincode::serialize(&proof).expect("serialize failed"); + let proof2: VmProof = bincode::deserialize(&bytes).expect("deserialize failed"); + assert!( + crate::verify_with_options(&proof2, &elf_bytes, &opts).expect("verify failed"), + "verification failed after serialization roundtrip" + ); } #[test] -fn test_disk_spill_serialization_roundtrip() { +fn test_disk_spill_prove_verify_and_roundtrip_chunked() { let _guard = ForceDiskGuard::new(); - let elf_bytes = asm_elf_bytes("sub"); + let elf_bytes = asm_elf_bytes("all_instructions_64"); let opts = GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is always valid"); - let proof = crate::prove_with_options(&elf_bytes, &opts, &MaxRowsConfig::default()) + let proof = crate::prove_with_options(&elf_bytes, &opts, &MaxRowsConfig::small()) .expect("prove failed"); + assert!( + crate::verify_with_options(&proof, &elf_bytes, &opts).expect("verify failed"), + "verification returned false" + ); let bytes = bincode::serialize(&proof).expect("serialize failed"); let proof2: VmProof = bincode::deserialize(&bytes).expect("deserialize failed"); - let valid = crate::verify_with_options(&proof2, &elf_bytes, &opts).expect("verify failed"); - assert!(valid, "verification failed after serialization roundtrip"); + assert!( + crate::verify_with_options(&proof2, &elf_bytes, &opts).expect("verify failed"), + "verification failed after serialization roundtrip (chunked)" + ); } #[test] @@ -70,20 +74,3 @@ fn test_disk_spill_prove_and_verify_372k() { let ok = crate::verify_with_options(&vm_proof, &elf_bytes, &opts).expect("verify failed"); assert!(ok, "verification returned false for fib_iterative_372k"); } - -#[test] -fn test_disk_spill_serialization_roundtrip_chunked() { - let _guard = ForceDiskGuard::new(); - let elf_bytes = asm_elf_bytes("all_instructions_64"); - let opts = GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is always valid"); - let proof = crate::prove_with_options(&elf_bytes, &opts, &MaxRowsConfig::small()) - .expect("prove failed"); - - let bytes = bincode::serialize(&proof).expect("serialize failed"); - let proof2: VmProof = bincode::deserialize(&bytes).expect("deserialize failed"); - let valid = crate::verify_with_options(&proof2, &elf_bytes, &opts).expect("verify failed"); - assert!( - valid, - "verification failed after serialization roundtrip (chunked)" - ); -} From 369bd94ea3e9429f5e6344db63d849f45eb29a82 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Mon, 18 May 2026 18:27:40 -0300 Subject: [PATCH 229/231] Drop fib_iterative_372k disk-spill test --- prover/src/tests/disk_spill_tests.rs | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/prover/src/tests/disk_spill_tests.rs b/prover/src/tests/disk_spill_tests.rs index 686f9bf6e..8a07f3175 100644 --- a/prover/src/tests/disk_spill_tests.rs +++ b/prover/src/tests/disk_spill_tests.rs @@ -63,14 +63,3 @@ fn test_disk_spill_prove_verify_and_roundtrip_chunked() { "verification failed after serialization roundtrip (chunked)" ); } - -#[test] -fn test_disk_spill_prove_and_verify_372k() { - let _guard = ForceDiskGuard::new(); - let elf_bytes = asm_elf_bytes("fib_iterative_372k"); - let opts = GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is always valid"); - let vm_proof = crate::prove_with_options(&elf_bytes, &opts, &MaxRowsConfig::default()) - .expect("prove failed"); - let ok = crate::verify_with_options(&vm_proof, &elf_bytes, &opts).expect("verify failed"); - assert!(ok, "verification returned false for fib_iterative_372k"); -} From 80348b073a84a270b622816247d6ebbb499cfb47 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 19 May 2026 12:14:31 -0300 Subject: [PATCH 230/231] Set FORCE_DISK_SPILL in CI, drop RAII guard and --test-threads=1 --- .github/workflows/pr_main.yaml | 4 +++- prover/src/tests/disk_spill_tests.rs | 31 +++++++++++----------------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/.github/workflows/pr_main.yaml b/.github/workflows/pr_main.yaml index f892845a7..68fae4fb0 100644 --- a/.github/workflows/pr_main.yaml +++ b/.github/workflows/pr_main.yaml @@ -183,8 +183,10 @@ jobs: cargo test --release -p stark --features disk-spill disk_spill - name: Run prover disk-spill tests + env: + FORCE_DISK_SPILL: "1" run: | - cargo test --release -p lambda-vm-prover --features disk-spill -- --test-threads=1 disk_spill count_table_lengths + cargo test --release -p lambda-vm-prover --features disk-spill -- disk_spill count_table_lengths build-prover-tests: name: Build prover tests diff --git a/prover/src/tests/disk_spill_tests.rs b/prover/src/tests/disk_spill_tests.rs index 8a07f3175..e019fa456 100644 --- a/prover/src/tests/disk_spill_tests.rs +++ b/prover/src/tests/disk_spill_tests.rs @@ -1,32 +1,25 @@ //! End-to-end tests forcing `StorageMode::Disk` via the `FORCE_DISK_SPILL` env var. +//! +//! Run with `FORCE_DISK_SPILL=1` set in the environment, e.g. +//! `FORCE_DISK_SPILL=1 cargo test --features disk-spill disk_spill`. Tests +//! fail fast if the var is unset to avoid silent loss of coverage. use crate::VmProof; use crate::tables::MaxRowsConfig; use crate::test_utils::asm_elf_bytes; use stark::proof::options::GoldilocksCubicProofOptions; -/// RAII guard that sets `FORCE_DISK_SPILL` for the test's scope and clears it -/// on drop. Tests must run with `--test-threads=1`. -struct ForceDiskGuard; - -impl ForceDiskGuard { - fn new() -> Self { - // SAFETY: tests run with --test-threads=1, no concurrent env access. - unsafe { std::env::set_var("FORCE_DISK_SPILL", "1") }; - Self - } -} - -impl Drop for ForceDiskGuard { - fn drop(&mut self) { - // SAFETY: same as new(). - unsafe { std::env::remove_var("FORCE_DISK_SPILL") }; - } +fn require_force_disk_spill() { + assert_eq!( + std::env::var("FORCE_DISK_SPILL").as_deref(), + Ok("1"), + "set FORCE_DISK_SPILL=1 before running disk-spill tests", + ); } #[test] fn test_disk_spill_prove_verify_and_roundtrip_small() { - let _guard = ForceDiskGuard::new(); + require_force_disk_spill(); let elf_bytes = asm_elf_bytes("sub"); let opts = GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is always valid"); let proof = crate::prove_with_options(&elf_bytes, &opts, &MaxRowsConfig::default()) @@ -46,7 +39,7 @@ fn test_disk_spill_prove_verify_and_roundtrip_small() { #[test] fn test_disk_spill_prove_verify_and_roundtrip_chunked() { - let _guard = ForceDiskGuard::new(); + require_force_disk_spill(); let elf_bytes = asm_elf_bytes("all_instructions_64"); let opts = GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is always valid"); let proof = crate::prove_with_options(&elf_bytes, &opts, &MaxRowsConfig::small()) From fafc9a57604502ba83307cd2047828d81765c3e1 Mon Sep 17 00:00:00 2001 From: gabrielbosio Date: Tue, 19 May 2026 12:21:57 -0300 Subject: [PATCH 231/231] Add make test-disk-spill target --- Makefile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d1554cc99..fcde68e9c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ .PHONY: deps deps-linux deps-macos prepare-test-data compile-programs-asm compile-programs-rust compile-bench \ compile-programs clean-asm clean-rust clean-bench clean-shared clean test test-asm test-no-compile \ test-asm-no-compile test-rust test-rust-no-compile test-executor flamegraph-prover \ -test-fast test-prover test-prover-all test-math-cuda bench-math-cuda build check clippy fmt lint +test-fast test-prover test-prover-all test-disk-spill test-math-cuda bench-math-cuda build check clippy fmt lint UNAME := $(shell uname) @@ -185,6 +185,11 @@ test-prover-all: test-prover-debug: cargo test -p lambda-vm-prover --features debug-checks -- --nocapture +# Disk-spill tests (stark + prover). FORCE_DISK_SPILL is required by the prover tests. +test-disk-spill: + cargo test --release -p stark --features disk-spill disk_spill + FORCE_DISK_SPILL=1 cargo test --release -p lambda-vm-prover --features disk-spill -- disk_spill count_table_lengths + # math-cuda parity tests (requires NVIDIA GPU + nvcc) test-math-cuda: cargo test -p math-cuda --release