From 8c4a0c7f74d26964e3caa546d3f1a373d96852aa Mon Sep 17 00:00:00 2001 From: MauroFab Date: Tue, 10 Mar 2026 16:32:10 -0300 Subject: [PATCH 1/9] don't run executor benches all the time --- .github/workflows/hyperfine.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/hyperfine.yaml b/.github/workflows/hyperfine.yaml index f12655d71..95a7c4c96 100644 --- a/.github/workflows/hyperfine.yaml +++ b/.github/workflows/hyperfine.yaml @@ -3,6 +3,9 @@ name: Hyperfine Benchmark on: pull_request: branches: ["**"] + paths: + - 'executor/src/**' + - 'executor/Cargo.toml' concurrency: group: ${{ github.workflow }}-${{ github.ref }} From c7eedec1fac21499bdadd085b5ac4bebb65878f8 Mon Sep 17 00:00:00 2001 From: Nicole Date: Tue, 17 Mar 2026 18:37:11 -0300 Subject: [PATCH 2/9] Add MEMW_A aligned-memory fast path table --- prover/src/lib.rs | 30 +- prover/src/tables/memw_aligned.rs | 846 ++++++++++++++++++++++++ prover/src/tables/mod.rs | 29 +- prover/src/tables/trace_builder.rs | 113 +++- prover/src/test_utils.rs | 22 + prover/src/tests/prove_elfs_tests.rs | 2 + prover/src/tests/trace_builder_tests.rs | 47 +- 7 files changed, 1057 insertions(+), 32 deletions(-) create mode 100644 prover/src/tables/memw_aligned.rs diff --git a/prover/src/lib.rs b/prover/src/lib.rs index aa4287c91..21bfc9255 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -41,7 +41,8 @@ use crate::tables::types::BusId; use crate::test_utils::{ E, F, VmAir, create_bitwise_air, create_branch_air, create_commit_air, create_cpu_air, create_decode_air, create_dvrm_air, create_halt_air, create_load_air, create_lt_air, - create_memw_air, create_mul_air, create_page_air, create_register_air, create_shift_air, + create_memw_air, create_memw_aligned_air, create_mul_air, create_page_air, create_register_air, + create_shift_air, }; use stark::proof::options::{GoldilocksCubicProofOptions, ProofOptions}; @@ -66,6 +67,7 @@ pub struct TableCounts { pub cpu: usize, pub lt: usize, pub memw: usize, + pub memw_aligned: usize, pub load: usize, pub mul: usize, pub dvrm: usize, @@ -80,7 +82,15 @@ impl TableCounts { /// allowing a malicious prover to bypass soundness checks. /// Sum of all chunk counts across split tables. pub fn total(&self) -> usize { - self.cpu + self.lt + self.memw + self.load + self.mul + self.dvrm + self.shift + self.branch + self.cpu + + self.lt + + self.memw + + self.memw_aligned + + self.load + + self.mul + + self.dvrm + + self.shift + + self.branch } /// Validate that all required tables have at least one chunk. @@ -92,6 +102,7 @@ impl TableCounts { ("cpu", self.cpu), ("lt", self.lt), ("memw", self.memw), + ("memw_aligned", self.memw_aligned), ("load", self.load), ("mul", self.mul), ("dvrm", self.dvrm), @@ -174,6 +185,7 @@ pub(crate) struct VmAirs { pub lts: Vec, pub shifts: Vec, pub memws: Vec, + pub memw_aligneds: Vec, pub loads: Vec, pub decode: VmAir, pub muls: Vec, @@ -208,6 +220,13 @@ impl VmAirs { for (air, trace) in self.memws.iter().zip(traces.memws.iter_mut()) { pairs.push((air, trace, &())); } + for (air, trace) in self + .memw_aligneds + .iter() + .zip(traces.memw_aligneds.iter_mut()) + { + pairs.push((air, trace, &())); + } for (air, trace) in self.loads.iter().zip(traces.loads.iter_mut()) { pairs.push((air, trace, &())); } @@ -249,6 +268,9 @@ impl VmAirs { for air in &self.memws { refs.push(air); } + for air in &self.memw_aligneds { + refs.push(air); + } for air in &self.loads { refs.push(air); } @@ -301,6 +323,9 @@ impl VmAirs { let memws: Vec<_> = (0..table_counts.memw) .map(|i| create_memw_air(proof_options).with_name(&format!("MEMW[{}]", i))) .collect(); + let memw_aligneds: Vec<_> = (0..table_counts.memw_aligned) + .map(|i| create_memw_aligned_air(proof_options).with_name(&format!("MEMW_A[{}]", i))) + .collect(); let loads: Vec<_> = (0..table_counts.load) .map(|i| create_load_air(proof_options).with_name(&format!("LOAD[{}]", i))) .collect(); @@ -343,6 +368,7 @@ impl VmAirs { lts, shifts, memws, + memw_aligneds, loads, decode, muls, diff --git a/prover/src/tables/memw_aligned.rs b/prover/src/tables/memw_aligned.rs new file mode 100644 index 000000000..40ed2a4f4 --- /dev/null +++ b/prover/src/tables/memw_aligned.rs @@ -0,0 +1,846 @@ +//! MEMW_A (Memory Write/Read — Aligned) table. +//! +//! Fast path for aligned memory/register accesses where all bytes share the +//! same old_timestamp. Most operations (aligned memory + all register accesses) +//! route here instead of the heavier MEMW table. +//! +//! ## Column layout (30 columns) +//! +//! - `is_register`: Bit +//! - `base_address_high`: Word (32-bit high word) +//! - `base_address_mid`: Half (16-bit mid) +//! - `base_address_low[2]`: Bytes (low 2 bytes) +//! - `value[8]`: BaseField[8] +//! - `timestamp`: DWordWL (2 cols) +//! - `write2/4/8`: Bit (access width flags) +//! - `old[8]`: BaseField[8] +//! - `old_timestamp`: DWordWL (2 cols — single, not 8!) +//! - `mu_read`, `mu_write`: multiplicity columns +//! +//! ## Bus Interactions (22) +//! - 1 IS_HALFWORD[base_address_mid] +//! - 1 IS_BYTE[base_address_low[1]] +//! - 1 AND_BYTE[base_address_low[0], mask] → 0 (alignment check) +//! - 1 LT[old_timestamp, timestamp, 0] → 1 +//! - 16 Memory bus tokens +//! - 2 MEMW output interactions (read + write) + +use math::field::element::FieldElement; +use math::field::traits::{IsField, IsSubFieldOf}; +use stark::constraints::transition::TransitionConstraint; +use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity, Packing}; +use stark::table::TableView; +use stark::trace::TraceTable; +use stark::traits::TransitionEvaluationContext; + +use super::memw::MemwOperation; +use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField}; + +/// Maximum number of rows per MEMW_A table chunk. +pub const MAX_ROWS: usize = super::max_rows::MEMW_A; + +// ========================================================================= +// Column indices (30 columns) +// ========================================================================= + +pub mod cols { + pub const IS_REGISTER: usize = 0; + + /// base_address decomposed: high = addr >> 32 (Word, 32-bit) + pub const BASE_ADDRESS_HIGH: usize = 1; + /// base_address decomposed: mid = (addr >> 16) & 0xFFFF (Half, 16-bit) + pub const BASE_ADDRESS_MID: usize = 2; + /// base_address decomposed: low bytes + /// low[0] = addr & 0xFF, low[1] = (addr >> 8) & 0xFF + pub const BASE_ADDRESS_LOW: [usize; 2] = [3, 4]; + + pub const VALUE: [usize; 8] = [5, 6, 7, 8, 9, 10, 11, 12]; + + pub const TIMESTAMP_0: usize = 13; + pub const TIMESTAMP_1: usize = 14; + + pub const WRITE2: usize = 15; + pub const WRITE4: usize = 16; + pub const WRITE8: usize = 17; + + pub const OLD: [usize; 8] = [18, 19, 20, 21, 22, 23, 24, 25]; + + /// Single old_timestamp (shared across all bytes, since they're aligned) + pub const OLD_TIMESTAMP_0: usize = 26; + pub const OLD_TIMESTAMP_1: usize = 27; + + pub const MU_READ: usize = 28; + pub const MU_WRITE: usize = 29; + + pub const NUM_COLUMNS: usize = 30; +} + +// ========================================================================= +// Trace generation +// ========================================================================= + +/// Generates the MEMW_A trace table from aligned operations. +/// +/// Reuses `MemwOperation` — the trace generator uses `old_timestamp[0]` +/// (verified equal for all accessed bytes by the routing logic). +pub fn generate_memw_aligned_trace( + operations: &[MemwOperation], +) -> TraceTable { + let num_rows = operations.len().next_power_of_two().max(4); + let mut data = vec![FE::zero(); num_rows * cols::NUM_COLUMNS]; + + for (row_idx, op) in operations.iter().enumerate() { + let base = row_idx * cols::NUM_COLUMNS; + + data[base + cols::IS_REGISTER] = FE::from(op.is_register as u64); + + // Decompose base_address + let addr = op.base_address; + let high = addr >> 32; + let mid = (addr >> 16) & 0xFFFF; + let low_1 = (addr >> 8) & 0xFF; + let low_0 = addr & 0xFF; + + data[base + cols::BASE_ADDRESS_HIGH] = FE::from(high); + data[base + cols::BASE_ADDRESS_MID] = FE::from(mid); + data[base + cols::BASE_ADDRESS_LOW[0]] = FE::from(low_0); + data[base + cols::BASE_ADDRESS_LOW[1]] = FE::from(low_1); + + for i in 0..8 { + data[base + cols::VALUE[i]] = FE::from(op.value[i]); + } + + data[base + cols::TIMESTAMP_0] = FE::from(op.timestamp & 0xFFFF_FFFF); + data[base + cols::TIMESTAMP_1] = FE::from(op.timestamp >> 32); + + let (w2, w4, w8) = op.write_flags(); + data[base + cols::WRITE2] = FE::from(w2 as u64); + data[base + cols::WRITE4] = FE::from(w4 as u64); + data[base + cols::WRITE8] = FE::from(w8 as u64); + + for i in 0..8 { + data[base + cols::OLD[i]] = FE::from(op.old[i]); + } + + // Single old_timestamp (from old_timestamp[0], verified equal for all bytes) + data[base + cols::OLD_TIMESTAMP_0] = FE::from(op.old_timestamp[0] & 0xFFFF_FFFF); + data[base + cols::OLD_TIMESTAMP_1] = FE::from(op.old_timestamp[0] >> 32); + + data[base + cols::MU_READ] = FE::from(op.is_read as u64); + data[base + cols::MU_WRITE] = FE::from(!op.is_read as u64); + } + + TraceTable::new_main(data, cols::NUM_COLUMNS, 1) +} + +// ========================================================================= +// Bus interactions (22 total) +// ========================================================================= + +pub fn bus_interactions() -> Vec { + let mut interactions = Vec::with_capacity(22); + + let mu_sum = Multiplicity::Sum(cols::MU_READ, cols::MU_WRITE); + + // ------------------------------------------------------------------------- + // IS_HALFWORD[base_address_mid] with μ_sum + // ------------------------------------------------------------------------- + interactions.push(BusInteraction::sender( + BusId::IsHalfword, + mu_sum.clone(), + vec![BusValue::Packed { + start_column: cols::BASE_ADDRESS_MID, + packing: Packing::Direct, + }], + )); + + // ------------------------------------------------------------------------- + // IS_BYTE[base_address_low[1]] with μ_sum + // ------------------------------------------------------------------------- + interactions.push(BusInteraction::sender( + BusId::IsByte, + mu_sum.clone(), + vec![BusValue::Packed { + start_column: cols::BASE_ADDRESS_LOW[1], + packing: Packing::Direct, + }], + )); + + // ------------------------------------------------------------------------- + // AND_BYTE[base_address_low[0], mask] → 0 with μ_sum + // mask = write2*1 + write4*3 + write8*7 + // This implicitly range-checks low[0] to [0, 256) AND checks alignment. + // ------------------------------------------------------------------------- + interactions.push(BusInteraction::sender( + BusId::AndByte, + mu_sum.clone(), + vec![ + // x = base_address_low[0] + BusValue::Packed { + start_column: cols::BASE_ADDRESS_LOW[0], + packing: Packing::Direct, + }, + // y = mask = write2*1 + write4*3 + write8*7 + BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1, + column: cols::WRITE2, + }, + LinearTerm::Column { + coefficient: 3, + column: cols::WRITE4, + }, + LinearTerm::Column { + coefficient: 7, + column: cols::WRITE8, + }, + ]), + // result = 0 (alignment constraint: low bits must be 0) + BusValue::constant(0), + ], + )); + + // ------------------------------------------------------------------------- + // LT[old_timestamp, timestamp, 0] → 1 with μ_sum + // ------------------------------------------------------------------------- + interactions.push(BusInteraction::sender( + BusId::Lt, + mu_sum.clone(), + vec![ + BusValue::Packed { + start_column: cols::OLD_TIMESTAMP_0, + packing: Packing::DWordWL, + }, + BusValue::Packed { + start_column: cols::TIMESTAMP_0, + packing: Packing::DWordWL, + }, + BusValue::constant(0), + BusValue::constant(1), + ], + )); + + // ------------------------------------------------------------------------- + // Memory bus interactions (16 total) + // ------------------------------------------------------------------------- + // For aligned accesses, address for byte i: + // lo = 2^16 * MID + 2^8 * LOW[1] + LOW[0] + i + // hi = HIGH + // All old_timestamp references use the single old_timestamp columns. + + // Virtual base_address_lo = 2^16 * MID + 2^8 * LOW[1] + LOW[0] + // For byte 0, the address is exactly this. + let base_addr_lo = BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1 << 16, + column: cols::BASE_ADDRESS_MID, + }, + LinearTerm::Column { + coefficient: 1 << 8, + column: cols::BASE_ADDRESS_LOW[1], + }, + LinearTerm::Column { + coefficient: 1, + column: cols::BASE_ADDRESS_LOW[0], + }, + ]); + + let base_addr_hi = BusValue::Packed { + start_column: cols::BASE_ADDRESS_HIGH, + packing: Packing::Direct, + }; + + // CM16: memory[is_register, base_address, old_timestamp, old[0]] with +μ_sum + interactions.push(BusInteraction::sender( + BusId::Memory, + mu_sum.clone(), + vec![ + BusValue::Packed { + start_column: cols::IS_REGISTER, + packing: Packing::Direct, + }, + base_addr_lo.clone(), + base_addr_hi.clone(), + BusValue::Packed { + start_column: cols::OLD_TIMESTAMP_0, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::OLD_TIMESTAMP_1, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::OLD[0], + packing: Packing::Direct, + }, + ], + )); + + // CM17: memory[is_register, base_address, timestamp, value[0]] with -μ_sum + interactions.push(BusInteraction::receiver( + BusId::Memory, + mu_sum, + vec![ + BusValue::Packed { + start_column: cols::IS_REGISTER, + packing: Packing::Direct, + }, + base_addr_lo.clone(), + base_addr_hi.clone(), + BusValue::Packed { + start_column: cols::TIMESTAMP_0, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::TIMESTAMP_1, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::VALUE[0], + packing: Packing::Direct, + }, + ], + )); + + // w2 multiplicity + let w2_mult = Multiplicity::Linear(vec![ + LinearTerm::Column { + coefficient: 1, + column: cols::WRITE2, + }, + LinearTerm::Column { + coefficient: 1, + column: cols::WRITE4, + }, + LinearTerm::Column { + coefficient: 1, + column: cols::WRITE8, + }, + ]); + + // CM18/19: byte 1 with w2 + // For aligned accesses, adding 1 to the low byte never overflows to hi word + // (since alignment guarantees base_address_lo + width-1 < 2^32). + let addr_1_lo = BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1 << 16, + column: cols::BASE_ADDRESS_MID, + }, + LinearTerm::Column { + coefficient: 1 << 8, + column: cols::BASE_ADDRESS_LOW[1], + }, + LinearTerm::Column { + coefficient: 1, + column: cols::BASE_ADDRESS_LOW[0], + }, + LinearTerm::Constant(1), + ]); + + interactions.push(BusInteraction::sender( + BusId::Memory, + w2_mult.clone(), + vec![ + BusValue::Packed { + start_column: cols::IS_REGISTER, + packing: Packing::Direct, + }, + addr_1_lo.clone(), + base_addr_hi.clone(), + BusValue::Packed { + start_column: cols::OLD_TIMESTAMP_0, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::OLD_TIMESTAMP_1, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::OLD[1], + packing: Packing::Direct, + }, + ], + )); + + interactions.push(BusInteraction::receiver( + BusId::Memory, + w2_mult, + vec![ + BusValue::Packed { + start_column: cols::IS_REGISTER, + packing: Packing::Direct, + }, + addr_1_lo, + base_addr_hi.clone(), + BusValue::Packed { + start_column: cols::TIMESTAMP_0, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::TIMESTAMP_1, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::VALUE[1], + packing: Packing::Direct, + }, + ], + )); + + // CM20/21: bytes 2-3 with w4 + for i in 2..=3 { + let addr_i_lo = BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1 << 16, + column: cols::BASE_ADDRESS_MID, + }, + LinearTerm::Column { + coefficient: 1 << 8, + column: cols::BASE_ADDRESS_LOW[1], + }, + LinearTerm::Column { + coefficient: 1, + column: cols::BASE_ADDRESS_LOW[0], + }, + LinearTerm::Constant(i as i64), + ]); + + interactions.push(BusInteraction::sender( + BusId::Memory, + Multiplicity::Sum(cols::WRITE4, cols::WRITE8), + vec![ + BusValue::Packed { + start_column: cols::IS_REGISTER, + packing: Packing::Direct, + }, + addr_i_lo.clone(), + base_addr_hi.clone(), + BusValue::Packed { + start_column: cols::OLD_TIMESTAMP_0, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::OLD_TIMESTAMP_1, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::OLD[i], + packing: Packing::Direct, + }, + ], + )); + + interactions.push(BusInteraction::receiver( + BusId::Memory, + Multiplicity::Sum(cols::WRITE4, cols::WRITE8), + vec![ + BusValue::Packed { + start_column: cols::IS_REGISTER, + packing: Packing::Direct, + }, + addr_i_lo, + base_addr_hi.clone(), + BusValue::Packed { + start_column: cols::TIMESTAMP_0, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::TIMESTAMP_1, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::VALUE[i], + packing: Packing::Direct, + }, + ], + )); + } + + // CM22/23: bytes 4-7 with write8 + for i in 4..=7 { + let addr_i_lo = BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1 << 16, + column: cols::BASE_ADDRESS_MID, + }, + LinearTerm::Column { + coefficient: 1 << 8, + column: cols::BASE_ADDRESS_LOW[1], + }, + LinearTerm::Column { + coefficient: 1, + column: cols::BASE_ADDRESS_LOW[0], + }, + LinearTerm::Constant(i as i64), + ]); + + interactions.push(BusInteraction::sender( + BusId::Memory, + Multiplicity::Column(cols::WRITE8), + vec![ + BusValue::Packed { + start_column: cols::IS_REGISTER, + packing: Packing::Direct, + }, + addr_i_lo.clone(), + base_addr_hi.clone(), + BusValue::Packed { + start_column: cols::OLD_TIMESTAMP_0, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::OLD_TIMESTAMP_1, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::OLD[i], + packing: Packing::Direct, + }, + ], + )); + + interactions.push(BusInteraction::receiver( + BusId::Memory, + Multiplicity::Column(cols::WRITE8), + vec![ + BusValue::Packed { + start_column: cols::IS_REGISTER, + packing: Packing::Direct, + }, + addr_i_lo, + base_addr_hi.clone(), + BusValue::Packed { + start_column: cols::TIMESTAMP_0, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::TIMESTAMP_1, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::VALUE[i], + packing: Packing::Direct, + }, + ], + )); + } + + // ------------------------------------------------------------------------- + // CO24: Read receiver (from CPU) + // ------------------------------------------------------------------------- + // The MEMW output bus fingerprint uses base_address as [lo32, hi32]. + // Reconstruct: lo32 = 2^16*MID + 2^8*LOW[1] + LOW[0], hi32 = HIGH + interactions.push(BusInteraction::receiver( + BusId::Memw, + Multiplicity::Column(cols::MU_READ), + vec![ + // old[8] + BusValue::Packed { + start_column: cols::OLD[0], + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::OLD[1], + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::OLD[2], + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::OLD[3], + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::OLD[4], + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::OLD[5], + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::OLD[6], + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::OLD[7], + packing: Packing::Direct, + }, + // is_register + BusValue::Packed { + start_column: cols::IS_REGISTER, + packing: Packing::Direct, + }, + // base_address reconstructed as [lo32, hi32] + base_addr_lo.clone(), + base_addr_hi.clone(), + // value[8] + BusValue::Packed { + start_column: cols::VALUE[0], + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::VALUE[1], + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::VALUE[2], + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::VALUE[3], + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::VALUE[4], + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::VALUE[5], + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::VALUE[6], + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::VALUE[7], + packing: Packing::Direct, + }, + // timestamp + BusValue::Packed { + start_column: cols::TIMESTAMP_0, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::TIMESTAMP_1, + packing: Packing::Direct, + }, + // write flags + BusValue::Packed { + start_column: cols::WRITE2, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::WRITE4, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::WRITE8, + packing: Packing::Direct, + }, + ], + )); + + // ------------------------------------------------------------------------- + // CO25: Write receiver (from CPU) + // ------------------------------------------------------------------------- + interactions.push(BusInteraction::receiver( + BusId::Memw, + Multiplicity::Column(cols::MU_WRITE), + vec![ + // is_register + BusValue::Packed { + start_column: cols::IS_REGISTER, + packing: Packing::Direct, + }, + // base_address reconstructed + base_addr_lo, + base_addr_hi, + // value[8] + BusValue::Packed { + start_column: cols::VALUE[0], + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::VALUE[1], + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::VALUE[2], + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::VALUE[3], + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::VALUE[4], + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::VALUE[5], + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::VALUE[6], + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::VALUE[7], + packing: Packing::Direct, + }, + // timestamp + BusValue::Packed { + start_column: cols::TIMESTAMP_0, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::TIMESTAMP_1, + packing: Packing::Direct, + }, + // write flags + BusValue::Packed { + start_column: cols::WRITE2, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::WRITE4, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::WRITE8, + packing: Packing::Direct, + }, + ], + )); + + interactions +} + +// ========================================================================= +// Constraints (2 algebraic) +// ========================================================================= + +/// MEMW_A constraint kinds. +#[derive(Debug, Clone, Copy)] +pub enum MemwAlignedConstraintKind { + /// IS_BIT<μ_sum>: multiplicity sum is 0 or 1 + MuSumIsBit, + /// w2 => μ_sum: if accessing 2+ bytes, must be active row + W2ImpliesMuSum, +} + +pub struct MemwAlignedConstraint { + constraint_idx: usize, + kind: MemwAlignedConstraintKind, +} + +impl MemwAlignedConstraint { + pub fn new(kind: MemwAlignedConstraintKind, constraint_idx: usize) -> Self { + Self { + constraint_idx, + kind, + } + } + + fn compute(&self, step: &TableView) -> FieldElement + where + F: IsSubFieldOf, + E: IsField, + { + let one = FieldElement::::one(); + let mu_read = step.get_main_evaluation_element(0, cols::MU_READ).clone(); + let mu_write = step.get_main_evaluation_element(0, cols::MU_WRITE).clone(); + let mu_sum = &mu_read + &mu_write; + + match self.kind { + MemwAlignedConstraintKind::MuSumIsBit => &mu_sum * (&one - &mu_sum), + MemwAlignedConstraintKind::W2ImpliesMuSum => { + let write2 = step.get_main_evaluation_element(0, cols::WRITE2).clone(); + let write4 = step.get_main_evaluation_element(0, cols::WRITE4).clone(); + let write8 = step.get_main_evaluation_element(0, cols::WRITE8).clone(); + let w2 = write2 + write4 + write8; + &w2 * (&one - &mu_sum) + } + } + } +} + +impl TransitionConstraint for MemwAlignedConstraint { + fn degree(&self) -> usize { + 2 + } + + fn constraint_idx(&self) -> usize { + self.constraint_idx + } + + fn end_exemptions(&self) -> usize { + 0 + } + + fn evaluate( + &self, + evaluation_context: &TransitionEvaluationContext, + transition_evaluations: &mut [FieldElement], + ) { + match evaluation_context { + TransitionEvaluationContext::Prover { + frame, + periodic_values: _, + rap_challenges: _, + .. + } => { + let v = self.compute(frame.get_evaluation_step(0)); + transition_evaluations[self.constraint_idx] = v.to_extension(); + } + TransitionEvaluationContext::Verifier { + frame, + periodic_values: _, + rap_challenges: _, + .. + } => { + let v = self.compute(frame.get_evaluation_step(0)); + transition_evaluations[self.constraint_idx] = v; + } + } + } +} + +/// Creates all constraints for the MEMW_A table (2 total). +pub fn constraints() -> Vec>> { + vec![ + Box::new(MemwAlignedConstraint::new( + MemwAlignedConstraintKind::MuSumIsBit, + 0, + )), + Box::new(MemwAlignedConstraint::new( + MemwAlignedConstraintKind::W2ImpliesMuSum, + 1, + )), + ] +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_memw_aligned_trace_generation() { + let ops = vec![ + MemwOperation::new(true, 4, [42, 0, 0, 0, 0, 0, 0, 0], 100, 2, true) + .with_old([42, 0, 0, 0, 0, 0, 0, 0], [50, 50, 0, 0, 0, 0, 0, 0]), + MemwOperation::new(false, 0x1000, [1, 2, 3, 4, 0, 0, 0, 0], 200, 4, false) + .with_old([0; 8], [100; 8]), + ]; + + let trace = generate_memw_aligned_trace(&ops); + assert_eq!(trace.num_cols(), cols::NUM_COLUMNS); + assert!(trace.num_rows() >= 2); + + // Check address decomposition for op[1]: addr = 0x1000 + // high = 0, mid = 0, low[1] = 0x10, low[0] = 0x00 + assert_eq!(*trace.get_main(1, cols::BASE_ADDRESS_HIGH), FE::from(0u64)); + assert_eq!(*trace.get_main(1, cols::BASE_ADDRESS_MID), FE::from(0u64)); + assert_eq!( + *trace.get_main(1, cols::BASE_ADDRESS_LOW[1]), + FE::from(0x10u64) + ); + assert_eq!( + *trace.get_main(1, cols::BASE_ADDRESS_LOW[0]), + FE::from(0x00u64) + ); + } +} diff --git a/prover/src/tables/mod.rs b/prover/src/tables/mod.rs index 90c0910f3..f5372cf06 100644 --- a/prover/src/tables/mod.rs +++ b/prover/src/tables/mod.rs @@ -13,7 +13,8 @@ //! //! ## Memory Tables //! -//! - **MEMW**: Memory word read/write table +//! - **MEMW**: Memory word read/write table (unaligned/split-timestamp path, 49 cols, 26 interactions) +//! - **MEMW_A**: Memory word read/write table (aligned fast path, 30 cols, 22 interactions) //! - **LOAD**: Memory load with extension table //! - **PAGE**: Paged memory init/final table (one per used page) //! - **REGISTER**: Register init/final table (32 registers × 8 bytes = 256 rows) @@ -30,6 +31,7 @@ pub mod halt; pub mod load; pub mod lt; pub mod memw; +pub mod memw_aligned; pub mod mul; pub mod page; pub mod register; @@ -44,19 +46,21 @@ pub use types::BusId; /// MEMW (effective width 127) at 2^19 is the baseline; other tables are scaled /// proportionally: max_rows = (127 × 2^19) / effective_width, rounded to 2^N. /// -/// | Table | Main | Bus | Eff.width | Max rows | -/// |--------|------|-----|-----------|----------| -/// | MEMW | 49 | 26 | 127 | 2^19 | -/// | CPU | 74 | 40 | 194 | 2^19 | -/// | DVRM | 34 | 34 | 136 | 2^19 | -/// | MUL | 26 | 16 | 74 | 2^20 | -/// | LT | 15 | 9 | 42 | 2^21 | -/// | SHIFT | 27 | 15 | 72 | 2^20 | -/// | LOAD | 18 | 5 | 33 | 2^21 | -/// | BRANCH | 14 | 6 | 32 | 2^21 | +/// | Table | Main | Bus | Eff.width | Max rows | +/// |---------|------|-----|-----------|----------| +/// | MEMW | 49 | 26 | 127 | 2^19 | +/// | MEMW_A | 30 | 22 | 96 | 2^20 | +/// | CPU | 74 | 40 | 194 | 2^19 | +/// | DVRM | 34 | 34 | 136 | 2^19 | +/// | MUL | 26 | 16 | 74 | 2^20 | +/// | LT | 15 | 9 | 42 | 2^21 | +/// | SHIFT | 27 | 15 | 72 | 2^20 | +/// | LOAD | 18 | 5 | 33 | 2^21 | +/// | BRANCH | 14 | 6 | 32 | 2^21 | pub mod max_rows { pub const CPU: usize = 1 << 19; // 524,288 — eff. width 194 pub const MEMW: usize = 1 << 19; // 524,288 — eff. width 127 (baseline) + pub const MEMW_A: usize = 1 << 20; // 1,048,576 — eff. width 96 pub const DVRM: usize = 1 << 19; // 524,288 — eff. width 136 pub const MUL: usize = 1 << 20; // 1,048,576 — eff. width 74 pub const LT: usize = 1 << 21; // 2,097,152 — eff. width 42 @@ -73,6 +77,7 @@ pub mod max_rows { pub struct MaxRowsConfig { pub cpu: usize, pub memw: usize, + pub memw_aligned: usize, pub dvrm: usize, pub mul: usize, pub lt: usize, @@ -86,6 +91,7 @@ impl Default for MaxRowsConfig { Self { cpu: max_rows::CPU, memw: max_rows::MEMW, + memw_aligned: max_rows::MEMW_A, dvrm: max_rows::DVRM, mul: max_rows::MUL, lt: max_rows::LT, @@ -103,6 +109,7 @@ impl MaxRowsConfig { Self { cpu: 1 << 5, memw: 1 << 5, + memw_aligned: 1 << 5, dvrm: 1 << 5, mul: 1 << 5, lt: 1 << 5, diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 3b22ff30b..5884aaf7c 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -10,9 +10,9 @@ //! ```text //! PHASE 0: ELF → DECODE, MEMORY_INIT (preprocessed tables) //! PHASE 1: Logs → CPU ops -//! PHASE 2: CPU ops → MEMW, LOAD, LT, Bitwise (with state tracking for MEMW/LOAD) -//! PHASE 3: MEMW → LT ops (timestamp ordering) -//! PHASE 4: LT → Bitwise lookups +//! PHASE 2: CPU ops → MEMW, MEMW_A, LOAD, LT, Bitwise (with state tracking for MEMW/LOAD) +//! PHASE 3: MEMW/MEMW_A → LT ops (timestamp ordering) +//! PHASE 4: LT, MEMW_A → Bitwise lookups //! PHASE 5: Generate all traces //! ``` //! @@ -43,6 +43,7 @@ use super::halt; use super::load::{self, LoadOperation}; use super::lt::{self, LtOperation}; use super::memw::{self, MemwOperation}; +use super::memw_aligned; use super::mul::{self, MulOperation}; use super::page::{self, FinalByteState, FinalStateMap, PageConfig}; use super::register::{self, FinalRegisterStateMap, FinalRegisterWordState}; @@ -827,6 +828,84 @@ fn collect_lt_from_memw(memw_ops: &[MemwOperation]) -> Vec { lt_ops } +/// Collects LT operations from MEMW_A for timestamp ordering. +/// +/// Each aligned operation has a single old_timestamp < timestamp check. +fn collect_lt_from_memw_aligned(memw_aligned_ops: &[MemwOperation]) -> Vec { + memw_aligned_ops + .iter() + .map(|op| LtOperation::new(op.old_timestamp[0], op.timestamp, false)) + .collect() +} + +/// Checks whether a MEMW operation qualifies for the aligned fast path (MEMW_A). +/// +/// An operation is aligned if: +/// 1. For width > 1: base_address is aligned to width (low bits are zero) +/// 2. All accessed bytes share the same old_timestamp +fn is_aligned_op(op: &MemwOperation) -> bool { + let low = (op.base_address & 0xFFFF_FFFF) as u32; + let width = op.width as u32; + + // Check alignment (trivially true for width=1) + if width > 1 && (low & (width - 1)) != 0 { + return false; + } + + // Check uniform old_timestamp + for i in 1..op.width as usize { + if op.old_timestamp[i] != op.old_timestamp[0] { + return false; + } + } + + true +} + +/// Collects bitwise lookups from MEMW_A operations. +/// +/// Per operation: +/// - 1 IS_HALFWORD for base_address_mid +/// - 1 IS_BYTE for base_address_low[1] +/// - 1 AND_BYTE for alignment check (low[0] & mask == 0) +fn collect_bitwise_from_memw_aligned(ops: &[MemwOperation]) -> Vec { + let mut bitwise_ops = Vec::with_capacity(ops.len() * 3); + + for op in ops { + let low_0 = (op.base_address & 0xFF) as u8; + let low_1 = ((op.base_address >> 8) & 0xFF) as u8; + let mid = ((op.base_address >> 16) & 0xFFFF) as u16; + let mask: u8 = match op.width { + 2 => 1, + 4 => 3, + 8 => 7, + _ => 0, + }; + + // IS_HALFWORD[mid] + bitwise_ops.push(BitwiseOperation::halfword( + BitwiseOperationType::IsHalf, + (mid & 0xFF) as u8, + (mid >> 8) as u8, + )); + + // IS_BYTE[low_1] + bitwise_ops.push(BitwiseOperation::single_byte( + BitwiseOperationType::IsByte, + low_1, + )); + + // AND_BYTE[low_0, mask] → expects result 0 + bitwise_ops.push(BitwiseOperation::byte_op( + BitwiseOperationType::AndByte, + low_0, + mask, + )); + } + + bitwise_ops +} + // ============================================================================= // Phase 4: All → Bitwise lookups // ============================================================================= @@ -1459,6 +1538,9 @@ pub struct Traces { /// MEMW memory/register read/write traces (split into chunks of max_rows::MEMW) pub memws: Vec>, + /// MEMW_A aligned memory/register read/write traces (split into chunks of max_rows::MEMW_A) + pub memw_aligneds: Vec>, + /// LOAD memory load with extension traces (split into chunks of max_rows::LOAD) pub loads: Vec>, @@ -1513,6 +1595,7 @@ impl Traces { cpu: self.cpus.len(), lt: self.lts.len(), memw: self.memws.len(), + memw_aligned: self.memw_aligneds.len(), load: self.loads.len(), mul: self.muls.len(), dvrm: self.dvrms.len(), @@ -1674,6 +1757,10 @@ impl Traces { let halt_memw_ops = collect_halt_ops(&mut register_state); memw_ops.extend(halt_memw_ops); + // Route MEMW operations: aligned ops → MEMW_A, rest → MEMW + let (memw_aligned_ops, memw_ops): (Vec<_>, Vec<_>) = + memw_ops.into_iter().partition(is_aligned_op); + // Collect BRANCH operations from CPU ops where branch_cond = true let branch_ops: Vec = cpu_ops .iter() @@ -1740,6 +1827,7 @@ impl Traces { // PHASE 3: MEMW → LT (timestamp ordering and overflow checks) // ===================================================================== lt_ops.extend(collect_lt_from_memw(&memw_ops)); + lt_ops.extend(collect_lt_from_memw_aligned(&memw_aligned_ops)); // ===================================================================== // PHASE 4: All → Bitwise lookups @@ -1749,6 +1837,7 @@ impl Traces { bitwise_ops.extend(collect_bitwise_from_dvrm(&dvrm_ops)); bitwise_ops.extend(collect_bitwise_from_branch(&branch_ops)); bitwise_ops.extend(shift::collect_bitwise_from_shift(&shift_ops)); + bitwise_ops.extend(collect_bitwise_from_memw_aligned(&memw_aligned_ops)); // PAGE tables do IS_BYTE lookups for init and fini values (C1, C2) bitwise_ops.extend(collect_bitwise_from_page(elf, &memory_state)); @@ -1782,6 +1871,11 @@ impl Traces { let cpus = chunk_and_generate(&cpu_ops, max_rows.cpu, cpu::generate_cpu_trace); let memws = chunk_and_generate(&memw_ops, max_rows.memw, memw::generate_memw_trace); + let memw_aligneds = chunk_and_generate( + &memw_aligned_ops, + max_rows.memw_aligned, + memw_aligned::generate_memw_aligned_trace, + ); let loads = chunk_and_generate(&load_ops, max_rows.load, load::generate_load_trace); let lts = chunk_and_generate(<_ops, max_rows.lt, lt::generate_lt_trace); let shifts = chunk_and_generate(&shift_ops, max_rows.shift, shift::generate_shift_trace); @@ -1852,6 +1946,7 @@ impl Traces { lts, shifts, memws, + memw_aligneds, loads, decode, muls, @@ -1898,6 +1993,10 @@ impl Traces { let halt_memw_ops = collect_halt_ops(&mut register_state); memw_ops.extend(halt_memw_ops); + // Route MEMW operations: aligned ops → MEMW_A, rest → MEMW + let (memw_aligned_ops, memw_ops): (Vec<_>, Vec<_>) = + memw_ops.into_iter().partition(is_aligned_op); + // Collect MUL operations from CPU ops where op_mul = true let mut mul_ops: Vec<(MulOperation, bool)> = cpu_ops .iter() @@ -1964,6 +2063,7 @@ impl Traces { // PHASE 3: MEMW → LT (timestamp ordering and overflow checks) // ===================================================================== lt_ops.extend(collect_lt_from_memw(&memw_ops)); + lt_ops.extend(collect_lt_from_memw_aligned(&memw_aligned_ops)); // ===================================================================== // PHASE 4: All → Bitwise lookups @@ -1973,6 +2073,7 @@ impl Traces { bitwise_ops.extend(collect_bitwise_from_dvrm(&dvrm_ops)); bitwise_ops.extend(collect_bitwise_from_branch(&branch_ops)); bitwise_ops.extend(shift::collect_bitwise_from_shift(&shift_ops)); + bitwise_ops.extend(collect_bitwise_from_memw_aligned(&memw_aligned_ops)); let public_output_bytes: Vec = commit_ops .iter() @@ -2003,6 +2104,11 @@ impl Traces { let cpus = chunk_and_generate(&cpu_ops, max_rows.cpu, cpu::generate_cpu_trace); let memws = chunk_and_generate(&memw_ops, max_rows.memw, memw::generate_memw_trace); + let memw_aligneds = chunk_and_generate( + &memw_aligned_ops, + max_rows.memw_aligned, + memw_aligned::generate_memw_aligned_trace, + ); let loads = chunk_and_generate(&load_ops, max_rows.load, load::generate_load_trace); let lts = chunk_and_generate(<_ops, max_rows.lt, lt::generate_lt_trace); let shifts = chunk_and_generate(&shift_ops, max_rows.shift, shift::generate_shift_trace); @@ -2059,6 +2165,7 @@ impl Traces { lts, shifts, memws, + memw_aligneds, loads, decode, muls, diff --git a/prover/src/test_utils.rs b/prover/src/test_utils.rs index 09ba51345..93d6d2971 100644 --- a/prover/src/test_utils.rs +++ b/prover/src/test_utils.rs @@ -50,6 +50,10 @@ use crate::tables::lt::{LtOperation, bus_interactions as lt_bus_interactions, co use crate::tables::memw::{ bus_interactions as memw_bus_interactions, cols as memw_cols, constraints as memw_constraints, }; +use crate::tables::memw_aligned::{ + bus_interactions as memw_aligned_bus_interactions, cols as memw_aligned_cols, + constraints as memw_aligned_constraints, +}; use crate::tables::mul::{bus_interactions as mul_bus_interactions, cols as mul_cols}; use crate::tables::page::{bus_interactions as page_bus_interactions, cols as page_cols}; use crate::tables::register::{ @@ -554,6 +558,24 @@ pub fn create_memw_air(proof_options: &ProofOptions) -> VmAir { .with_name("MEMW") } +/// Create MEMW_A (aligned) AIR with constraints and bus interactions. +pub fn create_memw_aligned_air(proof_options: &ProofOptions) -> VmAir { + let transition_constraints = memw_aligned_constraints(); + + let auxiliary_trace_build_data = AuxiliaryTraceBuildData { + interactions: memw_aligned_bus_interactions(), + }; + + AirWithBuses::new( + memw_aligned_cols::NUM_COLUMNS, + auxiliary_trace_build_data, + proof_options, + 1, + transition_constraints, + ) + .with_name("MEMW_A") +} + /// Create LOAD AIR with constraints and bus interactions. pub fn create_load_air(proof_options: &ProofOptions) -> VmAir { let transition_constraints = load_constraints(); diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index 0730a21f8..6ff7c7c0a 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -1645,6 +1645,7 @@ fn test_verify_rejects_zero_table_counts() { cpu: 0, lt: 0, memw: 0, + memw_aligned: 0, load: 0, mul: 0, dvrm: 0, @@ -1711,6 +1712,7 @@ fn test_crafted_zero_count_proof_must_not_verify() { cpu: 0, lt: 0, memw: 0, + memw_aligned: 0, load: 0, mul: 0, dvrm: 0, diff --git a/prover/src/tests/trace_builder_tests.rs b/prover/src/tests/trace_builder_tests.rs index e4c6c5992..ba2de9c3b 100644 --- a/prover/src/tests/trace_builder_tests.rs +++ b/prover/src/tests/trace_builder_tests.rs @@ -3,7 +3,7 @@ use crate::tables::bitwise; use crate::tables::cpu::cols; use crate::tables::lt; -use crate::tables::memw; +use crate::tables::memw_aligned; use crate::tables::trace_builder::Traces; use crate::tables::types::FE; use executor::vm::instruction::decoding::{ArithOp, Comparison, Instruction}; @@ -393,29 +393,44 @@ fn test_memw_generated_from_register_ops() { let traces = Traces::from_logs(&logs, instructions, &Default::default()).unwrap(); - // MEMW table should have register operations + // MEMW_A table should have register operations (register ops are always aligned) // First instruction generates: M1 (read x2), M3 (read x3), M5 (write x1) assert!( - traces.memws[0].main_table.height >= 3, - "MEMW should have at least 3 rows for register ops" + !traces.memw_aligneds.is_empty(), + "MEMW_A should have at least one chunk for register ops" + ); + assert!( + traces.memw_aligneds[0].main_table.height >= 3, + "MEMW_A should have at least 3 rows for register ops" ); - // Find the register write to x1 (address = 2 * 1 = 2, is_register = 1) + // Find the register write to x1 in MEMW_A + // Register address for x1 = 2*1 = 2, decomposed: high=0, mid=0, low=[2,0] let mut found_write = false; - for row_idx in 0..traces.memws[0].main_table.height { - let row = traces.memws[0].main_table.get_row(row_idx); - // Check for register write: is_register=1, address=2 (x1), mu_write=1 - if row[memw::cols::IS_REGISTER] == FE::one() - && row[memw::cols::BASE_ADDRESS_0] == FE::from(2u64) - && row[memw::cols::MU_WRITE] == FE::one() - { - // Check value is 300 (lo32=300, hi32=0) - assert_eq!(row[memw::cols::VALUE[0]], FE::from(300u64)); - found_write = true; + for chunk in &traces.memw_aligneds { + for row_idx in 0..chunk.main_table.height { + let row = chunk.main_table.get_row(row_idx); + // Check for register write: is_register=1, base_address_low[0]=2, mu_write=1 + if row[memw_aligned::cols::IS_REGISTER] == FE::one() + && row[memw_aligned::cols::BASE_ADDRESS_LOW[0]] == FE::from(2u64) + && row[memw_aligned::cols::BASE_ADDRESS_MID] == FE::zero() + && row[memw_aligned::cols::BASE_ADDRESS_HIGH] == FE::zero() + && row[memw_aligned::cols::MU_WRITE] == FE::one() + { + // Check value is 300 (lo32 word for register DWordWL packing) + assert_eq!(row[memw_aligned::cols::VALUE[0]], FE::from(300u64)); + found_write = true; + break; + } + } + if found_write { break; } } - assert!(found_write, "Register write to x1 not found in MEMW table"); + assert!( + found_write, + "Register write to x1 not found in MEMW_A table" + ); } // ============================================================================= From 469f7b9ff4389d06299adc3ad88b9a8132b17ec4 Mon Sep 17 00:00:00 2001 From: Nicole Date: Wed, 18 Mar 2026 10:11:59 -0300 Subject: [PATCH 3/9] fix deviation from spec: is_half and is_byte assumptions --- prover/src/tables/memw_aligned.rs | 34 ++++++------------------------ prover/src/tables/mod.rs | 6 +++--- prover/src/tables/trace_builder.rs | 22 ++++--------------- 3 files changed, 13 insertions(+), 49 deletions(-) diff --git a/prover/src/tables/memw_aligned.rs b/prover/src/tables/memw_aligned.rs index 40ed2a4f4..6525cc2c0 100644 --- a/prover/src/tables/memw_aligned.rs +++ b/prover/src/tables/memw_aligned.rs @@ -17,13 +17,15 @@ //! - `old_timestamp`: DWordWL (2 cols — single, not 8!) //! - `mu_read`, `mu_write`: multiplicity columns //! -//! ## Bus Interactions (22) -//! - 1 IS_HALFWORD[base_address_mid] -//! - 1 IS_BYTE[base_address_low[1]] +//! ## Bus Interactions (20) //! - 1 AND_BYTE[base_address_low[0], mask] → 0 (alignment check) //! - 1 LT[old_timestamp, timestamp, 0] → 1 //! - 16 Memory bus tokens //! - 2 MEMW output interactions (read + write) +//! +//! ## Assumptions (caller's responsibility, not enforced here) +//! - MEMW_A-A2: IS_HALF[base_address_mid] +//! - MEMW_A-A3.i: IS_BYTE[base_address_low[i]] for i ∈ [0, 1] use math::field::element::FieldElement; use math::field::traits::{IsField, IsSubFieldOf}; @@ -138,34 +140,10 @@ pub fn generate_memw_aligned_trace( // ========================================================================= pub fn bus_interactions() -> Vec { - let mut interactions = Vec::with_capacity(22); + let mut interactions = Vec::with_capacity(20); let mu_sum = Multiplicity::Sum(cols::MU_READ, cols::MU_WRITE); - // ------------------------------------------------------------------------- - // IS_HALFWORD[base_address_mid] with μ_sum - // ------------------------------------------------------------------------- - interactions.push(BusInteraction::sender( - BusId::IsHalfword, - mu_sum.clone(), - vec![BusValue::Packed { - start_column: cols::BASE_ADDRESS_MID, - packing: Packing::Direct, - }], - )); - - // ------------------------------------------------------------------------- - // IS_BYTE[base_address_low[1]] with μ_sum - // ------------------------------------------------------------------------- - interactions.push(BusInteraction::sender( - BusId::IsByte, - mu_sum.clone(), - vec![BusValue::Packed { - start_column: cols::BASE_ADDRESS_LOW[1], - packing: Packing::Direct, - }], - )); - // ------------------------------------------------------------------------- // AND_BYTE[base_address_low[0], mask] → 0 with μ_sum // mask = write2*1 + write4*3 + write8*7 diff --git a/prover/src/tables/mod.rs b/prover/src/tables/mod.rs index f5372cf06..32fd4970b 100644 --- a/prover/src/tables/mod.rs +++ b/prover/src/tables/mod.rs @@ -14,7 +14,7 @@ //! ## Memory Tables //! //! - **MEMW**: Memory word read/write table (unaligned/split-timestamp path, 49 cols, 26 interactions) -//! - **MEMW_A**: Memory word read/write table (aligned fast path, 30 cols, 22 interactions) +//! - **MEMW_A**: Memory word read/write table (aligned fast path, 30 cols, 20 interactions) //! - **LOAD**: Memory load with extension table //! - **PAGE**: Paged memory init/final table (one per used page) //! - **REGISTER**: Register init/final table (32 registers × 8 bytes = 256 rows) @@ -49,7 +49,7 @@ pub use types::BusId; /// | Table | Main | Bus | Eff.width | Max rows | /// |---------|------|-----|-----------|----------| /// | MEMW | 49 | 26 | 127 | 2^19 | -/// | MEMW_A | 30 | 22 | 96 | 2^20 | +/// | MEMW_A | 30 | 20 | 90 | 2^20 | /// | CPU | 74 | 40 | 194 | 2^19 | /// | DVRM | 34 | 34 | 136 | 2^19 | /// | MUL | 26 | 16 | 74 | 2^20 | @@ -60,7 +60,7 @@ pub use types::BusId; pub mod max_rows { pub const CPU: usize = 1 << 19; // 524,288 — eff. width 194 pub const MEMW: usize = 1 << 19; // 524,288 — eff. width 127 (baseline) - pub const MEMW_A: usize = 1 << 20; // 1,048,576 — eff. width 96 + pub const MEMW_A: usize = 1 << 20; // 1,048,576 — eff. width 90 pub const DVRM: usize = 1 << 19; // 524,288 — eff. width 136 pub const MUL: usize = 1 << 20; // 1,048,576 — eff. width 74 pub const LT: usize = 1 << 21; // 2,097,152 — eff. width 42 diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 5884aaf7c..5eb7740cd 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -865,16 +865,15 @@ fn is_aligned_op(op: &MemwOperation) -> bool { /// Collects bitwise lookups from MEMW_A operations. /// /// Per operation: -/// - 1 IS_HALFWORD for base_address_mid -/// - 1 IS_BYTE for base_address_low[1] /// - 1 AND_BYTE for alignment check (low[0] & mask == 0) +/// +/// IS_HALFWORD[base_address_mid] and IS_BYTE[base_address_low[1]] are +/// assumptions (MEMW_A-A2, MEMW_A-A3.i) — the caller's (CPU's) responsibility. fn collect_bitwise_from_memw_aligned(ops: &[MemwOperation]) -> Vec { - let mut bitwise_ops = Vec::with_capacity(ops.len() * 3); + let mut bitwise_ops = Vec::with_capacity(ops.len()); for op in ops { let low_0 = (op.base_address & 0xFF) as u8; - let low_1 = ((op.base_address >> 8) & 0xFF) as u8; - let mid = ((op.base_address >> 16) & 0xFFFF) as u16; let mask: u8 = match op.width { 2 => 1, 4 => 3, @@ -882,19 +881,6 @@ fn collect_bitwise_from_memw_aligned(ops: &[MemwOperation]) -> Vec 0, }; - // IS_HALFWORD[mid] - bitwise_ops.push(BitwiseOperation::halfword( - BitwiseOperationType::IsHalf, - (mid & 0xFF) as u8, - (mid >> 8) as u8, - )); - - // IS_BYTE[low_1] - bitwise_ops.push(BitwiseOperation::single_byte( - BitwiseOperationType::IsByte, - low_1, - )); - // AND_BYTE[low_0, mask] → expects result 0 bitwise_ops.push(BitwiseOperation::byte_op( BitwiseOperationType::AndByte, From bc185a74937100d8e70508fa7a7d95b52f766e28 Mon Sep 17 00:00:00 2001 From: Nicole Date: Wed, 18 Mar 2026 10:24:10 -0300 Subject: [PATCH 4/9] add comment --- prover/src/tables/trace_builder.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 5eb7740cd..6ed876682 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -832,6 +832,9 @@ fn collect_lt_from_memw(memw_ops: &[MemwOperation]) -> Vec { /// /// Each aligned operation has a single old_timestamp < timestamp check. fn collect_lt_from_memw_aligned(memw_aligned_ops: &[MemwOperation]) -> Vec { + // Address overflow LT checks (R1-R3 in MEMW) are intentionally absent. + // Alignment guarantees addr + (width-1) never wraps: the largest width-N + // aligned address is 2^64-N, and 2^64-N+(N-1) = 2^64-1, so no u64 overflow. memw_aligned_ops .iter() .map(|op| LtOperation::new(op.old_timestamp[0], op.timestamp, false)) From c7c7733b45c8e0543ef9749d7f055a9f547bc514 Mon Sep 17 00:00:00 2001 From: Nicole Date: Wed, 18 Mar 2026 10:45:46 -0300 Subject: [PATCH 5/9] Add a prove_elf test --- prover/src/tests/prove_elfs_tests.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index 6ff7c7c0a..e95661437 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -618,6 +618,23 @@ fn test_prove_elfs_test_sb_sh_8() { ); } +/// Exercises the MEMW_A (aligned fast path) table. +/// lw_sw stores a word (4 bytes) at aligned address 20 and loads it back, +/// routing both operations through MEMW_A instead of the unaligned MEMW table. +#[test] +fn test_prove_elfs_lw_sw() { + let (elf, logs, _instructions) = run_asm_elf("lw_sw"); + let mut traces = Traces::from_elf_and_logs(&elf, &logs, &Default::default()).unwrap(); + assert!( + !traces.memw_aligneds.is_empty(), + "lw_sw should produce MEMW_A rows for aligned word accesses" + ); + assert!( + prove_and_verify_vm_minimal(&elf, &mut traces), + "lw_sw failed" + ); +} + #[test] fn test_prove_elfs_all_branches_16() { // Initialize logger to see debug constraint validation output From 2d956a1585695ab08fc21f1f8152ca7b9bdc16f5 Mon Sep 17 00:00:00 2001 From: Nicole Date: Wed, 18 Mar 2026 12:02:31 -0300 Subject: [PATCH 6/9] add prove_elf test using both MEMW and MEMW_A chips --- executor/programs/asm/test_memw_split_ts.s | 24 ++++++++++++++++++++++ prover/src/tests/prove_elfs_tests.rs | 24 ++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 executor/programs/asm/test_memw_split_ts.s diff --git a/executor/programs/asm/test_memw_split_ts.s b/executor/programs/asm/test_memw_split_ts.s new file mode 100644 index 000000000..b6e4c8be3 --- /dev/null +++ b/executor/programs/asm/test_memw_split_ts.s @@ -0,0 +1,24 @@ + .attribute 5, "rv64i2p1" + .globl main +main: + # Exercise both MEMW (split old_timestamp) and MEMW_A (aligned fast path). + # + # MEMW path: sb+sb then lh — the two bytes at sp+0 and sp+1 are written + # by separate instructions (different timestamps), so the lh read has + # mismatched old_timestamps and routes to MEMW. + # + # MEMW_A path: sw then lw — all 4 bytes are written by one instruction + # (same timestamp), so the lw read routes to MEMW_A. + addi sp, sp, -16 # allocate stack + addi t0, zero, 0x41 # t0 = 'A' + addi t1, zero, 0x42 # t1 = 'B' + sb t0, 0(sp) # write byte 0 at sp+0 (timestamp T3) + sb t1, 1(sp) # write byte 1 at sp+1 (timestamp T4 ≠ T3) + lh a0, 0(sp) # read 2 bytes: old_ts[0]=T3, old_ts[1]=T4 → MEMW + addi t2, zero, 0x7FF # t2 = word value + sw t2, 4(sp) # write 4 bytes at sp+4, all timestamp T7 → MEMW_A + lw a1, 4(sp) # read 4 bytes: all old_ts=T7 → MEMW_A + addi sp, sp, 16 # deallocate stack + li a0, 0 + li a7, 93 + ecall diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index e95661437..dcc54d935 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -635,6 +635,30 @@ fn test_prove_elfs_lw_sw() { ); } +/// Exercises both MEMW and MEMW_A in the same program. +/// +/// Two separate `sb` instructions write to adjacent bytes (sp+0 and sp+1) at +/// different timestamps. The subsequent `lh` read spans both bytes and sees +/// mismatched old_timestamps, routing to MEMW. Register ops and the aligned +/// `sw`/`lw` pair route to MEMW_A. +#[test] +fn test_prove_elfs_test_memw_split_ts() { + let (elf, logs, _instructions) = run_asm_elf("test_memw_split_ts"); + let mut traces = Traces::from_elf_and_logs(&elf, &logs, &Default::default()).unwrap(); + assert!( + !traces.memws.is_empty(), + "test_memw_split_ts should produce MEMW rows (split old_timestamps from sb+sb+lh)" + ); + assert!( + !traces.memw_aligneds.is_empty(), + "test_memw_split_ts should produce MEMW_A rows (register ops and aligned sw/lw)" + ); + assert!( + prove_and_verify_vm_minimal(&elf, &mut traces), + "test_memw_split_ts failed" + ); +} + #[test] fn test_prove_elfs_all_branches_16() { // Initialize logger to see debug constraint validation output From 98f6f1583ee8eeb615a560925db3d009eb784c05 Mon Sep 17 00:00:00 2001 From: Nicole Date: Thu, 19 Mar 2026 09:54:13 -0300 Subject: [PATCH 7/9] fix comment: bus numbering --- prover/src/tables/memw_aligned.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prover/src/tables/memw_aligned.rs b/prover/src/tables/memw_aligned.rs index 6525cc2c0..ffd2f9dfe 100644 --- a/prover/src/tables/memw_aligned.rs +++ b/prover/src/tables/memw_aligned.rs @@ -136,7 +136,7 @@ pub fn generate_memw_aligned_trace( } // ========================================================================= -// Bus interactions (22 total) +// Bus interactions (20 total) // ========================================================================= pub fn bus_interactions() -> Vec { From c6cac656576a509956ae49e78afe16c117bff949 Mon Sep 17 00:00:00 2001 From: Nicole Date: Wed, 25 Mar 2026 10:53:49 -0300 Subject: [PATCH 8/9] Use Multiplicity::Sum3 for w2 in MEMW_A to avoid heap allocation on hot path --- prover/src/tables/memw_aligned.rs | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/prover/src/tables/memw_aligned.rs b/prover/src/tables/memw_aligned.rs index ffd2f9dfe..916491dde 100644 --- a/prover/src/tables/memw_aligned.rs +++ b/prover/src/tables/memw_aligned.rs @@ -280,21 +280,8 @@ pub fn bus_interactions() -> Vec { ], )); - // w2 multiplicity - let w2_mult = Multiplicity::Linear(vec![ - LinearTerm::Column { - coefficient: 1, - column: cols::WRITE2, - }, - LinearTerm::Column { - coefficient: 1, - column: cols::WRITE4, - }, - LinearTerm::Column { - coefficient: 1, - column: cols::WRITE8, - }, - ]); + // w2 multiplicity: write2 + write4 + write8 + let w2_mult = Multiplicity::Sum3(cols::WRITE2, cols::WRITE4, cols::WRITE8); // CM18/19: byte 1 with w2 // For aligned accesses, adding 1 to the low byte never overflows to hi word From e209c96d690ad45226b0b9cb4c5c353c3b36542e Mon Sep 17 00:00:00 2001 From: Nicole Date: Wed, 25 Mar 2026 14:43:16 -0300 Subject: [PATCH 9/9] change max_rows per chunk to 2^19 insted of 2^20 --- prover/src/tables/mod.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/prover/src/tables/mod.rs b/prover/src/tables/mod.rs index 32fd4970b..268d75dee 100644 --- a/prover/src/tables/mod.rs +++ b/prover/src/tables/mod.rs @@ -45,11 +45,13 @@ pub use types::BusId; /// Effective width = main_cols + 3 × bus_interactions (extension field = 3× cost). /// MEMW (effective width 127) at 2^19 is the baseline; other tables are scaled /// proportionally: max_rows = (127 × 2^19) / effective_width, rounded to 2^N. +/// (* MEMW_A formula gives 2^20, but set to 2^19 to match MEMW chunk geometry; +/// benchmarks show better parallel throughput with smaller chunks.) /// /// | Table | Main | Bus | Eff.width | Max rows | /// |---------|------|-----|-----------|----------| /// | MEMW | 49 | 26 | 127 | 2^19 | -/// | MEMW_A | 30 | 20 | 90 | 2^20 | +/// | MEMW_A | 30 | 20 | 90 | 2^19 * | /// | CPU | 74 | 40 | 194 | 2^19 | /// | DVRM | 34 | 34 | 136 | 2^19 | /// | MUL | 26 | 16 | 74 | 2^20 | @@ -60,7 +62,7 @@ pub use types::BusId; pub mod max_rows { pub const CPU: usize = 1 << 19; // 524,288 — eff. width 194 pub const MEMW: usize = 1 << 19; // 524,288 — eff. width 127 (baseline) - pub const MEMW_A: usize = 1 << 20; // 1,048,576 — eff. width 90 + pub const MEMW_A: usize = 1 << 19; // 524,288 — eff. width 90 pub const DVRM: usize = 1 << 19; // 524,288 — eff. width 136 pub const MUL: usize = 1 << 20; // 1,048,576 — eff. width 74 pub const LT: usize = 1 << 21; // 2,097,152 — eff. width 42