From 40e0c0cddfa68177e0accfa02f485d600b164d6f Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Mon, 13 Apr 2026 12:17:41 -0300 Subject: [PATCH 1/8] phase 1 --- prover/src/tables/cpu.rs | 52 +++++++++++++++++++++++++++- prover/src/tables/trace_builder.rs | 55 ++++++++++++++++++++++++++++++ prover/src/tests/cpu_tests.rs | 3 +- 3 files changed, 108 insertions(+), 2 deletions(-) diff --git a/prover/src/tables/cpu.rs b/prover/src/tables/cpu.rs index e29ae1d57..3c45af3ad 100644 --- a/prover/src/tables/cpu.rs +++ b/prover/src/tables/cpu.rs @@ -232,8 +232,27 @@ pub mod cols { /// branch_cond: Whether branch is taken pub const BRANCH_COND: usize = 73; + // ------------------------------------------------------------------------- + // Registers on-Main: prev_ts columns (Phase 1) + // ------------------------------------------------------------------------- + // These columns track the timestamp of the previous access to each register. + // Used to eliminate the MEMW_R chip by handling register state inline in CPU. + + /// rs1_prev_ts: Previous mem_step when rs1 was last accessed + pub const RS1_PREV_TS: usize = 74; + /// rs2_prev_ts: Previous mem_step when rs2 was last accessed + pub const RS2_PREV_TS: usize = 75; + /// rd_prev_ts: Previous mem_step when rd was last written + pub const RD_PREV_TS: usize = 76; + /// rd_prev_val_lo: Previous value of rd (low 32 bits) — for proving old state + pub const RD_PREV_VAL_LO: usize = 77; + /// rd_prev_val_hi: Previous value of rd (high 32 bits) + pub const RD_PREV_VAL_HI: usize = 78; + /// pc_prev_ts: Previous mem_step when x255 (PC) was last updated + pub const PC_PREV_TS: usize = 79; + /// Total number of columns - pub const NUM_COLUMNS: usize = 74; + pub const NUM_COLUMNS: usize = 80; // ------------------------------------------------------------------------- // Helper ranges for iteration @@ -298,6 +317,22 @@ pub struct CpuOperation { /// For Commit ECALLs: byte count from x12 pub commit_count: u64, + + // ------------------------------------------------------------------------- + // Registers on-Main: prev_ts tracking (Phase 1) + // ------------------------------------------------------------------------- + /// Previous mem_step when rs1 was last accessed (0 if never) + pub rs1_prev_ts: u64, + /// Previous mem_step when rs2 was last accessed (0 if never) + pub rs2_prev_ts: u64, + /// Previous mem_step when rd was last written (0 if never) + pub rd_prev_ts: u64, + /// Previous value of rd (low 32 bits) before this write + pub rd_prev_val_lo: u32, + /// Previous value of rd (high 32 bits) before this write + pub rd_prev_val_hi: u32, + /// Previous mem_step when x255 (PC) was last updated + pub pc_prev_ts: u64, } impl CpuOperation { @@ -660,6 +695,13 @@ impl CpuOperation { ecall_commit, commit_buf_addr, commit_count, + // Registers on-Main (Phase 1): populated later by populate_register_prev_ts + rs1_prev_ts: 0, + rs2_prev_ts: 0, + rd_prev_ts: 0, + rd_prev_val_lo: 0, + rd_prev_val_hi: 0, + pc_prev_ts: 0, }; // Compute runtime-specific values based on instruction type @@ -856,6 +898,14 @@ pub fn generate_cpu_trace( // Branch columns data[base + cols::IS_EQUAL] = FE::from(op.is_equal as u64); data[base + cols::BRANCH_COND] = FE::from(op.branch_cond as u64); + + // Registers on-Main: prev_ts columns (Phase 1, populated only) + data[base + cols::RS1_PREV_TS] = FE::from(op.rs1_prev_ts); + data[base + cols::RS2_PREV_TS] = FE::from(op.rs2_prev_ts); + data[base + cols::RD_PREV_TS] = FE::from(op.rd_prev_ts); + data[base + cols::RD_PREV_VAL_LO] = FE::from(op.rd_prev_val_lo as u64); + data[base + cols::RD_PREV_VAL_HI] = FE::from(op.rd_prev_val_hi as u64); + data[base + cols::PC_PREV_TS] = FE::from(op.pc_prev_ts); } // Padding rows: per spec, padding uses pc=1 (odd address, unreachable during diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 6cbcd0735..332c8922b 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -301,9 +301,64 @@ fn collect_cpu_ops( let op = CpuOperation::from_log_and_instruction(log, timestamp, instruction); cpu_ops.push(op); } + + // Phase 1 of "Registers on-Main": populate prev_ts columns for each CPU operation. + // This tracks the timestamp of the previous access to each register. + populate_register_prev_ts(&mut cpu_ops); + Ok(cpu_ops) } +/// Populates prev_ts and prev_val fields on CpuOperations by tracking +/// register access history. Used by the Registers-on-Main optimization. +/// +/// CPU instruction timing within one cycle (timestamp = ts): +/// - M1 (read rs1) at ts +/// - M3 (read rs2) at ts+1 +/// - M5 (write rd) at ts+2 +/// - CM54 (write x255 PC) at ts+3 +fn populate_register_prev_ts(cpu_ops: &mut [CpuOperation]) { + // last_ts[reg_idx] = mem_step of last access (read or write) to that register. + // Indices 0..32 for general registers, 255 for PC (x255). + // Initialized to 0 (matches REGISTER table init token at ts=0). + let mut last_ts: [u64; 256] = [0; 256]; + // last_val[reg_idx] = last value written to that register (0 initially). + let mut last_val: [u64; 256] = [0; 256]; + + for op in cpu_ops.iter_mut() { + let ts = op.timestamp; + + // M1: read rs1 at ts (only if read_register1) + if op.decode.read_register1 { + let reg = op.decode.rs1 as usize; + op.rs1_prev_ts = last_ts[reg]; + last_ts[reg] = ts; + } + + // M3: read rs2 at ts+1 (only if read_register2) + if op.decode.read_register2 { + let reg = op.decode.rs2 as usize; + op.rs2_prev_ts = last_ts[reg]; + last_ts[reg] = ts + 1; + } + + // M5: write rd at ts+2 (only if write_register) + if op.decode.write_register { + let reg = op.decode.rd as usize; + op.rd_prev_ts = last_ts[reg]; + op.rd_prev_val_lo = (last_val[reg] & 0xFFFF_FFFF) as u32; + op.rd_prev_val_hi = (last_val[reg] >> 32) as u32; + last_ts[reg] = ts + 2; + last_val[reg] = op.rvd; + } + + // CM54: write x255 (PC) at ts+3 — fires for every non-padding row. + op.pc_prev_ts = last_ts[255]; + last_ts[255] = ts + 3; + last_val[255] = op.next_pc; + } +} + // ============================================================================= // Phase 2: CPU ops → MEMW, LOAD, LT, Bitwise // ============================================================================= diff --git a/prover/src/tests/cpu_tests.rs b/prover/src/tests/cpu_tests.rs index d2c240293..08bf24871 100644 --- a/prover/src/tests/cpu_tests.rs +++ b/prover/src/tests/cpu_tests.rs @@ -336,7 +336,8 @@ fn test_bus_interactions_count() { #[test] fn test_column_count() { - assert_eq!(cols::NUM_COLUMNS, 74); + // 74 base + 6 prev_ts cols (Phase 1 of Registers on-Main) + assert_eq!(cols::NUM_COLUMNS, 80); } #[test] From 2f2b2c2632edada1674a86c267554bc3c33310b7 Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Mon, 13 Apr 2026 14:57:47 -0300 Subject: [PATCH 2/8] Replace MEMW_R with CPU-direct Memory bus interactions for register accesses (rs1, rs2, rd, PC), eliminating the register fast-path table and its ~3-4 rows-per-instruction overhead --- prover/src/lib.rs | 23 +- prover/src/tables/cpu.rs | 656 +++++++++++++++--------- prover/src/tables/memw_register.rs | 520 ------------------- prover/src/tables/mod.rs | 6 - prover/src/tables/trace_builder.rs | 394 ++++++-------- prover/src/test_utils.rs | 22 - prover/src/tests/cpu_tests.rs | 10 +- prover/src/tests/prove_elfs_tests.rs | 17 +- prover/src/tests/trace_builder_tests.rs | 104 ++-- 9 files changed, 630 insertions(+), 1122 deletions(-) delete mode 100644 prover/src/tables/memw_register.rs diff --git a/prover/src/lib.rs b/prover/src/lib.rs index 7cb947f6f..b64dc766e 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -41,8 +41,8 @@ use crate::tables::types::BusId; use crate::test_utils::{ E, F, VmAir, create_bitwise_air, create_branch_air, create_commit_air, create_cpu_air, create_decode_air, create_dvrm_air, create_halt_air, create_load_air, create_lt_air, - create_memw_air, create_memw_aligned_air, create_memw_register_air, create_mul_air, - create_page_air, create_register_air, create_shift_air, + create_memw_air, create_memw_aligned_air, create_mul_air, create_page_air, + create_register_air, create_shift_air, }; use stark::proof::options::{GoldilocksCubicProofOptions, ProofOptions}; @@ -73,7 +73,6 @@ pub struct TableCounts { pub dvrm: usize, pub shift: usize, pub branch: usize, - pub memw_register: usize, } impl TableCounts { @@ -92,7 +91,6 @@ impl TableCounts { + self.dvrm + self.shift + self.branch - + self.memw_register } /// Validate that all required tables have at least one chunk. @@ -110,7 +108,6 @@ impl TableCounts { ("dvrm", self.dvrm), ("shift", self.shift), ("branch", self.branch), - ("memw_register", self.memw_register), ]; for (name, count) in checks { if count == 0 { @@ -198,7 +195,6 @@ pub(crate) struct VmAirs { pub commit: VmAir, pub register: VmAir, pub pages: Vec, - pub memw_registers: Vec, } impl VmAirs { @@ -246,13 +242,6 @@ impl VmAirs { for (air, trace) in self.pages.iter().zip(traces.pages.iter_mut()) { pairs.push((air, trace, &())); } - for (air, trace) in self - .memw_registers - .iter() - .zip(traces.memw_registers.iter_mut()) - { - pairs.push((air, trace, &())); - } pairs } @@ -297,9 +286,6 @@ impl VmAirs { for air in &self.pages { refs.push(air); } - for air in &self.memw_registers { - refs.push(air); - } refs } @@ -372,10 +358,6 @@ impl VmAirs { ) }) .collect(); - let memw_registers: Vec<_> = (0..table_counts.memw_register) - .map(|i| create_memw_register_air(proof_options).with_name(&format!("MEMW_R[{}]", i))) - .collect(); - #[cfg(feature = "debug-checks")] debug_report::print_bus_legend(); @@ -395,7 +377,6 @@ impl VmAirs { commit, register, pages, - memw_registers, } } } diff --git a/prover/src/tables/cpu.rs b/prover/src/tables/cpu.rs index 3c45af3ad..6b1c147e5 100644 --- a/prover/src/tables/cpu.rs +++ b/prover/src/tables/cpu.rs @@ -1429,219 +1429,357 @@ pub fn bus_interactions() -> Vec { )); // ========================================================================= - // MEMW and LOAD bus interactions (M1, M3, M5, M6, M7) + // Register on-Main: Memory bus interactions (replaces M1, M3, M5 sends to MEMW_R) // ========================================================================= - // M1 and M3: Register read interactions (CPU → MEMW μ_read) + // + // Each non-padding CPU row directly handles up to 4 register accesses on the + // Memory bus (instead of routing them through the MEMW_R chip): + // + // - rs1 read at ts + 0 (multiplicity = READ_REGISTER1) + // - rs2 read at ts + 1 (multiplicity = READ_REGISTER2) + // - rd write at ts + 2 (multiplicity = WRITE_REGISTER) + // - PC read+write at ts + 1 (multiplicity = non_padding_sum, below) + // + // Per access, 5 bus interactions are emitted (mirroring MEMW_R): + // + // 1 × IsHalfword[current_ts - prev_ts - 1] (range-checks delta ∈ [1, 2^16]) + // 2 × Memory (sender, read-old word 0, 1) (prove-old at prev_ts, prev_val) + // 2 × Memory (receiver, write-new word 0, 1) (assume-new at current_ts, new_val) + // + // Memory bus signature: [is_register, addr_lo, addr_hi, ts_lo, ts_hi, value] + // Registers are always 2 words: word 0 at addr=2*reg, word 1 at addr=2*reg+1. + // Timestamps fit in u32 for CPU-originated ops, so ts_hi = 0 always. + // ------------------------------------------------------------------------- - // M1: MEMW[rv1; 1, 2*rs1, rv1, timestamp+0, 1, 0, 0] | read_register1 + // rs1 read @ timestamp + 0 (multiplicity = READ_REGISTER1) // ------------------------------------------------------------------------- - // Read from rs1 register via MEMW. Format: 24 elements - // [old[8], is_register, base_addr[2], value[8], timestamp[2], write2, write4, write8] - // - // Registers are stored as WL (2 words), remaining 6 values are unconstrained (zeros). - // rv1 is DWordWHH (3 cols: Half, Half, Word) -> pack as WL: lo32 = rv1[0] + 2^16*rv1[1], hi32 = rv1[2] - interactions.push(BusInteraction::sender( - BusId::Memw, - Multiplicity::Column(cols::READ_REGISTER1), - vec![ - // old[0] = lo32 = RV1_0 + 2^16 * RV1_1 - BusValue::linear(vec![ + // rv1 is DWordWHH (3 cols): lo32 = RV1_0 + 2^16*RV1_1, hi32 = RV1_2. + // For a read, old_value == new_value == rv1. + { + let mult = Multiplicity::Column(cols::READ_REGISTER1); + // IsHalfword[TIMESTAMP - RS1_PREV_TS - 1] + interactions.push(BusInteraction::sender( + BusId::IsHalfword, + mult.clone(), + vec![BusValue::linear(vec![ LinearTerm::Column { coefficient: 1, - column: cols::RV1_0, + column: cols::TIMESTAMP, }, LinearTerm::Column { - coefficient: 65536, - column: cols::RV1_1, + coefficient: -1, + column: cols::RS1_PREV_TS, }, - ]), - // old[1] = hi32 = RV1_2 - BusValue::Packed { - start_column: cols::RV1_2, - packing: Packing::Direct, + LinearTerm::Constant(-1), + ])], + )); + let rv1_lo = BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1, + column: cols::RV1_0, }, - // old[2..7] = 0 (unconstrained for registers) - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - // is_register = 1 - BusValue::constant(1), - // base_address[0] = 2 * rs1 - BusValue::linear(vec![LinearTerm::Column { + LinearTerm::Column { + coefficient: 65536, + column: cols::RV1_1, + }, + ]); + let rv1_hi = BusValue::Packed { + start_column: cols::RV1_2, + packing: Packing::Direct, + }; + let addr_w0 = BusValue::linear(vec![LinearTerm::Column { + coefficient: 2, + column: cols::RS1, + }]); + let addr_w1 = BusValue::linear(vec![ + LinearTerm::Column { coefficient: 2, column: cols::RS1, - }]), - // base_address[1] = 0 - BusValue::constant(0), - // value[0..7] = same as old (rv1 as WL + 6 zeros) - BusValue::linear(vec![ - LinearTerm::Column { - coefficient: 1, - column: cols::RV1_0, - }, - LinearTerm::Column { - coefficient: 65536, - column: cols::RV1_1, - }, - ]), - BusValue::Packed { - start_column: cols::RV1_2, - packing: Packing::Direct, }, - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - // timestamp[0] = timestamp, timestamp[1] = 0 - BusValue::Packed { - start_column: cols::TIMESTAMP, - packing: Packing::Direct, - }, - BusValue::constant(0), - // write2=1, write4=0, write8=0 (register access = 2 Words / 64 bits) - BusValue::constant(1), - BusValue::constant(0), - BusValue::constant(0), - ], - )); + LinearTerm::Constant(1), + ]); + let prev_ts = BusValue::Packed { + start_column: cols::RS1_PREV_TS, + packing: Packing::Direct, + }; + let curr_ts = BusValue::Packed { + start_column: cols::TIMESTAMP, + packing: Packing::Direct, + }; + // Memory read-old word 0 (sender) + interactions.push(BusInteraction::sender( + BusId::Memory, + mult.clone(), + vec![ + BusValue::constant(1), + addr_w0.clone(), + BusValue::constant(0), + prev_ts.clone(), + BusValue::constant(0), + rv1_lo.clone(), + ], + )); + // Memory read-old word 1 (sender) + interactions.push(BusInteraction::sender( + BusId::Memory, + mult.clone(), + vec![ + BusValue::constant(1), + addr_w1.clone(), + BusValue::constant(0), + prev_ts, + BusValue::constant(0), + rv1_hi.clone(), + ], + )); + // Memory write-new word 0 (receiver) + interactions.push(BusInteraction::receiver( + BusId::Memory, + mult.clone(), + vec![ + BusValue::constant(1), + addr_w0, + BusValue::constant(0), + curr_ts.clone(), + BusValue::constant(0), + rv1_lo, + ], + )); + // Memory write-new word 1 (receiver) + interactions.push(BusInteraction::receiver( + BusId::Memory, + mult, + vec![ + BusValue::constant(1), + addr_w1, + BusValue::constant(0), + curr_ts, + BusValue::constant(0), + rv1_hi, + ], + )); + } // ------------------------------------------------------------------------- - // M3: MEMW[rv2; 1, 2*rs2, rv2, timestamp+1, 0, 0, 1] | read_register2 + // rs2 read @ timestamp + 1 (multiplicity = READ_REGISTER2) // ------------------------------------------------------------------------- - // Same pattern as M1 but with RV2 and timestamp+1 - interactions.push(BusInteraction::sender( - BusId::Memw, - Multiplicity::Column(cols::READ_REGISTER2), - vec![ - // old[0] = lo32 = RV2_0 + 2^16 * RV2_1 - BusValue::linear(vec![ + { + let mult = Multiplicity::Column(cols::READ_REGISTER2); + // IsHalfword[(TIMESTAMP + 1) - RS2_PREV_TS - 1] = [TIMESTAMP - RS2_PREV_TS] + interactions.push(BusInteraction::sender( + BusId::IsHalfword, + mult.clone(), + vec![BusValue::linear(vec![ LinearTerm::Column { coefficient: 1, - column: cols::RV2_0, + column: cols::TIMESTAMP, }, LinearTerm::Column { - coefficient: 65536, - column: cols::RV2_1, + coefficient: -1, + column: cols::RS2_PREV_TS, }, - ]), - // old[1] = hi32 = RV2_2 - BusValue::Packed { - start_column: cols::RV2_2, - packing: Packing::Direct, + ])], + )); + let rv2_lo = BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1, + column: cols::RV2_0, }, - // old[2..7] = 0 - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - // is_register = 1 - BusValue::constant(1), - // base_address[0] = 2 * rs2 - BusValue::linear(vec![LinearTerm::Column { + LinearTerm::Column { + coefficient: 65536, + column: cols::RV2_1, + }, + ]); + let rv2_hi = BusValue::Packed { + start_column: cols::RV2_2, + packing: Packing::Direct, + }; + let addr_w0 = BusValue::linear(vec![LinearTerm::Column { + coefficient: 2, + column: cols::RS2, + }]); + let addr_w1 = BusValue::linear(vec![ + LinearTerm::Column { coefficient: 2, column: cols::RS2, - }]), - // base_address[1] = 0 - BusValue::constant(0), - // value[0..7] = rv2 as WL + 6 zeros - BusValue::linear(vec![ - LinearTerm::Column { - coefficient: 1, - column: cols::RV2_0, - }, - LinearTerm::Column { - coefficient: 65536, - column: cols::RV2_1, - }, - ]), - BusValue::Packed { - start_column: cols::RV2_2, - packing: Packing::Direct, }, - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - // timestamp[0] = timestamp + 1, timestamp[1] = 0 - BusValue::linear(vec![ + LinearTerm::Constant(1), + ]); + let prev_ts = BusValue::Packed { + start_column: cols::RS2_PREV_TS, + packing: Packing::Direct, + }; + let curr_ts = BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1, + column: cols::TIMESTAMP, + }, + LinearTerm::Constant(1), + ]); + interactions.push(BusInteraction::sender( + BusId::Memory, + mult.clone(), + vec![ + BusValue::constant(1), + addr_w0.clone(), + BusValue::constant(0), + prev_ts.clone(), + BusValue::constant(0), + rv2_lo.clone(), + ], + )); + interactions.push(BusInteraction::sender( + BusId::Memory, + mult.clone(), + vec![ + BusValue::constant(1), + addr_w1.clone(), + BusValue::constant(0), + prev_ts, + BusValue::constant(0), + rv2_hi.clone(), + ], + )); + interactions.push(BusInteraction::receiver( + BusId::Memory, + mult.clone(), + vec![ + BusValue::constant(1), + addr_w0, + BusValue::constant(0), + curr_ts.clone(), + BusValue::constant(0), + rv2_lo, + ], + )); + interactions.push(BusInteraction::receiver( + BusId::Memory, + mult, + vec![ + BusValue::constant(1), + addr_w1, + BusValue::constant(0), + curr_ts, + BusValue::constant(0), + rv2_hi, + ], + )); + } + + // ------------------------------------------------------------------------- + // rd write @ timestamp + 2 (multiplicity = WRITE_REGISTER) + // ------------------------------------------------------------------------- + // old_value = (RD_PREV_VAL_LO, RD_PREV_VAL_HI) tracked in trace builder. + // new_value = rvd (RVD_0, RVD_1, DWordWL). + { + let mult = Multiplicity::Column(cols::WRITE_REGISTER); + // IsHalfword[(TIMESTAMP + 2) - RD_PREV_TS - 1] = [TIMESTAMP - RD_PREV_TS + 1] + interactions.push(BusInteraction::sender( + BusId::IsHalfword, + mult.clone(), + vec![BusValue::linear(vec![ LinearTerm::Column { coefficient: 1, column: cols::TIMESTAMP, }, + LinearTerm::Column { + coefficient: -1, + column: cols::RD_PREV_TS, + }, LinearTerm::Constant(1), - ]), - BusValue::constant(0), - // write2=1, write4=0, write8=0 (register access = 2 Words / 64 bits) - BusValue::constant(1), - BusValue::constant(0), - BusValue::constant(0), - ], - )); - - // ------------------------------------------------------------------------- - // M5: MEMW[1, 2*rd, rvd, timestamp+2, 0, 0, 1] | write_register - // ------------------------------------------------------------------------- - // Write to rd register via MEMW. Format: 16 elements (write, no old) - // [is_register, base_addr[2], value[8], timestamp[2], write2, write4, write8] - // - // rvd is DWordWL (2 cols: Word, Word) - // MEMW uses EXCLUSIVE encoding for write flags: (0, 0, 1) for 8-byte access - // ("exactly N bytes" semantics, not "at least N bytes") - interactions.push(BusInteraction::sender( - BusId::Memw, - Multiplicity::Column(cols::WRITE_REGISTER), - vec![ - // is_register = 1 - BusValue::constant(1), - // base_address[0] = 2 * rd - BusValue::linear(vec![LinearTerm::Column { + ])], + )); + let old_lo = BusValue::Packed { + start_column: cols::RD_PREV_VAL_LO, + packing: Packing::Direct, + }; + let old_hi = BusValue::Packed { + start_column: cols::RD_PREV_VAL_HI, + packing: Packing::Direct, + }; + let new_lo = BusValue::Packed { + start_column: cols::RVD_0, + packing: Packing::Direct, + }; + let new_hi = BusValue::Packed { + start_column: cols::RVD_1, + packing: Packing::Direct, + }; + let addr_w0 = BusValue::linear(vec![LinearTerm::Column { + coefficient: 2, + column: cols::RD, + }]); + let addr_w1 = BusValue::linear(vec![ + LinearTerm::Column { coefficient: 2, column: cols::RD, - }]), - // base_address[1] = 0 - BusValue::constant(0), - // value[0] = rvd_lo = RVD_0 - BusValue::Packed { - start_column: cols::RVD_0, - packing: Packing::Direct, }, - // value[1] = rvd_hi = RVD_1 - BusValue::Packed { - start_column: cols::RVD_1, - packing: Packing::Direct, + LinearTerm::Constant(1), + ]); + let prev_ts = BusValue::Packed { + start_column: cols::RD_PREV_TS, + packing: Packing::Direct, + }; + let curr_ts = BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1, + column: cols::TIMESTAMP, }, - // value[2..7] = 0 - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - // timestamp[0] = timestamp + 2, timestamp[1] = 0 - BusValue::linear(vec![ - LinearTerm::Column { - coefficient: 1, - column: cols::TIMESTAMP, - }, - LinearTerm::Constant(2), - ]), - BusValue::constant(0), - // write2=1, write4=0, write8=0 (EXCLUSIVE encoding for 2-Word register access) - BusValue::constant(1), - BusValue::constant(0), - BusValue::constant(0), - ], - )); + LinearTerm::Constant(2), + ]); + interactions.push(BusInteraction::sender( + BusId::Memory, + mult.clone(), + vec![ + BusValue::constant(1), + addr_w0.clone(), + BusValue::constant(0), + prev_ts.clone(), + BusValue::constant(0), + old_lo, + ], + )); + interactions.push(BusInteraction::sender( + BusId::Memory, + mult.clone(), + vec![ + BusValue::constant(1), + addr_w1.clone(), + BusValue::constant(0), + prev_ts, + BusValue::constant(0), + old_hi, + ], + )); + interactions.push(BusInteraction::receiver( + BusId::Memory, + mult.clone(), + vec![ + BusValue::constant(1), + addr_w0, + BusValue::constant(0), + curr_ts.clone(), + BusValue::constant(0), + new_lo, + ], + )); + interactions.push(BusInteraction::receiver( + BusId::Memory, + mult, + vec![ + BusValue::constant(1), + addr_w1, + BusValue::constant(0), + curr_ts, + BusValue::constant(0), + new_hi, + ], + )); + } + // ========================================================================= + // LOAD and STORE bus interactions (M6, M7) + // ========================================================================= // ------------------------------------------------------------------------- // M6: LOAD[rvd; base_address, timestamp, read2, read4, read8, signed] | LOAD // ------------------------------------------------------------------------- @@ -1773,16 +1911,16 @@ pub fn bus_interactions() -> Vec { )); // ------------------------------------------------------------------------- - // CM54: MEMW[pc; 1, 510, next_pc, timestamp+1, 1, 0, 0] | 1 - pad + // PC register read-write @ timestamp + 1 (register on-Main, replaces CM54) // ------------------------------------------------------------------------- - // PC register read-write via MEMW. Format: 24 elements (with old) - // [old[8], is_register, base_addr[2], value[8], timestamp[2], write2, write4, write8] + // Every non-padding CPU row reads the current PC and writes next_pc to x255 + // (Word addresses 510, 511). Multiplicity = sum of 16 ALU flags (= 1 for + // non-padding rows, 0 for padding). // - // Every non-padding CPU row reads pc and writes next_pc to register x255 (address 510). - // Multiplicity = sum of all ALU flags = 1 for non-padding rows, 0 for padding. - interactions.push(BusInteraction::sender( - BusId::Memw, - Multiplicity::Linear(vec![ + // old_value = (PC_0, PC_1) — the PC entering this instruction + // new_value = (NEXT_PC_0, NEXT_PC_1) — the PC of the next instruction + { + let non_padding = Multiplicity::Linear(vec![ LinearTerm::Column { coefficient: 1, column: cols::ADD, @@ -1847,62 +1985,102 @@ pub fn bus_interactions() -> Vec { coefficient: 1, column: cols::EBREAK, }, - ]), - vec![ - // old[0] = PC_0 (low word of current pc) - BusValue::Packed { - start_column: cols::PC_0, - packing: Packing::Direct, - }, - // old[1] = PC_1 (high word of current pc) - BusValue::Packed { - start_column: cols::PC_1, - packing: Packing::Direct, - }, - // old[2..7] = 0 (unconstrained for registers) - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - // is_register = 1 - BusValue::constant(1), - // base_address = [510, 0] (register x255) - BusValue::constant(510), - BusValue::constant(0), - // value[0] = NEXT_PC_0 (low word of next_pc) - BusValue::Packed { - start_column: cols::NEXT_PC_0, - packing: Packing::Direct, - }, - // value[1] = NEXT_PC_1 (high word of next_pc) - BusValue::Packed { - start_column: cols::NEXT_PC_1, - packing: Packing::Direct, - }, - // value[2..7] = 0 (unconstrained for registers) - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - // timestamp[0] = timestamp + 1, timestamp[1] = 0 - BusValue::linear(vec![ + ]); + // IsHalfword[(TIMESTAMP + 1) - PC_PREV_TS - 1] = [TIMESTAMP - PC_PREV_TS] + interactions.push(BusInteraction::sender( + BusId::IsHalfword, + non_padding.clone(), + vec![BusValue::linear(vec![ LinearTerm::Column { coefficient: 1, column: cols::TIMESTAMP, }, - LinearTerm::Constant(1), - ]), - BusValue::constant(0), - // write2=1, write4=0, write8=0 (register access = 2 Words / 64 bits) - BusValue::constant(1), - BusValue::constant(0), - BusValue::constant(0), - ], - )); + LinearTerm::Column { + coefficient: -1, + column: cols::PC_PREV_TS, + }, + ])], + )); + let pc_lo = BusValue::Packed { + start_column: cols::PC_0, + packing: Packing::Direct, + }; + let pc_hi = BusValue::Packed { + start_column: cols::PC_1, + packing: Packing::Direct, + }; + let next_pc_lo = BusValue::Packed { + start_column: cols::NEXT_PC_0, + packing: Packing::Direct, + }; + let next_pc_hi = BusValue::Packed { + start_column: cols::NEXT_PC_1, + packing: Packing::Direct, + }; + let prev_ts = BusValue::Packed { + start_column: cols::PC_PREV_TS, + packing: Packing::Direct, + }; + let curr_ts = BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1, + column: cols::TIMESTAMP, + }, + LinearTerm::Constant(1), + ]); + // Memory read-old word 0 (sender): [1, 510, 0, PC_PREV_TS, 0, PC_0] + interactions.push(BusInteraction::sender( + BusId::Memory, + non_padding.clone(), + vec![ + BusValue::constant(1), + BusValue::constant(510), + BusValue::constant(0), + prev_ts.clone(), + BusValue::constant(0), + pc_lo, + ], + )); + // Memory read-old word 1 (sender): [1, 511, 0, PC_PREV_TS, 0, PC_1] + interactions.push(BusInteraction::sender( + BusId::Memory, + non_padding.clone(), + vec![ + BusValue::constant(1), + BusValue::constant(511), + BusValue::constant(0), + prev_ts, + BusValue::constant(0), + pc_hi, + ], + )); + // Memory write-new word 0 (receiver): [1, 510, 0, TIMESTAMP+1, 0, NEXT_PC_0] + interactions.push(BusInteraction::receiver( + BusId::Memory, + non_padding.clone(), + vec![ + BusValue::constant(1), + BusValue::constant(510), + BusValue::constant(0), + curr_ts.clone(), + BusValue::constant(0), + next_pc_lo, + ], + )); + // Memory write-new word 1 (receiver): [1, 511, 0, TIMESTAMP+1, 0, NEXT_PC_1] + interactions.push(BusInteraction::receiver( + BusId::Memory, + non_padding, + vec![ + BusValue::constant(1), + BusValue::constant(511), + BusValue::constant(0), + curr_ts, + BusValue::constant(0), + next_pc_hi, + ], + )); + } // ------------------------------------------------------------------------- // BRANCH interaction (for branch/jump target calculation) diff --git a/prover/src/tables/memw_register.rs b/prover/src/tables/memw_register.rs deleted file mode 100644 index 206a9c746..000000000 --- a/prover/src/tables/memw_register.rs +++ /dev/null @@ -1,520 +0,0 @@ -//! MEMW_R (Memory Write/Read -- Register) table. -//! -//! Ultra-slim fast path for register accesses. Registers are always 2 words -//! (DWordWL), always aligned, and `is_register=1`, so this table strips out -//! all memory-specific columns (address decomposition, alignment mask, width -//! flags, per-byte old_timestamps). -//! -//! ## Timestamp ordering: IS_HALF instead of LT -//! -//! The general MEMW table proves `old_timestamp < timestamp` by routing through -//! the LT table, which requires extra LT trace rows and bus interactions. -//! MEMW_R instead checks `IS_HALF[timestamp[0] - old_timestamp[0] - 1]`, -//! which proves the delta is in `[1, 2^16]` in a single lookup. This is safe -//! because registers are accessed very frequently — their timestamp deltas are -//! almost always small — and the routing predicate (`is_register_op`) enforces -//! the delta fits before admitting an op into this table. -//! -//! ## Column layout (10 columns) -//! -//! - `ADDRESS`: Byte (register index 0-31) -//! - `TIMESTAMP_0`: Word (low 32 bits) -//! - `TIMESTAMP_1`: Word (high 32 bits) -//! - `VAL_0`: Word (low 32 bits of register value) -//! - `VAL_1`: Word (high 32 bits of register value) -//! - `OLD_0`: Word (low 32 bits of previous value) -//! - `OLD_1`: Word (high 32 bits of previous value) -//! - `OLD_TIMESTAMP_LO`: Word (low 32 bits of old timestamp; upper limb = TIMESTAMP_1) -//! - `MU_READ`: Bit -//! - `MU_WRITE`: Bit -//! -//! ## Virtual -//! -//! - `old_timestamp = [OLD_TIMESTAMP_LO, TIMESTAMP_1]` (shares upper limb!) -//! - `mu_sum = MU_READ + MU_WRITE` -//! -//! ## Bus Interactions (7) -//! - 1 IS_HALFWORD[timestamp_0 - old_timestamp_lo - 1] -//! - 4 Memory bus tokens (read-old + write-new, per word) -//! - 2 MEMW output interactions (read + write, from CPU) - -use math::field::element::FieldElement; -use math::field::traits::{IsField, IsSubFieldOf}; -use stark::constraints::transition::TransitionConstraint; -use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity, Packing}; -use stark::table::TableView; -use stark::trace::TraceTable; -use stark::traits::TransitionEvaluationContext; - -use super::memw::MemwOperation; -use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField}; - -// ========================================================================= -// Column indices (10 columns) -// ========================================================================= - -pub mod cols { - /// Register index (0-31). CPU sends base_address = 2*reg_index. - pub const ADDRESS: usize = 0; - - /// Timestamp low 32 bits - pub const TIMESTAMP_0: usize = 1; - /// Timestamp high 32 bits - pub const TIMESTAMP_1: usize = 2; - - /// Register value low 32 bits - pub const VAL_0: usize = 3; - /// Register value high 32 bits - pub const VAL_1: usize = 4; - - /// Previous value low 32 bits - pub const OLD_0: usize = 5; - /// Previous value high 32 bits - pub const OLD_1: usize = 6; - - /// Old timestamp low 32 bits (upper limb shared with TIMESTAMP_1) - pub const OLD_TIMESTAMP_LO: usize = 7; - - /// Read multiplicity - pub const MU_READ: usize = 8; - /// Write multiplicity - pub const MU_WRITE: usize = 9; - - pub const NUM_COLUMNS: usize = 10; -} - -// ========================================================================= -// Trace generation -// ========================================================================= - -/// Generates the MEMW_R trace table from register operations. -/// -/// Reuses `MemwOperation` -- the trace generator divides `base_address` by 2 -/// to recover the register index (CPU sends `2 * register_index`). -pub fn generate_memw_register_trace( - operations: &[MemwOperation], -) -> TraceTable { - let num_rows = operations.len().next_power_of_two().max(4); - let mut data = vec![FE::zero(); num_rows * cols::NUM_COLUMNS]; - - for (row_idx, op) in operations.iter().enumerate() { - let base = row_idx * cols::NUM_COLUMNS; - - debug_assert_eq!( - op.base_address % 2, - 0, - "register base_address must be even (got {})", - op.base_address - ); - // Both register words must have been last accessed at the same timestamp. - // MEMW_R stores a single old_timestamp_lo and shares TIMESTAMP_1 as the - // upper limb, so if the two words differ, the wrong token would be sent - // to the memory bus. The routing predicate enforces this before dispatch. - debug_assert_eq!( - op.old_timestamp[0], op.old_timestamp[1], - "register words must share old_timestamp ({} != {})", - op.old_timestamp[0], op.old_timestamp[1] - ); - - // ADDRESS = base_address / 2 (CPU sends 2 * register_index) - data[base + cols::ADDRESS] = FE::from(op.base_address / 2); - - // Timestamp split into lo/hi 32-bit words - data[base + cols::TIMESTAMP_0] = FE::from(op.timestamp & 0xFFFF_FFFF); - data[base + cols::TIMESTAMP_1] = FE::from(op.timestamp >> 32); - - // Value: registers are DWordWL = 2 words - data[base + cols::VAL_0] = FE::from(op.value[0]); - data[base + cols::VAL_1] = FE::from(op.value[1]); - - // Old value - data[base + cols::OLD_0] = FE::from(op.old[0]); - data[base + cols::OLD_1] = FE::from(op.old[1]); - - // Old timestamp low (upper limb shared with TIMESTAMP_1) - data[base + cols::OLD_TIMESTAMP_LO] = FE::from(op.old_timestamp[0] & 0xFFFF_FFFF); - - // Multiplicity - data[base + cols::MU_READ] = FE::from(op.is_read as u64); - data[base + cols::MU_WRITE] = FE::from(!op.is_read as u64); - } - - TraceTable::new_main(data, cols::NUM_COLUMNS, 1) -} - -// ========================================================================= -// Bus interactions (7 total) -// ========================================================================= - -pub fn bus_interactions() -> Vec { - let mut interactions = Vec::with_capacity(7); - - let mu_sum = Multiplicity::Sum(cols::MU_READ, cols::MU_WRITE); - - // ------------------------------------------------------------------------- - // IS_HALFWORD[timestamp_0 - old_timestamp_lo - 1] with mu_sum - // ------------------------------------------------------------------------- - interactions.push(BusInteraction::sender( - BusId::IsHalfword, - mu_sum.clone(), - vec![BusValue::linear(vec![ - LinearTerm::Column { - coefficient: 1, - column: cols::TIMESTAMP_0, - }, - LinearTerm::Column { - coefficient: -1, - column: cols::OLD_TIMESTAMP_LO, - }, - LinearTerm::Constant(-1), - ])], - )); - - // ------------------------------------------------------------------------- - // Memory bus read-old (sender, for i=0,1) - // memory[is_register=1, addr_lo=2*ADDRESS+i, addr_hi=0, - // OLD_TIMESTAMP_LO, TIMESTAMP_1, OLD[i]] - // ------------------------------------------------------------------------- - for i in 0..2 { - let addr_lo = BusValue::linear(vec![ - LinearTerm::Column { - coefficient: 2, - column: cols::ADDRESS, - }, - LinearTerm::Constant(i as i64), - ]); - - interactions.push(BusInteraction::sender( - BusId::Memory, - mu_sum.clone(), - vec![ - BusValue::constant(1), - addr_lo, - BusValue::constant(0), - BusValue::Packed { - start_column: cols::OLD_TIMESTAMP_LO, - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: cols::TIMESTAMP_1, - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: if i == 0 { cols::OLD_0 } else { cols::OLD_1 }, - packing: Packing::Direct, - }, - ], - )); - } - - // ------------------------------------------------------------------------- - // Memory bus write-new (receiver, for i=0,1) - // memory[is_register=1, addr_lo=2*ADDRESS+i, addr_hi=0, - // TIMESTAMP_0, TIMESTAMP_1, VAL[i]] - // ------------------------------------------------------------------------- - for i in 0..2 { - let addr_lo = BusValue::linear(vec![ - LinearTerm::Column { - coefficient: 2, - column: cols::ADDRESS, - }, - LinearTerm::Constant(i as i64), - ]); - - interactions.push(BusInteraction::receiver( - BusId::Memory, - mu_sum.clone(), - vec![ - BusValue::constant(1), - addr_lo, - BusValue::constant(0), - BusValue::Packed { - start_column: cols::TIMESTAMP_0, - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: cols::TIMESTAMP_1, - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: if i == 0 { cols::VAL_0 } else { cols::VAL_1 }, - packing: Packing::Direct, - }, - ], - )); - } - - // ------------------------------------------------------------------------- - // CO24: MEMW read receiver (from CPU M1/M3 sender) - // ------------------------------------------------------------------------- - let addr_lo_linear = BusValue::linear(vec![LinearTerm::Column { - coefficient: 2, - column: cols::ADDRESS, - }]); - - interactions.push(BusInteraction::receiver( - BusId::Memw, - Multiplicity::Column(cols::MU_READ), - vec![ - // old[0..8] - BusValue::Packed { - start_column: cols::OLD_0, - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: cols::OLD_1, - packing: Packing::Direct, - }, - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - // is_register = 1 - BusValue::constant(1), - // base_address = [2*ADDRESS, 0] - addr_lo_linear.clone(), - BusValue::constant(0), - // value[0..8] - BusValue::Packed { - start_column: cols::VAL_0, - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: cols::VAL_1, - packing: Packing::Direct, - }, - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - // timestamp - BusValue::Packed { - start_column: cols::TIMESTAMP_0, - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: cols::TIMESTAMP_1, - packing: Packing::Direct, - }, - // write flags: write2=1, write4=0, write8=0 (registers are always 2 words) - BusValue::constant(1), - BusValue::constant(0), - BusValue::constant(0), - ], - )); - - // ------------------------------------------------------------------------- - // CO25: MEMW write receiver (from CPU M5 sender — register write to rd) - // ------------------------------------------------------------------------- - interactions.push(BusInteraction::receiver( - BusId::Memw, - Multiplicity::Column(cols::MU_WRITE), - vec![ - // is_register = 1 - BusValue::constant(1), - // base_address = [2*ADDRESS, 0] - addr_lo_linear, - BusValue::constant(0), - // value[0..8] - BusValue::Packed { - start_column: cols::VAL_0, - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: cols::VAL_1, - packing: Packing::Direct, - }, - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - BusValue::constant(0), - // timestamp - BusValue::Packed { - start_column: cols::TIMESTAMP_0, - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: cols::TIMESTAMP_1, - packing: Packing::Direct, - }, - // write flags: write2=1, write4=0, write8=0 - BusValue::constant(1), - BusValue::constant(0), - BusValue::constant(0), - ], - )); - - interactions -} - -// ========================================================================= -// Constraints (3 algebraic) -// ========================================================================= - -/// MEMW_R constraint: IS_BIT(mu_sum) = (mu_read + mu_write) * (1 - mu_read - mu_write) = 0 -pub struct MemwRegisterMuSumIsBit { - constraint_idx: usize, -} - -impl MemwRegisterMuSumIsBit { - pub fn new(constraint_idx: usize) -> Self { - Self { constraint_idx } - } - - fn compute(&self, step: &TableView) -> FieldElement - where - F: IsSubFieldOf, - E: IsField, - { - let one = FieldElement::::one(); - let mu_read = step.get_main_evaluation_element(0, cols::MU_READ).clone(); - let mu_write = step.get_main_evaluation_element(0, cols::MU_WRITE).clone(); - let mu_sum = &mu_read + &mu_write; - &mu_sum * (&one - &mu_sum) - } -} - -impl TransitionConstraint for MemwRegisterMuSumIsBit { - fn degree(&self) -> usize { - 2 - } - - fn constraint_idx(&self) -> usize { - self.constraint_idx - } - - fn end_exemptions(&self) -> usize { - 0 - } - - fn evaluate( - &self, - evaluation_context: &TransitionEvaluationContext, - transition_evaluations: &mut [FieldElement], - ) { - match evaluation_context { - TransitionEvaluationContext::Prover { - frame, - periodic_values: _, - rap_challenges: _, - .. - } => { - let v = self.compute(frame.get_evaluation_step(0)); - transition_evaluations[self.constraint_idx] = v.to_extension(); - } - TransitionEvaluationContext::Verifier { - frame, - periodic_values: _, - rap_challenges: _, - .. - } => { - let v = self.compute(frame.get_evaluation_step(0)); - transition_evaluations[self.constraint_idx] = v; - } - } - } -} - -/// Creates all constraints for the MEMW_R table (3 total). -/// -/// - IS_BIT(MU_READ) -- unconditional -/// - IS_BIT(MU_WRITE) -- unconditional -/// - IS_BIT(mu_sum) = (mu_read + mu_write) * (1 - mu_read - mu_write) = 0 -pub fn constraints() -> Vec>> { - use crate::constraints::templates::IsBitConstraint; - - vec![ - Box::new(IsBitConstraint::unconditional(cols::MU_READ, 0)), - Box::new(IsBitConstraint::unconditional(cols::MU_WRITE, 1)), - Box::new(MemwRegisterMuSumIsBit::new(2)), - ] -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_memw_register_trace_generation() { - // Create a simple register op (reg x1 = address 1, so base_address = 2) - let ops = vec![ - MemwOperation::new( - true, // is_register - 2, // base_address = 2 * register_index (reg x1) - [42, 7, 0, 0, 0, 0, 0, 0], - 100, - 2, // width = 2 words (registers are DWordWL) - true, - ) - .with_old([10, 3, 0, 0, 0, 0, 0, 0], [50, 50, 0, 0, 0, 0, 0, 0]), - ]; - - let trace = generate_memw_register_trace(&ops); - assert_eq!(trace.num_cols(), cols::NUM_COLUMNS); - assert!(trace.num_rows() >= 4); // minimum 4 rows - - // ADDRESS = base_address / 2 = 2 / 2 = 1 - assert_eq!(*trace.get_main(0, cols::ADDRESS), FE::from(1u64)); - - // TIMESTAMP split - assert_eq!(*trace.get_main(0, cols::TIMESTAMP_0), FE::from(100u64)); - assert_eq!(*trace.get_main(0, cols::TIMESTAMP_1), FE::from(0u64)); - - // Values - assert_eq!(*trace.get_main(0, cols::VAL_0), FE::from(42u64)); - assert_eq!(*trace.get_main(0, cols::VAL_1), FE::from(7u64)); - - // Old values - assert_eq!(*trace.get_main(0, cols::OLD_0), FE::from(10u64)); - assert_eq!(*trace.get_main(0, cols::OLD_1), FE::from(3u64)); - - // Old timestamp lo - assert_eq!(*trace.get_main(0, cols::OLD_TIMESTAMP_LO), FE::from(50u64)); - - // Multiplicity: is_read = true => MU_READ=1, MU_WRITE=0 - assert_eq!(*trace.get_main(0, cols::MU_READ), FE::from(1u64)); - assert_eq!(*trace.get_main(0, cols::MU_WRITE), FE::from(0u64)); - } - - #[test] - fn test_memw_register_trace_generation_write_op() { - // Write op: is_read = false => MU_WRITE=1, MU_READ=0 - let ops = vec![ - MemwOperation::new( - true, // is_register - 4, // base_address = 2 * register_index (reg x2) - [99, 55, 0, 0, 0, 0, 0, 0], - 200, - 2, // width = 2 words - false, // is_read = false (write) - ) - .with_old([11, 22, 0, 0, 0, 0, 0, 0], [180, 180, 0, 0, 0, 0, 0, 0]), - ]; - - let trace = generate_memw_register_trace(&ops); - - // ADDRESS = base_address / 2 = 4 / 2 = 2 - assert_eq!(*trace.get_main(0, cols::ADDRESS), FE::from(2u64)); - - // Values - assert_eq!(*trace.get_main(0, cols::VAL_0), FE::from(99u64)); - assert_eq!(*trace.get_main(0, cols::VAL_1), FE::from(55u64)); - - // Old values - assert_eq!(*trace.get_main(0, cols::OLD_0), FE::from(11u64)); - assert_eq!(*trace.get_main(0, cols::OLD_1), FE::from(22u64)); - - // Old timestamp lo - assert_eq!(*trace.get_main(0, cols::OLD_TIMESTAMP_LO), FE::from(180u64)); - - // Multiplicity: is_read = false => MU_WRITE=1, MU_READ=0 - assert_eq!(*trace.get_main(0, cols::MU_READ), FE::from(0u64)); - assert_eq!(*trace.get_main(0, cols::MU_WRITE), FE::from(1u64)); - } -} diff --git a/prover/src/tables/mod.rs b/prover/src/tables/mod.rs index 551dc4aa3..1bb351583 100644 --- a/prover/src/tables/mod.rs +++ b/prover/src/tables/mod.rs @@ -32,7 +32,6 @@ pub mod load; pub mod lt; pub mod memw; pub mod memw_aligned; -pub mod memw_register; pub mod mul; pub mod page; pub mod register; @@ -60,7 +59,6 @@ pub use types::BusId; /// | SHIFT | 27 | 15 | 72 | 2^20 | /// | LOAD | 18 | 5 | 33 | 2^21 | /// | BRANCH | 14 | 6 | 32 | 2^21 | -/// | MEMW_R | 10 | 7 | 31 | 2^21 | pub mod max_rows { pub const CPU: usize = 1 << 19; // 524,288 — eff. width 194 pub const MEMW: usize = 1 << 19; // 524,288 — eff. width 127 (baseline) @@ -71,7 +69,6 @@ pub mod max_rows { pub const SHIFT: usize = 1 << 20; // 1,048,576 — eff. width 72 pub const LOAD: usize = 1 << 21; // 2,097,152 — eff. width 33 pub const BRANCH: usize = 1 << 21; // 2,097,152 — eff. width 32 - pub const MEMW_R: usize = 1 << 21; // 2,097,152 — eff. width 31 } /// Per-table maximum row limits, configurable for different environments. @@ -89,7 +86,6 @@ pub struct MaxRowsConfig { pub shift: usize, pub load: usize, pub branch: usize, - pub memw_register: usize, } impl Default for MaxRowsConfig { @@ -104,7 +100,6 @@ impl Default for MaxRowsConfig { shift: max_rows::SHIFT, load: max_rows::LOAD, branch: max_rows::BRANCH, - memw_register: max_rows::MEMW_R, } } } @@ -123,7 +118,6 @@ impl MaxRowsConfig { shift: 1 << 5, load: 1 << 5, branch: 1 << 5, - memw_register: 1 << 5, } } } diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 332c8922b..7663f27c4 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -10,9 +10,11 @@ //! ```text //! PHASE 0: ELF → DECODE, MEMORY_INIT (preprocessed tables) //! PHASE 1: Logs → CPU ops -//! PHASE 2: CPU ops → MEMW, MEMW_A, MEMW_R, LOAD, LT, Bitwise (with state tracking for MEMW/LOAD) -//! PHASE 3: MEMW/MEMW_A → LT ops (timestamp ordering); MEMW_R uses IS_HALFWORD instead -//! PHASE 4: LT, MEMW_A, MEMW_R → Bitwise lookups +//! PHASE 2: CPU ops → MEMW, MEMW_A, LOAD, LT, Bitwise (with state tracking for MEMW/LOAD) +//! Register accesses (rs1/rs2/rd/PC) are handled inline on the Memory bus +//! by the CPU chip (Registers on-Main) and do not produce MEMW ops here. +//! PHASE 3: MEMW/MEMW_A → LT ops (timestamp ordering) +//! PHASE 4: LT, MEMW_A → Bitwise lookups //! PHASE 5: Generate all traces //! ``` //! @@ -44,7 +46,6 @@ use super::load::{self, LoadOperation}; use super::lt::{self, LtOperation}; use super::memw::{self, MemwOperation}; use super::memw_aligned; -use super::memw_register; use super::mul::{self, MulOperation}; use super::page::{self, FinalByteState, FinalStateMap, PageConfig}; use super::register::{self, FinalRegisterStateMap, FinalRegisterWordState}; @@ -302,63 +303,14 @@ fn collect_cpu_ops( cpu_ops.push(op); } - // Phase 1 of "Registers on-Main": populate prev_ts columns for each CPU operation. - // This tracks the timestamp of the previous access to each register. - populate_register_prev_ts(&mut cpu_ops); + // Note: register prev_ts / prev_val columns are populated later in + // `collect_ops_from_cpu`, where they are read from `register_state` + // (the unified source of truth that also accounts for COMMIT ECALL + // register accesses). Ok(cpu_ops) } -/// Populates prev_ts and prev_val fields on CpuOperations by tracking -/// register access history. Used by the Registers-on-Main optimization. -/// -/// CPU instruction timing within one cycle (timestamp = ts): -/// - M1 (read rs1) at ts -/// - M3 (read rs2) at ts+1 -/// - M5 (write rd) at ts+2 -/// - CM54 (write x255 PC) at ts+3 -fn populate_register_prev_ts(cpu_ops: &mut [CpuOperation]) { - // last_ts[reg_idx] = mem_step of last access (read or write) to that register. - // Indices 0..32 for general registers, 255 for PC (x255). - // Initialized to 0 (matches REGISTER table init token at ts=0). - let mut last_ts: [u64; 256] = [0; 256]; - // last_val[reg_idx] = last value written to that register (0 initially). - let mut last_val: [u64; 256] = [0; 256]; - - for op in cpu_ops.iter_mut() { - let ts = op.timestamp; - - // M1: read rs1 at ts (only if read_register1) - if op.decode.read_register1 { - let reg = op.decode.rs1 as usize; - op.rs1_prev_ts = last_ts[reg]; - last_ts[reg] = ts; - } - - // M3: read rs2 at ts+1 (only if read_register2) - if op.decode.read_register2 { - let reg = op.decode.rs2 as usize; - op.rs2_prev_ts = last_ts[reg]; - last_ts[reg] = ts + 1; - } - - // M5: write rd at ts+2 (only if write_register) - if op.decode.write_register { - let reg = op.decode.rd as usize; - op.rd_prev_ts = last_ts[reg]; - op.rd_prev_val_lo = (last_val[reg] & 0xFFFF_FFFF) as u32; - op.rd_prev_val_hi = (last_val[reg] >> 32) as u32; - last_ts[reg] = ts + 2; - last_val[reg] = op.rvd; - } - - // CM54: write x255 (PC) at ts+3 — fires for every non-padding row. - op.pc_prev_ts = last_ts[255]; - last_ts[255] = ts + 3; - last_val[255] = op.next_pc; - } -} - // ============================================================================= // Phase 2: CPU ops → MEMW, LOAD, LT, Bitwise // ============================================================================= @@ -374,9 +326,12 @@ fn populate_register_prev_ts(cpu_ops: &mut [CpuOperation]) { /// MEMW and LOAD collection requires sequential processing with state tracking. /// /// Returns: (memw_ops, load_ops, lt_ops, shift_ops, bitwise_ops, commit_ops) +/// +/// `cpu_ops` is mutated in place to populate the Registers-on-Main prev_ts +/// and prev_val columns from `register_state` before each op is processed. #[allow(clippy::type_complexity)] fn collect_ops_from_cpu( - cpu_ops: &[CpuOperation], + cpu_ops: &mut [CpuOperation], memory_state: &mut MemoryState, register_state: &mut RegisterState, ) -> ( @@ -396,7 +351,7 @@ fn collect_ops_from_cpu( let mut current_commit_index = 0u32; let mut commit_ecall_count = 0u32; - for op in cpu_ops { + for op in cpu_ops.iter_mut() { // --- MEMW and LOAD (require state tracking, order matters) --- // Collect memory operations for Load/Store instructions @@ -410,9 +365,10 @@ fn collect_ops_from_cpu( memw_ops.push(memw_op); } - // Collect register operations (M1, M3, M5) - let reg_memw_ops = collect_register_ops_from_cpu(op, register_state); - memw_ops.extend(reg_memw_ops); + // Register accesses (rs1, rs2, rd, PC): populate on-Main prev_ts/prev_val + // columns AND update register_state. No MEMW operations are produced — + // the CPU chip emits the Memory bus interactions directly. + track_register_ops_from_cpu(op, register_state); // Collect COMMIT ECALL memory operations (register reads/writes + byte reads) if op.ecall_commit { @@ -583,33 +539,36 @@ fn collect_store_op_from_cpu(op: &CpuOperation, memory_state: &mut MemoryState) memw_op } -/// Collects register read/write operations (M1, M3, M5) from CpuOperation. +/// Tracks register read/write operations (rs1, rs2, rd, PC) for the Registers +/// on-Main optimization. /// -/// Returns: Vec of MEMW operations for register accesses -fn collect_register_ops_from_cpu( - op: &CpuOperation, - register_state: &mut RegisterState, -) -> Vec { - let mut memw_ops = Vec::with_capacity(4); +/// Instead of producing MEMW operations for register accesses (previously routed +/// through the MEMW_R chip), this function: +/// 1. Reads the previous (value, timestamp) from `register_state` before the +/// access happens and stores them in the CpuOperation's prev_ts / prev_val +/// columns. These columns feed the CPU chip's Memory bus interactions. +/// 2. Updates `register_state` so subsequent ops (including this op's own +/// dependent accesses, and COMMIT / HALT paths that still run later) see +/// the post-access state. +/// +/// Per-instruction memory timestamps match the CPU chip's sends: +/// - rs1 read at timestamp + 0 (if READ_REGISTER1 && rs1 != 0) +/// - rs2 read at timestamp + 1 (if READ_REGISTER2 && rs2 != 0) +/// - rd write at timestamp + 2 (if WRITE_REGISTER && rd != 0) +/// - PC read-write at timestamp + 1 (every non-padding row) +fn track_register_ops_from_cpu(op: &mut CpuOperation, register_state: &mut RegisterState) { let d = &op.decode; - // M1: Read rs1 register at timestamp+0 - // Skip x0 (hardwired zero). x255 (the register where the pc is stored) is handled - // via read_pc/write_pc since regs[] only covers indices 0..31. + // rs1 read @ ts + 0 + // Guards match CPU column: READ_REGISTER1 = d.read_register1 && rs1 != 0. + // x255 (PC) is stored in register_state.pc_register, not regs[]. if d.read_register1 && d.rs1 != 0 { - let reg_value = pack_register_value(op.rv1); - let reg_addr = 2 * d.rs1 as u64; let (_old_val, old_ts) = if d.rs1 == 255 { register_state.read_pc() } else { register_state.read(d.rs1) }; - // old_timestamps array is 8 elements but only first 2 are used for registers - let old_timestamps = [old_ts, old_ts, 0, 0, 0, 0, 0, 0]; - - let memw_op = MemwOperation::new(true, reg_addr, reg_value, op.timestamp, 2, true) - .with_old(reg_value, old_timestamps); - memw_ops.push(memw_op); + op.rs1_prev_ts = old_ts; if d.rs1 == 255 { register_state.write_pc(op.rv1, op.timestamp); } else { @@ -617,51 +576,29 @@ fn collect_register_ops_from_cpu( } } - // M3: Read rs2 register at timestamp+1 + // rs2 read @ ts + 1 if d.read_register2 && d.rs2 != 0 { - let reg_value = pack_register_value(op.rv2); - let reg_addr = 2 * d.rs2 as u64; let (_old_val, old_ts) = register_state.read(d.rs2); - // old_timestamps array is 8 elements but only first 2 are used for registers - let old_timestamps = [old_ts, old_ts, 0, 0, 0, 0, 0, 0]; - - let memw_op = MemwOperation::new(true, reg_addr, reg_value, op.timestamp + 1, 2, true) - .with_old(reg_value, old_timestamps); - memw_ops.push(memw_op); + op.rs2_prev_ts = old_ts; register_state.write(d.rs2, op.rv2, op.timestamp + 1); } - // M5: Write rd register at timestamp+2 + // rd write @ ts + 2 + // prev_val is needed here to prove the old state on the Memory bus. if d.write_register && d.rd != 0 { - let reg_value = pack_register_value(op.rvd); - let reg_addr = 2 * d.rd as u64; let (old_val, old_ts) = register_state.read(d.rd); - let old_value = pack_register_value(old_val); - // old_timestamps array is 8 elements but only first 2 are used for registers - let old_timestamps = [old_ts, old_ts, 0, 0, 0, 0, 0, 0]; - - let memw_op = MemwOperation::new(true, reg_addr, reg_value, op.timestamp + 2, 2, false) - .with_old(old_value, old_timestamps); - memw_ops.push(memw_op); + op.rd_prev_ts = old_ts; + op.rd_prev_val_lo = (old_val & 0xFFFF_FFFF) as u32; + op.rd_prev_val_hi = (old_val >> 32) as u32; register_state.write(d.rd, op.rvd, op.timestamp + 2); } - // CM54: PC register read-write at timestamp+1 - // Every non-padding CPU row sends a MEMW for x255 (address 510). - // old = pc (current), value = next_pc (new). + // PC (x255) read-write @ ts + 1 — always, for every non-padding row. { - let pc_value = pack_register_value(op.decode.pc); - let next_pc_value = pack_register_value(op.next_pc); - let (_old_val, old_ts) = register_state.read_pc(); - let old_timestamps = [old_ts, old_ts, 0, 0, 0, 0, 0, 0]; - - let memw_op = MemwOperation::new(true, 510, next_pc_value, op.timestamp + 1, 2, true) - .with_old(pc_value, old_timestamps); - memw_ops.push(memw_op); + let (_pc_val, pc_ts) = register_state.read_pc(); + op.pc_prev_ts = pc_ts; register_state.write_pc(op.next_pc, op.timestamp + 1); } - - memw_ops } /// Collects MEMW operations for a COMMIT ECALL from CpuOperation. @@ -922,6 +859,89 @@ fn is_aligned_op(op: &MemwOperation) -> bool { } /// Collects bitwise lookups from MEMW_A operations. +/// Collects IS_HALFWORD bitwise lookups for CPU register-on-Main range checks. +/// +/// For each non-padding CPU instruction, the CPU chip sends IS_HALFWORD queries +/// for each register access. These entries populate the Bitwise table so that +/// those queries are received (balancing the IS_HALFWORD bus). +/// +/// Deltas match the CPU chip bus interactions exactly: +/// - rs1: IS_HALF[TIMESTAMP - RS1_PREV_TS - 1] (if READ_REGISTER1) +/// - rs2: IS_HALF[TIMESTAMP - RS2_PREV_TS] (if READ_REGISTER2) +/// - rd: IS_HALF[TIMESTAMP - RD_PREV_TS + 1] (if WRITE_REGISTER) +/// - PC: IS_HALF[TIMESTAMP - PC_PREV_TS] (if non-padding row) +fn collect_bitwise_from_cpu_registers(cpu_ops: &[CpuOperation]) -> Vec { + let mut ops = Vec::with_capacity(cpu_ops.len() * 4); + + for op in cpu_ops { + let d = &op.decode; + // CPU timestamps always fit in u32; use low 32 bits. + let ts = (op.timestamp & 0xFFFF_FFFF) as u32; + + // rs1: IS_HALF[ts - rs1_prev_ts - 1] — fires when READ_REGISTER1=1 + if d.read_register1 && d.rs1 != 0 { + let prev = (op.rs1_prev_ts & 0xFFFF_FFFF) as u32; + let delta = ts.wrapping_sub(prev).wrapping_sub(1) as u16; + ops.push(BitwiseOperation::halfword( + BitwiseOperationType::IsHalf, + (delta & 0xFF) as u8, + (delta >> 8) as u8, + )); + } + + // rs2: IS_HALF[ts - rs2_prev_ts] — fires when READ_REGISTER2=1 + if d.read_register2 && d.rs2 != 0 { + let prev = (op.rs2_prev_ts & 0xFFFF_FFFF) as u32; + let delta = ts.wrapping_sub(prev) as u16; + ops.push(BitwiseOperation::halfword( + BitwiseOperationType::IsHalf, + (delta & 0xFF) as u8, + (delta >> 8) as u8, + )); + } + + // rd: IS_HALF[ts - rd_prev_ts + 1] — fires when WRITE_REGISTER=1 + if d.write_register && d.rd != 0 { + let prev = (op.rd_prev_ts & 0xFFFF_FFFF) as u32; + let delta = ts.wrapping_sub(prev).wrapping_add(1) as u16; + ops.push(BitwiseOperation::halfword( + BitwiseOperationType::IsHalf, + (delta & 0xFF) as u8, + (delta >> 8) as u8, + )); + } + + // PC: IS_HALF[ts - pc_prev_ts] — fires for every non-padding row + let non_padding = d.op_add + || d.op_sub + || d.op_slt + || d.op_and + || d.op_or + || d.op_xor + || d.op_shift + || d.op_jalr + || d.op_beq + || d.op_blt + || d.op_load + || d.op_store + || d.op_mul + || d.op_divrem + || d.op_ecall + || d.op_ebreak; + if non_padding { + let prev = (op.pc_prev_ts & 0xFFFF_FFFF) as u32; + let delta = ts.wrapping_sub(prev) as u16; + ops.push(BitwiseOperation::halfword( + BitwiseOperationType::IsHalf, + (delta & 0xFF) as u8, + (delta >> 8) as u8, + )); + } + } + + ops +} + /// /// Per operation: /// - 1 IS_HALF for alignment check: IS_HALF[base_address[0] + mask] @@ -958,59 +978,6 @@ fn collect_bitwise_from_memw_aligned(ops: &[MemwOperation]) -> Vec old_timestamp[0] (lower limb ordering) -/// 5. timestamp[0] - old_timestamp[0] <= 0x10000 (delta fits in IS_HALF range [1, 2^16]) -/// -/// Width-1 register ops (e.g. COMMIT x254) stay in MEMW, which has -/// dynamic write flags. MEMW_R hardcodes write2=1. -fn is_register_op(op: &MemwOperation) -> bool { - if !op.is_register || op.width != 2 { - return false; - } - // Both words must share old_timestamp (atomic register write assumption) - if op.old_timestamp[0] != op.old_timestamp[1] { - return false; - } - let ts = op.timestamp; - let old_ts = op.old_timestamp[0]; - let ts_lo = ts & 0xFFFF_FFFF; - let old_ts_lo = old_ts & 0xFFFF_FFFF; - let ts_hi = ts >> 32; - let old_ts_hi = old_ts >> 32; - ts_hi == old_ts_hi && ts_lo > old_ts_lo && (ts_lo - old_ts_lo) <= 0x10000 -} - -/// Collects IS_HALFWORD bitwise lookups for MEMW_R operations. -/// -/// For each register op: checks that `timestamp[0] - old_timestamp_lo - 1` fits -/// in a halfword (proving the timestamp delta is in range [1, 2^16]). -fn collect_bitwise_from_memw_register(ops: &[MemwOperation]) -> Vec { - ops.iter() - .map(|op| { - let ts_lo = op.timestamp & 0xFFFF_FFFF; - let old_ts_lo = op.old_timestamp[0] & 0xFFFF_FFFF; - debug_assert!( - ts_lo > old_ts_lo, - "ts_lo must exceed old_ts_lo (enforced by is_register_op)" - ); - let diff_minus_1 = (ts_lo - old_ts_lo - 1) as u16; - BitwiseOperation::halfword( - BitwiseOperationType::IsHalf, - (diff_minus_1 & 0xFF) as u8, - (diff_minus_1 >> 8) as u8, - ) - }) - .collect() -} - // ============================================================================= // Phase 4: All → Bitwise lookups // ============================================================================= @@ -1678,9 +1645,6 @@ pub struct Traces { /// COMMIT table for write syscall (byte-by-byte commit with recursive bus) pub commit: TraceTable, - - /// MEMW_R register-only fast-path traces (split into chunks of max_rows::MEMW_R) - pub memw_registers: Vec>, } /// Chunk raw ops and generate one trace table per chunk. @@ -1709,7 +1673,6 @@ impl Traces { dvrm: self.dvrms.len(), shift: self.shifts.len(), branch: self.branches.len(), - memw_register: self.memw_registers.len(), } } @@ -1849,7 +1812,7 @@ impl Traces { // ===================================================================== // PHASE 1: Logs → CPU operations // ===================================================================== - let cpu_ops = collect_cpu_ops(logs, &instructions)?; + let mut cpu_ops = collect_cpu_ops(logs, &instructions)?; // ===================================================================== // PHASE 2: CPU ops → MEMW, LOAD, LT, Bitwise, Branch @@ -1859,17 +1822,17 @@ impl Traces { let mut memory_state = MemoryState::from_elf(elf); let mut register_state = RegisterState::new(elf.entry_point); let (mut memw_ops, load_ops, mut lt_ops, shift_ops, mut bitwise_ops, commit_ops) = - collect_ops_from_cpu(&cpu_ops, &mut memory_state, &mut register_state); + collect_ops_from_cpu(&mut cpu_ops, &mut memory_state, &mut register_state); // HALT finalization: 33 register MEMW operations at timestamp u64::MAX. // Must come before Phase 3 (LT from MEMW) so HALT ops get timestamp checks. let halt_memw_ops = collect_halt_ops(&mut register_state); memw_ops.extend(halt_memw_ops); - // Route MEMW_R (register fast-path) first, then MEMW_A (aligned), rest → MEMW. - // Order matters: register ops would also pass is_aligned_op, so check first. - let (memw_register_ops, memw_ops): (Vec<_>, Vec<_>) = - memw_ops.into_iter().partition(is_register_op); + // Route aligned memory ops to MEMW_A, everything else to MEMW. + // (Register accesses from CPU now flow directly on the Memory bus via the + // CPU chip — see Registers on-Main. Any residual register ops here come + // from COMMIT / HALT and go through the general MEMW path.) let (memw_aligned_ops, memw_ops): (Vec<_>, Vec<_>) = memw_ops.into_iter().partition(is_aligned_op); @@ -1950,8 +1913,8 @@ impl Traces { bitwise_ops.extend(collect_bitwise_from_branch(&branch_ops)); bitwise_ops.extend(shift::collect_bitwise_from_shift(&shift_ops)); bitwise_ops.extend(collect_bitwise_from_memw_aligned(&memw_aligned_ops)); - // MEMW_R sends IS_HALFWORD[timestamp_0 - old_timestamp_lo - 1] - bitwise_ops.extend(collect_bitwise_from_memw_register(&memw_register_ops)); + // CPU register-on-Main: IS_HALFWORD range checks for rs1/rs2/rd/PC + bitwise_ops.extend(collect_bitwise_from_cpu_registers(&cpu_ops)); // PAGE tables do IS_BYTE lookups for init and fini values (C1, C2) bitwise_ops.extend(collect_bitwise_from_page(elf, &memory_state)); @@ -1990,11 +1953,6 @@ impl Traces { max_rows.memw_aligned, memw_aligned::generate_memw_aligned_trace, ); - let memw_registers = chunk_and_generate( - &memw_register_ops, - max_rows.memw_register, - memw_register::generate_memw_register_trace, - ); let loads = chunk_and_generate(&load_ops, max_rows.load, load::generate_load_trace); let lts = chunk_and_generate(<_ops, max_rows.lt, lt::generate_lt_trace); let shifts = chunk_and_generate(&shift_ops, max_rows.shift, shift::generate_shift_trace); @@ -2077,7 +2035,6 @@ impl Traces { branches, halt: halt_trace, commit: commit_trace, - memw_registers, }) } @@ -2095,7 +2052,7 @@ impl Traces { // ===================================================================== // PHASE 1: Logs → CPU operations // ===================================================================== - let cpu_ops = collect_cpu_ops(logs, &instructions)?; + let mut cpu_ops = collect_cpu_ops(logs, &instructions)?; // ===================================================================== // PHASE 2: CPU ops → MEMW, LOAD, LT, Bitwise, Branch @@ -2106,16 +2063,17 @@ impl Traces { let entry_point = cpu_ops.first().map_or(0, |op| op.decode.pc); let mut register_state = RegisterState::new(entry_point); let (mut memw_ops, load_ops, mut lt_ops, shift_ops, mut bitwise_ops, commit_ops) = - collect_ops_from_cpu(&cpu_ops, &mut memory_state, &mut register_state); + collect_ops_from_cpu(&mut cpu_ops, &mut memory_state, &mut register_state); // HALT finalization: 33 register MEMW operations at timestamp u64::MAX. // Must come before Phase 3 (LT from MEMW) so HALT ops get timestamp checks. let halt_memw_ops = collect_halt_ops(&mut register_state); memw_ops.extend(halt_memw_ops); - // Route MEMW_R (register fast-path) first, then MEMW_A (aligned), rest → MEMW. - let (memw_register_ops, memw_ops): (Vec<_>, Vec<_>) = - memw_ops.into_iter().partition(is_register_op); + // Route aligned memory ops to MEMW_A, everything else to MEMW. + // Register accesses from CPU flow directly on the Memory bus via the CPU + // chip (Registers on-Main); any residual register ops come from COMMIT / + // HALT and go through the general MEMW path. let (memw_aligned_ops, memw_ops): (Vec<_>, Vec<_>) = memw_ops.into_iter().partition(is_aligned_op); @@ -2196,8 +2154,8 @@ impl Traces { bitwise_ops.extend(collect_bitwise_from_branch(&branch_ops)); bitwise_ops.extend(shift::collect_bitwise_from_shift(&shift_ops)); bitwise_ops.extend(collect_bitwise_from_memw_aligned(&memw_aligned_ops)); - // MEMW_R sends IS_HALFWORD[timestamp_0 - old_timestamp_lo - 1] - bitwise_ops.extend(collect_bitwise_from_memw_register(&memw_register_ops)); + // CPU register-on-Main: IS_HALFWORD range checks for rs1/rs2/rd/PC + bitwise_ops.extend(collect_bitwise_from_cpu_registers(&cpu_ops)); let public_output_bytes: Vec = commit_ops .iter() @@ -2233,11 +2191,6 @@ impl Traces { max_rows.memw_aligned, memw_aligned::generate_memw_aligned_trace, ); - let memw_registers = chunk_and_generate( - &memw_register_ops, - max_rows.memw_register, - memw_register::generate_memw_register_trace, - ); let loads = chunk_and_generate(&load_ops, max_rows.load, load::generate_load_trace); let lts = chunk_and_generate(<_ops, max_rows.lt, lt::generate_lt_trace); let shifts = chunk_and_generate(&shift_ops, max_rows.shift, shift::generate_shift_trace); @@ -2306,7 +2259,6 @@ impl Traces { branches, halt: halt_trace, commit: commit_trace, - memw_registers, }) } @@ -2359,53 +2311,3 @@ impl Traces { } } -#[cfg(test)] -mod routing_tests { - use super::*; - - fn make_register_op(timestamp: u64, old_timestamp: u64) -> MemwOperation { - MemwOperation::new(true, 2, [1, 0, 0, 0, 0, 0, 0, 0], timestamp, 2, false) - .with_old([0; 8], [old_timestamp, old_timestamp, 0, 0, 0, 0, 0, 0]) - } - - #[test] - fn test_is_register_op_delta_at_boundary_routes_in() { - // delta = 0x10000 = 2^16: spec allows this (IS_HALF[0xFFFF] is valid) - let op = make_register_op(0x10000, 0); - assert!(is_register_op(&op), "delta = 2^16 should route to MEMW_R"); - } - - #[test] - fn test_is_register_op_delta_above_boundary_falls_back() { - // delta = 0x10001: one above the IS_HALF range, must fall back to MEMW_A - let op = make_register_op(0x10001, 0); - assert!( - !is_register_op(&op), - "delta = 2^16 + 1 should fall back to MEMW_A" - ); - } - - #[test] - fn test_is_register_op_delta_one_routes_in() { - // delta = 1: minimum allowed value - let op = make_register_op(1, 0); - assert!(is_register_op(&op), "delta = 1 should route to MEMW_R"); - } - - #[test] - fn test_is_register_op_delta_zero_falls_back() { - // delta = 0: ts[0] not strictly greater than old_ts[0] - let op = make_register_op(5, 5); - assert!(!is_register_op(&op), "delta = 0 should not route to MEMW_R"); - } - - #[test] - fn test_is_register_op_upper_limb_mismatch_falls_back() { - // ts_hi != old_ts_hi: shared upper limb assumption violated - let op = make_register_op(0x1_0000_0001, 0x0_0000_0000); - assert!( - !is_register_op(&op), - "different upper limbs should fall back to MEMW_A" - ); - } -} diff --git a/prover/src/test_utils.rs b/prover/src/test_utils.rs index 155af86cb..93d6d2971 100644 --- a/prover/src/test_utils.rs +++ b/prover/src/test_utils.rs @@ -54,10 +54,6 @@ use crate::tables::memw_aligned::{ bus_interactions as memw_aligned_bus_interactions, cols as memw_aligned_cols, constraints as memw_aligned_constraints, }; -use crate::tables::memw_register::{ - bus_interactions as memw_register_bus_interactions, cols as memw_register_cols, - constraints as memw_register_constraints, -}; use crate::tables::mul::{bus_interactions as mul_bus_interactions, cols as mul_cols}; use crate::tables::page::{bus_interactions as page_bus_interactions, cols as page_cols}; use crate::tables::register::{ @@ -580,24 +576,6 @@ pub fn create_memw_aligned_air(proof_options: &ProofOptions) -> VmAir { .with_name("MEMW_A") } -/// Create MEMW_R (register) AIR with constraints and bus interactions. -pub fn create_memw_register_air(proof_options: &ProofOptions) -> VmAir { - let transition_constraints = memw_register_constraints(); - - let auxiliary_trace_build_data = AuxiliaryTraceBuildData { - interactions: memw_register_bus_interactions(), - }; - - AirWithBuses::new( - memw_register_cols::NUM_COLUMNS, - auxiliary_trace_build_data, - proof_options, - 1, - transition_constraints, - ) - .with_name("MEMW_R") -} - /// Create LOAD AIR with constraints and bus interactions. pub fn create_load_air(proof_options: &ProofOptions) -> VmAir { let transition_constraints = load_constraints(); diff --git a/prover/src/tests/cpu_tests.rs b/prover/src/tests/cpu_tests.rs index 08bf24871..b058775bf 100644 --- a/prover/src/tests/cpu_tests.rs +++ b/prover/src/tests/cpu_tests.rs @@ -317,12 +317,10 @@ fn test_bus_interactions_count() { // - 1 MSB8 (res_sign_bit) // - 1 ZERO (is_equal for BEQ) // - 1 LT (less-than comparison) - // - 1 M1 (MEMW read rs1 register) - // - 1 M3 (MEMW read rs2 register) - // - 1 M5 (MEMW write rd register) + // - 20 Registers on-Main: 4 accesses × (1 IsHalfword + 2 Memory sender + 2 Memory receiver) + // (replaces M1/M3/M5/CM54 — CPU handles register accesses directly on the Memory bus) // - 1 M6 (LOAD from memory) // - 1 M7 (STORE to memory) - // - 1 CM54 (MEMW PC register read-write) // - 1 DECODE (instruction fetch) // - 1 MUL (multiplication) // - 1 DVRM (division/remainder) @@ -330,8 +328,8 @@ fn test_bus_interactions_count() { // - 1 BRANCH (branch/jump target calculation) // - 1 ECALL (single shared bus for HALT and COMMIT, mult = ECALL) // - 27 IS_BYTE (byte range checks: RS1, RS2, RD, ARG1[0..7], ARG2[0..7], RES[0..7]) - // Total: 8 + 8 + 8 + 2 + 1 + 1 + 1 + 1 + 5 + 1 + 1 + 1 + 1 + 1 + 1 + 27 = 68 - assert_eq!(interactions.len(), 68); + // Total: 8 + 8 + 8 + 2 + 1 + 1 + 1 + 20 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 27 = 84 + assert_eq!(interactions.len(), 84); } #[test] diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index 9a1b8a6d0..87a743892 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -1663,20 +1663,15 @@ fn test_heap_alloc_runtime_pages_roundtrip() { ); } -/// Verify that register ops route to MEMW_R and a full prove/verify roundtrip -/// succeeds. Uses `test_add_8` which exercises register reads and writes. +/// Verify a full prove/verify roundtrip with register accesses handled inline +/// by the CPU chip (Registers on-Main). Uses `test_add_8` which exercises +/// register reads and writes. #[test] -fn test_prove_verify_with_memw_register() { +fn test_prove_verify_with_register_accesses() { let (elf, logs, instructions) = run_asm_elf("test_add_8"); let mut traces = Traces::from_logs_minimal(&logs, instructions.clone(), &Default::default()).unwrap(); - // Register ops must go to MEMW_R, not to MEMW_A. - assert!( - !traces.memw_registers.is_empty(), - "register ops should route to MEMW_R: memw_registers must be non-empty" - ); - // MEMW_A should still have non-register aligned ops (e.g. stack stores). assert!( !traces.memw_aligneds.is_empty(), @@ -1686,7 +1681,7 @@ fn test_prove_verify_with_memw_register() { // Full prove + verify roundtrip. assert!( prove_and_verify_vm_minimal(&elf, &mut traces), - "prove/verify should succeed when MEMW_R handles register ops" + "prove/verify should succeed when CPU handles register accesses inline" ); } @@ -1716,7 +1711,6 @@ fn test_verify_rejects_zero_table_counts() { dvrm: 0, shift: 0, branch: 0, - memw_register: 0, }, ..vm_proof }; @@ -1784,7 +1778,6 @@ fn test_crafted_zero_count_proof_must_not_verify() { dvrm: 0, shift: 0, branch: 0, - memw_register: 0, }; let airs = VmAirs::new(&elf, &proof_options, true, &[], &zero_counts); diff --git a/prover/src/tests/trace_builder_tests.rs b/prover/src/tests/trace_builder_tests.rs index a21f2c273..03cbc541d 100644 --- a/prover/src/tests/trace_builder_tests.rs +++ b/prover/src/tests/trace_builder_tests.rs @@ -3,7 +3,6 @@ use crate::tables::bitwise; use crate::tables::cpu::cols; use crate::tables::lt; -use crate::tables::memw_register; use crate::tables::trace_builder::Traces; use crate::tables::types::FE; use executor::vm::instruction::decoding::{ArithOp, Comparison, Instruction}; @@ -353,12 +352,17 @@ fn test_mixed_instructions() { // ============================================================================= #[test] -fn test_memw_generated_from_register_ops() { - // Test that MEMW operations are generated for register reads/writes - // ADD x1, x2, x3 reads x2 (M1), x3 (M3), writes x1 (M5) +fn test_cpu_populates_register_prev_ts_columns() { + // Registers-on-Main: register accesses no longer produce MEMW operations. + // Instead, the CPU chip emits the Memory bus interactions directly, using + // the RS1_PREV_TS / RS2_PREV_TS / RD_PREV_TS / PC_PREV_TS / RD_PREV_VAL_* + // columns populated by the trace builder from register_state. + // + // This test verifies those columns track register history correctly across + // two consecutive instructions that read/write overlapping registers. let mut logs = vec![ - make_add_log(0x1000, 100, 200, 300), // x2=100, x3=200, x1=300 - make_add_log(0x1004, 0, 0, 0), + make_add_log(0x1000, 100, 200, 300), // ADD x1, x2, x3: x2=100, x3=200, x1=300 + make_add_log(0x1004, 300, 200, 500), // ADD x4, x1, x3: re-reads x1 (just written) and x3 make_add_log(0x1008, 0, 0, 0), make_add_log(0x100c, 0, 0, 0), ]; @@ -370,9 +374,9 @@ fn test_memw_generated_from_register_ops() { op: ArithOp::Add, }, Instruction::Arith { - dst: 0, - src1: 0, - src2: 0, + dst: 4, // x4 + src1: 1, // x1 (was just written by op 0) + src2: 3, // x3 (read again) op: ArithOp::Add, }, Instruction::Arith { @@ -393,39 +397,44 @@ fn test_memw_generated_from_register_ops() { let traces = Traces::from_logs(&logs, instructions, &Default::default()).unwrap(); - // Register ops should route to MEMW_R (memw_registers), not MEMW_A. - // First instruction generates: M1 (read x2), M3 (read x3), M5 (write x1). - assert!( - !traces.memw_registers.is_empty(), - "MEMW_R should have at least one chunk for register ops" + // CPU timestamps: op 0 at ts=4, op 1 at ts=8 (stride = 4). + // Op 0: rs1=x2 @ ts=4, rs2=x3 @ ts=5, rd=x1 @ ts=6. + // Op 1: rs1=x1, rs2=x3, rd=x4. prev_ts for: + // rs1=x1 (just written by op 0 at ts=6) should be 6. + // rs2=x3 (read by op 0 at ts=5) should be 5. + // rd=x4 (never touched) should be 0. + // rd_prev_val for x4 should be 0. + let cpu_row = traces.cpus[0].main_table.get_row(1); + assert_eq!( + cpu_row[cols::RS1_PREV_TS], + FE::from(6u64), + "op 1 rs1_prev_ts should be ts of op 0's rd write (6)" ); - assert!( - traces.memw_registers[0].main_table.height >= 3, - "MEMW_R should have at least 3 rows for register ops (reads x2, x3 + write x1)" + assert_eq!( + cpu_row[cols::RS2_PREV_TS], + FE::from(5u64), + "op 1 rs2_prev_ts should be ts of op 0's rs2 read (5)" ); - - // Find the register write to x1 in MEMW_R. - // MEMW_R columns: ADDRESS = register_index (x1 → index 1), - // MU_WRITE = 1 for writes, VAL_0 = value low 32 bits. - let mut found_write = false; - for row_idx in 0..traces.memw_registers[0].main_table.height { - let row = traces.memw_registers[0].main_table.get_row(row_idx); - // ADDRESS = 1 (x1), MU_WRITE = 1, VAL_0 = 300 - if row[memw_register::cols::ADDRESS] == FE::from(1u64) - && row[memw_register::cols::MU_WRITE] == FE::one() - { - assert_eq!( - row[memw_register::cols::VAL_0], - FE::from(300u64), - "Write value for x1 should be 300" - ); - found_write = true; - break; - } - } - assert!( - found_write, - "Register write to x1 (ADDRESS=1, MU_WRITE=1, VAL_0=300) not found in MEMW_R" + assert_eq!( + cpu_row[cols::RD_PREV_TS], + FE::from(0u64), + "op 1 rd_prev_ts for unused x4 should be 0 (REGISTER init token)" + ); + assert_eq!( + cpu_row[cols::RD_PREV_VAL_LO], + FE::from(0u64), + "op 1 rd_prev_val for unused x4 should be 0" + ); + assert_eq!( + cpu_row[cols::RD_PREV_VAL_HI], + FE::from(0u64), + "op 1 rd_prev_val_hi for unused x4 should be 0" + ); + // PC_PREV_TS for op 1 should be ts+1 of op 0 = 5 (op 0's CM54 write). + assert_eq!( + cpu_row[cols::PC_PREV_TS], + FE::from(5u64), + "op 1 pc_prev_ts should be op 0's PC write ts (5)" ); } @@ -474,24 +483,19 @@ fn test_memw_generates_lt_for_timestamp_ordering() { let traces = Traces::from_logs(&logs, instructions, &Default::default()).unwrap(); - // Register ops route to MEMW_R (IS_HALFWORD, not LT). - assert!( - !traces.memw_registers.is_empty(), - "Register ops should route to MEMW_R" - ); - - // Register ops use IS_HALF for timestamp ordering instead of LT. + // Register ops are handled inline on the Memory bus via the CPU chip. + // They use IS_HALFWORD for timestamp delta range checks (not LT). // Verify the bitwise table has at least one IS_HALF entry with non-zero - // multiplicity, proving that MEMW_R's IS_HALF lookups were emitted. + // multiplicity, proving that CPU's on-Main IS_HALFWORD lookups were emitted. let has_is_half_entry = (0..traces.bitwise.main_table.height) .any(|i| traces.bitwise.main_table.get_row(i)[bitwise::cols::MU_IS_HALF] != FE::zero()); assert!( has_is_half_entry, - "MEMW_R register ops should produce IS_HALF bitwise entries" + "CPU register accesses should produce IS_HALF bitwise entries" ); // The LT table should still have ops from non-register MEMW accesses - // (e.g. PC next-pc write is a non-register memory op that needs LT). + // (e.g. HALT finalization writes that generate LT timestamp checks). let total_lt_rows: usize = traces.lts.iter().map(|t| t.main_table.height).sum(); assert!( total_lt_rows > 0, From 369a91512a6671bd8a8040c36778134a7c88a64c Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Mon, 13 Apr 2026 16:35:55 -0300 Subject: [PATCH 3/8] Add REGISTER_RELOAD table to bridge timestamp gaps >65534 in register accesses, preventing IS_HALF overflow when a register goes unaccessed for many instructions --- prover/src/lib.rs | 13 +- prover/src/tables/mod.rs | 1 + prover/src/tables/register_reload.rs | 214 +++++++++++++++++++++++++++ prover/src/tables/trace_builder.rs | 97 ++++++++++-- prover/src/test_utils.rs | 26 ++++ prover/src/tests/prove_elfs_tests.rs | 2 +- 6 files changed, 337 insertions(+), 16 deletions(-) create mode 100644 prover/src/tables/register_reload.rs diff --git a/prover/src/lib.rs b/prover/src/lib.rs index b64dc766e..b6ce0b18b 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -42,7 +42,7 @@ use crate::test_utils::{ E, F, VmAir, create_bitwise_air, create_branch_air, create_commit_air, create_cpu_air, create_decode_air, create_dvrm_air, create_halt_air, create_load_air, create_lt_air, create_memw_air, create_memw_aligned_air, create_mul_air, create_page_air, - create_register_air, create_shift_air, + create_register_air, create_register_reload_air, create_shift_air, }; use stark::proof::options::{GoldilocksCubicProofOptions, ProofOptions}; @@ -194,6 +194,7 @@ pub(crate) struct VmAirs { pub halt: VmAir, pub commit: VmAir, pub register: VmAir, + pub register_reload: VmAir, pub pages: Vec, } @@ -206,6 +207,7 @@ impl VmAirs { (&self.halt, &mut traces.halt, &()), (&self.commit, &mut traces.commit, &()), (&self.register, &mut traces.register, &()), + (&self.register_reload, &mut traces.register_reload, &()), ]; for (air, trace) in self.cpus.iter().zip(traces.cpus.iter_mut()) { @@ -254,6 +256,7 @@ impl VmAirs { &self.halt, &self.commit, &self.register, + &self.register_reload, ]; for air in &self.cpus { @@ -349,6 +352,7 @@ impl VmAirs { register::preprocessed_commitment(proof_options, elf.entry_point), register::NUM_PREPROCESSED_COLS, ); + let register_reload = create_register_reload_air(proof_options); let pages: Vec<_> = page_configs .iter() .map(|config| { @@ -376,6 +380,7 @@ impl VmAirs { halt, commit, register, + register_reload, pages, } } @@ -575,11 +580,11 @@ pub fn verify_with_options( Traces::page_configs_from_elf_and_runtime(&program, &vm_proof.runtime_page_ranges); // Cross-check: table_counts must match the number of sub-proofs. - // Fixed tables (bitwise, decode, halt, commit, register) = 5, plus page tables. - let expected_proof_count = vm_proof.table_counts.total() + 5 + page_configs.len(); + // Fixed tables (bitwise, decode, halt, commit, register, register_reload) = 6, plus page tables. + let expected_proof_count = vm_proof.table_counts.total() + 6 + page_configs.len(); if expected_proof_count != vm_proof.proof.proofs.len() { return Err(Error::InvalidTableCounts(format!( - "table_counts total ({}) + 5 fixed + {} pages = {}, but proof contains {} sub-proofs", + "table_counts total ({}) + 6 fixed + {} pages = {}, but proof contains {} sub-proofs", vm_proof.table_counts.total(), page_configs.len(), expected_proof_count, diff --git a/prover/src/tables/mod.rs b/prover/src/tables/mod.rs index 1bb351583..0d32f7ade 100644 --- a/prover/src/tables/mod.rs +++ b/prover/src/tables/mod.rs @@ -35,6 +35,7 @@ pub mod memw_aligned; pub mod mul; pub mod page; pub mod register; +pub mod register_reload; pub mod shift; pub mod trace_builder; diff --git a/prover/src/tables/register_reload.rs b/prover/src/tables/register_reload.rs new file mode 100644 index 000000000..008f4b014 --- /dev/null +++ b/prover/src/tables/register_reload.rs @@ -0,0 +1,214 @@ +//! REGISTER_RELOAD table — bridges large timestamp gaps for register accesses. +//! +//! When a register goes unaccessed for more than [`MAX_REG_GAP`] timestamps, +//! the CPU chip's IS_HALF range check `IS_HALF[TIMESTAMP - PREV_TS ± k]` would +//! overflow. This table inserts intermediate Memory bus prove-old/assume-new +//! pairs that chain the gap in steps of at most [`MAX_REG_GAP`] timestamps, +//! so the final CPU IS_HALF delta always fits in [0, 65535]. +//! +//! ## Token model +//! +//! Each row bridges a single step from `old_ts` to `new_ts` for one register, +//! keeping the value unchanged: +//! +//! - **Sender** (prove-old word 0): `(1, 2*reg_idx, 0, old_ts, 0, val_lo)` +//! - **Sender** (prove-old word 1): `(1, 2*reg_idx+1, 0, old_ts, 0, val_hi)` +//! - **Receiver** (assume-new word 0): `(1, 2*reg_idx, 0, new_ts, 0, val_lo)` +//! - **Receiver** (assume-new word 1): `(1, 2*reg_idx+1, 0, new_ts, 0, val_hi)` +//! +//! ## Padding +//! +//! Padding rows use `old_ts = new_ts = 0`, so prove-old and assume-new have +//! identical tokens and cancel in the LogUp sum (net contribution = 0). +//! +//! ## Columns (5 total) +//! +//! | Index | Name | Description | +//! |-------|---------|--------------------------------------------| +//! | 0 | reg_idx | Register index (0–63 for x0–x63, 255=PC) | +//! | 1 | old_ts | Previous timestamp (prove-old) | +//! | 2 | new_ts | Intermediate timestamp (assume-new) | +//! | 3 | val_lo | Register value word 0 (low 32 bits) | +//! | 4 | val_hi | Register value word 1 (high 32 bits) | + +use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity, Packing}; +use stark::trace::TraceTable; + +use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField}; + +// ========================================================================= +// Column indices +// ========================================================================= + +pub mod cols { + /// reg_idx: register index (Byte; 0–63 for x0–x63, 255 for PC x255) + pub const REG_IDX: usize = 0; + /// old_ts: previous timestamp (Word; 32-bit value) + pub const OLD_TS: usize = 1; + /// new_ts: intermediate/new timestamp (Word; 32-bit value) + pub const NEW_TS: usize = 2; + /// val_lo: register value word 0 (low 32 bits) + pub const VAL_LO: usize = 3; + /// val_hi: register value word 1 (high 32 bits) + pub const VAL_HI: usize = 4; + + /// Total number of columns. + pub const NUM_COLUMNS: usize = 5; +} + +// ========================================================================= +// Operation type +// ========================================================================= + +/// A single register reload step. +/// +/// Each step contributes 4 Memory bus tokens: prove-old and assume-new for +/// word 0 and word 1 of the register, all with the same value. +#[derive(Debug, Clone, Copy)] +pub struct RegisterReloadOp { + /// Register index (0–63 for x0–x63, 255 for PC x255). + pub reg_idx: u8, + /// Previous timestamp (prove-old). + pub old_ts: u64, + /// Intermediate/new timestamp (assume-new). Must be > old_ts. + pub new_ts: u64, + /// Register value word 0 (low 32 bits). Unchanged across the step. + pub val_lo: u32, + /// Register value word 1 (high 32 bits). Unchanged across the step. + pub val_hi: u32, +} + +// ========================================================================= +// Trace generation +// ========================================================================= + +/// Generates the REGISTER_RELOAD trace table from reload operations. +/// +/// Active rows encode (reg_idx, old_ts, new_ts, val_lo, val_hi). +/// Padding rows are all-zero (old_ts = new_ts = 0 → tokens cancel in LogUp). +pub fn generate_register_reload_trace( + ops: &[RegisterReloadOp], +) -> TraceTable { + let num_active = ops.len(); + let num_rows = num_active.next_power_of_two().max(4); + + let mut data = vec![FE::from(0u64); num_rows * cols::NUM_COLUMNS]; + + for (i, op) in ops.iter().enumerate() { + let base = i * cols::NUM_COLUMNS; + data[base + cols::REG_IDX] = FE::from(op.reg_idx as u64); + data[base + cols::OLD_TS] = FE::from(op.old_ts); + data[base + cols::NEW_TS] = FE::from(op.new_ts); + data[base + cols::VAL_LO] = FE::from(op.val_lo as u64); + data[base + cols::VAL_HI] = FE::from(op.val_hi as u64); + } + // Padding rows remain zero-initialized: old_ts = new_ts = 0 → self-canceling tokens. + + TraceTable::new_main(data, cols::NUM_COLUMNS, num_rows) +} + +// ========================================================================= +// Bus interactions +// ========================================================================= + +/// Returns the 4 Memory bus interactions for the REGISTER_RELOAD table. +/// +/// Each row emits: +/// - 2 senders (prove-old for word 0 and word 1 at old_ts) +/// - 2 receivers (assume-new for word 0 and word 1 at new_ts) +/// +/// With `Multiplicity::One` for all rows. Padding rows have old_ts = new_ts, +/// so their sender and receiver tokens are identical and cancel to zero. +pub fn bus_interactions() -> Vec { + let mut interactions = Vec::with_capacity(4); + + // Compute word addresses from reg_idx: + // word 0 address = 2 * reg_idx + // word 1 address = 2 * reg_idx + 1 + let addr_w0 = BusValue::linear(vec![LinearTerm::Column { + coefficient: 2, + column: cols::REG_IDX, + }]); + let addr_w1 = BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 2, + column: cols::REG_IDX, + }, + LinearTerm::Constant(1), + ]); + + let old_ts = BusValue::Packed { + start_column: cols::OLD_TS, + packing: Packing::Direct, + }; + let new_ts = BusValue::Packed { + start_column: cols::NEW_TS, + packing: Packing::Direct, + }; + let val_lo = BusValue::Packed { + start_column: cols::VAL_LO, + packing: Packing::Direct, + }; + let val_hi = BusValue::Packed { + start_column: cols::VAL_HI, + packing: Packing::Direct, + }; + + // Sender (prove-old) word 0: (1, 2*reg_idx, 0, old_ts, 0, val_lo) + interactions.push(BusInteraction::sender( + BusId::Memory, + Multiplicity::One, + vec![ + BusValue::constant(1), // is_register = 1 + addr_w0.clone(), // addr_lo = 2 * reg_idx + BusValue::constant(0), // addr_hi = 0 + old_ts.clone(), // ts_lo = old_ts + BusValue::constant(0), // ts_hi = 0 + val_lo.clone(), // value = val_lo + ], + )); + + // Sender (prove-old) word 1: (1, 2*reg_idx+1, 0, old_ts, 0, val_hi) + interactions.push(BusInteraction::sender( + BusId::Memory, + Multiplicity::One, + vec![ + BusValue::constant(1), // is_register = 1 + addr_w1.clone(), // addr_lo = 2 * reg_idx + 1 + BusValue::constant(0), // addr_hi = 0 + old_ts, // ts_lo = old_ts + BusValue::constant(0), // ts_hi = 0 + val_hi.clone(), // value = val_hi + ], + )); + + // Receiver (assume-new) word 0: (1, 2*reg_idx, 0, new_ts, 0, val_lo) + interactions.push(BusInteraction::receiver( + BusId::Memory, + Multiplicity::One, + vec![ + BusValue::constant(1), // is_register = 1 + addr_w0, // addr_lo = 2 * reg_idx + BusValue::constant(0), // addr_hi = 0 + new_ts.clone(), // ts_lo = new_ts + BusValue::constant(0), // ts_hi = 0 + val_lo, // value = val_lo + ], + )); + + // Receiver (assume-new) word 1: (1, 2*reg_idx+1, 0, new_ts, 0, val_hi) + interactions.push(BusInteraction::receiver( + BusId::Memory, + Multiplicity::One, + vec![ + BusValue::constant(1), // is_register = 1 + addr_w1, // addr_lo = 2 * reg_idx + 1 + BusValue::constant(0), // addr_hi = 0 + new_ts, // ts_lo = new_ts + BusValue::constant(0), // ts_hi = 0 + val_hi, // value = val_hi + ], + )); + + interactions +} diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 7663f27c4..eaa411b8f 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -49,6 +49,7 @@ use super::memw_aligned; use super::mul::{self, MulOperation}; use super::page::{self, FinalByteState, FinalStateMap, PageConfig}; use super::register::{self, FinalRegisterStateMap, FinalRegisterWordState}; +use super::register_reload::{self, RegisterReloadOp}; use super::shift::{self, ShiftOperation}; use super::types::{GoldilocksExtension, GoldilocksField}; use crate::Error; @@ -341,6 +342,7 @@ fn collect_ops_from_cpu( Vec, Vec, Vec, + Vec, ) { let mut memw_ops = Vec::with_capacity(cpu_ops.len() * 3); let mut load_ops = Vec::with_capacity(cpu_ops.len() / 8 + 1); @@ -348,6 +350,7 @@ fn collect_ops_from_cpu( let mut shift_ops = Vec::with_capacity(cpu_ops.len() / 10 + 1); let mut bitwise_ops = Vec::with_capacity(cpu_ops.len() * 4); let mut commit_ops = Vec::new(); + let mut reload_ops: Vec = Vec::new(); let mut current_commit_index = 0u32; let mut commit_ecall_count = 0u32; @@ -368,7 +371,8 @@ fn collect_ops_from_cpu( // Register accesses (rs1, rs2, rd, PC): populate on-Main prev_ts/prev_val // columns AND update register_state. No MEMW operations are produced — // the CPU chip emits the Memory bus interactions directly. - track_register_ops_from_cpu(op, register_state); + // Phase 4: reload ops bridge large timestamp gaps (>MAX_REG_GAP). + track_register_ops_from_cpu(op, register_state, &mut reload_ops); // Collect COMMIT ECALL memory operations (register reads/writes + byte reads) if op.ecall_commit { @@ -434,6 +438,7 @@ fn collect_ops_from_cpu( shift_ops, bitwise_ops, commit_ops, + reload_ops, ) } @@ -539,6 +544,52 @@ fn collect_store_op_from_cpu(op: &CpuOperation, memory_state: &mut MemoryState) memw_op } +/// Maximum timestamp gap allowed before inserting a register reload step. +/// +/// The CPU chip's IS_HALF range checks require deltas that fit in 16 bits: +/// - rs1: IS_HALF[TIMESTAMP - RS1_PREV_TS - 1] ≤ 65535 → max gap = 65536 +/// - rs2: IS_HALF[TIMESTAMP - RS2_PREV_TS] ≤ 65535 → max gap = 65535 +/// - rd: IS_HALF[TIMESTAMP - RD_PREV_TS + 1] ≤ 65535 → max gap = 65534 +/// - PC: IS_HALF[TIMESTAMP - PC_PREV_TS] ≤ 65535 → max gap = 65535 +/// +/// We use 65534 (the most restrictive, for rd) uniformly across all register +/// types. This may add one extra reload step for rs1 at the boundary, but +/// keeps the implementation uniform and correct for all cases. +const MAX_REG_GAP: u64 = 65534; + +/// Generates register reload ops to bridge a large timestamp gap. +/// +/// If `curr_ts - prev_ts > MAX_REG_GAP`, inserts intermediate reload steps +/// (each at most MAX_REG_GAP timestamps apart) and returns the effective +/// previous timestamp to use in the CPU column. +/// +/// For each reload step: prove-old at `old_ts`, assume-new at `new_ts`, +/// keeping the register value unchanged. +fn bridge_timestamp_gap( + reg_idx: u8, + prev_ts: u64, + curr_ts: u64, + value: u64, + reload_ops: &mut Vec, +) -> u64 { + let val_lo = (value & 0xFFFF_FFFF) as u32; + let val_hi = (value >> 32) as u32; + + let mut ts = prev_ts; + while curr_ts.saturating_sub(ts) > MAX_REG_GAP { + let next_ts = ts + MAX_REG_GAP; + reload_ops.push(RegisterReloadOp { + reg_idx, + old_ts: ts, + new_ts: next_ts, + val_lo, + val_hi, + }); + ts = next_ts; + } + ts // effective prev_ts for the CPU column +} + /// Tracks register read/write operations (rs1, rs2, rd, PC) for the Registers /// on-Main optimization. /// @@ -550,25 +601,34 @@ fn collect_store_op_from_cpu(op: &CpuOperation, memory_state: &mut MemoryState) /// 2. Updates `register_state` so subsequent ops (including this op's own /// dependent accesses, and COMMIT / HALT paths that still run later) see /// the post-access state. +/// 3. Phase 4: for gaps > MAX_REG_GAP, inserts reload ops into `reload_ops` +/// and sets prev_ts to the last intermediate timestamp so the CPU IS_HALF +/// check stays within [0, 65535]. /// /// Per-instruction memory timestamps match the CPU chip's sends: /// - rs1 read at timestamp + 0 (if READ_REGISTER1 && rs1 != 0) /// - rs2 read at timestamp + 1 (if READ_REGISTER2 && rs2 != 0) /// - rd write at timestamp + 2 (if WRITE_REGISTER && rd != 0) /// - PC read-write at timestamp + 1 (every non-padding row) -fn track_register_ops_from_cpu(op: &mut CpuOperation, register_state: &mut RegisterState) { +fn track_register_ops_from_cpu( + op: &mut CpuOperation, + register_state: &mut RegisterState, + reload_ops: &mut Vec, +) { let d = &op.decode; // rs1 read @ ts + 0 // Guards match CPU column: READ_REGISTER1 = d.read_register1 && rs1 != 0. // x255 (PC) is stored in register_state.pc_register, not regs[]. if d.read_register1 && d.rs1 != 0 { - let (_old_val, old_ts) = if d.rs1 == 255 { + let (old_val, old_ts) = if d.rs1 == 255 { register_state.read_pc() } else { register_state.read(d.rs1) }; - op.rs1_prev_ts = old_ts; + let effective_prev_ts = + bridge_timestamp_gap(d.rs1, old_ts, op.timestamp, old_val, reload_ops); + op.rs1_prev_ts = effective_prev_ts; if d.rs1 == 255 { register_state.write_pc(op.rv1, op.timestamp); } else { @@ -578,8 +638,10 @@ fn track_register_ops_from_cpu(op: &mut CpuOperation, register_state: &mut Regis // rs2 read @ ts + 1 if d.read_register2 && d.rs2 != 0 { - let (_old_val, old_ts) = register_state.read(d.rs2); - op.rs2_prev_ts = old_ts; + let (old_val, old_ts) = register_state.read(d.rs2); + let effective_prev_ts = + bridge_timestamp_gap(d.rs2, old_ts, op.timestamp, old_val, reload_ops); + op.rs2_prev_ts = effective_prev_ts; register_state.write(d.rs2, op.rv2, op.timestamp + 1); } @@ -587,7 +649,9 @@ fn track_register_ops_from_cpu(op: &mut CpuOperation, register_state: &mut Regis // prev_val is needed here to prove the old state on the Memory bus. if d.write_register && d.rd != 0 { let (old_val, old_ts) = register_state.read(d.rd); - op.rd_prev_ts = old_ts; + let effective_prev_ts = + bridge_timestamp_gap(d.rd, old_ts, op.timestamp, old_val, reload_ops); + op.rd_prev_ts = effective_prev_ts; op.rd_prev_val_lo = (old_val & 0xFFFF_FFFF) as u32; op.rd_prev_val_hi = (old_val >> 32) as u32; register_state.write(d.rd, op.rvd, op.timestamp + 2); @@ -595,8 +659,10 @@ fn track_register_ops_from_cpu(op: &mut CpuOperation, register_state: &mut Regis // PC (x255) read-write @ ts + 1 — always, for every non-padding row. { - let (_pc_val, pc_ts) = register_state.read_pc(); - op.pc_prev_ts = pc_ts; + let (pc_val, pc_ts) = register_state.read_pc(); + let effective_pc_prev_ts = + bridge_timestamp_gap(255, pc_ts, op.timestamp, pc_val, reload_ops); + op.pc_prev_ts = effective_pc_prev_ts; register_state.write_pc(op.next_pc, op.timestamp + 1); } } @@ -1645,6 +1711,10 @@ pub struct Traces { /// COMMIT table for write syscall (byte-by-byte commit with recursive bus) pub commit: TraceTable, + + /// REGISTER_RELOAD table for bridging large timestamp gaps in register accesses. + /// Empty (padding-only) for programs where all register gaps fit in 16 bits. + pub register_reload: TraceTable, } /// Chunk raw ops and generate one trace table per chunk. @@ -1821,7 +1891,7 @@ impl Traces { // Initialize memory state from ELF so first accesses get correct old_value. let mut memory_state = MemoryState::from_elf(elf); let mut register_state = RegisterState::new(elf.entry_point); - let (mut memw_ops, load_ops, mut lt_ops, shift_ops, mut bitwise_ops, commit_ops) = + let (mut memw_ops, load_ops, mut lt_ops, shift_ops, mut bitwise_ops, commit_ops, reload_ops) = collect_ops_from_cpu(&mut cpu_ops, &mut memory_state, &mut register_state); // HALT finalization: 33 register MEMW operations at timestamp u64::MAX. @@ -1984,6 +2054,7 @@ impl Traces { // Generate remaining traces in parallel (page, register, halt, commit). // chunk_and_generate already handled cpu, lt, memw, load, mul, dvrm, branch above. let commit_trace = commit::generate_commit_trace(&commit_ops); + let register_reload_trace = register_reload::generate_register_reload_trace(&reload_ops); let (pages, page_configs, register_trace, halt_trace); #[cfg(feature = "parallel")] { @@ -2035,6 +2106,7 @@ impl Traces { branches, halt: halt_trace, commit: commit_trace, + register_reload: register_reload_trace, }) } @@ -2062,7 +2134,7 @@ impl Traces { // Entry point = first instruction's PC (start of execution) let entry_point = cpu_ops.first().map_or(0, |op| op.decode.pc); let mut register_state = RegisterState::new(entry_point); - let (mut memw_ops, load_ops, mut lt_ops, shift_ops, mut bitwise_ops, commit_ops) = + let (mut memw_ops, load_ops, mut lt_ops, shift_ops, mut bitwise_ops, commit_ops, reload_ops) = collect_ops_from_cpu(&mut cpu_ops, &mut memory_state, &mut register_state); // HALT finalization: 33 register MEMW operations at timestamp u64::MAX. @@ -2198,6 +2270,8 @@ impl Traces { let dvrms = chunk_and_generate(&dvrm_ops, max_rows.dvrm, dvrm::generate_dvrm_trace); let branches = chunk_and_generate(&branch_ops, max_rows.branch, branch::generate_branch_trace); + let register_reload_trace = + register_reload::generate_register_reload_trace(&reload_ops); let mut bitwise = bitwise::generate_bitwise_trace(); bitwise::update_multiplicities(&mut bitwise, &bitwise_ops); @@ -2259,6 +2333,7 @@ impl Traces { branches, halt: halt_trace, commit: commit_trace, + register_reload: register_reload_trace, }) } diff --git a/prover/src/test_utils.rs b/prover/src/test_utils.rs index 93d6d2971..3e82f5550 100644 --- a/prover/src/test_utils.rs +++ b/prover/src/test_utils.rs @@ -59,6 +59,9 @@ use crate::tables::page::{bus_interactions as page_bus_interactions, cols as pag use crate::tables::register::{ bus_interactions as register_bus_interactions, cols as register_cols, }; +use crate::tables::register_reload::{ + bus_interactions as register_reload_bus_interactions, cols as register_reload_cols, +}; use crate::tables::shift::{ bus_interactions as shift_bus_interactions, cols as shift_cols, shift_constraints, }; @@ -769,3 +772,26 @@ pub fn create_register_air(proof_options: &ProofOptions) -> VmAir { ) .with_name("REGISTER") } + +/// Create REGISTER_RELOAD AIR with bus interactions. +/// +/// The REGISTER_RELOAD table bridges large timestamp gaps for register accesses +/// by providing intermediate Memory bus prove-old/assume-new token pairs. +/// This prevents IS_HALF overflow when a register goes unaccessed for more +/// than MAX_REG_GAP (65534) timestamps. +pub fn create_register_reload_air(proof_options: &ProofOptions) -> VmAir { + let transition_constraints: Vec>> = vec![]; + + let auxiliary_trace_build_data = AuxiliaryTraceBuildData { + interactions: register_reload_bus_interactions(), + }; + + AirWithBuses::new( + register_reload_cols::NUM_COLUMNS, + auxiliary_trace_build_data, + proof_options, + 1, + transition_constraints, + ) + .with_name("REGISTER_RELOAD") +} diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index 87a743892..bd93cfb47 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -1782,7 +1782,7 @@ fn test_crafted_zero_count_proof_must_not_verify() { let airs = VmAirs::new(&elf, &proof_options, true, &[], &zero_counts); let verifier_air_refs = airs.air_refs(); - assert_eq!(verifier_air_refs.len(), 5); + assert_eq!(verifier_air_refs.len(), 6); let mut bitwise_trace = crate::tables::bitwise::generate_bitwise_trace(); From 41a6ba17ba08d540fdf6776e38e12c4142f3dd97 Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Mon, 13 Apr 2026 17:01:11 -0300 Subject: [PATCH 4/8] fix lint --- prover/src/lib.rs | 4 +-- prover/src/tables/register_reload.rs | 48 ++++++++++++++-------------- prover/src/tables/trace_builder.rs | 26 +++++++++++---- 3 files changed, 45 insertions(+), 33 deletions(-) diff --git a/prover/src/lib.rs b/prover/src/lib.rs index b6ce0b18b..240d4273d 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -41,8 +41,8 @@ use crate::tables::types::BusId; use crate::test_utils::{ E, F, VmAir, create_bitwise_air, create_branch_air, create_commit_air, create_cpu_air, create_decode_air, create_dvrm_air, create_halt_air, create_load_air, create_lt_air, - create_memw_air, create_memw_aligned_air, create_mul_air, create_page_air, - create_register_air, create_register_reload_air, create_shift_air, + create_memw_air, create_memw_aligned_air, create_mul_air, create_page_air, create_register_air, + create_register_reload_air, create_shift_air, }; use stark::proof::options::{GoldilocksCubicProofOptions, ProofOptions}; diff --git a/prover/src/tables/register_reload.rs b/prover/src/tables/register_reload.rs index 008f4b014..69e1124b9 100644 --- a/prover/src/tables/register_reload.rs +++ b/prover/src/tables/register_reload.rs @@ -159,12 +159,12 @@ pub fn bus_interactions() -> Vec { BusId::Memory, Multiplicity::One, vec![ - BusValue::constant(1), // is_register = 1 - addr_w0.clone(), // addr_lo = 2 * reg_idx - BusValue::constant(0), // addr_hi = 0 - old_ts.clone(), // ts_lo = old_ts - BusValue::constant(0), // ts_hi = 0 - val_lo.clone(), // value = val_lo + BusValue::constant(1), // is_register = 1 + addr_w0.clone(), // addr_lo = 2 * reg_idx + BusValue::constant(0), // addr_hi = 0 + old_ts.clone(), // ts_lo = old_ts + BusValue::constant(0), // ts_hi = 0 + val_lo.clone(), // value = val_lo ], )); @@ -173,12 +173,12 @@ pub fn bus_interactions() -> Vec { BusId::Memory, Multiplicity::One, vec![ - BusValue::constant(1), // is_register = 1 - addr_w1.clone(), // addr_lo = 2 * reg_idx + 1 - BusValue::constant(0), // addr_hi = 0 - old_ts, // ts_lo = old_ts - BusValue::constant(0), // ts_hi = 0 - val_hi.clone(), // value = val_hi + BusValue::constant(1), // is_register = 1 + addr_w1.clone(), // addr_lo = 2 * reg_idx + 1 + BusValue::constant(0), // addr_hi = 0 + old_ts, // ts_lo = old_ts + BusValue::constant(0), // ts_hi = 0 + val_hi.clone(), // value = val_hi ], )); @@ -187,12 +187,12 @@ pub fn bus_interactions() -> Vec { BusId::Memory, Multiplicity::One, vec![ - BusValue::constant(1), // is_register = 1 - addr_w0, // addr_lo = 2 * reg_idx - BusValue::constant(0), // addr_hi = 0 - new_ts.clone(), // ts_lo = new_ts - BusValue::constant(0), // ts_hi = 0 - val_lo, // value = val_lo + BusValue::constant(1), // is_register = 1 + addr_w0, // addr_lo = 2 * reg_idx + BusValue::constant(0), // addr_hi = 0 + new_ts.clone(), // ts_lo = new_ts + BusValue::constant(0), // ts_hi = 0 + val_lo, // value = val_lo ], )); @@ -201,12 +201,12 @@ pub fn bus_interactions() -> Vec { BusId::Memory, Multiplicity::One, vec![ - BusValue::constant(1), // is_register = 1 - addr_w1, // addr_lo = 2 * reg_idx + 1 - BusValue::constant(0), // addr_hi = 0 - new_ts, // ts_lo = new_ts - BusValue::constant(0), // ts_hi = 0 - val_hi, // value = val_hi + BusValue::constant(1), // is_register = 1 + addr_w1, // addr_lo = 2 * reg_idx + 1 + BusValue::constant(0), // addr_hi = 0 + new_ts, // ts_lo = new_ts + BusValue::constant(0), // ts_hi = 0 + val_hi, // value = val_hi ], )); diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index eaa411b8f..9914b4f8f 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -1891,8 +1891,15 @@ impl Traces { // Initialize memory state from ELF so first accesses get correct old_value. let mut memory_state = MemoryState::from_elf(elf); let mut register_state = RegisterState::new(elf.entry_point); - let (mut memw_ops, load_ops, mut lt_ops, shift_ops, mut bitwise_ops, commit_ops, reload_ops) = - collect_ops_from_cpu(&mut cpu_ops, &mut memory_state, &mut register_state); + let ( + mut memw_ops, + load_ops, + mut lt_ops, + shift_ops, + mut bitwise_ops, + commit_ops, + reload_ops, + ) = collect_ops_from_cpu(&mut cpu_ops, &mut memory_state, &mut register_state); // HALT finalization: 33 register MEMW operations at timestamp u64::MAX. // Must come before Phase 3 (LT from MEMW) so HALT ops get timestamp checks. @@ -2134,8 +2141,15 @@ impl Traces { // Entry point = first instruction's PC (start of execution) let entry_point = cpu_ops.first().map_or(0, |op| op.decode.pc); let mut register_state = RegisterState::new(entry_point); - let (mut memw_ops, load_ops, mut lt_ops, shift_ops, mut bitwise_ops, commit_ops, reload_ops) = - collect_ops_from_cpu(&mut cpu_ops, &mut memory_state, &mut register_state); + let ( + mut memw_ops, + load_ops, + mut lt_ops, + shift_ops, + mut bitwise_ops, + commit_ops, + reload_ops, + ) = collect_ops_from_cpu(&mut cpu_ops, &mut memory_state, &mut register_state); // HALT finalization: 33 register MEMW operations at timestamp u64::MAX. // Must come before Phase 3 (LT from MEMW) so HALT ops get timestamp checks. @@ -2270,8 +2284,7 @@ impl Traces { let dvrms = chunk_and_generate(&dvrm_ops, max_rows.dvrm, dvrm::generate_dvrm_trace); let branches = chunk_and_generate(&branch_ops, max_rows.branch, branch::generate_branch_trace); - let register_reload_trace = - register_reload::generate_register_reload_trace(&reload_ops); + let register_reload_trace = register_reload::generate_register_reload_trace(&reload_ops); let mut bitwise = bitwise::generate_bitwise_trace(); bitwise::update_multiplicities(&mut bitwise, &bitwise_ops); @@ -2385,4 +2398,3 @@ impl Traces { Self::from_logs_trimmed(logs, instructions, max_rows) } } - From b635f22ed6ffb92b3f22783bedd8e78f6226f9fa Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Mon, 13 Apr 2026 17:28:23 -0300 Subject: [PATCH 5/8] addres coments --- prover/src/tables/cpu.rs | 43 ++++++++++++++++++++++------ prover/src/tables/register_reload.rs | 2 +- prover/src/tables/trace_builder.rs | 21 ++++++++++++++ 3 files changed, 56 insertions(+), 10 deletions(-) diff --git a/prover/src/tables/cpu.rs b/prover/src/tables/cpu.rs index 6b1c147e5..22f18d8a1 100644 --- a/prover/src/tables/cpu.rs +++ b/prover/src/tables/cpu.rs @@ -235,20 +235,37 @@ pub mod cols { // ------------------------------------------------------------------------- // Registers on-Main: prev_ts columns (Phase 1) // ------------------------------------------------------------------------- - // These columns track the timestamp of the previous access to each register. - // Used to eliminate the MEMW_R chip by handling register state inline in CPU. + // ------------------------------------------------------------------------- + // Registers-on-Main columns (cols 74–79) + // + // These columns are WITNESS-ONLY — they have no polynomial transition or + // boundary constraints. Their correctness is enforced exclusively by the + // Memory bus (LogUp multiset argument): + // + // - Each PREV_TS / PREV_VAL column appears in a Memory bus prove-old sender + // token. For that token to be matched, there must be an identical receiver + // token (assume-new) emitted by a prior CPU row or REGISTER_RELOAD row. + // Any fake value breaks the global multiset balance and fails verification. + // + // - The IS_HALF range check (IS_HALF[TIMESTAMP - PREV_TS ± k]) prevents + // timestamp deltas from wrapping the Goldilocks field order, bounding each + // gap to [0, 65534]. Gaps larger than this are bridged by REGISTER_RELOAD. + // + // This is intentional: the Memory bus argument is the sole enforcement + // mechanism, analogous to how MEMW_R used to enforce register ordering. + // ------------------------------------------------------------------------- - /// rs1_prev_ts: Previous mem_step when rs1 was last accessed + /// rs1_prev_ts: timestamp of the previous access to rs1 (bus-enforced witness) pub const RS1_PREV_TS: usize = 74; - /// rs2_prev_ts: Previous mem_step when rs2 was last accessed + /// rs2_prev_ts: timestamp of the previous access to rs2 (bus-enforced witness) pub const RS2_PREV_TS: usize = 75; - /// rd_prev_ts: Previous mem_step when rd was last written + /// rd_prev_ts: timestamp of the previous access to rd (bus-enforced witness) pub const RD_PREV_TS: usize = 76; - /// rd_prev_val_lo: Previous value of rd (low 32 bits) — for proving old state + /// rd_prev_val_lo: previous value of rd, low 32 bits (bus-enforced witness) pub const RD_PREV_VAL_LO: usize = 77; - /// rd_prev_val_hi: Previous value of rd (high 32 bits) + /// rd_prev_val_hi: previous value of rd, high 32 bits (bus-enforced witness) pub const RD_PREV_VAL_HI: usize = 78; - /// pc_prev_ts: Previous mem_step when x255 (PC) was last updated + /// pc_prev_ts: timestamp of the previous access to x255/PC (bus-enforced witness) pub const PC_PREV_TS: usize = 79; /// Total number of columns @@ -1442,13 +1459,21 @@ pub fn bus_interactions() -> Vec { // // Per access, 5 bus interactions are emitted (mirroring MEMW_R): // - // 1 × IsHalfword[current_ts - prev_ts - 1] (range-checks delta ∈ [1, 2^16]) + // 1 × IsHalfword[current_ts - prev_ts ± k] (range-checks delta ∈ [0, 65535]) // 2 × Memory (sender, read-old word 0, 1) (prove-old at prev_ts, prev_val) // 2 × Memory (receiver, write-new word 0, 1) (assume-new at current_ts, new_val) // // Memory bus signature: [is_register, addr_lo, addr_hi, ts_lo, ts_hi, value] // Registers are always 2 words: word 0 at addr=2*reg, word 1 at addr=2*reg+1. // Timestamps fit in u32 for CPU-originated ops, so ts_hi = 0 always. + // + // Soundness note: RS1_PREV_TS, RS2_PREV_TS, RD_PREV_TS, RD_PREV_VAL_LO/HI, + // and PC_PREV_TS (cols 74–79) are witness-only columns with no polynomial + // constraints. Their correctness is enforced by the Memory bus multiset + // argument (LogUp): a prove-old sender at (addr, prev_ts, prev_val) must be + // matched by a prior assume-new receiver with the same tuple. Any fake value + // leaves an unmatched token, failing verification. The IS_HALF checks bound + // each gap to [0, 65535]; larger gaps are bridged by the REGISTER_RELOAD table. // ------------------------------------------------------------------------- // rs1 read @ timestamp + 0 (multiplicity = READ_REGISTER1) diff --git a/prover/src/tables/register_reload.rs b/prover/src/tables/register_reload.rs index 69e1124b9..1c81db854 100644 --- a/prover/src/tables/register_reload.rs +++ b/prover/src/tables/register_reload.rs @@ -104,7 +104,7 @@ pub fn generate_register_reload_trace( } // Padding rows remain zero-initialized: old_ts = new_ts = 0 → self-canceling tokens. - TraceTable::new_main(data, cols::NUM_COLUMNS, num_rows) + TraceTable::new_main(data, cols::NUM_COLUMNS, 1) } // ========================================================================= diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 9914b4f8f..e4ece333d 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -572,6 +572,11 @@ fn bridge_timestamp_gap( value: u64, reload_ops: &mut Vec, ) -> u64 { + debug_assert!( + curr_ts >= prev_ts, + "bridge_timestamp_gap: curr_ts ({curr_ts}) < prev_ts ({prev_ts}) for reg_idx={reg_idx}" + ); + let val_lo = (value & 0xFFFF_FFFF) as u32; let val_hi = (value >> 32) as u32; @@ -947,6 +952,10 @@ fn collect_bitwise_from_cpu_registers(cpu_ops: &[CpuOperation]) -> Vec prev, + "rs1 IS_HALF delta underflow: ts={ts} prev={prev}" + ); let delta = ts.wrapping_sub(prev).wrapping_sub(1) as u16; ops.push(BitwiseOperation::halfword( BitwiseOperationType::IsHalf, @@ -958,6 +967,10 @@ fn collect_bitwise_from_cpu_registers(cpu_ops: &[CpuOperation]) -> Vec= prev, + "rs2 IS_HALF delta underflow: ts={ts} prev={prev}" + ); let delta = ts.wrapping_sub(prev) as u16; ops.push(BitwiseOperation::halfword( BitwiseOperationType::IsHalf, @@ -969,6 +982,10 @@ fn collect_bitwise_from_cpu_registers(cpu_ops: &[CpuOperation]) -> Vec= prev, + "rd IS_HALF delta underflow: ts={ts} prev={prev}" + ); let delta = ts.wrapping_sub(prev).wrapping_add(1) as u16; ops.push(BitwiseOperation::halfword( BitwiseOperationType::IsHalf, @@ -996,6 +1013,10 @@ fn collect_bitwise_from_cpu_registers(cpu_ops: &[CpuOperation]) -> Vec= prev, + "PC IS_HALF delta underflow: ts={ts} prev={prev}" + ); let delta = ts.wrapping_sub(prev) as u16; ops.push(BitwiseOperation::halfword( BitwiseOperationType::IsHalf, From 8bef74acff8d205d95edd7e35deaebbd06598043 Mon Sep 17 00:00:00 2001 From: Nicole Date: Tue, 14 Apr 2026 16:38:06 -0300 Subject: [PATCH 6/8] Add unit and integration tests for bridge_timestamp_gap and REGISTER_RELOA --- prover/src/tables/trace_builder.rs | 60 ++++++++++++++++++ prover/src/tests/trace_builder_tests.rs | 81 +++++++++++++++++++++++++ 2 files changed, 141 insertions(+) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index e4ece333d..a47f32ac0 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -2419,3 +2419,63 @@ impl Traces { Self::from_logs_trimmed(logs, instructions, max_rows) } } + +#[cfg(test)] +mod tests { + use super::register_reload::RegisterReloadOp; + use super::{MAX_REG_GAP, bridge_timestamp_gap}; + + #[test] + fn test_bridge_no_reload_at_exact_gap() { + // gap == MAX_REG_GAP: condition `gap > MAX_REG_GAP` is false → no ops + let mut ops: Vec = Vec::new(); + let eff = bridge_timestamp_gap(1, 0, MAX_REG_GAP, 0, &mut ops); + assert!(ops.is_empty()); + assert_eq!(eff, 0); + } + + #[test] + fn test_bridge_one_reload_step() { + // gap == MAX_REG_GAP + 1: exactly one reload op is inserted + let mut ops: Vec = Vec::new(); + let prev_ts = 10; + let curr_ts = prev_ts + MAX_REG_GAP + 1; + let value: u64 = 0xDEAD_BEEF_1234_5678; + let eff = bridge_timestamp_gap(3, prev_ts, curr_ts, value, &mut ops); + assert_eq!(ops.len(), 1); + assert_eq!(ops[0].reg_idx, 3); + assert_eq!(ops[0].old_ts, prev_ts); + assert_eq!(ops[0].new_ts, prev_ts + MAX_REG_GAP); + assert_eq!(ops[0].val_lo, 0x1234_5678_u32); + assert_eq!(ops[0].val_hi, 0xDEAD_BEEF_u32); + // Effective prev_ts must leave the remaining delta ≤ MAX_REG_GAP + assert_eq!(eff, prev_ts + MAX_REG_GAP); + assert!(curr_ts - eff <= MAX_REG_GAP); + } + + #[test] + fn test_bridge_three_reload_steps() { + // gap == 3 * MAX_REG_GAP + 1: three chained reload ops + let mut ops: Vec = Vec::new(); + let curr_ts = 3 * MAX_REG_GAP + 1; + let eff = bridge_timestamp_gap(7, 0, curr_ts, 99, &mut ops); + assert_eq!(ops.len(), 3); + assert_eq!(ops[0].old_ts, 0); + assert_eq!(ops[0].new_ts, MAX_REG_GAP); + assert_eq!(ops[1].old_ts, MAX_REG_GAP); + assert_eq!(ops[1].new_ts, 2 * MAX_REG_GAP); + assert_eq!(ops[2].old_ts, 2 * MAX_REG_GAP); + assert_eq!(ops[2].new_ts, 3 * MAX_REG_GAP); + assert_eq!(eff, 3 * MAX_REG_GAP); + assert!(curr_ts - eff <= MAX_REG_GAP); + } + + #[test] + fn test_bridge_zero_gap() { + // prev_ts == curr_ts: no ops, effective ts unchanged + let mut ops: Vec = Vec::new(); + let eff = bridge_timestamp_gap(0, 42, 42, 0, &mut ops); + assert!(ops.is_empty()); + assert_eq!(eff, 42); + } +} diff --git a/prover/src/tests/trace_builder_tests.rs b/prover/src/tests/trace_builder_tests.rs index 03cbc541d..e75ce8b82 100644 --- a/prover/src/tests/trace_builder_tests.rs +++ b/prover/src/tests/trace_builder_tests.rs @@ -3,6 +3,7 @@ use crate::tables::bitwise; use crate::tables::cpu::cols; use crate::tables::lt; +use crate::tables::register_reload; use crate::tables::trace_builder::Traces; use crate::tables::types::FE; use executor::vm::instruction::decoding::{ArithOp, Comparison, Instruction}; @@ -565,3 +566,83 @@ fn test_lt_generates_bitwise_lookups() { "IS_HALF lookup for lhs_sub_rhs[0] should have non-zero multiplicity" ); } + +/// Test that REGISTER_RELOAD rows are generated when a register goes unaccessed +/// for more than MAX_REG_GAP (65534) timestamps. +/// +/// Timeline: +/// - x5 initialized at ts=0 (value=0, via REGISTER table init) +/// - 16383 ADD x0, x0, x0 instructions (ts=4..65532), none touch x5 +/// - Instruction 16383 (0-indexed): ADD x6, x5, x0 — first CPU access to x5 +/// at ts = 4 * 16384 = 65536. Gap = 65536 > 65534 → one reload op for x5. +/// +/// Row 0 of REGISTER_RELOAD must contain: +/// reg_idx=5, old_ts=0, new_ts=65534, val_lo=0, val_hi=0 +/// +/// (x6 and x17 also each generate one reload row; total active rows ≥ 1.) +#[test] +fn test_register_reload_triggered_for_large_timestamp_gap() { + const N_NOPS: usize = 16383; + + let mut logs: Vec = Vec::with_capacity(N_NOPS + 2); + let mut instrs: Vec = Vec::with_capacity(N_NOPS + 2); + + // N_NOPS instructions that don't touch x5: ADD x0, x0, x0 + for i in 0..N_NOPS as u64 { + logs.push(make_add_log(0x1000 + i * 4, 0, 0, 0)); + instrs.push(Instruction::Arith { + dst: 0, + src1: 0, + src2: 0, + op: ArithOp::Add, + }); + } + + // Instruction N_NOPS: ADD x6, x5, x0 — first access to x5 (value=0). + // rs1=x5 is processed before rd=x6, so the x5 reload appears first. + logs.push(make_add_log(0x1000 + N_NOPS as u64 * 4, 0, 0, 0)); + instrs.push(Instruction::Arith { + dst: 6, + src1: 5, + src2: 0, + op: ArithOp::Add, + }); + + append_ecall(&mut logs, &mut instrs); + let instructions = make_instructions(&logs, &instrs); + let traces = Traces::from_logs(&logs, instructions, &Default::default()).unwrap(); + + // Row 0: the first reload is for x5 (rs1 processed before rd=x6) + let reload_row = traces.register_reload.main_table.get_row(0); + assert_eq!( + reload_row[register_reload::cols::REG_IDX], + FE::from(5u64), + "reg_idx should be 5" + ); + assert_eq!( + reload_row[register_reload::cols::OLD_TS], + FE::from(0u64), + "old_ts should be 0" + ); + assert_eq!( + reload_row[register_reload::cols::NEW_TS], + FE::from(65534u64), + "new_ts should be MAX_REG_GAP = 65534" + ); + assert_eq!( + reload_row[register_reload::cols::VAL_LO], + FE::from(0u64), + "val_lo should be 0" + ); + assert_eq!( + reload_row[register_reload::cols::VAL_HI], + FE::from(0u64), + "val_hi should be 0" + ); + + // The table must have at least 2 rows (x5 reload + at least one more) + assert!( + traces.register_reload.main_table.height >= 2, + "register_reload table should have at least 2 rows" + ); +} From 8ead119fc19c0ad012b140951e712f18fb0191d3 Mon Sep 17 00:00:00 2001 From: Nicole Date: Tue, 14 Apr 2026 17:00:12 -0300 Subject: [PATCH 7/8] Fix stale doc comment, update mod.rs table listing, document AUIPC/JAL IS_HALF[0] --- prover/src/tables/cpu.rs | 5 +++++ prover/src/tables/mod.rs | 23 ++++++++++++----------- prover/src/tables/trace_builder.rs | 6 +++++- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/prover/src/tables/cpu.rs b/prover/src/tables/cpu.rs index 22f18d8a1..fb488551e 100644 --- a/prover/src/tables/cpu.rs +++ b/prover/src/tables/cpu.rs @@ -2012,6 +2012,11 @@ pub fn bus_interactions() -> Vec { }, ]); // IsHalfword[(TIMESTAMP + 1) - PC_PREV_TS - 1] = [TIMESTAMP - PC_PREV_TS] + // + // AUIPC/JAL special case (rs1 = 255): the rs1 block runs first and + // consumes the old PC token, then produces a new token at TIMESTAMP. + // The PC block then reads PC_PREV_TS = TIMESTAMP, so the IS_HALF delta + // is TIMESTAMP - TIMESTAMP = 0. IS_HALF[0] is valid (0 ∈ [0, 65535]). interactions.push(BusInteraction::sender( BusId::IsHalfword, non_padding.clone(), diff --git a/prover/src/tables/mod.rs b/prover/src/tables/mod.rs index 0d32f7ade..d89b38259 100644 --- a/prover/src/tables/mod.rs +++ b/prover/src/tables/mod.rs @@ -49,17 +49,18 @@ pub use types::BusId; /// (* MEMW_A formula gives 2^20, but set to 2^19 to match MEMW chunk geometry; /// benchmarks show better parallel throughput with smaller chunks.) /// -/// | Table | Main | Bus | Eff.width | Max rows | -/// |---------|------|-----|-----------|----------| -/// | MEMW | 49 | 26 | 127 | 2^19 | -/// | MEMW_A | 29 | 20 | 89 | 2^19 * | -/// | CPU | 74 | 40 | 194 | 2^19 | -/// | DVRM | 34 | 34 | 136 | 2^19 | -/// | MUL | 26 | 16 | 74 | 2^20 | -/// | LT | 15 | 9 | 42 | 2^21 | -/// | SHIFT | 27 | 15 | 72 | 2^20 | -/// | LOAD | 18 | 5 | 33 | 2^21 | -/// | BRANCH | 14 | 6 | 32 | 2^21 | +/// | Table | Main | Bus | Eff.width | Max rows | +/// |-----------------|------|-----|-----------|----------| +/// | MEMW | 49 | 26 | 127 | 2^19 | +/// | MEMW_A | 29 | 20 | 89 | 2^19 * | +/// | CPU | 80 | 38 | 194 | 2^19 | +/// | REGISTER_RELOAD | 5 | 4 | 17 | (none) | +/// | DVRM | 34 | 34 | 136 | 2^19 | +/// | MUL | 26 | 16 | 74 | 2^20 | +/// | LT | 15 | 9 | 42 | 2^21 | +/// | SHIFT | 27 | 15 | 72 | 2^20 | +/// | LOAD | 18 | 5 | 33 | 2^21 | +/// | BRANCH | 14 | 6 | 32 | 2^21 | pub mod max_rows { pub const CPU: usize = 1 << 19; // 524,288 — eff. width 194 pub const MEMW: usize = 1 << 19; // 524,288 — eff. width 127 (baseline) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index a47f32ac0..f034f2acb 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -663,6 +663,11 @@ fn track_register_ops_from_cpu( } // PC (x255) read-write @ ts + 1 — always, for every non-padding row. + // + // AUIPC/JAL (rs1 = 255): the rs1 block above already called write_pc with + // timestamp = op.timestamp, so read_pc() returns pc_ts = op.timestamp here. + // This produces pc_prev_ts = op.timestamp and IS_HALF[TIMESTAMP - PC_PREV_TS] + // = IS_HALF[0] in the AIR — a valid one-step ordering check by design. { let (pc_val, pc_ts) = register_state.read_pc(); let effective_pc_prev_ts = @@ -929,7 +934,6 @@ fn is_aligned_op(op: &MemwOperation) -> bool { true } -/// Collects bitwise lookups from MEMW_A operations. /// Collects IS_HALFWORD bitwise lookups for CPU register-on-Main range checks. /// /// For each non-padding CPU instruction, the CPU chip sends IS_HALFWORD queries From 1e33ca5eec34b161e20a40962013b4a0c2b904b4 Mon Sep 17 00:00:00 2001 From: Nicole Date: Wed, 15 Apr 2026 10:03:15 -0300 Subject: [PATCH 8/8] Assert for timestamp ordering invariants in trace builder --- prover/src/tables/trace_builder.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index f034f2acb..a183f96bb 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -572,7 +572,7 @@ fn bridge_timestamp_gap( value: u64, reload_ops: &mut Vec, ) -> u64 { - debug_assert!( + assert!( curr_ts >= prev_ts, "bridge_timestamp_gap: curr_ts ({curr_ts}) < prev_ts ({prev_ts}) for reg_idx={reg_idx}" ); @@ -956,7 +956,7 @@ fn collect_bitwise_from_cpu_registers(cpu_ops: &[CpuOperation]) -> Vec prev, "rs1 IS_HALF delta underflow: ts={ts} prev={prev}" ); @@ -971,7 +971,7 @@ fn collect_bitwise_from_cpu_registers(cpu_ops: &[CpuOperation]) -> Vec= prev, "rs2 IS_HALF delta underflow: ts={ts} prev={prev}" ); @@ -986,7 +986,7 @@ fn collect_bitwise_from_cpu_registers(cpu_ops: &[CpuOperation]) -> Vec= prev, "rd IS_HALF delta underflow: ts={ts} prev={prev}" ); @@ -1017,7 +1017,7 @@ fn collect_bitwise_from_cpu_registers(cpu_ops: &[CpuOperation]) -> Vec= prev, "PC IS_HALF delta underflow: ts={ts} prev={prev}" );