From 711080d6841ba4da6f0169c5ab28c63302b828d5 Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Mon, 6 Apr 2026 11:14:48 -0300 Subject: [PATCH 01/14] save work --- executor/programs/asm/test_keccak.s | 30 + executor/src/vm/instruction/execution.rs | 154 +++++ executor/tests/asm.rs | 7 + prover/src/constraints/cpu.rs | 83 ++- prover/src/tables/cpu.rs | 82 ++- prover/src/tables/keccak.rs | 345 ++++++++++ prover/src/tables/keccak_rc.rs | 216 +++++++ prover/src/tables/keccak_rnd.rs | 770 +++++++++++++++++++++++ prover/src/tables/mod.rs | 3 + prover/src/tables/types.rs | 12 + prover/src/tests/constraints_tests.rs | 18 +- prover/src/tests/cpu_tests.rs | 9 +- syscalls/src/syscalls.rs | 22 + 13 files changed, 1729 insertions(+), 22 deletions(-) create mode 100644 executor/programs/asm/test_keccak.s create mode 100644 prover/src/tables/keccak.rs create mode 100644 prover/src/tables/keccak_rc.rs create mode 100644 prover/src/tables/keccak_rnd.rs diff --git a/executor/programs/asm/test_keccak.s b/executor/programs/asm/test_keccak.s new file mode 100644 index 000000000..5d5aac618 --- /dev/null +++ b/executor/programs/asm/test_keccak.s @@ -0,0 +1,30 @@ + .attribute 5, "rv64i2p1_m2p0_zmmul1p0" +.Lfunc_end0: + .globl main +main: + # Allocate 200 bytes on the stack for the Keccak state (25 × u64) + addi sp, sp, -200 + + # Zero out the state (200 bytes = 25 doublewords) + mv t0, sp + li t1, 25 +.Lzero_loop: + sd zero, 0(t0) + addi t0, t0, 8 + addi t1, t1, -1 + bnez t1, .Lzero_loop + + # Call keccak-f[1600] permutation + # a0 = pointer to 200-byte state + # a7 = syscall number (0xFFFFFFFFFFFFFFFE = u64::MAX - 1) + mv a0, sp + li a7, -2 + ecall + + # Restore stack and halt + addi sp, sp, 200 + li a0, 0 + li a7, 93 + ecall +.Lfunc_end1: + .size main, .Lfunc_end1-main diff --git a/executor/src/vm/instruction/execution.rs b/executor/src/vm/instruction/execution.rs index dcab3f927..c28b3de65 100644 --- a/executor/src/vm/instruction/execution.rs +++ b/executor/src/vm/instruction/execution.rs @@ -13,8 +13,14 @@ pub enum SyscallNumbers { GetPrivateInputs = 4, Commit = 64, Halt = 93, + KeccakPermute = 94, // Actual syscall number is KECCAK_SYSCALL_NUMBER (u64::MAX - 1) } +/// Syscall number for KeccakPermute (u64::MAX - 1 = 0xFFFF_FFFF_FFFF_FFFE). +/// +/// Cannot be an enum discriminant because it exceeds isize::MAX. +pub const KECCAK_SYSCALL_NUMBER: u64 = u64::MAX - 1; + impl TryFrom for SyscallNumbers { type Error = (); fn try_from(value: u64) -> Result { @@ -24,6 +30,7 @@ impl TryFrom for SyscallNumbers { 4 => Ok(SyscallNumbers::GetPrivateInputs), 64 => Ok(SyscallNumbers::Commit), 93 => Ok(SyscallNumbers::Halt), + v if v == KECCAK_SYSCALL_NUMBER => Ok(SyscallNumbers::KeccakPermute), _ => Err(()), } } @@ -326,6 +333,19 @@ impl Instruction { memory.store_byte(pointer + i as u64, *byte); } } + SyscallNumbers::KeccakPermute => { + // keccak-f[1600] permutation on 200 bytes (25 × u64) at address in x10 + let state_addr = registers.read(10)?; + let mut state = [0u64; 25]; + for (i, lane) in state.iter_mut().enumerate() { + *lane = memory.load_doubleword(state_addr + (i as u64) * 8)?; + } + keccak_f1600(&mut state); + for (i, &lane) in state.iter().enumerate() { + memory.store_doubleword(state_addr + (i as u64) * 8, lane)?; + } + src2_val = state_addr; + } SyscallNumbers::Halt => { // halt return Ok(Log { @@ -499,3 +519,137 @@ pub enum ExecutionError { #[error("Invalid commit fd: expected 1 (stdout), got {0}")] InvalidCommitFd(u64), } + +// ============================================================================= +// Keccak-f[1600] permutation +// ============================================================================= + +/// Round constants for Keccak-f[1600] (24 rounds). +pub const KECCAK_RC: [u64; 24] = [ + 0x0000000000000001, + 0x0000000000008082, + 0x800000000000808A, + 0x8000000080008000, + 0x000000000000808B, + 0x0000000080000001, + 0x8000000080008081, + 0x8000000000008009, + 0x000000000000008A, + 0x0000000000000088, + 0x0000000080008009, + 0x000000008000000A, + 0x000000008000808B, + 0x800000000000008B, + 0x8000000000008089, + 0x8000000000008003, + 0x8000000000008002, + 0x8000000000000080, + 0x000000000000800A, + 0x800000008000000A, + 0x8000000080008081, + 0x8000000000008080, + 0x0000000080000001, + 0x8000000080008008, +]; + +/// Rotation offsets R[x][y] for the rho step of Keccak-f[1600]. +pub const KECCAK_RHO: [[u32; 5]; 5] = [ + [0, 36, 3, 41, 18], + [1, 44, 10, 45, 2], + [62, 6, 43, 15, 61], + [28, 55, 25, 21, 56], + [27, 20, 39, 8, 14], +]; + +/// Apply the Keccak-f[1600] permutation (24 rounds) to a 25-word state. +/// +/// The state is indexed as `state[x + 5*y]` where `x, y ∈ {0..4}`. +pub fn keccak_f1600(state: &mut [u64; 25]) { + for &rc in &KECCAK_RC { + // θ (theta) + let mut c = [0u64; 5]; + for x in 0..5 { + c[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^ state[x + 20]; + } + let mut d = [0u64; 5]; + for x in 0..5 { + d[x] = c[(x + 4) % 5] ^ c[(x + 1) % 5].rotate_left(1); + } + for x in 0..5 { + for y in 0..5 { + state[x + 5 * y] ^= d[x]; + } + } + + // ρ (rho) and π (pi) + let mut b = [0u64; 25]; + for x in 0..5 { + for y in 0..5 { + b[y + 5 * ((2 * x + 3 * y) % 5)] = + state[x + 5 * y].rotate_left(KECCAK_RHO[x][y]); + } + } + + // χ (chi) + for x in 0..5 { + for y in 0..5 { + state[x + 5 * y] = + b[x + 5 * y] ^ (!b[(x + 1) % 5 + 5 * y] & b[(x + 2) % 5 + 5 * y]); + } + } + + // ι (iota) + state[0] ^= rc; + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_keccak_f1600_zero_input() { + let mut state = [0u64; 25]; + keccak_f1600(&mut state); + + let expected: [u64; 25] = [ + 0xF1258F7940E1DDE7, + 0x84D5CCF933C0478A, + 0xD598261EA65AA9EE, + 0xBD1547306F80494D, + 0x8B284E056253D057, + 0xFF97A42D7F8E6FD4, + 0x90FEE5A0A44647C4, + 0x8C5BDA0CD6192E76, + 0xAD30A6F71B19059C, + 0x30935AB7D08FFC64, + 0xEB5AA93F2317D635, + 0xA9A6E6260D712103, + 0x81A57C16DBCF555F, + 0x43B831CD0347C826, + 0x01F22F1A11A5569F, + 0x05E5635A21D9AE61, + 0x64BEFEF28CC970F2, + 0x613670957BC46611, + 0xB87C5A554FD00ECB, + 0x8C3EE88A1CCF32C8, + 0x940C7922AE3A2614, + 0x1841F924A2C509E4, + 0x16F53526E70465C2, + 0x75F644E97F30A13B, + 0xEAF1FF7B5CECA249, + ]; + + assert_eq!(state, expected, "keccak-f[1600] on zero input mismatch"); + } + + #[test] + fn test_keccak_f1600_nonzero_input() { + let mut state = [0u64; 25]; + state[0] = 1; + let original = state; + keccak_f1600(&mut state); + assert_ne!(state, original); + assert!(state.iter().any(|&x| x != 0)); + } +} diff --git a/executor/tests/asm.rs b/executor/tests/asm.rs index fc9d3657f..1d5b2586f 100644 --- a/executor/tests/asm.rs +++ b/executor/tests/asm.rs @@ -788,3 +788,10 @@ fn test_sub_64bit() { fn test_sub_underflow() { run_program("./program_artifacts/asm/sub_underflow.elf"); } + +// ==================== Keccak Precompile ==================== + +#[test] +fn test_keccak() { + run_program("./program_artifacts/asm/test_keccak.elf"); +} diff --git a/prover/src/constraints/cpu.rs b/prover/src/constraints/cpu.rs index f95a916eb..cecf5bf4e 100644 --- a/prover/src/constraints/cpu.rs +++ b/prover/src/constraints/cpu.rs @@ -84,6 +84,7 @@ pub const BIT_FLAG_COLUMNS: &[usize] = &[ cols::MUL, cols::DIVREM, cols::ECALL, + cols::ECALL_KECCAK, cols::EBREAK, // Sign bits cols::RV1_EXT_BIT, @@ -101,6 +102,80 @@ pub fn create_is_bit_constraints(constraint_idx_start: usize) -> (Vec Self { + Self { constraint_idx } + } + + fn compute(&self, step: &TableView) -> FieldElement + where + F: IsSubFieldOf, + E: IsField, + { + let ecall_keccak = step + .get_main_evaluation_element(0, cols::ECALL_KECCAK) + .clone(); + let ecall = step.get_main_evaluation_element(0, cols::ECALL).clone(); + let one = FieldElement::::one(); + ecall_keccak * (one - ecall) + } +} + +impl TransitionConstraint + for EcallKeccakImpliesEcallConstraint +{ + fn degree(&self) -> usize { + 2 + } + + fn constraint_idx(&self) -> usize { + self.constraint_idx + } + + fn end_exemptions(&self) -> usize { + 0 + } + + fn evaluate( + &self, + evaluation_context: &TransitionEvaluationContext, + transition_evaluations: &mut [FieldElement], + ) { + match evaluation_context { + TransitionEvaluationContext::Prover { + frame, + periodic_values: _, + rap_challenges: _, + .. + } => { + let constraint_value = self.compute(frame.get_evaluation_step(0)); + transition_evaluations[self.constraint_idx] = constraint_value.to_extension(); + } + + TransitionEvaluationContext::Verifier { + frame, + periodic_values: _, + rap_challenges: _, + .. + } => { + let constraint_value = self.compute(frame.get_evaluation_step(0)); + transition_evaluations[self.constraint_idx] = constraint_value; + } + } + } +} + // ========================================================================= // ALU ADD Constraints // ========================================================================= @@ -1222,9 +1297,9 @@ pub fn create_jalr_constraints(constraint_idx_start: usize) -> (Vec ( other.push(Box::new(EbreakConstraint::new(next_idx))); next_idx += 1; + // ECALL_KECCAK implies ECALL + other.push(Box::new(EcallKeccakImpliesEcallConstraint::new(next_idx))); + next_idx += 1; + // rv1 zero-forcing (CM48): (1 - read_register1) * rv1[i] = 0 for i ∈ [0, 2] for &value_col in &[cols::RV1_0, cols::RV1_1, cols::RV1_2] { other.push(Box::new(RegNotReadIsZeroConstraint::new( diff --git a/prover/src/tables/cpu.rs b/prover/src/tables/cpu.rs index e29ae1d57..8b97b180f 100644 --- a/prover/src/tables/cpu.rs +++ b/prover/src/tables/cpu.rs @@ -232,8 +232,16 @@ pub mod cols { /// branch_cond: Whether branch is taken pub const BRANCH_COND: usize = 73; + /// ECALL_KECCAK: 1 when the ECALL is a KeccakPermute syscall, 0 otherwise + pub const ECALL_KECCAK: usize = 74; + + /// Keccak state address (DWordWL: lo32 and hi32). + /// Non-zero only for KeccakPermute ECALLs. + pub const KECCAK_STATE_ADDR_0: usize = 75; + pub const KECCAK_STATE_ADDR_1: usize = 76; + /// Total number of columns - pub const NUM_COLUMNS: usize = 74; + pub const NUM_COLUMNS: usize = 77; // ------------------------------------------------------------------------- // Helper ranges for iteration @@ -298,6 +306,12 @@ pub struct CpuOperation { /// For Commit ECALLs: byte count from x12 pub commit_count: u64, + + /// Whether this ECALL is a KeccakPermute syscall + pub ecall_keccak: bool, + + /// For KeccakPermute ECALLs: state address from x10 + pub keccak_state_addr: u64, } impl CpuOperation { @@ -638,6 +652,9 @@ impl CpuOperation { } else { (0, 0) }; + let ecall_keccak = decode.op_ecall + && log.src1_val == executor::vm::instruction::execution::KECCAK_SYSCALL_NUMBER; + let keccak_state_addr = if ecall_keccak { log.src2_val } else { 0 }; // CM50: (1 - read_register2) * rv2[i] = 0. When read_register2=0, rv2 must be 0. // For example, ECALL has read_register2=0 (rs2 defaults to 0). The commit buf_addr is // carried separately in commit_buf_addr and does not go through rv2. @@ -660,6 +677,8 @@ impl CpuOperation { ecall_commit, commit_buf_addr, commit_count, + ecall_keccak, + keccak_state_addr, }; // Compute runtime-specific values based on instruction type @@ -799,6 +818,11 @@ pub fn generate_cpu_trace( data[base + cols::MUL] = FE::from(d.op_mul as u64); data[base + cols::DIVREM] = FE::from(d.op_divrem as u64); data[base + cols::ECALL] = FE::from(d.op_ecall as u64); + data[base + cols::ECALL_KECCAK] = FE::from(op.ecall_keccak as u64); + data[base + cols::KECCAK_STATE_ADDR_0] = + FE::from(op.keccak_state_addr & 0xFFFF_FFFF); + data[base + cols::KECCAK_STATE_ADDR_1] = + FE::from(op.keccak_state_addr >> 32); data[base + cols::EBREAK] = FE::from(d.op_ebreak as u64); // Output columns @@ -1995,15 +2019,21 @@ pub fn bus_interactions() -> Vec { )); } - // ECALL interaction (single shared bus for HALT and COMMIT) + // ECALL interaction for HALT and COMMIT (excludes keccak) // ------------------------------------------------------------------------- - // Sends to both HALT and COMMIT tables. Each receiver pattern-matches on - // the syscall number in the payload. - // multiplicity = ECALL - // rv1 = value of a7 register (syscall number). + // multiplicity = ECALL - ECALL_KECCAK interactions.push(BusInteraction::sender( BusId::Ecall, - Multiplicity::Column(cols::ECALL), + Multiplicity::Linear(vec![ + LinearTerm::Column { + coefficient: 1, + column: cols::ECALL, + }, + LinearTerm::Column { + coefficient: -1, + column: cols::ECALL_KECCAK, + }, + ]), vec![ BusValue::Packed { start_column: cols::TIMESTAMP, @@ -2029,6 +2059,44 @@ pub fn bus_interactions() -> Vec { ], )); + // EcallKeccak interaction (CPU → KECCAK core chip) + // ------------------------------------------------------------------------- + // multiplicity = ECALL_KECCAK + // Payload: [timestamp_lo, timestamp_hi, syscall_lo32, syscall_hi32, state_addr_lo32, state_addr_hi32] + interactions.push(BusInteraction::sender( + BusId::EcallKeccak, + Multiplicity::Column(cols::ECALL_KECCAK), + vec![ + BusValue::Packed { + start_column: cols::TIMESTAMP, + packing: Packing::Direct, + }, + BusValue::constant(0), // timestamp_hi = 0 + BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1, + column: cols::RV1_0, + }, + LinearTerm::Column { + coefficient: 65536, + column: cols::RV1_1, + }, + ]), + BusValue::Packed { + start_column: cols::RV1_2, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::KECCAK_STATE_ADDR_0, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::KECCAK_STATE_ADDR_1, + packing: Packing::Direct, + }, + ], + )); + interactions } diff --git a/prover/src/tables/keccak.rs b/prover/src/tables/keccak.rs new file mode 100644 index 000000000..ec5e5eae8 --- /dev/null +++ b/prover/src/tables/keccak.rs @@ -0,0 +1,345 @@ +//! KECCAK core chip — handles ECALL, memory I/O, and delegation to the round chip. +//! +//! One row per keccak permutation call. Reads/writes 25 u64 lanes from/to memory, +//! sends input state to the round chip via the Keccak bus, and receives the output +//! state after 24 rounds. +//! +//! ## Column layout (~511 columns) +//! +//! | Group | Size | Description | +//! |----------------|------|------------------------------------------------| +//! | timestamp | 2 | DWordWL | +//! | addr | 8 | State address as DWordBL (8 bytes) | +//! | input_state | 200 | Input state bytes [5][5][8] | +//! | output_state | 200 | Output state bytes [5][5][8] | +//! | state_ptr | 100 | Per-lane DWordHL addresses [25][4] | +//! | mu | 1 | Multiplicity flag | + +use executor::vm::instruction::execution::KECCAK_SYSCALL_NUMBER; +use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity, Packing}; +use stark::trace::TraceTable; + +use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField}; + +// ========================================================================= +// Column indices +// ========================================================================= + +pub mod cols { + pub const TIMESTAMP_0: usize = 0; + pub const TIMESTAMP_1: usize = 1; + + // addr[8] — state address as 8 bytes (DWordBL) + pub const ADDR: usize = 2; + + // input_state[5][5][8] = 200 bytes + pub const INPUT_STATE: usize = ADDR + 8; // 10 + + // output_state[5][5][8] = 200 bytes + pub const OUTPUT_STATE: usize = INPUT_STATE + 200; // 210 + + // state_ptr[25][4] = 100 halfwords (DWordHL per lane) + pub const STATE_PTR: usize = OUTPUT_STATE + 200; // 410 + + pub const MU: usize = STATE_PTR + 100; // 510 + + pub const NUM_COLUMNS: usize = MU + 1; // 511 + + // ------------------------------------------------------------------------- + // Index helpers + // ------------------------------------------------------------------------- + + #[inline] + pub const fn addr(byte: usize) -> usize { + ADDR + byte + } + + /// Index into input_state[x][y][byte] + #[inline] + pub const fn input_state(x: usize, y: usize, byte: usize) -> usize { + INPUT_STATE + (x * 5 + y) * 8 + byte + } + + /// Index into output_state[x][y][byte] + #[inline] + pub const fn output_state(x: usize, y: usize, byte: usize) -> usize { + OUTPUT_STATE + (x * 5 + y) * 8 + byte + } + + /// Index into state_ptr[lane_idx][halfword] (DWordHL = 4 halfwords) + #[inline] + pub const fn state_ptr(lane_idx: usize, hw: usize) -> usize { + STATE_PTR + lane_idx * 4 + hw + } +} + +// ========================================================================= +// Operation struct +// ========================================================================= + +#[derive(Debug, Clone)] +pub struct KeccakOperation { + pub timestamp: u64, + pub state_addr: u64, + pub input: [u64; 25], + pub output: [u64; 25], +} + +// ========================================================================= +// Trace generation +// ========================================================================= + +fn byte_of(val: u64, b: usize) -> u8 { + ((val >> (b * 8)) & 0xFF) as u8 +} + +pub fn generate_keccak_trace( + ops: &[KeccakOperation], +) -> TraceTable { + let n = ops.len(); + let num_rows = n.next_power_of_two().max(4); + let mut data = vec![FE::zero(); num_rows * cols::NUM_COLUMNS]; + + for (row_idx, op) in ops.iter().enumerate() { + let base = row_idx * cols::NUM_COLUMNS; + + // Timestamp + data[base + cols::TIMESTAMP_0] = FE::from(op.timestamp & 0xFFFF_FFFF); + data[base + cols::TIMESTAMP_1] = FE::from(op.timestamp >> 32); + + // Address as 8 bytes + for b in 0..8 { + data[base + cols::addr(b)] = FE::from(byte_of(op.state_addr, b) as u64); + } + + // Input state as bytes + for x in 0..5 { + for y in 0..5 { + let lane = op.input[x + 5 * y]; + for b in 0..8 { + data[base + cols::input_state(x, y, b)] = FE::from(byte_of(lane, b) as u64); + } + } + } + + // Output state as bytes + for x in 0..5 { + for y in 0..5 { + let lane = op.output[x + 5 * y]; + for b in 0..8 { + data[base + cols::output_state(x, y, b)] = FE::from(byte_of(lane, b) as u64); + } + } + } + + // State pointers: state_ptr[lane] = addr + 8 * lane_idx + for lane_idx in 0..25 { + let ptr = op.state_addr.wrapping_add(lane_idx as u64 * 8); + data[base + cols::state_ptr(lane_idx, 0)] = FE::from(ptr & 0xFFFF); + data[base + cols::state_ptr(lane_idx, 1)] = FE::from((ptr >> 16) & 0xFFFF); + data[base + cols::state_ptr(lane_idx, 2)] = FE::from((ptr >> 32) & 0xFFFF); + data[base + cols::state_ptr(lane_idx, 3)] = FE::from((ptr >> 48) & 0xFFFF); + } + + // mu = 1 (real row) + data[base + cols::MU] = FE::one(); + } + + TraceTable::new_main(data, cols::NUM_COLUMNS, 1) +} + +// ========================================================================= +// Bus interactions +// ========================================================================= + +pub fn bus_interactions() -> Vec { + let syscall_lo = KECCAK_SYSCALL_NUMBER & 0xFFFF_FFFF; + let syscall_hi = KECCAK_SYSCALL_NUMBER >> 32; + let mut interactions = Vec::with_capacity(160); + + // 1. EcallKeccak receiver: [ts_lo, ts_hi, syscall_lo32, syscall_hi32, addr_lo32, addr_hi32] + interactions.push(BusInteraction::receiver( + BusId::EcallKeccak, + Multiplicity::Column(cols::MU), + vec![ + BusValue::Packed { + start_column: cols::TIMESTAMP_0, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::TIMESTAMP_1, + packing: Packing::Direct, + }, + BusValue::constant(syscall_lo), + BusValue::constant(syscall_hi), + // state_addr as DWordWL from DWordBL bytes + BusValue::linear(vec![ + LinearTerm::Column { coefficient: 1, column: cols::addr(0) }, + LinearTerm::Column { coefficient: 256, column: cols::addr(1) }, + LinearTerm::Column { coefficient: 65536, column: cols::addr(2) }, + LinearTerm::Column { coefficient: 16777216, column: cols::addr(3) }, + ]), + BusValue::linear(vec![ + LinearTerm::Column { coefficient: 1, column: cols::addr(4) }, + LinearTerm::Column { coefficient: 256, column: cols::addr(5) }, + LinearTerm::Column { coefficient: 65536, column: cols::addr(6) }, + LinearTerm::Column { coefficient: 16777216, column: cols::addr(7) }, + ]), + ], + )); + + // 2. Keccak bus: send (timestamp, 0, input_state[200]) + { + let mut values = vec![ + BusValue::Packed { start_column: cols::TIMESTAMP_0, packing: Packing::Direct }, + BusValue::Packed { start_column: cols::TIMESTAMP_1, packing: Packing::Direct }, + BusValue::constant(0), // round = 0 + ]; + for x in 0..5 { + for y in 0..5 { + values.push(BusValue::Packed { + start_column: cols::input_state(x, y, 0), + packing: Packing::Word4L, + }); + values.push(BusValue::Packed { + start_column: cols::input_state(x, y, 4), + packing: Packing::Word4L, + }); + } + } + interactions.push(BusInteraction::sender( + BusId::Keccak, + Multiplicity::Column(cols::MU), + values, + )); + } + + // 3. Keccak bus: receive (timestamp, 24, output_state[200]) + { + let mut values = vec![ + BusValue::Packed { start_column: cols::TIMESTAMP_0, packing: Packing::Direct }, + BusValue::Packed { start_column: cols::TIMESTAMP_1, packing: Packing::Direct }, + BusValue::constant(24), // round = 24 + ]; + for x in 0..5 { + for y in 0..5 { + values.push(BusValue::Packed { + start_column: cols::output_state(x, y, 0), + packing: Packing::Word4L, + }); + values.push(BusValue::Packed { + start_column: cols::output_state(x, y, 4), + packing: Packing::Word4L, + }); + } + } + interactions.push(BusInteraction::receiver( + BusId::Keccak, + Multiplicity::Column(cols::MU), + values, + )); + } + + // 4. IS_HALF range checks on state_ptr (100 interactions) + for lane_idx in 0..25 { + for hw in 0..4 { + interactions.push(BusInteraction::sender( + BusId::IsHalfword, + Multiplicity::Column(cols::MU), + vec![BusValue::Packed { + start_column: cols::state_ptr(lane_idx, hw), + packing: Packing::Direct, + }], + )); + } + } + + // 5. MEMW interactions for 25 lane reads (on mu) + 25 lane writes (on mu) + // Format: [old[8], is_register, address[DWordHL=2], value[8], ts[2], w2, w4, w8] + for lane_idx in 0..25 { + let x = lane_idx % 5; + let y = lane_idx / 5; + let addr_start = cols::state_ptr(lane_idx, 0); + + // Read: old = input, value = input (read doesn't change) + let mut read_values = Vec::with_capacity(24); + // old[0..8] = input bytes + for b in 0..8 { + read_values.push(BusValue::Packed { + start_column: cols::input_state(x, y, b), + packing: Packing::Direct, + }); + } + // is_register = 0 + read_values.push(BusValue::constant(0)); + // address as DWordHL (2 bus elements packed from 4 halfword columns) + read_values.push(BusValue::Packed { + start_column: addr_start, + packing: Packing::DWordHL, + }); + // value[0..8] = same as old (read) + for b in 0..8 { + read_values.push(BusValue::Packed { + start_column: cols::input_state(x, y, b), + packing: Packing::Direct, + }); + } + // timestamp + read_values.push(BusValue::Packed { + start_column: cols::TIMESTAMP_0, + packing: Packing::Direct, + }); + read_values.push(BusValue::Packed { + start_column: cols::TIMESTAMP_1, + packing: Packing::Direct, + }); + // write2=0, write4=0, write8=1 + read_values.push(BusValue::constant(0)); + read_values.push(BusValue::constant(0)); + read_values.push(BusValue::constant(1)); + + interactions.push(BusInteraction::sender( + BusId::Memw, + Multiplicity::Column(cols::MU), + read_values, + )); + + // Write: new value = output, timestamp = ts + 1 + let mut write_values = Vec::with_capacity(16); + // is_register = 0 + write_values.push(BusValue::constant(0)); + // address as DWordHL + write_values.push(BusValue::Packed { + start_column: addr_start, + packing: Packing::DWordHL, + }); + // value[0..8] = output bytes + for b in 0..8 { + write_values.push(BusValue::Packed { + start_column: cols::output_state(x, y, b), + packing: Packing::Direct, + }); + } + // timestamp + 1 + write_values.push(BusValue::linear(vec![ + LinearTerm::Column { coefficient: 1, column: cols::TIMESTAMP_0 }, + LinearTerm::Constant(1), + ])); + write_values.push(BusValue::Packed { + start_column: cols::TIMESTAMP_1, + packing: Packing::Direct, + }); + // write2=0, write4=0, write8=1 + write_values.push(BusValue::constant(0)); + write_values.push(BusValue::constant(0)); + write_values.push(BusValue::constant(1)); + + interactions.push(BusInteraction::sender( + BusId::Memw, + Multiplicity::Column(cols::MU), + write_values, + )); + } + + interactions +} diff --git a/prover/src/tables/keccak_rc.rs b/prover/src/tables/keccak_rc.rs new file mode 100644 index 000000000..0ba29297b --- /dev/null +++ b/prover/src/tables/keccak_rc.rs @@ -0,0 +1,216 @@ +//! KECCAK_RC: Precomputed round constant lookup table for Keccak-f[1600]. +//! +//! 24 rows (one per round), padded to 32. Each row maps a round index to its +//! 8-byte round constant. The round chip looks up `(round) → rc[8]` via the +//! `KeccakRc` bus. +//! +//! Follows the BITWISE preprocessed-table pattern: precomputed columns are +//! committed once and cached via `OnceLock`. + +use std::sync::OnceLock; + +use math::fft::cpu::bit_reversing::in_place_bit_reverse_permute; +use math::field::element::FieldElement; +use math::polynomial::Polynomial; +use stark::config::{BatchedMerkleTree, Commitment}; +use stark::lookup::{BusInteraction, BusValue, Multiplicity, Packing}; +use stark::proof::options::ProofOptions; +use stark::prover::evaluate_polynomial_on_lde_domain; +use stark::trace::{TraceTable, columns2rows}; + +use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField}; + +// ========================================================================= +// Column indices +// ========================================================================= + +pub mod cols { + /// Round index (0..23) + pub const ROUND: usize = 0; + /// RC bytes [0..7] — 8 bytes of the round constant (little-endian) + pub const RC: usize = 1; + pub const RC_END: usize = RC + 8; // = 9 + /// Multiplicity (how many times this row is looked up) + pub const MU: usize = 9; + + pub const NUM_COLUMNS: usize = 10; +} + +/// Number of precomputed columns (everything except MU). +pub const NUM_PRECOMPUTED_COLS: usize = 9; + +/// Number of real rows (one per keccak round). +pub const NUM_REAL_ROWS: usize = 24; + +/// Number of rows in the trace (padded to next power of 2). +pub const NUM_ROWS: usize = 32; + +/// Keccak-f[1600] round constants. +const RC: [u64; 24] = [ + 0x0000000000000001, + 0x0000000000008082, + 0x800000000000808A, + 0x8000000080008000, + 0x000000000000808B, + 0x0000000080000001, + 0x8000000080008081, + 0x8000000000008009, + 0x000000000000008A, + 0x0000000000000088, + 0x0000000080008009, + 0x000000008000000A, + 0x000000008000808B, + 0x800000000000008B, + 0x8000000000008089, + 0x8000000000008003, + 0x8000000000008002, + 0x8000000000000080, + 0x000000000000800A, + 0x800000008000000A, + 0x8000000080008081, + 0x8000000000008080, + 0x0000000080000001, + 0x8000000080008008, +]; + +/// Whether this table is preprocessed. +pub const fn is_preprocessed() -> bool { + true +} + +/// Generate one precomputed row: [round, rc_byte0, ..., rc_byte7]. +pub const fn generate_row(round: usize) -> [u64; NUM_PRECOMPUTED_COLS] { + let rc_val = if round < 24 { RC[round] } else { 0 }; + [ + round as u64, + rc_val & 0xFF, + (rc_val >> 8) & 0xFF, + (rc_val >> 16) & 0xFF, + (rc_val >> 24) & 0xFF, + (rc_val >> 32) & 0xFF, + (rc_val >> 40) & 0xFF, + (rc_val >> 48) & 0xFF, + (rc_val >> 56) & 0xFF, + ] +} + +// ========================================================================= +// Preprocessed commitment +// ========================================================================= + +static KECCAK_RC_COMMITMENT: OnceLock = OnceLock::new(); + +fn compute_preprocessed_commitment(options: &ProofOptions) -> Commitment { + // Generate precomputed columns + let mut columns: Vec> = (0..NUM_PRECOMPUTED_COLS) + .map(|_| Vec::with_capacity(NUM_ROWS)) + .collect(); + for idx in 0..NUM_ROWS { + let row = generate_row(idx); + for (col_idx, &value) in row.iter().enumerate() { + columns[col_idx].push(FE::from(value)); + } + } + + // Interpolate each column to a polynomial + let polys: Vec> = columns + .iter() + .map(|col| { + Polynomial::interpolate_fft::(col) + .expect("FFT interpolation failed for keccak_rc column") + }) + .collect(); + + // Evaluate on LDE domain + let blowup_factor = options.blowup_factor as usize; + let coset_offset = FE::from(options.coset_offset); + let mut lde_columns: Vec> = polys + .iter() + .map(|poly| { + evaluate_polynomial_on_lde_domain(poly, blowup_factor, NUM_ROWS, &coset_offset) + .expect("LDE evaluation failed for keccak_rc polynomial") + }) + .collect(); + + // Bit-reverse permute + for col in lde_columns.iter_mut() { + in_place_bit_reverse_permute(col); + } + + // Build Merkle tree + let lde_rows = columns2rows(lde_columns); + let tree = BatchedMerkleTree::::build(&lde_rows) + .expect("Failed to build Merkle tree for keccak_rc LDE"); + + tree.root +} + +#[inline] +pub fn preprocessed_commitment(options: &ProofOptions) -> Commitment { + *KECCAK_RC_COMMITMENT.get_or_init(|| compute_preprocessed_commitment(options)) +} + +// ========================================================================= +// Trace generation +// ========================================================================= + +/// Generate the KECCAK_RC trace table. +/// +/// All precomputed columns are filled; MU is initialized to zero and must be +/// updated via `update_multiplicities` after all round-chip lookups are known. +pub fn generate_keccak_rc_trace() -> TraceTable { + let mut data = vec![FE::zero(); NUM_ROWS * cols::NUM_COLUMNS]; + + for idx in 0..NUM_ROWS { + let base = idx * cols::NUM_COLUMNS; + let row = generate_row(idx); + for (col_idx, &value) in row.iter().enumerate() { + data[base + col_idx] = FE::from(value); + } + // MU = 0 (will be updated later) + } + + TraceTable::new_main(data, cols::NUM_COLUMNS, 1) +} + +/// Increment MU for each round lookup. +/// +/// Called after the round chip's trace is generated. Each keccak permutation +/// call produces 24 round lookups (one per round), so each round row's MU +/// equals the number of keccak operations. +pub fn update_multiplicities( + trace: &mut TraceTable, + num_keccak_ops: usize, +) { + let mu = FieldElement::from(num_keccak_ops as u64); + for round in 0..NUM_REAL_ROWS { + let base = round * cols::NUM_COLUMNS; + trace.main_table.data[base + cols::MU] = mu; + } +} + +// ========================================================================= +// Bus interactions +// ========================================================================= + +/// Single receiver on the KeccakRc bus. +/// +/// Format: [round(Direct), rc[0](Direct), ..., rc[7](Direct)] +pub fn bus_interactions() -> Vec { + let mut values = vec![BusValue::Packed { + start_column: cols::ROUND, + packing: Packing::Direct, + }]; + for i in 0..8 { + values.push(BusValue::Packed { + start_column: cols::RC + i, + packing: Packing::Direct, + }); + } + + vec![BusInteraction::receiver( + BusId::KeccakRc, + Multiplicity::Column(cols::MU), + values, + )] +} diff --git a/prover/src/tables/keccak_rnd.rs b/prover/src/tables/keccak_rnd.rs new file mode 100644 index 000000000..3216b2be7 --- /dev/null +++ b/prover/src/tables/keccak_rnd.rs @@ -0,0 +1,770 @@ +//! KECCAK_RND: Round chip for Keccak-f[1600] permutation. +//! +//! One row per round (24 rows per keccak call). All bitwise operations are +//! delegated to BITWISE lookup tables (XOR_BYTE, AND_BYTE, HWSL, IS_BYTE). +//! +//! ## Column layout (~1,775 columns) +//! +//! | Group | Size | Description | +//! |----------------|------|-----------------------------------------------| +//! | timestamp | 2 | DWordWL | +//! | round | 1 | Round index (0..23) | +//! | start | 200 | Input state bytes [5][5][8] | +//! | Cxz | 160 | Column parity chain [5][4][8] | +//! | Cxz_left | 40 | Left component of rotated C [5][8] | +//! | Cxz_right | 40 | Right component of rotated C [5][8] | +//! | Dxz | 40 | D values [5][8] | +//! | theta | 200 | State after θ [5][5][8] | +//! | rot_left | 200 | Left half of ρ rotation [5][5][8] | +//! | rot_right | 200 | Right half of ρ rotation [5][5][8] | +//! | pi | 200 | State after π∘ρ (materialized) [5][5][8] | +//! | chi_ands | 200 | AND results for χ [5][5][8] | +//! | chi | 200 | State after χ [5][5][8] | +//! | rc | 8 | Round constant bytes | +//! | iota | 8 | χ[0][0] ⊕ rc | +//! | rnc | 25 | ρ rotation nibble constant [5][5] | +//! | rbc | 50 | ρ rotation byte count bits [5][5][2] | +//! | mu | 1 | Multiplicity (1 for real, 0 for padding) | + +use executor::vm::instruction::execution::{KECCAK_RC, KECCAK_RHO}; +use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity, Packing}; +use stark::trace::TraceTable; + +use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField}; + +// ========================================================================= +// Column indices +// ========================================================================= + +pub mod cols { + pub const TIMESTAMP_0: usize = 0; + pub const TIMESTAMP_1: usize = 1; + pub const ROUND: usize = 2; + + // start[5][5][8] = 200 bytes — input state for this round + pub const START: usize = 3; + + // Cxz[5][4][8] = 160 bytes — partial XOR chain for column parities + // Cxz[x][stage][byte]: stage 0 = XOR(start[x,0], start[x,1]), + // stage k = XOR(Cxz[x,k-1], start[x,k+1]) + // Final parity C[x] = Cxz[x][3] + pub const CXZ: usize = START + 200; // 203 + + // Cxz_left[5][8] = 40 bytes — left shift component of rotated C + pub const CXZ_LEFT: usize = CXZ + 160; // 363 + + // Cxz_right[5][8] = 40 bytes — right shift component of rotated C + pub const CXZ_RIGHT: usize = CXZ_LEFT + 40; // 403 + + // Dxz[5][8] = 40 bytes — D[x] = C[(x-1)%5] XOR rotated_C[(x+1)%5] + pub const DXZ: usize = CXZ_RIGHT + 40; // 443 + + // theta[5][5][8] = 200 bytes — state after θ + pub const THETA: usize = DXZ + 40; // 483 + + // rot_left[5][5][8] = 200 bytes — left half of ρ rotation + pub const ROT_LEFT: usize = THETA + 200; // 683 + + // rot_right[5][5][8] = 200 bytes — right half of ρ rotation + pub const ROT_RIGHT: usize = ROT_LEFT + 200; // 883 + + // pi[5][5][8] = 200 bytes — state after π∘ρ (materialized virtual) + pub const PI: usize = ROT_RIGHT + 200; // 1083 + + // chi_ands[5][5][8] = 200 bytes — AND results for χ + pub const CHI_ANDS: usize = PI + 200; // 1283 + + // chi[5][5][8] = 200 bytes — state after χ + pub const CHI: usize = CHI_ANDS + 200; // 1483 + + // rc[8] — round constant bytes + pub const RC: usize = CHI + 200; // 1683 + + // iota[8] — χ[0][0] ⊕ rc + pub const IOTA: usize = RC + 8; // 1691 + + // rnc[5][5] — ρ rotation nibble (offset mod 16, used as HWSL shift amount) + pub const RNC: usize = IOTA + 8; // 1699 + + // rbc[5][5][2] — ρ rotation byte count (2 bits per lane) + pub const RBC: usize = RNC + 25; // 1724 + + // mu — multiplicity flag + pub const MU: usize = RBC + 50; // 1774 + + pub const NUM_COLUMNS: usize = MU + 1; // 1775 + + // ------------------------------------------------------------------------- + // Index helpers + // ------------------------------------------------------------------------- + + /// Index into start[x][y][byte] (200 bytes, row-major: y varies fastest) + #[inline] + pub const fn start(x: usize, y: usize, byte: usize) -> usize { + START + (x * 5 + y) * 8 + byte + } + + /// Index into Cxz[x][stage][byte] (160 bytes) + #[inline] + pub const fn cxz(x: usize, stage: usize, byte: usize) -> usize { + CXZ + (x * 4 + stage) * 8 + byte + } + + /// Index into Cxz_left[x][byte] + #[inline] + pub const fn cxz_left(x: usize, byte: usize) -> usize { + CXZ_LEFT + x * 8 + byte + } + + /// Index into Cxz_right[x][byte] + #[inline] + pub const fn cxz_right(x: usize, byte: usize) -> usize { + CXZ_RIGHT + x * 8 + byte + } + + /// Index into Dxz[x][byte] + #[inline] + pub const fn dxz(x: usize, byte: usize) -> usize { + DXZ + x * 8 + byte + } + + /// Index into theta[x][y][byte] + #[inline] + pub const fn theta(x: usize, y: usize, byte: usize) -> usize { + THETA + (x * 5 + y) * 8 + byte + } + + /// Index into rot_left[x][y][byte] + #[inline] + pub const fn rot_left(x: usize, y: usize, byte: usize) -> usize { + ROT_LEFT + (x * 5 + y) * 8 + byte + } + + /// Index into rot_right[x][y][byte] + #[inline] + pub const fn rot_right(x: usize, y: usize, byte: usize) -> usize { + ROT_RIGHT + (x * 5 + y) * 8 + byte + } + + /// Index into pi[x][y][byte] + #[inline] + pub const fn pi(x: usize, y: usize, byte: usize) -> usize { + PI + (x * 5 + y) * 8 + byte + } + + /// Index into chi_ands[x][y][byte] + #[inline] + pub const fn chi_ands(x: usize, y: usize, byte: usize) -> usize { + CHI_ANDS + (x * 5 + y) * 8 + byte + } + + /// Index into chi[x][y][byte] + #[inline] + pub const fn chi(x: usize, y: usize, byte: usize) -> usize { + CHI + (x * 5 + y) * 8 + byte + } + + /// Index into rc[byte] + #[inline] + pub const fn rc(byte: usize) -> usize { + RC + byte + } + + /// Index into iota[byte] + #[inline] + pub const fn iota(byte: usize) -> usize { + IOTA + byte + } + + /// Index into rnc[x][y] + #[inline] + pub const fn rnc(x: usize, y: usize) -> usize { + RNC + x * 5 + y + } + + /// Index into rbc[x][y][bit] + #[inline] + pub const fn rbc(x: usize, y: usize, bit: usize) -> usize { + RBC + (x * 5 + y) * 2 + bit + } +} + +// ========================================================================= +// Operation struct +// ========================================================================= + +/// One keccak permutation call's worth of data (produces 24 rows). +#[derive(Debug, Clone)] +pub struct KeccakRoundOperation { + pub timestamp: u64, + pub input: [u64; 25], + pub output: [u64; 25], +} + +// ========================================================================= +// Trace generation +// ========================================================================= + +/// Extract byte `b` (0..8) from a u64 value. +#[inline] +fn byte_of(val: u64, b: usize) -> u8 { + ((val >> (b * 8)) & 0xFF) as u8 +} + +/// Compute halfword shift left: (value << shift) mod 2^16 and value >> (16 - shift). +#[inline] +fn hwsl(halfword: u16, shift: u8) -> (u16, u16) { + if shift == 0 { + (halfword, 0) + } else { + ( + halfword << shift, // u16 naturally wraps at 16 bits + halfword >> (16 - shift), + ) + } +} + +#[allow(clippy::needless_range_loop)] +/// Generate the KECCAK_RND trace table. +/// +/// Each `KeccakRoundOperation` produces 24 rows (one per round). The trace +/// computes all intermediate values (θ, ρ, π, χ, ι) at byte granularity. +pub fn generate_keccak_rnd_trace( + ops: &[KeccakRoundOperation], +) -> TraceTable { + let n_rows = (ops.len() * 24).next_power_of_two().max(4); + let mut data = vec![FE::zero(); n_rows * cols::NUM_COLUMNS]; + + for (op_idx, op) in ops.iter().enumerate() { + // Execute round-by-round, tracking the state + let mut state = op.input; + + for round in 0..24 { + let row_idx = op_idx * 24 + round; + let base = row_idx * cols::NUM_COLUMNS; + + // Timestamp & round + data[base + cols::TIMESTAMP_0] = FE::from(op.timestamp & 0xFFFF_FFFF); + data[base + cols::TIMESTAMP_1] = FE::from(op.timestamp >> 32); + data[base + cols::ROUND] = FE::from(round as u64); + + // start = current state as bytes + for x in 0..5 { + for y in 0..5 { + let lane = state[x + 5 * y]; + for b in 0..8 { + data[base + cols::start(x, y, b)] = FE::from(byte_of(lane, b) as u64); + } + } + } + + // === θ (theta) === + // Column parities: C[x] = XOR of all 5 lanes in column x + // Computed as a chain: Cxz[x][0] = start[x,0] XOR start[x,1] + // Cxz[x][k] = Cxz[x][k-1] XOR start[x,k+1] + let mut c_bytes = [[0u8; 8]; 5]; // C[x][byte] = final parity + let mut cxz = [[[0u8; 8]; 4]; 5]; // Cxz[x][stage][byte] + for x in 0..5 { + // Stage 0: XOR(start[x,0], start[x,1]) + for b in 0..8 { + let v0 = byte_of(state[x], b); + let v1 = byte_of(state[x + 5], b); + cxz[x][0][b] = v0 ^ v1; + data[base + cols::cxz(x, 0, b)] = FE::from(cxz[x][0][b] as u64); + } + // Stages 1..3: XOR(Cxz[x][k-1], start[x, k+1]) + for stage in 1..4 { + let y = stage + 1; + for b in 0..8 { + let prev = cxz[x][stage - 1][b]; + let sv = byte_of(state[x + 5 * y], b); + cxz[x][stage][b] = prev ^ sv; + data[base + cols::cxz(x, stage, b)] = FE::from(cxz[x][stage][b] as u64); + } + } + c_bytes[x] = cxz[x][3]; + } + + // Rotate C left by 1 bit using HWSL on each column's halfwords + let mut cxz_left_bytes = [[0u8; 8]; 5]; + let mut cxz_right_bytes = [[0u8; 8]; 5]; + let mut rotated_c = [[0u8; 8]; 5]; + for x in 0..5 { + for hw in 0..4 { + let lo = c_bytes[x][hw * 2] as u16; + let hi = c_bytes[x][hw * 2 + 1] as u16; + let halfword = lo | (hi << 8); + let (shifted, carry) = hwsl(halfword, 1); + cxz_left_bytes[x][hw * 2] = (shifted & 0xFF) as u8; + cxz_left_bytes[x][hw * 2 + 1] = (shifted >> 8) as u8; + cxz_right_bytes[x][hw * 2] = (carry & 0xFF) as u8; + cxz_right_bytes[x][hw * 2 + 1] = (carry >> 8) as u8; + data[base + cols::cxz_left(x, hw * 2)] = FE::from(cxz_left_bytes[x][hw * 2] as u64); + data[base + cols::cxz_left(x, hw * 2 + 1)] = FE::from(cxz_left_bytes[x][hw * 2 + 1] as u64); + data[base + cols::cxz_right(x, hw * 2)] = FE::from(cxz_right_bytes[x][hw * 2] as u64); + data[base + cols::cxz_right(x, hw * 2 + 1)] = FE::from(cxz_right_bytes[x][hw * 2 + 1] as u64); + } + for b in 0..8 { + rotated_c[x][b] = cxz_left_bytes[x][b].wrapping_add(cxz_right_bytes[x][(b + 7) % 8]); + } + } + + // D[x] = C[(x-1)%5] XOR rotated_C[(x+1)%5] + let mut d_bytes = [[0u8; 8]; 5]; + for x in 0..5 { + for b in 0..8 { + let val = c_bytes[(x + 4) % 5][b] ^ rotated_c[(x + 1) % 5][b]; + d_bytes[x][b] = val; + data[base + cols::dxz(x, b)] = FE::from(val as u64); + } + } + + // theta[x][y] = start[x][y] XOR D[x] + let mut theta_lanes = [0u64; 25]; + for x in 0..5 { + for y in 0..5 { + let lane = state[x + 5 * y]; + let mut d_lane = 0u64; + for b in 0..8 { + d_lane |= (d_bytes[x][b] as u64) << (b * 8); + } + theta_lanes[x + 5 * y] = lane ^ d_lane; + for b in 0..8 { + data[base + cols::theta(x, y, b)] = + FE::from(byte_of(theta_lanes[x + 5 * y], b) as u64); + } + } + } + + // === ρ (rho) === + // For each lane, rotate theta[x][y] by KECCAK_RHO[x][y] bits. + // Decompose rotation as: rnc (nibble, 0..15) + 16*rbc[0] + 32*rbc[1] + // HWSL handles the sub-16-bit rotation on each halfword. + // The byte-level shift (rbc) is handled by the pi column reconstruction. + for x in 0..5 { + for y in 0..5 { + let rho_offset = KECCAK_RHO[x][y] as usize; + let rnc_val = (rho_offset % 16) as u8; + let rbc_val = rho_offset / 16; // 0..3 + let rbc0 = (rbc_val & 1) as u8; + let rbc1 = ((rbc_val >> 1) & 1) as u8; + + data[base + cols::rnc(x, y)] = FE::from(rnc_val as u64); + data[base + cols::rbc(x, y, 0)] = FE::from(rbc0 as u64); + data[base + cols::rbc(x, y, 1)] = FE::from(rbc1 as u64); + + let theta_lane = theta_lanes[x + 5 * y]; + // HWSL on each halfword with shift = rnc_val + for hw in 0..4 { + let halfword = ((theta_lane >> (hw * 16)) & 0xFFFF) as u16; + let (shifted, carry) = hwsl(halfword, rnc_val); + data[base + cols::rot_left(x, y, hw * 2)] = + FE::from((shifted & 0xFF) as u64); + data[base + cols::rot_left(x, y, hw * 2 + 1)] = + FE::from((shifted >> 8) as u64); + data[base + cols::rot_right(x, y, hw * 2)] = + FE::from((carry & 0xFF) as u64); + data[base + cols::rot_right(x, y, hw * 2 + 1)] = + FE::from((carry >> 8) as u64); + } + } + } + + // === π (pi) === + // pi[x][y] = rho[(x+3y)%5][x] where rho is the rotated theta + // rho[x'][y'] is reconstructed from rot_left/rot_right using rbc mux: + // rho[byte] = mux(rbc, rot_left[byte - 2*rbc_val] + rot_right[byte - 2*rbc_val - 1]) + // But it's simpler to just compute the full rotation and store pi directly. + let mut pi_lanes = [0u64; 25]; + for x in 0..5 { + for y in 0..5 { + // rho rotation of theta[x][y] + let rotated = theta_lanes[x + 5 * y].rotate_left(KECCAK_RHO[x][y]); + // π permutation: b[y][(2x+3y)%5] = rotated + let dst_x = y; + let dst_y = (2 * x + 3 * y) % 5; + pi_lanes[dst_x + 5 * dst_y] = rotated; + } + } + for x in 0..5 { + for y in 0..5 { + for b in 0..8 { + data[base + cols::pi(x, y, b)] = + FE::from(byte_of(pi_lanes[x + 5 * y], b) as u64); + } + } + } + + // === χ (chi) === + let mut chi_lanes = [0u64; 25]; + for x in 0..5 { + for y in 0..5 { + let not_next = !pi_lanes[(x + 1) % 5 + 5 * y]; + let next2 = pi_lanes[(x + 2) % 5 + 5 * y]; + let and_val = not_next & next2; + chi_lanes[x + 5 * y] = pi_lanes[x + 5 * y] ^ and_val; + for b in 0..8 { + data[base + cols::chi_ands(x, y, b)] = + FE::from(byte_of(and_val, b) as u64); + data[base + cols::chi(x, y, b)] = + FE::from(byte_of(chi_lanes[x + 5 * y], b) as u64); + } + } + } + + // === ι (iota) === + let rc_val = KECCAK_RC[round]; + for b in 0..8 { + data[base + cols::rc(b)] = FE::from(byte_of(rc_val, b) as u64); + let iota_byte = byte_of(chi_lanes[0], b) ^ byte_of(rc_val, b); + data[base + cols::iota(b)] = FE::from(iota_byte as u64); + } + + // Update state for next round + chi_lanes[0] ^= rc_val; + state = chi_lanes; + + // mu = 1 (real row) + data[base + cols::MU] = FE::one(); + } + } + + // Padding rows have mu=0 and all zeros (default) + TraceTable::new_main(data, cols::NUM_COLUMNS, 1) +} + +// ========================================================================= +// Bus interactions (1,411 total) +// ========================================================================= + +pub fn bus_interactions() -> Vec { + let mut interactions = Vec::with_capacity(1420); + + // --- IO group (3) --- + + // 1. KECCAK bus: receive (timestamp, round, start[200]) + { + let mut values = vec![ + BusValue::Packed { + start_column: cols::TIMESTAMP_0, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::TIMESTAMP_1, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::ROUND, + packing: Packing::Direct, + }, + ]; + // Pack state as 25 DWordBL = 50 bus elements + for x in 0..5 { + for y in 0..5 { + // Word4L packing: 4 consecutive byte columns → 1 bus element + values.push(BusValue::Packed { + start_column: cols::start(x, y, 0), + packing: Packing::Word4L, + }); + values.push(BusValue::Packed { + start_column: cols::start(x, y, 4), + packing: Packing::Word4L, + }); + } + } + interactions.push(BusInteraction::receiver( + BusId::Keccak, + Multiplicity::Column(cols::MU), + values, + )); + } + + // 2. KECCAK bus: send (timestamp, round+1, out[200]) + // out[0][0] = iota, out[x][y] = chi for (x,y) != (0,0) + { + let mut values = vec![ + BusValue::Packed { + start_column: cols::TIMESTAMP_0, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::TIMESTAMP_1, + packing: Packing::Direct, + }, + BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1, + column: cols::ROUND, + }, + LinearTerm::Constant(1), + ]), + ]; + for x in 0..5 { + for y in 0..5 { + if x == 0 && y == 0 { + // Lane [0][0] uses iota columns + values.push(BusValue::Packed { + start_column: cols::IOTA, + packing: Packing::Word4L, + }); + values.push(BusValue::Packed { + start_column: cols::IOTA + 4, + packing: Packing::Word4L, + }); + } else { + // Other lanes use chi columns + values.push(BusValue::Packed { + start_column: cols::chi(x, y, 0), + packing: Packing::Word4L, + }); + values.push(BusValue::Packed { + start_column: cols::chi(x, y, 4), + packing: Packing::Word4L, + }); + } + } + } + interactions.push(BusInteraction::sender( + BusId::Keccak, + Multiplicity::Column(cols::MU), + values, + )); + } + + // 3. KECCAK_RC: lookup (round) → rc[8] + { + let mut values = vec![BusValue::Packed { + start_column: cols::ROUND, + packing: Packing::Direct, + }]; + for b in 0..8 { + values.push(BusValue::Packed { + start_column: cols::rc(b), + packing: Packing::Direct, + }); + } + interactions.push(BusInteraction::sender( + BusId::KeccakRc, + Multiplicity::Column(cols::MU), + values, + )); + } + + // --- Theta: Cxz chain XOR_BYTE (160) --- + // Stage 0: XOR(start[x,0,z], start[x,1,z]) → Cxz[x,0,z] + for x in 0..5 { + for b in 0..8 { + interactions.push(BusInteraction::sender( + BusId::XorByte, + Multiplicity::Column(cols::MU), + vec![ + BusValue::Packed { start_column: cols::start(x, 0, b), packing: Packing::Direct }, + BusValue::Packed { start_column: cols::start(x, 1, b), packing: Packing::Direct }, + BusValue::Packed { start_column: cols::cxz(x, 0, b), packing: Packing::Direct }, + ], + )); + } + } + // Stages 1..3: XOR(Cxz[x,stage-1,z], start[x,stage+1,z]) → Cxz[x,stage,z] + for x in 0..5 { + for stage in 1..4usize { + let y = stage + 1; + for b in 0..8 { + interactions.push(BusInteraction::sender( + BusId::XorByte, + Multiplicity::Column(cols::MU), + vec![ + BusValue::Packed { start_column: cols::cxz(x, stage - 1, b), packing: Packing::Direct }, + BusValue::Packed { start_column: cols::start(x, y, b), packing: Packing::Direct }, + BusValue::Packed { start_column: cols::cxz(x, stage, b), packing: Packing::Direct }, + ], + )); + } + } + } + + // --- Theta: HWSL for rotated C (20) --- + // HWSL(C[x] halfword[hw], 1) → (Cxz_left, Cxz_right) + for x in 0..5 { + for hw in 0..4 { + interactions.push(BusInteraction::sender( + BusId::Hwsl, + Multiplicity::Column(cols::MU), + vec![ + // Input halfword: Cxz[x][3][hw*2] + 256 * Cxz[x][3][hw*2+1] + BusValue::linear(vec![ + LinearTerm::Column { coefficient: 1, column: cols::cxz(x, 3, hw * 2) }, + LinearTerm::Column { coefficient: 256, column: cols::cxz(x, 3, hw * 2 + 1) }, + ]), + // Shift amount = 1 + BusValue::constant(1), + // Output: shifted + BusValue::linear(vec![ + LinearTerm::Column { coefficient: 1, column: cols::cxz_left(x, hw * 2) }, + LinearTerm::Column { coefficient: 256, column: cols::cxz_left(x, hw * 2 + 1) }, + ]), + // Output: carry + BusValue::linear(vec![ + LinearTerm::Column { coefficient: 1, column: cols::cxz_right(x, hw * 2) }, + LinearTerm::Column { coefficient: 256, column: cols::cxz_right(x, hw * 2 + 1) }, + ]), + ], + )); + } + } + + // --- Theta: IS_BYTE range checks on Cxz_left + Cxz_right (80) --- + for x in 0..5 { + for b in 0..8 { + interactions.push(BusInteraction::sender( + BusId::IsByte, + Multiplicity::Column(cols::MU), + vec![BusValue::Packed { start_column: cols::cxz_left(x, b), packing: Packing::Direct }], + )); + interactions.push(BusInteraction::sender( + BusId::IsByte, + Multiplicity::Column(cols::MU), + vec![BusValue::Packed { start_column: cols::cxz_right(x, b), packing: Packing::Direct }], + )); + } + } + + // --- Theta: Dxz XOR_BYTE (40) --- + // D[x][b] = C[(x-1)%5][b] XOR rotated_C[(x+1)%5][b] + // rotated_C[x'][b] = Cxz_left[x'][b] + Cxz_right[x'][(b-1)%8] (virtual) + for x in 0..5 { + for b in 0..8 { + interactions.push(BusInteraction::sender( + BusId::XorByte, + Multiplicity::Column(cols::MU), + vec![ + BusValue::Packed { start_column: cols::cxz((x + 4) % 5, 3, b), packing: Packing::Direct }, + BusValue::linear(vec![ + LinearTerm::Column { coefficient: 1, column: cols::cxz_left((x + 1) % 5, b) }, + LinearTerm::Column { coefficient: 1, column: cols::cxz_right((x + 1) % 5, (b + 7) % 8) }, + ]), + BusValue::Packed { start_column: cols::dxz(x, b), packing: Packing::Direct }, + ], + )); + } + } + + // --- Theta final: XOR_BYTE (200) --- + // theta[x][y][b] = start[x][y][b] XOR D[x][b] + for x in 0..5 { + for y in 0..5 { + for b in 0..8 { + interactions.push(BusInteraction::sender( + BusId::XorByte, + Multiplicity::Column(cols::MU), + vec![ + BusValue::Packed { start_column: cols::start(x, y, b), packing: Packing::Direct }, + BusValue::Packed { start_column: cols::dxz(x, b), packing: Packing::Direct }, + BusValue::Packed { start_column: cols::theta(x, y, b), packing: Packing::Direct }, + ], + )); + } + } + } + + // --- Rho: HWSL (100) --- + // HWSL(theta[x][y] halfword[hw], rnc[x][y]) → (rot_left, rot_right) + for x in 0..5 { + for y in 0..5 { + for hw in 0..4 { + interactions.push(BusInteraction::sender( + BusId::Hwsl, + Multiplicity::Column(cols::MU), + vec![ + BusValue::linear(vec![ + LinearTerm::Column { coefficient: 1, column: cols::theta(x, y, hw * 2) }, + LinearTerm::Column { coefficient: 256, column: cols::theta(x, y, hw * 2 + 1) }, + ]), + BusValue::Packed { start_column: cols::rnc(x, y), packing: Packing::Direct }, + BusValue::linear(vec![ + LinearTerm::Column { coefficient: 1, column: cols::rot_left(x, y, hw * 2) }, + LinearTerm::Column { coefficient: 256, column: cols::rot_left(x, y, hw * 2 + 1) }, + ]), + BusValue::linear(vec![ + LinearTerm::Column { coefficient: 1, column: cols::rot_right(x, y, hw * 2) }, + LinearTerm::Column { coefficient: 256, column: cols::rot_right(x, y, hw * 2 + 1) }, + ]), + ], + )); + } + } + } + + // --- Rho: IS_BYTE range checks on rot_left + rot_right (400) --- + for x in 0..5 { + for y in 0..5 { + for b in 0..8 { + interactions.push(BusInteraction::sender( + BusId::IsByte, + Multiplicity::Column(cols::MU), + vec![BusValue::Packed { start_column: cols::rot_left(x, y, b), packing: Packing::Direct }], + )); + interactions.push(BusInteraction::sender( + BusId::IsByte, + Multiplicity::Column(cols::MU), + vec![BusValue::Packed { start_column: cols::rot_right(x, y, b), packing: Packing::Direct }], + )); + } + } + } + + // --- Chi: AND_BYTE (200) --- + // chi_ands[x][y][b] = (255 - pi[(x+1)%5][y][b]) AND pi[(x+2)%5][y][b] + for x in 0..5 { + for y in 0..5 { + for b in 0..8 { + interactions.push(BusInteraction::sender( + BusId::AndByte, + Multiplicity::Column(cols::MU), + vec![ + BusValue::linear(vec![ + LinearTerm::Constant(255), + LinearTerm::Column { coefficient: -1, column: cols::pi((x + 1) % 5, y, b) }, + ]), + BusValue::Packed { start_column: cols::pi((x + 2) % 5, y, b), packing: Packing::Direct }, + BusValue::Packed { start_column: cols::chi_ands(x, y, b), packing: Packing::Direct }, + ], + )); + } + } + } + + // --- Chi: XOR_BYTE (200) --- + // chi[x][y][b] = pi[x][y][b] XOR chi_ands[x][y][b] + for x in 0..5 { + for y in 0..5 { + for b in 0..8 { + interactions.push(BusInteraction::sender( + BusId::XorByte, + Multiplicity::Column(cols::MU), + vec![ + BusValue::Packed { start_column: cols::pi(x, y, b), packing: Packing::Direct }, + BusValue::Packed { start_column: cols::chi_ands(x, y, b), packing: Packing::Direct }, + BusValue::Packed { start_column: cols::chi(x, y, b), packing: Packing::Direct }, + ], + )); + } + } + } + + // --- Iota: XOR_BYTE (8) --- + // iota[b] = chi[0][0][b] XOR rc[b] + for b in 0..8 { + interactions.push(BusInteraction::sender( + BusId::XorByte, + Multiplicity::Column(cols::MU), + vec![ + BusValue::Packed { start_column: cols::chi(0, 0, b), packing: Packing::Direct }, + BusValue::Packed { start_column: cols::rc(b), packing: Packing::Direct }, + BusValue::Packed { start_column: cols::iota(b), packing: Packing::Direct }, + ], + )); + } + + interactions +} diff --git a/prover/src/tables/mod.rs b/prover/src/tables/mod.rs index 268d75dee..20aa7cc47 100644 --- a/prover/src/tables/mod.rs +++ b/prover/src/tables/mod.rs @@ -28,6 +28,9 @@ pub mod cpu; pub mod decode; pub mod dvrm; pub mod halt; +pub mod keccak; +pub mod keccak_rc; +pub mod keccak_rnd; pub mod load; pub mod lt; pub mod memw; diff --git a/prover/src/tables/types.rs b/prover/src/tables/types.rs index 8e89490ae..fe2e52f53 100644 --- a/prover/src/tables/types.rs +++ b/prover/src/tables/types.rs @@ -109,6 +109,12 @@ pub enum BusId { /// COMMIT output bus: verifier computes the receiver contribution externally /// from `VmProof.public_output` using the shared LogUp challenges Commit, + /// ECALL dispatch for KeccakPermute syscall (CPU → KECCAK) + EcallKeccak, + /// Keccak core ↔ round chip: (timestamp, round, state[200 bytes]) + Keccak, + /// Keccak round ↔ RC lookup: (round, rc[8 bytes]) + KeccakRc, } impl BusId { @@ -137,6 +143,9 @@ impl BusId { BusId::Dvrm => "Dvrm", BusId::CommitNextByte => "CommitNextByte", BusId::Commit => "Commit", + BusId::EcallKeccak => "EcallKeccak", + BusId::Keccak => "Keccak", + BusId::KeccakRc => "KeccakRc", } } } @@ -168,6 +177,9 @@ impl TryFrom for BusId { 19 => Ok(BusId::Ecall), 20 => Ok(BusId::CommitNextByte), 21 => Ok(BusId::Commit), + 22 => Ok(BusId::EcallKeccak), + 23 => Ok(BusId::Keccak), + 24 => Ok(BusId::KeccakRc), other => Err(other), } } diff --git a/prover/src/tests/constraints_tests.rs b/prover/src/tests/constraints_tests.rs index 3881c611e..0189aa4e0 100644 --- a/prover/src/tests/constraints_tests.rs +++ b/prover/src/tests/constraints_tests.rs @@ -523,8 +523,8 @@ use crate::tables::cpu::cols as cpu_cols; #[test] fn test_cpu_bit_flag_columns_count() { - // Should have 32 bit flag columns (includes read_register1, read_register2) - assert_eq!(BIT_FLAG_COLUMNS.len(), 32); + // Should have 33 bit flag columns (includes read_register1, read_register2, ecall_keccak) + assert_eq!(BIT_FLAG_COLUMNS.len(), 33); } #[test] @@ -539,8 +539,8 @@ fn test_cpu_bit_flag_columns_valid() { fn test_create_is_bit_constraints() { let (constraints, next_idx) = create_is_bit_constraints(0); - assert_eq!(constraints.len(), 32); - assert_eq!(next_idx, 32); + assert_eq!(constraints.len(), 33); + assert_eq!(next_idx, 33); // Check constraint indices are sequential for (i, c) in constraints.iter().enumerate() { @@ -622,14 +622,14 @@ fn test_next_pc_add_constraint_new_pair() { fn test_create_all_cpu_constraints() { let (is_bit, add, other, total) = create_all_cpu_constraints(); - assert_eq!(is_bit.len(), 32); + assert_eq!(is_bit.len(), 33); // ADD constraints: 2 (ADD+LOAD) + 2 (STORE: arg1+imm) + 2 (SUB+BEQ) + 2 (JALR) = 8 assert_eq!(add.len(), 8); - // Other: branch_cond(1) + ebreak(1) + rv1_zero_forcing(3) + rv2_zero_forcing(3) + arg1(2) + arg2(2) + rvd(2) + slt_zero(7) + ext_bit_zero(3) + next_pc(2) = 26 - assert_eq!(other.len(), 26); + // Other: branch_cond(1) + ebreak(1) + ecall_keccak_implies(1) + rv1_zero_forcing(3) + rv2_zero_forcing(3) + arg1(2) + arg2(2) + rvd(2) + slt_zero(7) + ext_bit_zero(3) + next_pc(2) = 27 + assert_eq!(other.len(), 27); - // Total should be 32 + 8 + 26 = 66 - assert_eq!(total, 66); + // Total should be 33 + 8 + 27 = 68 + assert_eq!(total, 68); assert_eq!(total, NUM_CPU_CONSTRAINTS); } diff --git a/prover/src/tests/cpu_tests.rs b/prover/src/tests/cpu_tests.rs index d2c240293..edfd4406c 100644 --- a/prover/src/tests/cpu_tests.rs +++ b/prover/src/tests/cpu_tests.rs @@ -328,15 +328,16 @@ fn test_bus_interactions_count() { // - 1 DVRM (division/remainder) // - 1 SHIFT (shift operations) // - 1 BRANCH (branch/jump target calculation) - // - 1 ECALL (single shared bus for HALT and COMMIT, mult = ECALL) + // - 1 ECALL (shared bus for HALT and COMMIT, mult = ECALL - ECALL_KECCAK) + // - 1 EcallKeccak (CPU → KECCAK core, mult = ECALL_KECCAK) // - 27 IS_BYTE (byte range checks: RS1, RS2, RD, ARG1[0..7], ARG2[0..7], RES[0..7]) - // Total: 8 + 8 + 8 + 2 + 1 + 1 + 1 + 1 + 5 + 1 + 1 + 1 + 1 + 1 + 1 + 27 = 68 - assert_eq!(interactions.len(), 68); + // Total: 8 + 8 + 8 + 2 + 1 + 1 + 1 + 1 + 5 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 27 = 69 + assert_eq!(interactions.len(), 69); } #[test] fn test_column_count() { - assert_eq!(cols::NUM_COLUMNS, 74); + assert_eq!(cols::NUM_COLUMNS, 77); } #[test] diff --git a/syscalls/src/syscalls.rs b/syscalls/src/syscalls.rs index e31b66619..ae5ddb8a8 100644 --- a/syscalls/src/syscalls.rs +++ b/syscalls/src/syscalls.rs @@ -14,6 +14,10 @@ enum SyscallNumbers { Halt = 93, } +/// Syscall number for KeccakPermute (u64::MAX - 1). +#[cfg(target_arch = "riscv64")] +const KECCAK_SYSCALL_NUMBER: usize = usize::MAX - 1; + #[cfg(target_arch = "riscv64")] /// This is a template for printing in the vm pub fn print_string(s: &str) { @@ -121,6 +125,24 @@ pub fn sys_halt() -> ! { unimplemented!("syscalls are only implemented for riscv64 targets"); } +#[cfg(target_arch = "riscv64")] +/// Apply the Keccak-f[1600] permutation to a 25-element u64 state in-place. +pub fn keccak_permute(state: &mut [u64; 25]) { + unsafe { + asm!( + "ecall", + in("a0") state.as_mut_ptr(), + in("a7") KECCAK_SYSCALL_NUMBER, + ) + } +} + +#[cfg(not(target_arch = "riscv64"))] +/// Apply the Keccak-f[1600] permutation to a 25-element u64 state in-place. +pub fn keccak_permute(_state: &mut [u64; 25]) { + unimplemented!("syscalls are only implemented for riscv64 targets"); +} + // ============================================================================= // Stub implementations for unsupported std functions // These functions are required by Rust's std zkvm module but are not supported From 3c5caa5a9bc55af1af0aa2d18312afe63cbb93a7 Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Mon, 6 Apr 2026 12:02:55 -0300 Subject: [PATCH 02/14] Wire keccak chips into prover pipeline: trace builder, AIR creation, VmAirs registration, and E2E test scaffold --- prover/src/lib.rs | 29 ++- prover/src/tables/keccak.rs | 30 ++- prover/src/tables/trace_builder.rs | 335 ++++++++++++++++++++++++++- prover/src/test_utils.rs | 57 +++++ prover/src/tests/prove_elfs_tests.rs | 32 ++- 5 files changed, 462 insertions(+), 21 deletions(-) diff --git a/prover/src/lib.rs b/prover/src/lib.rs index 2a9d2c912..cbece5f23 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -40,8 +40,9 @@ use crate::tables::trace_builder::Traces; use crate::tables::types::BusId; use crate::test_utils::{ E, F, VmAir, create_bitwise_air, create_branch_air, create_commit_air, create_cpu_air, - create_decode_air, create_dvrm_air, create_halt_air, create_load_air, create_lt_air, - create_memw_air, create_memw_aligned_air, create_mul_air, create_page_air, create_register_air, + create_decode_air, create_dvrm_air, create_halt_air, create_keccak_air, create_keccak_rc_air, + create_keccak_rnd_air, create_load_air, create_lt_air, create_memw_air, + create_memw_aligned_air, create_mul_air, create_page_air, create_register_air, create_shift_air, }; @@ -193,6 +194,9 @@ pub(crate) struct VmAirs { pub branches: Vec, pub halt: VmAir, pub commit: VmAir, + pub keccak: VmAir, + pub keccak_rnd: VmAir, + pub keccak_rc: VmAir, pub register: VmAir, pub pages: Vec, } @@ -205,6 +209,9 @@ impl VmAirs { (&self.decode, &mut traces.decode, &()), (&self.halt, &mut traces.halt, &()), (&self.commit, &mut traces.commit, &()), + (&self.keccak, &mut traces.keccak, &()), + (&self.keccak_rnd, &mut traces.keccak_rnd, &()), + (&self.keccak_rc, &mut traces.keccak_rc, &()), (&self.register, &mut traces.register, &()), ]; @@ -253,6 +260,9 @@ impl VmAirs { &self.decode, &self.halt, &self.commit, + &self.keccak, + &self.keccak_rnd, + &self.keccak_rc, &self.register, ]; @@ -345,6 +355,12 @@ impl VmAirs { .collect(); let halt = create_halt_air(proof_options); let commit = create_commit_air(proof_options); + let keccak = create_keccak_air(proof_options); + let keccak_rnd = create_keccak_rnd_air(proof_options); + let keccak_rc = create_keccak_rc_air(proof_options).with_preprocessed( + tables::keccak_rc::preprocessed_commitment(proof_options), + tables::keccak_rc::NUM_PRECOMPUTED_COLS, + ); let register = create_register_air(proof_options).with_preprocessed( register::preprocessed_commitment(proof_options, elf.entry_point), register::NUM_PREPROCESSED_COLS, @@ -376,6 +392,9 @@ impl VmAirs { branches, halt, commit, + keccak, + keccak_rnd, + keccak_rc, register, pages, } @@ -576,11 +595,11 @@ pub fn verify_with_options( Traces::page_configs_from_elf_and_runtime(&program, &vm_proof.runtime_page_ranges); // Cross-check: table_counts must match the number of sub-proofs. - // Fixed tables (bitwise, decode, halt, commit, register) = 5, plus page tables. - let expected_proof_count = vm_proof.table_counts.total() + 5 + page_configs.len(); + // Fixed tables (bitwise, decode, halt, commit, keccak, keccak_rnd, keccak_rc, register) = 8, plus page tables. + let expected_proof_count = vm_proof.table_counts.total() + 8 + page_configs.len(); if expected_proof_count != vm_proof.proof.proofs.len() { return Err(Error::InvalidTableCounts(format!( - "table_counts total ({}) + 5 fixed + {} pages = {}, but proof contains {} sub-proofs", + "table_counts total ({}) + 8 fixed + {} pages = {}, but proof contains {} sub-proofs", vm_proof.table_counts.total(), page_configs.len(), expected_proof_count, diff --git a/prover/src/tables/keccak.rs b/prover/src/tables/keccak.rs index ec5e5eae8..45638dd0c 100644 --- a/prover/src/tables/keccak.rs +++ b/prover/src/tables/keccak.rs @@ -255,11 +255,21 @@ pub fn bus_interactions() -> Vec { } // 5. MEMW interactions for 25 lane reads (on mu) + 25 lane writes (on mu) - // Format: [old[8], is_register, address[DWordHL=2], value[8], ts[2], w2, w4, w8] + // Read format: [old[8], is_register, addr_lo32, addr_hi32, value[8], ts[2], w2, w4, w8] = 24 + // Write format: [is_register, addr_lo32, addr_hi32, value[8], ts[2], w2, w4, w8] = 16 for lane_idx in 0..25 { let x = lane_idx % 5; let y = lane_idx / 5; - let addr_start = cols::state_ptr(lane_idx, 0); + + // Address as DWordWL: lo32 = h0 + 2^16*h1, hi32 = h2 + 2^16*h3 + let addr_lo = BusValue::linear(vec![ + LinearTerm::Column { coefficient: 1, column: cols::state_ptr(lane_idx, 0) }, + LinearTerm::Column { coefficient: 65536, column: cols::state_ptr(lane_idx, 1) }, + ]); + let addr_hi = BusValue::linear(vec![ + LinearTerm::Column { coefficient: 1, column: cols::state_ptr(lane_idx, 2) }, + LinearTerm::Column { coefficient: 65536, column: cols::state_ptr(lane_idx, 3) }, + ]); // Read: old = input, value = input (read doesn't change) let mut read_values = Vec::with_capacity(24); @@ -272,11 +282,9 @@ pub fn bus_interactions() -> Vec { } // is_register = 0 read_values.push(BusValue::constant(0)); - // address as DWordHL (2 bus elements packed from 4 halfword columns) - read_values.push(BusValue::Packed { - start_column: addr_start, - packing: Packing::DWordHL, - }); + // address as DWordWL + read_values.push(addr_lo.clone()); + read_values.push(addr_hi.clone()); // value[0..8] = same as old (read) for b in 0..8 { read_values.push(BusValue::Packed { @@ -308,11 +316,9 @@ pub fn bus_interactions() -> Vec { let mut write_values = Vec::with_capacity(16); // is_register = 0 write_values.push(BusValue::constant(0)); - // address as DWordHL - write_values.push(BusValue::Packed { - start_column: addr_start, - packing: Packing::DWordHL, - }); + // address as DWordWL + write_values.push(addr_lo); + write_values.push(addr_hi); // value[0..8] = output bytes for b in 0..8 { write_values.push(BusValue::Packed { diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 6ed876682..0becdf782 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -40,6 +40,9 @@ use super::cpu::{self, CpuOperation}; use super::decode; use super::dvrm::{self, DvrmOperation}; use super::halt; +use super::keccak::{self, KeccakOperation}; +use super::keccak_rc; +use super::keccak_rnd::{self, KeccakRoundOperation}; use super::load::{self, LoadOperation}; use super::lt::{self, LtOperation}; use super::memw::{self, MemwOperation}; @@ -317,7 +320,7 @@ fn collect_cpu_ops( /// /// MEMW and LOAD collection requires sequential processing with state tracking. /// -/// Returns: (memw_ops, load_ops, lt_ops, shift_ops, bitwise_ops, commit_ops) +/// Returns: (memw_ops, load_ops, lt_ops, shift_ops, bitwise_ops, commit_ops, keccak_ops) #[allow(clippy::type_complexity)] fn collect_ops_from_cpu( cpu_ops: &[CpuOperation], @@ -330,6 +333,7 @@ fn collect_ops_from_cpu( Vec, Vec, Vec, + Vec, ) { let mut memw_ops = Vec::with_capacity(cpu_ops.len() * 3); let mut load_ops = Vec::with_capacity(cpu_ops.len() / 8 + 1); @@ -337,6 +341,7 @@ fn collect_ops_from_cpu( let mut shift_ops = Vec::with_capacity(cpu_ops.len() / 10 + 1); let mut bitwise_ops = Vec::with_capacity(cpu_ops.len() * 4); let mut commit_ops = Vec::new(); + let mut keccak_ops = Vec::new(); let mut current_commit_index = 0u32; let mut commit_ecall_count = 0u32; @@ -379,6 +384,40 @@ fn collect_ops_from_cpu( commit_ecall_count += 1; } + // Collect KeccakPermute ECALL operations + if op.ecall_keccak { + let state_addr = op.keccak_state_addr; + let mut input = [0u64; 25]; + for (i, lane) in input.iter_mut().enumerate() { + let addr = state_addr.wrapping_add(i as u64 * 8); + let mut val = 0u64; + for b in 0..8 { + let (byte_val, _ts) = memory_state.read_byte(addr + b as u64); + val |= (byte_val as u64) << (b * 8); + } + *lane = val; + } + let mut output = input; + executor::vm::instruction::execution::keccak_f1600(&mut output); + // Update memory state with output + for (i, &lane) in output.iter().enumerate() { + let addr = state_addr.wrapping_add(i as u64 * 8); + for b in 0..8 { + let byte_val = ((lane >> (b * 8)) & 0xFF) as u8; + memory_state.write_byte(addr + b as u64, byte_val, op.timestamp + 1); + } + } + let keccak_memw_ops = + collect_keccak_memw_ops(op, &input, &output, register_state); + memw_ops.extend(keccak_memw_ops); + keccak_ops.push(KeccakOperation { + timestamp: op.timestamp, + state_addr, + input, + output, + }); + } + // --- LT, SHIFT, and Bitwise (no state tracking needed) --- // Collect LT operations from SLT/BLT instructions @@ -422,6 +461,7 @@ fn collect_ops_from_cpu( shift_ops, bitwise_ops, commit_ops, + keccak_ops, ) } @@ -774,6 +814,59 @@ fn collect_halt_ops(register_state: &mut RegisterState) -> Vec { // ============================================================================= /// Collects LT operations from MEMW for timestamp ordering. +/// Collect MEMW operations for a KeccakPermute ECALL. +/// +/// Generates 25 read operations (input lanes at timestamp) and 25 write +/// operations (output lanes at timestamp+1). Each operation is 8 bytes wide. +fn collect_keccak_memw_ops( + op: &CpuOperation, + input: &[u64; 25], + output: &[u64; 25], + register_state: &mut RegisterState, +) -> Vec { + let ts = op.timestamp; + let state_addr = op.keccak_state_addr; + let mut memw_ops = Vec::with_capacity(53); // 25 reads + 25 writes + 3 register ops + + // Register reads for the ECALL: x10 (state_addr), x17 (syscall number) + // x10 read at ts + { + let reg_value = pack_register_value(state_addr); + let reg_addr = 2 * 10u64; + let (_old_val, old_ts) = register_state.read(10); + let old_timestamps = [old_ts, old_ts, 0, 0, 0, 0, 0, 0]; + let memw_op = MemwOperation::new(true, reg_addr, reg_value, ts, 2, true) + .with_old(reg_value, old_timestamps); + memw_ops.push(memw_op); + register_state.write(10, state_addr, ts); + } + + // 25 lane reads at timestamp + for (lane_idx, &lane_val) in input.iter().enumerate() { + let lane_addr = state_addr.wrapping_add(lane_idx as u64 * 8); + let mut value_bytes = [0u64; 8]; + for (b, byte) in value_bytes.iter_mut().enumerate() { + *byte = (lane_val >> (b * 8)) & 0xFF; + } + let memw_op = MemwOperation::new(false, lane_addr, value_bytes, ts, 8, true) + .with_old(value_bytes, [0; 8]); + memw_ops.push(memw_op); + } + + // 25 lane writes at timestamp+1 + for (lane_idx, &lane_val) in output.iter().enumerate() { + let lane_addr = state_addr.wrapping_add(lane_idx as u64 * 8); + let mut value_bytes = [0u64; 8]; + for (b, byte) in value_bytes.iter_mut().enumerate() { + *byte = (lane_val >> (b * 8)) & 0xFF; + } + let memw_op = MemwOperation::new(false, lane_addr, value_bytes, ts + 1, 8, false); + memw_ops.push(memw_op); + } + + memw_ops +} + /// /// From spec memw.md: /// - MEMW-C4 through MEMW-C7: old_timestamp[i] < timestamp (based on width) @@ -1439,6 +1532,196 @@ fn collect_bitwise_from_commit(commit_ops: &[CommitOperation]) -> Vec Vec { + use executor::vm::instruction::execution::{KECCAK_RC, KECCAK_RHO}; + + let mut ops = Vec::new(); + + for kop in keccak_ops { + let state_addr = kop.state_addr; + + // IS_HALF for state_ptr halfwords (100 per call) + for lane_idx in 0..25 { + let ptr = state_addr.wrapping_add(lane_idx as u64 * 8); + for shift in [0, 16, 32, 48] { + let half = ((ptr >> shift) & 0xFFFF) as u16; + ops.push(BitwiseOperation::halfword( + BitwiseOperationType::IsHalf, + (half & 0xFF) as u8, + ((half >> 8) & 0xFF) as u8, + )); + } + } + + // Replay keccak round computation to extract bitwise lookups + let mut state = kop.input; + for round in 0..24 { + // --- theta: Cxz chain XOR_BYTE (160) --- + let mut cxz = [[[0u8; 8]; 4]; 5]; + for x in 0..5 { + for b in 0..8 { + let v0 = ((state[x] >> (b * 8)) & 0xFF) as u8; + let v1 = ((state[x + 5] >> (b * 8)) & 0xFF) as u8; + cxz[x][0][b] = v0 ^ v1; + ops.push(BitwiseOperation::byte_op(BitwiseOperationType::XorByte, v0, v1)); + } + for stage in 1..4usize { + let y = stage + 1; + for b in 0..8 { + let prev = cxz[x][stage - 1][b]; + let sv = ((state[x + 5 * y] >> (b * 8)) & 0xFF) as u8; + cxz[x][stage][b] = prev ^ sv; + ops.push(BitwiseOperation::byte_op(BitwiseOperationType::XorByte, prev, sv)); + } + } + } + + // theta: HWSL for rotated C (20) + IS_BYTE (80) + let mut rotated_c = [[0u8; 8]; 5]; + for x in 0..5 { + let c = cxz[x][3]; + for hw in 0..4 { + let halfword = (c[hw * 2] as u16) | ((c[hw * 2 + 1] as u16) << 8); + let shifted = halfword << 1; // u16 wraps + let carry = if halfword >> 15 == 1 { 1u16 } else { 0 }; + ops.push(BitwiseOperation::new(BitwiseOperationType::Hwsl, + (halfword & 0xFF) as u8, + ((halfword >> 8) & 0xFF) as u8, + 1, + )); + // IS_BYTE for cxz_left bytes + ops.push(BitwiseOperation::single_byte(BitwiseOperationType::IsByte, (shifted & 0xFF) as u8)); + ops.push(BitwiseOperation::single_byte(BitwiseOperationType::IsByte, ((shifted >> 8) & 0xFF) as u8)); + // IS_BYTE for cxz_right bytes + ops.push(BitwiseOperation::single_byte(BitwiseOperationType::IsByte, (carry & 0xFF) as u8)); + ops.push(BitwiseOperation::single_byte(BitwiseOperationType::IsByte, ((carry >> 8) & 0xFF) as u8)); + } + // Reconstruct rotated_c + let mut left_bytes = [0u8; 8]; + let mut right_bytes = [0u8; 8]; + for hw in 0..4 { + let halfword = (c[hw * 2] as u16) | ((c[hw * 2 + 1] as u16) << 8); + let shifted = halfword << 1; + let carry = if halfword >> 15 == 1 { 1u16 } else { 0 }; + left_bytes[hw * 2] = (shifted & 0xFF) as u8; + left_bytes[hw * 2 + 1] = ((shifted >> 8) & 0xFF) as u8; + right_bytes[hw * 2] = (carry & 0xFF) as u8; + right_bytes[hw * 2 + 1] = ((carry >> 8) & 0xFF) as u8; + } + for b in 0..8 { + rotated_c[x][b] = left_bytes[b].wrapping_add(right_bytes[(b + 7) % 8]); + } + } + + // theta: Dxz XOR_BYTE (40) + let mut d_bytes = [[0u8; 8]; 5]; + for x in 0..5 { + for b in 0..8 { + let a = cxz[(x + 4) % 5][3][b]; + let rb = rotated_c[(x + 1) % 5][b]; + d_bytes[x][b] = a ^ rb; + ops.push(BitwiseOperation::byte_op(BitwiseOperationType::XorByte, a, rb)); + } + } + + // theta final: XOR_BYTE (200) + let mut theta_lanes = [0u64; 25]; + for x in 0..5 { + for y in 0..5 { + let lane = state[x + 5 * y]; + let mut d_lane = 0u64; + for b in 0..8 { + d_lane |= (d_bytes[x][b] as u64) << (b * 8); + } + theta_lanes[x + 5 * y] = lane ^ d_lane; + for b in 0..8 { + let s = ((lane >> (b * 8)) & 0xFF) as u8; + ops.push(BitwiseOperation::byte_op(BitwiseOperationType::XorByte, s, d_bytes[x][b])); + } + } + } + + // rho: HWSL (100) + IS_BYTE (400) + for x in 0..5 { + for y in 0..5 { + let rho_offset = KECCAK_RHO[x][y] as usize; + let rnc_val = (rho_offset % 16) as u8; + let theta_lane = theta_lanes[x + 5 * y]; + for hw in 0..4 { + let halfword = ((theta_lane >> (hw * 16)) & 0xFFFF) as u16; + let (shifted, carry) = if rnc_val == 0 { + (halfword, 0u16) + } else { + (halfword << rnc_val, halfword >> (16 - rnc_val)) + }; + ops.push(BitwiseOperation::new(BitwiseOperationType::Hwsl, + (halfword & 0xFF) as u8, + ((halfword >> 8) & 0xFF) as u8, + rnc_val, + )); + // IS_BYTE for rot_left + ops.push(BitwiseOperation::single_byte(BitwiseOperationType::IsByte, (shifted & 0xFF) as u8)); + ops.push(BitwiseOperation::single_byte(BitwiseOperationType::IsByte, ((shifted >> 8) & 0xFF) as u8)); + // IS_BYTE for rot_right + ops.push(BitwiseOperation::single_byte(BitwiseOperationType::IsByte, (carry & 0xFF) as u8)); + ops.push(BitwiseOperation::single_byte(BitwiseOperationType::IsByte, ((carry >> 8) & 0xFF) as u8)); + } + } + } + + // pi: compute pi_lanes + let mut pi_lanes = [0u64; 25]; + for x in 0..5 { + for y in 0..5 { + let rotated = theta_lanes[x + 5 * y].rotate_left(KECCAK_RHO[x][y]); + let dst_x = y; + let dst_y = (2 * x + 3 * y) % 5; + pi_lanes[dst_x + 5 * dst_y] = rotated; + } + } + + // chi: AND_BYTE (200) + XOR_BYTE (200) + let mut chi_lanes = [0u64; 25]; + for x in 0..5 { + for y in 0..5 { + let not_next = !pi_lanes[(x + 1) % 5 + 5 * y]; + let next2 = pi_lanes[(x + 2) % 5 + 5 * y]; + let and_val = not_next & next2; + chi_lanes[x + 5 * y] = pi_lanes[x + 5 * y] ^ and_val; + for b in 0..8 { + let not_byte = ((not_next >> (b * 8)) & 0xFF) as u8; + let n2_byte = ((next2 >> (b * 8)) & 0xFF) as u8; + ops.push(BitwiseOperation::byte_op(BitwiseOperationType::AndByte, not_byte, n2_byte)); + let pi_byte = ((pi_lanes[x + 5 * y] >> (b * 8)) & 0xFF) as u8; + let and_byte = ((and_val >> (b * 8)) & 0xFF) as u8; + ops.push(BitwiseOperation::byte_op(BitwiseOperationType::XorByte, pi_byte, and_byte)); + } + } + } + + // iota: XOR_BYTE (8) + let rc_val = KECCAK_RC[round]; + for b in 0..8 { + let chi_byte = ((chi_lanes[0] >> (b * 8)) & 0xFF) as u8; + let rc_byte = ((rc_val >> (b * 8)) & 0xFF) as u8; + ops.push(BitwiseOperation::byte_op(BitwiseOperationType::XorByte, chi_byte, rc_byte)); + } + + // Update state + chi_lanes[0] ^= rc_val; + state = chi_lanes; + } + } + + ops +} + /// every address accessed during execution (ELF init + runtime stores/loads). /// ELF pages get their init data from the binary; all others are zero-init. fn generate_page_tables( @@ -1562,6 +1845,15 @@ pub struct Traces { /// COMMIT table for write syscall (byte-by-byte commit with recursive bus) pub commit: TraceTable, + + /// KECCAK core table (one row per keccak permutation call) + pub keccak: TraceTable, + + /// KECCAK_RND round table (24 rows per keccak call) + pub keccak_rnd: TraceTable, + + /// KECCAK_RC precomputed round constant table (32 rows) + pub keccak_rc: TraceTable, } /// Chunk raw ops and generate one trace table per chunk. @@ -1738,7 +2030,7 @@ impl Traces { // Initialize memory state from ELF so first accesses get correct old_value. let mut memory_state = MemoryState::from_elf(elf); let mut register_state = RegisterState::new(elf.entry_point); - let (mut memw_ops, load_ops, mut lt_ops, shift_ops, mut bitwise_ops, commit_ops) = + let (mut memw_ops, load_ops, mut lt_ops, shift_ops, mut bitwise_ops, commit_ops, keccak_ops) = collect_ops_from_cpu(&cpu_ops, &mut memory_state, &mut register_state); // HALT finalization: 33 register MEMW operations at timestamp u64::MAX. @@ -1837,6 +2129,7 @@ impl Traces { .collect(); // COMMIT table sends IsByte and IsHalfword lookups bitwise_ops.extend(collect_bitwise_from_commit(&commit_ops)); + bitwise_ops.extend(collect_bitwise_from_keccak(&keccak_ops)); // CPU padding rows send IS_BYTE with all-zero values. // Add corresponding ops so the bitwise table multiplicities balance. @@ -1896,6 +2189,21 @@ impl Traces { // Generate remaining traces in parallel (page, register, halt, commit). // chunk_and_generate already handled cpu, lt, memw, load, mul, dvrm, branch above. let commit_trace = commit::generate_commit_trace(&commit_ops); + + // Generate keccak traces + let keccak_rnd_ops: Vec = keccak_ops + .iter() + .map(|op| KeccakRoundOperation { + timestamp: op.timestamp, + input: op.input, + output: op.output, + }) + .collect(); + let keccak_trace = keccak::generate_keccak_trace(&keccak_ops); + let keccak_rnd_trace = keccak_rnd::generate_keccak_rnd_trace(&keccak_rnd_ops); + let mut keccak_rc_trace = keccak_rc::generate_keccak_rc_trace(); + keccak_rc::update_multiplicities(&mut keccak_rc_trace, keccak_ops.len()); + let (pages, page_configs, register_trace, halt_trace); #[cfg(feature = "parallel")] { @@ -1947,6 +2255,9 @@ impl Traces { branches, halt: halt_trace, commit: commit_trace, + keccak: keccak_trace, + keccak_rnd: keccak_rnd_trace, + keccak_rc: keccak_rc_trace, }) } @@ -1974,7 +2285,7 @@ impl Traces { // Entry point = first instruction's PC (start of execution) let entry_point = cpu_ops.first().map_or(0, |op| op.decode.pc); let mut register_state = RegisterState::new(entry_point); - let (mut memw_ops, load_ops, mut lt_ops, shift_ops, mut bitwise_ops, commit_ops) = + let (mut memw_ops, load_ops, mut lt_ops, shift_ops, mut bitwise_ops, commit_ops, keccak_ops) = collect_ops_from_cpu(&cpu_ops, &mut memory_state, &mut register_state); // HALT finalization: 33 register MEMW operations at timestamp u64::MAX. @@ -2071,6 +2382,7 @@ impl Traces { .collect(); // COMMIT table sends IsHalfword lookups bitwise_ops.extend(collect_bitwise_from_commit(&commit_ops)); + bitwise_ops.extend(collect_bitwise_from_keccak(&keccak_ops)); // CPU padding rows send IS_BYTE with all-zero values. let num_padding_rows: usize = cpu_ops @@ -2125,6 +2437,20 @@ impl Traces { let commit_trace = commit::generate_commit_trace(&commit_ops); + // Generate keccak traces + let keccak_rnd_ops: Vec = keccak_ops + .iter() + .map(|op| KeccakRoundOperation { + timestamp: op.timestamp, + input: op.input, + output: op.output, + }) + .collect(); + let keccak_trace = keccak::generate_keccak_trace(&keccak_ops); + let keccak_rnd_trace = keccak_rnd::generate_keccak_rnd_trace(&keccak_rnd_ops); + let mut keccak_rc_trace = keccak_rc::generate_keccak_rc_trace(); + keccak_rc::update_multiplicities(&mut keccak_rc_trace, keccak_ops.len()); + // Generate remaining traces in parallel (register, halt). // chunk_and_generate already handled cpu, lt, memw, load, mul, dvrm, branch above. let (register_trace, halt_trace); @@ -2166,6 +2492,9 @@ impl Traces { branches, halt: halt_trace, commit: commit_trace, + keccak: keccak_trace, + keccak_rnd: keccak_rnd_trace, + keccak_rc: keccak_rc_trace, }) } diff --git a/prover/src/test_utils.rs b/prover/src/test_utils.rs index 93d6d2971..d3311d93f 100644 --- a/prover/src/test_utils.rs +++ b/prover/src/test_utils.rs @@ -43,6 +43,9 @@ use crate::tables::dvrm::{ bus_interactions as dvrm_bus_interactions, cols as dvrm_cols, dvrm_constraints, }; use crate::tables::halt::{bus_interactions as halt_bus_interactions, cols as halt_cols}; +use crate::tables::keccak::{bus_interactions as keccak_bus_interactions, cols as keccak_cols}; +use crate::tables::keccak_rc::{bus_interactions as keccak_rc_bus_interactions, cols as keccak_rc_cols}; +use crate::tables::keccak_rnd::{bus_interactions as keccak_rnd_bus_interactions, cols as keccak_rnd_cols}; use crate::tables::load::{ bus_interactions as load_bus_interactions, cols as load_cols, constraints as load_constraints, }; @@ -769,3 +772,57 @@ pub fn create_register_air(proof_options: &ProofOptions) -> VmAir { ) .with_name("REGISTER") } + +/// Create KECCAK core AIR with bus interactions (no transition constraints yet). +pub fn create_keccak_air(proof_options: &ProofOptions) -> VmAir { + let transition_constraints: Vec>> = vec![]; + + let auxiliary_trace_build_data = AuxiliaryTraceBuildData { + interactions: keccak_bus_interactions(), + }; + + AirWithBuses::new( + keccak_cols::NUM_COLUMNS, + auxiliary_trace_build_data, + proof_options, + 1, + transition_constraints, + ) + .with_name("KECCAK") +} + +/// Create KECCAK_RND AIR with bus interactions (pi constraints TODO). +pub fn create_keccak_rnd_air(proof_options: &ProofOptions) -> VmAir { + let transition_constraints: Vec>> = vec![]; + + let auxiliary_trace_build_data = AuxiliaryTraceBuildData { + interactions: keccak_rnd_bus_interactions(), + }; + + AirWithBuses::new( + keccak_rnd_cols::NUM_COLUMNS, + auxiliary_trace_build_data, + proof_options, + 1, + transition_constraints, + ) + .with_name("KECCAK_RND") +} + +/// Create KECCAK_RC AIR with bus interactions (preprocessed table). +pub fn create_keccak_rc_air(proof_options: &ProofOptions) -> VmAir { + let transition_constraints: Vec>> = vec![]; + + let auxiliary_trace_build_data = AuxiliaryTraceBuildData { + interactions: keccak_rc_bus_interactions(), + }; + + AirWithBuses::new( + keccak_rc_cols::NUM_COLUMNS, + auxiliary_trace_build_data, + proof_options, + 1, + transition_constraints, + ) + .with_name("KECCAK_RC") +} diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index dcc54d935..2637440f6 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -716,6 +716,36 @@ fn test_prove_elfs_all_instructions_64() { ); } +#[test] +fn test_prove_elfs_keccak() { + let _ = env_logger::builder().is_test(true).try_init(); + + let (elf, logs, instructions) = run_asm_elf("test_keccak"); + let mut traces = + Traces::from_logs_minimal(&logs, instructions.clone(), &Default::default()).unwrap(); + + println!( + "keccak (fast): CPU {} rows, KECCAK {} rows, KECCAK_RND {} rows, KECCAK_RC {} rows, MEMW {} tables ({} rows), BITWISE {} rows", + traces.cpus[0].main_table.height, + traces.keccak.main_table.height, + traces.keccak_rnd.main_table.height, + traces.keccak_rc.main_table.height, + traces.memws.len(), + traces.memws[0].main_table.height, + traces.bitwise.main_table.height, + ); + println!( + "Bus interaction counts: KECCAK core={}, KECCAK_RND={}, KECCAK_RC={}", + crate::tables::keccak::bus_interactions().len(), + crate::tables::keccak_rnd::bus_interactions().len(), + crate::tables::keccak_rc::bus_interactions().len(), + ); + assert!( + prove_and_verify_vm_minimal(&elf, &mut traces), + "keccak prove/verify failed" + ); +} + #[test] fn test_prove_elfs_test_commit_4() { let elf_bytes = crate::test_utils::asm_elf_bytes("test_commit_4"); @@ -1763,7 +1793,7 @@ fn test_crafted_zero_count_proof_must_not_verify() { let airs = VmAirs::new(&elf, &proof_options, true, &[], &zero_counts); let verifier_air_refs = airs.air_refs(); - assert_eq!(verifier_air_refs.len(), 5); + assert_eq!(verifier_air_refs.len(), 8); let mut bitwise_trace = crate::tables::bitwise::generate_bitwise_trace(); From 71ff49b4e1acdd72edc54e33a22b91b23213a8d2 Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Mon, 6 Apr 2026 12:40:50 -0300 Subject: [PATCH 03/14] Fix keccak round trace generation --- prover/src/tables/keccak.rs | 4 +- prover/src/tables/keccak_rnd.rs | 26 +++-- prover/src/tables/trace_builder.rs | 175 ++++++++++++++++++++++++----- 3 files changed, 165 insertions(+), 40 deletions(-) diff --git a/prover/src/tables/keccak.rs b/prover/src/tables/keccak.rs index 45638dd0c..0ce8db136 100644 --- a/prover/src/tables/keccak.rs +++ b/prover/src/tables/keccak.rs @@ -57,13 +57,13 @@ pub mod cols { /// Index into input_state[x][y][byte] #[inline] pub const fn input_state(x: usize, y: usize, byte: usize) -> usize { - INPUT_STATE + (x * 5 + y) * 8 + byte + INPUT_STATE + (x + 5 * y) * 8 + byte } /// Index into output_state[x][y][byte] #[inline] pub const fn output_state(x: usize, y: usize, byte: usize) -> usize { - OUTPUT_STATE + (x * 5 + y) * 8 + byte + OUTPUT_STATE + (x + 5 * y) * 8 + byte } /// Index into state_ptr[lane_idx][halfword] (DWordHL = 4 halfwords) diff --git a/prover/src/tables/keccak_rnd.rs b/prover/src/tables/keccak_rnd.rs index 3216b2be7..c3906b57b 100644 --- a/prover/src/tables/keccak_rnd.rs +++ b/prover/src/tables/keccak_rnd.rs @@ -101,7 +101,7 @@ pub mod cols { /// Index into start[x][y][byte] (200 bytes, row-major: y varies fastest) #[inline] pub const fn start(x: usize, y: usize, byte: usize) -> usize { - START + (x * 5 + y) * 8 + byte + START + (x + 5 * y) * 8 + byte } /// Index into Cxz[x][stage][byte] (160 bytes) @@ -131,31 +131,31 @@ pub mod cols { /// Index into theta[x][y][byte] #[inline] pub const fn theta(x: usize, y: usize, byte: usize) -> usize { - THETA + (x * 5 + y) * 8 + byte + THETA + (x + 5 * y) * 8 + byte } /// Index into rot_left[x][y][byte] #[inline] pub const fn rot_left(x: usize, y: usize, byte: usize) -> usize { - ROT_LEFT + (x * 5 + y) * 8 + byte + ROT_LEFT + (x + 5 * y) * 8 + byte } /// Index into rot_right[x][y][byte] #[inline] pub const fn rot_right(x: usize, y: usize, byte: usize) -> usize { - ROT_RIGHT + (x * 5 + y) * 8 + byte + ROT_RIGHT + (x + 5 * y) * 8 + byte } /// Index into pi[x][y][byte] #[inline] pub const fn pi(x: usize, y: usize, byte: usize) -> usize { - PI + (x * 5 + y) * 8 + byte + PI + (x + 5 * y) * 8 + byte } /// Index into chi_ands[x][y][byte] #[inline] pub const fn chi_ands(x: usize, y: usize, byte: usize) -> usize { - CHI_ANDS + (x * 5 + y) * 8 + byte + CHI_ANDS + (x + 5 * y) * 8 + byte } /// Index into chi[x][y][byte] @@ -179,13 +179,13 @@ pub mod cols { /// Index into rnc[x][y] #[inline] pub const fn rnc(x: usize, y: usize) -> usize { - RNC + x * 5 + y + RNC + x + 5 * y } /// Index into rbc[x][y][bit] #[inline] pub const fn rbc(x: usize, y: usize, bit: usize) -> usize { - RBC + (x * 5 + y) * 2 + bit + RBC + (x + 5 * y) * 2 + bit } } @@ -285,7 +285,10 @@ pub fn generate_keccak_rnd_trace( c_bytes[x] = cxz[x][3]; } - // Rotate C left by 1 bit using HWSL on each column's halfwords + // Rotate C left by 1 bit using HWSL decomposition. + // HWSL shifts each halfword independently. The carry from halfword k + // propagates to halfword (k+1)%4, which is a 2-byte offset: + // rotated_Cxz[z] = Cxz_left[z] + Cxz_right[(z-2) mod 8] let mut cxz_left_bytes = [[0u8; 8]; 5]; let mut cxz_right_bytes = [[0u8; 8]; 5]; let mut rotated_c = [[0u8; 8]; 5]; @@ -304,8 +307,9 @@ pub fn generate_keccak_rnd_trace( data[base + cols::cxz_right(x, hw * 2)] = FE::from(cxz_right_bytes[x][hw * 2] as u64); data[base + cols::cxz_right(x, hw * 2 + 1)] = FE::from(cxz_right_bytes[x][hw * 2 + 1] as u64); } + // Reconstruct: left[z] + right[(z-2) mod 8] for b in 0..8 { - rotated_c[x][b] = cxz_left_bytes[x][b].wrapping_add(cxz_right_bytes[x][(b + 7) % 8]); + rotated_c[x][b] = cxz_left_bytes[x][b].wrapping_add(cxz_right_bytes[x][(b + 6) % 8]); } } @@ -641,7 +645,7 @@ pub fn bus_interactions() -> Vec { BusValue::Packed { start_column: cols::cxz((x + 4) % 5, 3, b), packing: Packing::Direct }, BusValue::linear(vec![ LinearTerm::Column { coefficient: 1, column: cols::cxz_left((x + 1) % 5, b) }, - LinearTerm::Column { coefficient: 1, column: cols::cxz_right((x + 1) % 5, (b + 7) % 8) }, + LinearTerm::Column { coefficient: 1, column: cols::cxz_right((x + 1) % 5, (b + 6) % 8) }, ]), BusValue::Packed { start_column: cols::dxz(x, b), packing: Packing::Direct }, ], diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 0becdf782..1eea04691 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -399,16 +399,9 @@ fn collect_ops_from_cpu( } let mut output = input; executor::vm::instruction::execution::keccak_f1600(&mut output); - // Update memory state with output - for (i, &lane) in output.iter().enumerate() { - let addr = state_addr.wrapping_add(i as u64 * 8); - for b in 0..8 { - let byte_val = ((lane >> (b * 8)) & 0xFF) as u8; - memory_state.write_byte(addr + b as u64, byte_val, op.timestamp + 1); - } - } + // collect_keccak_memw_ops handles memory_state updates for reads and writes let keccak_memw_ops = - collect_keccak_memw_ops(op, &input, &output, register_state); + collect_keccak_memw_ops(op, &input, &output, memory_state); memw_ops.extend(keccak_memw_ops); keccak_ops.push(KeccakOperation { timestamp: op.timestamp, @@ -822,46 +815,53 @@ fn collect_keccak_memw_ops( op: &CpuOperation, input: &[u64; 25], output: &[u64; 25], - register_state: &mut RegisterState, + memory_state: &mut MemoryState, ) -> Vec { let ts = op.timestamp; let state_addr = op.keccak_state_addr; - let mut memw_ops = Vec::with_capacity(53); // 25 reads + 25 writes + 3 register ops - - // Register reads for the ECALL: x10 (state_addr), x17 (syscall number) - // x10 read at ts - { - let reg_value = pack_register_value(state_addr); - let reg_addr = 2 * 10u64; - let (_old_val, old_ts) = register_state.read(10); - let old_timestamps = [old_ts, old_ts, 0, 0, 0, 0, 0, 0]; - let memw_op = MemwOperation::new(true, reg_addr, reg_value, ts, 2, true) - .with_old(reg_value, old_timestamps); - memw_ops.push(memw_op); - register_state.write(10, state_addr, ts); - } + let mut memw_ops = Vec::with_capacity(50); // 25 reads + 25 writes // 25 lane reads at timestamp for (lane_idx, &lane_val) in input.iter().enumerate() { let lane_addr = state_addr.wrapping_add(lane_idx as u64 * 8); let mut value_bytes = [0u64; 8]; + let mut old_timestamps = [0u64; 8]; for (b, byte) in value_bytes.iter_mut().enumerate() { *byte = (lane_val >> (b * 8)) & 0xFF; + let (_old_val, old_ts) = memory_state.read_byte(lane_addr + b as u64); + old_timestamps[b] = old_ts; } let memw_op = MemwOperation::new(false, lane_addr, value_bytes, ts, 8, true) - .with_old(value_bytes, [0; 8]); + .with_old(value_bytes, old_timestamps); memw_ops.push(memw_op); + // Update memory state timestamps (reads update the timestamp) + for b in 0..8 { + memory_state.write_byte(lane_addr + b as u64, value_bytes[b] as u8, ts); + } } // 25 lane writes at timestamp+1 + // The reads above happened at ts, so old_timestamp for the write is ts. for (lane_idx, &lane_val) in output.iter().enumerate() { let lane_addr = state_addr.wrapping_add(lane_idx as u64 * 8); let mut value_bytes = [0u64; 8]; + // old_timestamps = ts for all 8 bytes (the read just happened at ts) + let old_timestamps = [ts; 8]; for (b, byte) in value_bytes.iter_mut().enumerate() { *byte = (lane_val >> (b * 8)) & 0xFF; } - let memw_op = MemwOperation::new(false, lane_addr, value_bytes, ts + 1, 8, false); + // old = input (the value before the write) + let mut old_bytes = [0u64; 8]; + for (b, byte) in old_bytes.iter_mut().enumerate() { + *byte = (input[lane_idx] >> (b * 8)) & 0xFF; + } + let memw_op = MemwOperation::new(false, lane_addr, value_bytes, ts + 1, 8, false) + .with_old(old_bytes, old_timestamps); memw_ops.push(memw_op); + // Update memory state + for b in 0..8 { + memory_state.write_byte(lane_addr + b as u64, value_bytes[b] as u8, ts + 1); + } } memw_ops @@ -1615,7 +1615,7 @@ fn collect_bitwise_from_keccak(keccak_ops: &[KeccakOperation]) -> Vec> 8) & 0xFF) as u8; } for b in 0..8 { - rotated_c[x][b] = left_bytes[b].wrapping_add(right_bytes[(b + 7) % 8]); + rotated_c[x][b] = left_bytes[b].wrapping_add(right_bytes[(b + 6) % 8]); } } @@ -2546,3 +2546,124 @@ impl Traces { Self::from_logs_trimmed(logs, instructions, max_rows) } } + +#[cfg(test)] +mod keccak_debug_tests { + use super::*; + use executor::vm::instruction::execution::keccak_f1600; + + #[test] + fn test_keccak_bitwise_ops_count() { + let mut input = [0u64; 25]; + let mut output = input; + keccak_f1600(&mut output); + let kop = KeccakOperation { timestamp: 42, state_addr: 0x1000, input, output }; + let ops = collect_bitwise_from_keccak(&[kop]); + + let xor_count = ops.iter().filter(|o| o.lookup_type == BitwiseOperationType::XorByte).count(); + let and_count = ops.iter().filter(|o| o.lookup_type == BitwiseOperationType::AndByte).count(); + let is_byte_count = ops.iter().filter(|o| o.lookup_type == BitwiseOperationType::IsByte).count(); + let hwsl_count = ops.iter().filter(|o| o.lookup_type == BitwiseOperationType::Hwsl).count(); + let is_half_count = ops.iter().filter(|o| o.lookup_type == BitwiseOperationType::IsHalf).count(); + + println!("Bitwise ops from 1 keccak call:"); + println!(" XorByte: {} (expected: 24 * 608 = {})", xor_count, 24 * 608); + println!(" AndByte: {} (expected: 24 * 200 = {})", and_count, 24 * 200); + println!(" IsByte: {} (expected: 24 * 480 = {})", is_byte_count, 24 * 480); + println!(" Hwsl: {} (expected: 24 * 120 = {})", hwsl_count, 24 * 120); + println!(" IsHalf: {} (expected: 100)", is_half_count); + println!(" Total: {}", ops.len()); + } +} + +#[cfg(test)] +mod keccak_trace_debug_tests { + use super::*; + use executor::vm::instruction::execution::keccak_f1600; + use crate::tables::keccak_rnd::cols as rnd_cols; + use crate::tables::keccak::cols as core_cols; + use crate::tables::types::FE; + + #[test] + fn test_keccak_bus_values_match() { + let input = [0u64; 25]; + let mut output = input; + keccak_f1600(&mut output); + + let kop = KeccakOperation { timestamp: 42, state_addr: 0x1000, input, output }; + let rop = KeccakRoundOperation { timestamp: 42, input, output }; + + let core_trace = keccak::generate_keccak_trace(&[kop]); + let rnd_trace = keccak_rnd::generate_keccak_rnd_trace(&[rop]); + + // Check: round 0, start bytes match core input_state bytes + let core_base = 0 * core_cols::NUM_COLUMNS; + let rnd_base = 0 * rnd_cols::NUM_COLUMNS; // round 0 + + for x in 0..5 { + for y in 0..5 { + for b in 0..8 { + let core_val = &core_trace.main_table.data[core_base + core_cols::input_state(x, y, b)]; + let rnd_val = &rnd_trace.main_table.data[rnd_base + rnd_cols::start(x, y, b)]; + assert_eq!(core_val, rnd_val, "Round 0 start mismatch at ({x},{y},{b})"); + } + } + } + println!("Round 0 start == core input_state ✓"); + + // Check: each round's output matches keccak_f1600 round-by-round + let mut ref_state = input; + for round in 0..24 { + // Apply one round of keccak_f1600 + let rc = executor::vm::instruction::execution::KECCAK_RC[round]; + let mut c = [0u64; 5]; + for x in 0..5 { c[x] = ref_state[x] ^ ref_state[x+5] ^ ref_state[x+10] ^ ref_state[x+15] ^ ref_state[x+20]; } + let mut d = [0u64; 5]; + for x in 0..5 { d[x] = c[(x+4)%5] ^ c[(x+1)%5].rotate_left(1); } + for i in 0..25 { ref_state[i] ^= d[i % 5]; } + let mut b = [0u64; 25]; + for x in 0..5 { for y in 0..5 { b[y + 5*((2*x+3*y)%5)] = ref_state[x+5*y].rotate_left(executor::vm::instruction::execution::KECCAK_RHO[x][y]); } } + for x in 0..5 { for y in 0..5 { ref_state[x+5*y] = b[x+5*y] ^ (!b[(x+1)%5+5*y] & b[(x+2)%5+5*y]); } } + ref_state[0] ^= rc; + + // Compare with round chip's output (iota for lane 0, chi for rest) + let rnd_base_r = round * rnd_cols::NUM_COLUMNS; + for lane in 0..25 { + let x = lane % 5; // This is how keccak indexes: lane = x + 5*y + let y = lane / 5; + for byte_idx in 0..8 { + let expected = ((ref_state[lane] >> (byte_idx * 8)) & 0xFF) as u64; + let trace_col = if x == 0 && y == 0 { + rnd_cols::iota(byte_idx) + } else { + rnd_cols::chi(x, y, byte_idx) + }; + let expected_fe = FE::from(expected); + let trace_fe = &rnd_trace.main_table.data[rnd_base_r + trace_col]; + if &expected_fe != trace_fe { + panic!("Round {round} lane ({x},{y}) byte {byte_idx}: expected {expected_fe:?}, trace {trace_fe:?}"); + } + } + } + } + println!("All 24 rounds match keccak_f1600 ✓"); + + // Check: round 23 out matches core output_state + let rnd_base_23 = 23 * rnd_cols::NUM_COLUMNS; + for x in 0..5 { + for y in 0..5 { + for b in 0..8 { + let core_val = &core_trace.main_table.data[core_base + core_cols::output_state(x, y, b)]; + // out[0][0] = iota, out[x][y] = chi for rest + let rnd_val = if x == 0 && y == 0 { + &rnd_trace.main_table.data[rnd_base_23 + rnd_cols::iota(b)] + } else { + &rnd_trace.main_table.data[rnd_base_23 + rnd_cols::chi(x, y, b)] + }; + assert_eq!(core_val, rnd_val, "Round 23 out mismatch at ({x},{y},{b}): core={core_val:?} rnd={rnd_val:?}"); + } + } + } + println!("Round 23 out == core output_state ✓"); + } +} From b8213dd11c7a8a2fe7455cb757fa3745d4bcb5fb Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Mon, 6 Apr 2026 13:11:28 -0300 Subject: [PATCH 04/14] Use from_elf_and_logs for keccak test --- prover/src/tables/keccak.rs | 71 +++++++--------------------- prover/src/tables/trace_builder.rs | 44 ++++++----------- prover/src/tests/prove_elfs_tests.rs | 6 ++- 3 files changed, 37 insertions(+), 84 deletions(-) diff --git a/prover/src/tables/keccak.rs b/prover/src/tables/keccak.rs index 0ce8db136..cfa5d9f8b 100644 --- a/prover/src/tables/keccak.rs +++ b/prover/src/tables/keccak.rs @@ -254,9 +254,9 @@ pub fn bus_interactions() -> Vec { } } - // 5. MEMW interactions for 25 lane reads (on mu) + 25 lane writes (on mu) - // Read format: [old[8], is_register, addr_lo32, addr_hi32, value[8], ts[2], w2, w4, w8] = 24 - // Write format: [is_register, addr_lo32, addr_hi32, value[8], ts[2], w2, w4, w8] = 16 + // 5. MEMW interactions: 25 combined read+write per lane (per spec) + // Format: [old[8], is_register, addr_lo32, addr_hi32, value[8], ts[2], w2, w4, w8] = 24 + // old = input_state (read), value = output_state (write) for lane_idx in 0..25 { let x = lane_idx % 5; let y = lane_idx / 5; @@ -271,79 +271,44 @@ pub fn bus_interactions() -> Vec { LinearTerm::Column { coefficient: 65536, column: cols::state_ptr(lane_idx, 3) }, ]); - // Read: old = input, value = input (read doesn't change) - let mut read_values = Vec::with_capacity(24); - // old[0..8] = input bytes + let mut values = Vec::with_capacity(24); + // old[0..8] = input_state bytes (the value being read) for b in 0..8 { - read_values.push(BusValue::Packed { + values.push(BusValue::Packed { start_column: cols::input_state(x, y, b), packing: Packing::Direct, }); } // is_register = 0 - read_values.push(BusValue::constant(0)); + values.push(BusValue::constant(0)); // address as DWordWL - read_values.push(addr_lo.clone()); - read_values.push(addr_hi.clone()); - // value[0..8] = same as old (read) + values.push(addr_lo); + values.push(addr_hi); + // value[0..8] = output_state bytes (the value being written) for b in 0..8 { - read_values.push(BusValue::Packed { - start_column: cols::input_state(x, y, b), + values.push(BusValue::Packed { + start_column: cols::output_state(x, y, b), packing: Packing::Direct, }); } // timestamp - read_values.push(BusValue::Packed { + values.push(BusValue::Packed { start_column: cols::TIMESTAMP_0, packing: Packing::Direct, }); - read_values.push(BusValue::Packed { - start_column: cols::TIMESTAMP_1, - packing: Packing::Direct, - }); - // write2=0, write4=0, write8=1 - read_values.push(BusValue::constant(0)); - read_values.push(BusValue::constant(0)); - read_values.push(BusValue::constant(1)); - - interactions.push(BusInteraction::sender( - BusId::Memw, - Multiplicity::Column(cols::MU), - read_values, - )); - - // Write: new value = output, timestamp = ts + 1 - let mut write_values = Vec::with_capacity(16); - // is_register = 0 - write_values.push(BusValue::constant(0)); - // address as DWordWL - write_values.push(addr_lo); - write_values.push(addr_hi); - // value[0..8] = output bytes - for b in 0..8 { - write_values.push(BusValue::Packed { - start_column: cols::output_state(x, y, b), - packing: Packing::Direct, - }); - } - // timestamp + 1 - write_values.push(BusValue::linear(vec![ - LinearTerm::Column { coefficient: 1, column: cols::TIMESTAMP_0 }, - LinearTerm::Constant(1), - ])); - write_values.push(BusValue::Packed { + values.push(BusValue::Packed { start_column: cols::TIMESTAMP_1, packing: Packing::Direct, }); // write2=0, write4=0, write8=1 - write_values.push(BusValue::constant(0)); - write_values.push(BusValue::constant(0)); - write_values.push(BusValue::constant(1)); + values.push(BusValue::constant(0)); + values.push(BusValue::constant(0)); + values.push(BusValue::constant(1)); interactions.push(BusInteraction::sender( BusId::Memw, Multiplicity::Column(cols::MU), - write_values, + values, )); } diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 1eea04691..0327eb97f 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -819,48 +819,34 @@ fn collect_keccak_memw_ops( ) -> Vec { let ts = op.timestamp; let state_addr = op.keccak_state_addr; - let mut memw_ops = Vec::with_capacity(50); // 25 reads + 25 writes + let mut memw_ops = Vec::with_capacity(25); - // 25 lane reads at timestamp - for (lane_idx, &lane_val) in input.iter().enumerate() { + // Per spec: single combined read+write MEMW per lane at `timestamp`. + // input = [0, state_ptr, output_state, timestamp, 0, 0, 1], output = input_state + // The MEMW table sees: old=input_state, value=output_state, is_read=true. + for (lane_idx, (&in_lane, &out_lane)) in input.iter().zip(output.iter()).enumerate() { let lane_addr = state_addr.wrapping_add(lane_idx as u64 * 8); - let mut value_bytes = [0u64; 8]; + + let mut old_bytes = [0u64; 8]; let mut old_timestamps = [0u64; 8]; - for (b, byte) in value_bytes.iter_mut().enumerate() { - *byte = (lane_val >> (b * 8)) & 0xFF; + for b in 0..8 { + old_bytes[b] = (in_lane >> (b * 8)) & 0xFF; let (_old_val, old_ts) = memory_state.read_byte(lane_addr + b as u64); old_timestamps[b] = old_ts; } - let memw_op = MemwOperation::new(false, lane_addr, value_bytes, ts, 8, true) - .with_old(value_bytes, old_timestamps); - memw_ops.push(memw_op); - // Update memory state timestamps (reads update the timestamp) - for b in 0..8 { - memory_state.write_byte(lane_addr + b as u64, value_bytes[b] as u8, ts); - } - } - // 25 lane writes at timestamp+1 - // The reads above happened at ts, so old_timestamp for the write is ts. - for (lane_idx, &lane_val) in output.iter().enumerate() { - let lane_addr = state_addr.wrapping_add(lane_idx as u64 * 8); let mut value_bytes = [0u64; 8]; - // old_timestamps = ts for all 8 bytes (the read just happened at ts) - let old_timestamps = [ts; 8]; for (b, byte) in value_bytes.iter_mut().enumerate() { - *byte = (lane_val >> (b * 8)) & 0xFF; - } - // old = input (the value before the write) - let mut old_bytes = [0u64; 8]; - for (b, byte) in old_bytes.iter_mut().enumerate() { - *byte = (input[lane_idx] >> (b * 8)) & 0xFF; + *byte = (out_lane >> (b * 8)) & 0xFF; } - let memw_op = MemwOperation::new(false, lane_addr, value_bytes, ts + 1, 8, false) + + let memw_op = MemwOperation::new(false, lane_addr, value_bytes, ts, 8, true) .with_old(old_bytes, old_timestamps); memw_ops.push(memw_op); + // Update memory state - for b in 0..8 { - memory_state.write_byte(lane_addr + b as u64, value_bytes[b] as u8, ts + 1); + for (b, &val) in value_bytes.iter().enumerate() { + memory_state.write_byte(lane_addr + b as u64, val as u8, ts); } } diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index 2637440f6..b26e79525 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -720,9 +720,11 @@ fn test_prove_elfs_all_instructions_64() { fn test_prove_elfs_keccak() { let _ = env_logger::builder().is_test(true).try_init(); - let (elf, logs, instructions) = run_asm_elf("test_keccak"); + let (elf, logs, _instructions) = run_asm_elf("test_keccak"); + // Must use from_elf_and_logs (not from_logs_minimal) because keccak accesses + // RAM (stack memory), which requires PAGE tables for Memory bus balance. let mut traces = - Traces::from_logs_minimal(&logs, instructions.clone(), &Default::default()).unwrap(); + Traces::from_elf_and_logs(&elf, &logs, &Default::default()).unwrap(); println!( "keccak (fast): CPU {} rows, KECCAK {} rows, KECCAK_RND {} rows, KECCAK_RC {} rows, MEMW {} tables ({} rows), BITWISE {} rows", From 7b5fd44413cb00118bddf3809bb55b382e4d3fda Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Mon, 6 Apr 2026 15:49:28 -0300 Subject: [PATCH 05/14] Align with spec: shared ECALL bus, MEMW read_addr for x10, pi verification and state_ptr ADD constraints --- prover/src/tables/cpu.rs | 53 +------- prover/src/tables/keccak.rs | 107 ++++++++++++--- prover/src/tables/keccak_rnd.rs | 208 +++++++++++++++++++++++++++++ prover/src/tables/trace_builder.rs | 21 ++- prover/src/test_utils.rs | 12 +- prover/src/tests/cpu_tests.rs | 7 +- 6 files changed, 331 insertions(+), 77 deletions(-) diff --git a/prover/src/tables/cpu.rs b/prover/src/tables/cpu.rs index 8b97b180f..f59281e0a 100644 --- a/prover/src/tables/cpu.rs +++ b/prover/src/tables/cpu.rs @@ -2019,21 +2019,12 @@ pub fn bus_interactions() -> Vec { )); } - // ECALL interaction for HALT and COMMIT (excludes keccak) + // ECALL interaction (shared bus for HALT, COMMIT, and KECCAK) // ------------------------------------------------------------------------- - // multiplicity = ECALL - ECALL_KECCAK + // multiplicity = ECALL (all ECALLs, each receiver matches on syscall number) interactions.push(BusInteraction::sender( BusId::Ecall, - Multiplicity::Linear(vec![ - LinearTerm::Column { - coefficient: 1, - column: cols::ECALL, - }, - LinearTerm::Column { - coefficient: -1, - column: cols::ECALL_KECCAK, - }, - ]), + Multiplicity::Column(cols::ECALL), vec![ BusValue::Packed { start_column: cols::TIMESTAMP, @@ -2059,44 +2050,6 @@ pub fn bus_interactions() -> Vec { ], )); - // EcallKeccak interaction (CPU → KECCAK core chip) - // ------------------------------------------------------------------------- - // multiplicity = ECALL_KECCAK - // Payload: [timestamp_lo, timestamp_hi, syscall_lo32, syscall_hi32, state_addr_lo32, state_addr_hi32] - interactions.push(BusInteraction::sender( - BusId::EcallKeccak, - Multiplicity::Column(cols::ECALL_KECCAK), - vec![ - BusValue::Packed { - start_column: cols::TIMESTAMP, - packing: Packing::Direct, - }, - BusValue::constant(0), // timestamp_hi = 0 - BusValue::linear(vec![ - LinearTerm::Column { - coefficient: 1, - column: cols::RV1_0, - }, - LinearTerm::Column { - coefficient: 65536, - column: cols::RV1_1, - }, - ]), - BusValue::Packed { - start_column: cols::RV1_2, - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: cols::KECCAK_STATE_ADDR_0, - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: cols::KECCAK_STATE_ADDR_1, - packing: Packing::Direct, - }, - ], - )); - interactions } diff --git a/prover/src/tables/keccak.rs b/prover/src/tables/keccak.rs index cfa5d9f8b..b094459f3 100644 --- a/prover/src/tables/keccak.rs +++ b/prover/src/tables/keccak.rs @@ -16,9 +16,11 @@ //! | mu | 1 | Multiplicity flag | use executor::vm::instruction::execution::KECCAK_SYSCALL_NUMBER; +use stark::constraints::transition::TransitionConstraint; use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity, Packing}; use stark::trace::TraceTable; +use crate::constraints::templates::{AddConstraint, AddOperand}; use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField}; // ========================================================================= @@ -157,9 +159,11 @@ pub fn bus_interactions() -> Vec { let syscall_hi = KECCAK_SYSCALL_NUMBER >> 32; let mut interactions = Vec::with_capacity(160); - // 1. EcallKeccak receiver: [ts_lo, ts_hi, syscall_lo32, syscall_hi32, addr_lo32, addr_hi32] + // 1. ECALL receiver (shared bus, per spec keccak:c:output) + // Format: [ts_lo, ts_hi, syscall_lo32, syscall_hi32] + // Syscall number: lo32 = 2^32-2, hi32 = 2^32-1 interactions.push(BusInteraction::receiver( - BusId::EcallKeccak, + BusId::Ecall, Multiplicity::Column(cols::MU), vec![ BusValue::Packed { @@ -172,22 +176,54 @@ pub fn bus_interactions() -> Vec { }, BusValue::constant(syscall_lo), BusValue::constant(syscall_hi), - // state_addr as DWordWL from DWordBL bytes - BusValue::linear(vec![ - LinearTerm::Column { coefficient: 1, column: cols::addr(0) }, - LinearTerm::Column { coefficient: 256, column: cols::addr(1) }, - LinearTerm::Column { coefficient: 65536, column: cols::addr(2) }, - LinearTerm::Column { coefficient: 16777216, column: cols::addr(3) }, - ]), - BusValue::linear(vec![ - LinearTerm::Column { coefficient: 1, column: cols::addr(4) }, - LinearTerm::Column { coefficient: 256, column: cols::addr(5) }, - LinearTerm::Column { coefficient: 65536, column: cols::addr(6) }, - LinearTerm::Column { coefficient: 16777216, column: cols::addr(7) }, - ]), ], )); + // 2. MEMW read_addr: read register x10 to bind addr (per spec keccak:c:read_addr) + // Format: [old[8], is_register=1, base_addr=[20,0], value[8], ts, ts_hi, write2=1, write4=0, write8=0] + // For register read: old = value = addr as WL + 6 zeros + { + // addr as DWordWL from DWordBL bytes: lo32 = sum(addr[0..4] * 256^i), hi32 = sum(addr[4..8] * 256^i) + let addr_lo = BusValue::linear(vec![ + LinearTerm::Column { coefficient: 1, column: cols::addr(0) }, + LinearTerm::Column { coefficient: 256, column: cols::addr(1) }, + LinearTerm::Column { coefficient: 65536, column: cols::addr(2) }, + LinearTerm::Column { coefficient: 16777216, column: cols::addr(3) }, + ]); + let addr_hi = BusValue::linear(vec![ + LinearTerm::Column { coefficient: 1, column: cols::addr(4) }, + LinearTerm::Column { coefficient: 256, column: cols::addr(5) }, + LinearTerm::Column { coefficient: 65536, column: cols::addr(6) }, + LinearTerm::Column { coefficient: 16777216, column: cols::addr(7) }, + ]); + let mut values = Vec::with_capacity(24); + // old[0..7] = addr as WL + 6 zeros + values.push(addr_lo.clone()); + values.push(addr_hi.clone()); + for _ in 2..8 { values.push(BusValue::constant(0)); } + // is_register = 1 + values.push(BusValue::constant(1)); + // base_address = 2*10 = 20 (register x10) + values.push(BusValue::constant(20)); + values.push(BusValue::constant(0)); + // value[0..7] = same as old (read) + values.push(addr_lo); + values.push(addr_hi); + for _ in 2..8 { values.push(BusValue::constant(0)); } + // timestamp + values.push(BusValue::Packed { start_column: cols::TIMESTAMP_0, packing: Packing::Direct }); + values.push(BusValue::Packed { start_column: cols::TIMESTAMP_1, packing: Packing::Direct }); + // write2=1, write4=0, write8=0 (register access) + values.push(BusValue::constant(1)); + values.push(BusValue::constant(0)); + values.push(BusValue::constant(0)); + interactions.push(BusInteraction::sender( + BusId::Memw, + Multiplicity::Column(cols::MU), + values, + )); + } + // 2. Keccak bus: send (timestamp, 0, input_state[200]) { let mut values = vec![ @@ -314,3 +350,44 @@ pub fn bus_interactions() -> Vec { interactions } + +// ========================================================================= +// Constraints +// ========================================================================= + +/// Create constraints for the KECCAK core chip. +/// +/// Per spec (keccak:c:state_ptr): ADD template for each lane: +/// state_ptr[lane] = addr + 8 * lane_idx +/// +/// 25 lane pointers × 2 constraints per ADD = 50 constraints total. +/// Conditional on mu (only real rows). +pub fn create_constraints( + constraint_idx_start: usize, +) -> ( + Vec>>, + usize, +) { + let mut constraints: Vec>> = + Vec::with_capacity(50); + let mut idx = constraint_idx_start; + + // state_ptr[lane] = addr + 8*lane_idx + // addr is DWordBL (8 bytes), state_ptr is DWordHL (4 halfwords) + // ADD: lhs = addr (DWordBL→DWordWL), rhs = 8*lane_idx (constant), sum = state_ptr (DWordHL→DWordWL) + for lane_idx in 0..25 { + let offset = (lane_idx * 8) as i64; + let (c0, c1) = AddConstraint::new_pair( + vec![cols::MU], // conditional on mu + AddOperand::from_dword_bl(cols::ADDR), + AddOperand::constant(offset), + AddOperand::from_dword_hl(cols::state_ptr(lane_idx, 0)), + idx, + ); + constraints.push(Box::new(c0)); + constraints.push(Box::new(c1)); + idx += 2; + } + + (constraints, idx) +} diff --git a/prover/src/tables/keccak_rnd.rs b/prover/src/tables/keccak_rnd.rs index c3906b57b..a9e23996f 100644 --- a/prover/src/tables/keccak_rnd.rs +++ b/prover/src/tables/keccak_rnd.rs @@ -27,7 +27,12 @@ //! | mu | 1 | Multiplicity (1 for real, 0 for padding) | use executor::vm::instruction::execution::{KECCAK_RC, KECCAK_RHO}; +use math::field::element::FieldElement; +use math::field::traits::{IsField, IsSubFieldOf}; +use stark::constraints::transition::TransitionConstraint; +use stark::traits::TransitionEvaluationContext; use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity, Packing}; +use stark::table::TableView; use stark::trace::TraceTable; use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField}; @@ -772,3 +777,206 @@ pub fn bus_interactions() -> Vec { interactions } + +// ========================================================================= +// Constraints: Pi verification (200 degree-3 polynomial constraints) +// ========================================================================= + +/// Constraint verifying pi[x][y][z] = rho[(x+3y)%5][x][z]. +/// +/// rho is reconstructed from rot_left/rot_right using the rbc mux: +/// rho[z] = (1-b0)(1-b1)(L[z]+R[(z-2)%8]) + b0(1-b1)(L[(z-2)%8]+R[(z-4)%8]) +/// + (1-b0)b1(L[(z-4)%8]+R[(z-6)%8]) + b0*b1(L[(z-6)%8]+R[z]) +/// +/// where (L,R) = (rot_left, rot_right) at the source lane, and +/// b0 = rbc[src_x][src_y][0], b1 = rbc[src_x][src_y][1]. +pub struct PiConstraint { + constraint_idx: usize, + /// Destination coordinates in pi + x: usize, + y: usize, + z: usize, + /// Source coordinates: (sx, sy) = ((x + 3y) % 5, x) + sx: usize, + sy: usize, +} + +impl PiConstraint { + pub fn new(constraint_idx: usize, x: usize, y: usize, z: usize) -> Self { + let sx = (x + 3 * y) % 5; + let sy = x; + Self { + constraint_idx, + x, + y, + z, + sx, + sy, + } + } + + fn compute(&self, step: &TableView) -> FieldElement + where + F: IsSubFieldOf, + E: IsField, + { + let one = FieldElement::::one(); + let z = self.z; + + // Source lane rbc bits + let b0 = step + .get_main_evaluation_element(0, cols::rbc(self.sx, self.sy, 0)) + .clone(); + let b1 = step + .get_main_evaluation_element(0, cols::rbc(self.sx, self.sy, 1)) + .clone(); + + let not_b0 = &one - &b0; + let not_b1 = &one - &b1; + + // Helper to get rot_left/rot_right at source lane with byte index + let l = |byte_idx: usize| { + step.get_main_evaluation_element(0, cols::rot_left(self.sx, self.sy, byte_idx)) + .clone() + }; + let r = |byte_idx: usize| { + step.get_main_evaluation_element(0, cols::rot_right(self.sx, self.sy, byte_idx)) + .clone() + }; + + // Corrected offsets: (z-2k) mod 8 for rbc case k + let case0 = ¬_b0 * ¬_b1 * (l(z) + r((z + 6) % 8)); + let case1 = &b0 * ¬_b1 * (l((z + 6) % 8) + r((z + 4) % 8)); + let case2 = ¬_b0 * &b1 * (l((z + 4) % 8) + r((z + 2) % 8)); + let case3 = &b0 * &b1 * (l((z + 2) % 8) + r(z)); + + let expected = case0 + case1 + case2 + case3; + + let pi_val = step + .get_main_evaluation_element(0, cols::pi(self.x, self.y, self.z)) + .clone(); + + // pi - expected = 0 (degree 3: b0 * b1 * column) + // No mu guard needed: on padding rows all columns are zero, + // so expected=0 and pi=0, satisfying the constraint. + pi_val - expected + } +} + +impl TransitionConstraint for PiConstraint { + fn degree(&self) -> usize { + // b0 * b1 * (L + R) has degree 3 + 3 + } + + fn constraint_idx(&self) -> usize { + self.constraint_idx + } + + fn end_exemptions(&self) -> usize { + 0 + } + + fn evaluate( + &self, + evaluation_context: &TransitionEvaluationContext, + transition_evaluations: &mut [FieldElement], + ) { + match evaluation_context { + TransitionEvaluationContext::Prover { + frame, + periodic_values: _, + rap_challenges: _, + .. + } => { + let constraint_value = self.compute(frame.get_evaluation_step(0)); + transition_evaluations[self.constraint_idx] = constraint_value.to_extension(); + } + + TransitionEvaluationContext::Verifier { + frame, + periodic_values: _, + rap_challenges: _, + .. + } => { + let constraint_value = self.compute(frame.get_evaluation_step(0)); + transition_evaluations[self.constraint_idx] = constraint_value; + } + } + } +} + +/// Create all pi verification constraints (200 total: 5×5×8). +pub fn create_constraints( + constraint_idx_start: usize, +) -> ( + Vec>>, + usize, +) { + let mut constraints: Vec>> = + Vec::with_capacity(200); + let mut idx = constraint_idx_start; + + for x in 0..5 { + for y in 0..5 { + for z in 0..8 { + constraints.push(Box::new(PiConstraint::new(idx, x, y, z))); + idx += 1; + } + } + } + + (constraints, idx) +} + +#[cfg(test)] +mod tests { + use super::*; + use executor::vm::instruction::execution::keccak_f1600; + + #[test] + fn test_pi_constraint_values() { + let input = [0u64; 25]; + let mut output = input; + keccak_f1600(&mut output); + let op = KeccakRoundOperation { timestamp: 42, input, output }; + let trace = generate_keccak_rnd_trace(&[op]); + + // Check pi constraint on round 0 + for x in 0..5 { + for y in 0..5 { + let sx = (x + 3 * y) % 5; + let sy = x; + let rho_offset = KECCAK_RHO[sx][sy] as usize; + let rbc_val = rho_offset / 16; + let b0 = (rbc_val & 1) as u64; + let b1 = ((rbc_val >> 1) & 1) as u64; + + for z in 0..8 { + let base = 0 * cols::NUM_COLUMNS; + let pi_val = &trace.main_table.data[base + cols::pi(x, y, z)]; + + // Reconstruct expected from rot_left/rot_right + let l = |bz: usize| &trace.main_table.data[base + cols::rot_left(sx, sy, bz)]; + let r = |bz: usize| &trace.main_table.data[base + cols::rot_right(sx, sy, bz)]; + + let expected = if b0 == 0 && b1 == 0 { + l(z) + r((z + 6) % 8) + } else if b0 == 1 && b1 == 0 { + l((z + 6) % 8) + r((z + 4) % 8) + } else if b0 == 0 && b1 == 1 { + l((z + 4) % 8) + r((z + 2) % 8) + } else { + l((z + 2) % 8) + r(z) + }; + + assert_eq!( + pi_val, &expected, + "Pi mismatch at ({x},{y},{z}): src=({sx},{sy}), rbc=({b0},{b1}), rho_offset={rho_offset}, pi={pi_val:?}, expected={expected:?}" + ); + } + } + } + println!("All pi constraints verified for round 0 ✓"); + } +} diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 0327eb97f..c0ca82356 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -399,9 +399,9 @@ fn collect_ops_from_cpu( } let mut output = input; executor::vm::instruction::execution::keccak_f1600(&mut output); - // collect_keccak_memw_ops handles memory_state updates for reads and writes + // collect_keccak_memw_ops handles memory_state + register_state updates let keccak_memw_ops = - collect_keccak_memw_ops(op, &input, &output, memory_state); + collect_keccak_memw_ops(op, &input, &output, memory_state, register_state); memw_ops.extend(keccak_memw_ops); keccak_ops.push(KeccakOperation { timestamp: op.timestamp, @@ -816,12 +816,25 @@ fn collect_keccak_memw_ops( input: &[u64; 25], output: &[u64; 25], memory_state: &mut MemoryState, + register_state: &mut RegisterState, ) -> Vec { let ts = op.timestamp; let state_addr = op.keccak_state_addr; - let mut memw_ops = Vec::with_capacity(25); + let mut memw_ops = Vec::with_capacity(26); // 1 register read + 25 lane ops + + // Per spec (keccak:c:read_addr): read register x10 to get state_addr + { + let reg_value = pack_register_value(state_addr); + let reg_addr = 2 * 10u64; // x10 → address 20 + let (_old_val, old_ts) = register_state.read(10); + let old_timestamps = [old_ts, old_ts, 0, 0, 0, 0, 0, 0]; + let memw_op = MemwOperation::new(true, reg_addr, reg_value, ts, 2, true) + .with_old(reg_value, old_timestamps); + memw_ops.push(memw_op); + register_state.write(10, state_addr, ts); + } - // Per spec: single combined read+write MEMW per lane at `timestamp`. + // Per spec (keccak:c:load_store_state): single combined read+write MEMW per lane. // input = [0, state_ptr, output_state, timestamp, 0, 0, 1], output = input_state // The MEMW table sees: old=input_state, value=output_state, is_read=true. for (lane_idx, (&in_lane, &out_lane)) in input.iter().zip(output.iter()).enumerate() { diff --git a/prover/src/test_utils.rs b/prover/src/test_utils.rs index d3311d93f..9932f3957 100644 --- a/prover/src/test_utils.rs +++ b/prover/src/test_utils.rs @@ -773,9 +773,11 @@ pub fn create_register_air(proof_options: &ProofOptions) -> VmAir { .with_name("REGISTER") } -/// Create KECCAK core AIR with bus interactions (no transition constraints yet). +/// Create KECCAK core AIR with ADD constraints and bus interactions. pub fn create_keccak_air(proof_options: &ProofOptions) -> VmAir { - let transition_constraints: Vec>> = vec![]; + let (constraints, _) = crate::tables::keccak::create_constraints(0); + let transition_constraints: Vec>> = + constraints.into_iter().map(|c| c as _).collect(); let auxiliary_trace_build_data = AuxiliaryTraceBuildData { interactions: keccak_bus_interactions(), @@ -791,9 +793,11 @@ pub fn create_keccak_air(proof_options: &ProofOptions) -> VmAir { .with_name("KECCAK") } -/// Create KECCAK_RND AIR with bus interactions (pi constraints TODO). +/// Create KECCAK_RND AIR with pi constraints and bus interactions. pub fn create_keccak_rnd_air(proof_options: &ProofOptions) -> VmAir { - let transition_constraints: Vec>> = vec![]; + let (constraints, _) = crate::tables::keccak_rnd::create_constraints(0); + let transition_constraints: Vec>> = + constraints.into_iter().map(|c| c as _).collect(); let auxiliary_trace_build_data = AuxiliaryTraceBuildData { interactions: keccak_rnd_bus_interactions(), diff --git a/prover/src/tests/cpu_tests.rs b/prover/src/tests/cpu_tests.rs index edfd4406c..c2a9c2843 100644 --- a/prover/src/tests/cpu_tests.rs +++ b/prover/src/tests/cpu_tests.rs @@ -328,11 +328,10 @@ fn test_bus_interactions_count() { // - 1 DVRM (division/remainder) // - 1 SHIFT (shift operations) // - 1 BRANCH (branch/jump target calculation) - // - 1 ECALL (shared bus for HALT and COMMIT, mult = ECALL - ECALL_KECCAK) - // - 1 EcallKeccak (CPU → KECCAK core, mult = ECALL_KECCAK) + // - 1 ECALL (shared bus for HALT, COMMIT, and KECCAK, mult = ECALL) // - 27 IS_BYTE (byte range checks: RS1, RS2, RD, ARG1[0..7], ARG2[0..7], RES[0..7]) - // Total: 8 + 8 + 8 + 2 + 1 + 1 + 1 + 1 + 5 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 27 = 69 - assert_eq!(interactions.len(), 69); + // Total: 8 + 8 + 8 + 2 + 1 + 1 + 1 + 1 + 5 + 1 + 1 + 1 + 1 + 1 + 1 + 27 = 68 + assert_eq!(interactions.len(), 68); } #[test] From c9d20208585835f5a704d01b0798e567e3c40ec8 Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Tue, 7 Apr 2026 09:27:20 -0300 Subject: [PATCH 06/14] Add IS_BIT constraints for rbc and equality constraints for rnc constants --- prover/src/tables/keccak_rnd.rs | 122 +++++++++++++++++++++++++++++--- 1 file changed, 111 insertions(+), 11 deletions(-) diff --git a/prover/src/tables/keccak_rnd.rs b/prover/src/tables/keccak_rnd.rs index a9e23996f..850613720 100644 --- a/prover/src/tables/keccak_rnd.rs +++ b/prover/src/tables/keccak_rnd.rs @@ -30,10 +30,10 @@ use executor::vm::instruction::execution::{KECCAK_RC, KECCAK_RHO}; use math::field::element::FieldElement; use math::field::traits::{IsField, IsSubFieldOf}; use stark::constraints::transition::TransitionConstraint; -use stark::traits::TransitionEvaluationContext; use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity, Packing}; use stark::table::TableView; use stark::trace::TraceTable; +use stark::traits::TransitionEvaluationContext; use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField}; @@ -50,9 +50,6 @@ pub mod cols { pub const START: usize = 3; // Cxz[5][4][8] = 160 bytes — partial XOR chain for column parities - // Cxz[x][stage][byte]: stage 0 = XOR(start[x,0], start[x,1]), - // stage k = XOR(Cxz[x,k-1], start[x,k+1]) - // Final parity C[x] = Cxz[x][3] pub const CXZ: usize = START + 200; // 203 // Cxz_left[5][8] = 40 bytes — left shift component of rotated C @@ -61,22 +58,22 @@ pub mod cols { // Cxz_right[5][8] = 40 bytes — right shift component of rotated C pub const CXZ_RIGHT: usize = CXZ_LEFT + 40; // 403 - // Dxz[5][8] = 40 bytes — D[x] = C[(x-1)%5] XOR rotated_C[(x+1)%5] + // Dxz[5][8] = 40 bytes pub const DXZ: usize = CXZ_RIGHT + 40; // 443 // theta[5][5][8] = 200 bytes — state after θ pub const THETA: usize = DXZ + 40; // 483 - // rot_left[5][5][8] = 200 bytes — left half of ρ rotation + // rot_left[5][5][8] = 200 bytes pub const ROT_LEFT: usize = THETA + 200; // 683 - // rot_right[5][5][8] = 200 bytes — right half of ρ rotation + // rot_right[5][5][8] = 200 bytes pub const ROT_RIGHT: usize = ROT_LEFT + 200; // 883 // pi[5][5][8] = 200 bytes — state after π∘ρ (materialized virtual) pub const PI: usize = ROT_RIGHT + 200; // 1083 - // chi_ands[5][5][8] = 200 bytes — AND results for χ + // chi_ands[5][5][8] = 200 bytes pub const CHI_ANDS: usize = PI + 200; // 1283 // chi[5][5][8] = 200 bytes — state after χ @@ -88,10 +85,10 @@ pub mod cols { // iota[8] — χ[0][0] ⊕ rc pub const IOTA: usize = RC + 8; // 1691 - // rnc[5][5] — ρ rotation nibble (offset mod 16, used as HWSL shift amount) + // rnc[5][5] — ρ rotation nibble constant (spec: [[variables.constant]]) pub const RNC: usize = IOTA + 8; // 1699 - // rbc[5][5][2] — ρ rotation byte count (2 bits per lane) + // rbc[5][5][2] — ρ rotation byte count bits (spec: [[variables.constant]]) pub const RBC: usize = RNC + 25; // 1724 // mu — multiplicity flag @@ -907,16 +904,97 @@ impl TransitionConstraint for PiConstraint } /// Create all pi verification constraints (200 total: 5×5×8). +/// Constraint: mu * (rnc[x][y] - CONSTANT) = 0 +/// +/// Forces rnc to equal the compile-time rotation nibble on active rows. +/// Per spec: rnc is [[variables.constant]]. +pub struct RncConstantConstraint { + constraint_idx: usize, + col: usize, + expected: u64, +} + +impl RncConstantConstraint { + pub fn new(constraint_idx: usize, x: usize, y: usize) -> Self { + let rho_offset = KECCAK_RHO[x][y] as u64; + Self { + constraint_idx, + col: cols::rnc(x, y), + expected: rho_offset % 16, + } + } + + fn compute(&self, step: &TableView) -> FieldElement + where + F: IsSubFieldOf, + E: IsField, + { + let mu = step.get_main_evaluation_element(0, cols::MU).clone(); + let rnc = step.get_main_evaluation_element(0, self.col).clone(); + let expected = FieldElement::::from(self.expected); + mu * (rnc - expected) + } +} + +impl TransitionConstraint for RncConstantConstraint { + fn degree(&self) -> usize { + 2 + } + + fn constraint_idx(&self) -> usize { + self.constraint_idx + } + + fn end_exemptions(&self) -> usize { + 0 + } + + fn evaluate( + &self, + evaluation_context: &TransitionEvaluationContext, + transition_evaluations: &mut [FieldElement], + ) { + match evaluation_context { + TransitionEvaluationContext::Prover { + frame, + periodic_values: _, + rap_challenges: _, + .. + } => { + let v = self.compute(frame.get_evaluation_step(0)); + transition_evaluations[self.constraint_idx] = v.to_extension(); + } + TransitionEvaluationContext::Verifier { + frame, + periodic_values: _, + rap_challenges: _, + .. + } => { + let v = self.compute(frame.get_evaluation_step(0)); + transition_evaluations[self.constraint_idx] = v; + } + } + } +} + +/// Create all keccak round constraints: +/// - 200 pi verification (degree-3) +/// - 50 IS_BIT for rbc (degree-2, per spec: rbc type is Bit) +/// - 25 rnc equality (degree-2, per spec: rnc is constant) +/// Total: 275 constraints pub fn create_constraints( constraint_idx_start: usize, ) -> ( Vec>>, usize, ) { + use crate::constraints::templates::IsBitConstraint; + let mut constraints: Vec>> = - Vec::with_capacity(200); + Vec::with_capacity(275); let mut idx = constraint_idx_start; + // 200 pi verification constraints (degree-3) for x in 0..5 { for y in 0..5 { for z in 0..8 { @@ -926,6 +1004,28 @@ pub fn create_constraints( } } + // 50 IS_BIT constraints for rbc[x][y][bit] (degree-2, unconditional) + // Safe on padding: rbc=0 on padding rows, 0*(1-0)=0 ✓ + for x in 0..5 { + for y in 0..5 { + for bit in 0..2 { + constraints.push(Box::new(IsBitConstraint::unconditional( + cols::rbc(x, y, bit), + idx, + ))); + idx += 1; + } + } + } + + // 25 rnc equality constraints: mu * (rnc[x][y] - KECCAK_RHO[x][y] % 16) = 0 + for x in 0..5 { + for y in 0..5 { + constraints.push(Box::new(RncConstantConstraint::new(idx, x, y))); + idx += 1; + } + } + (constraints, idx) } From b67bb03f8ed8ac095473f7ef7ef0cf8b80c6878e Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Tue, 7 Apr 2026 11:38:54 -0300 Subject: [PATCH 07/14] save work --- prover/src/tables/trace_builder.rs | 151 ++++++++++++++------------- prover/src/tests/prove_elfs_tests.rs | 16 --- 2 files changed, 80 insertions(+), 87 deletions(-) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index c0ca82356..99188171a 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -2547,73 +2547,48 @@ impl Traces { } #[cfg(test)] -mod keccak_debug_tests { - use super::*; - use executor::vm::instruction::execution::keccak_f1600; - - #[test] - fn test_keccak_bitwise_ops_count() { - let mut input = [0u64; 25]; - let mut output = input; - keccak_f1600(&mut output); - let kop = KeccakOperation { timestamp: 42, state_addr: 0x1000, input, output }; - let ops = collect_bitwise_from_keccak(&[kop]); - - let xor_count = ops.iter().filter(|o| o.lookup_type == BitwiseOperationType::XorByte).count(); - let and_count = ops.iter().filter(|o| o.lookup_type == BitwiseOperationType::AndByte).count(); - let is_byte_count = ops.iter().filter(|o| o.lookup_type == BitwiseOperationType::IsByte).count(); - let hwsl_count = ops.iter().filter(|o| o.lookup_type == BitwiseOperationType::Hwsl).count(); - let is_half_count = ops.iter().filter(|o| o.lookup_type == BitwiseOperationType::IsHalf).count(); - - println!("Bitwise ops from 1 keccak call:"); - println!(" XorByte: {} (expected: 24 * 608 = {})", xor_count, 24 * 608); - println!(" AndByte: {} (expected: 24 * 200 = {})", and_count, 24 * 200); - println!(" IsByte: {} (expected: 24 * 480 = {})", is_byte_count, 24 * 480); - println!(" Hwsl: {} (expected: 24 * 120 = {})", hwsl_count, 24 * 120); - println!(" IsHalf: {} (expected: 100)", is_half_count); - println!(" Total: {}", ops.len()); - } -} - -#[cfg(test)] -mod keccak_trace_debug_tests { +mod keccak_tests { use super::*; use executor::vm::instruction::execution::keccak_f1600; use crate::tables::keccak_rnd::cols as rnd_cols; use crate::tables::keccak::cols as core_cols; use crate::tables::types::FE; - #[test] - fn test_keccak_bus_values_match() { + fn make_keccak_ops() -> (KeccakOperation, KeccakRoundOperation) { let input = [0u64; 25]; let mut output = input; keccak_f1600(&mut output); - let kop = KeccakOperation { timestamp: 42, state_addr: 0x1000, input, output }; let rop = KeccakRoundOperation { timestamp: 42, input, output }; + (kop, rop) + } - let core_trace = keccak::generate_keccak_trace(&[kop]); - let rnd_trace = keccak_rnd::generate_keccak_rnd_trace(&[rop]); + #[test] + fn test_keccak_bitwise_ops_count() { + let (kop, _) = make_keccak_ops(); + let ops = collect_bitwise_from_keccak(&[kop]); - // Check: round 0, start bytes match core input_state bytes - let core_base = 0 * core_cols::NUM_COLUMNS; - let rnd_base = 0 * rnd_cols::NUM_COLUMNS; // round 0 + let xor = ops.iter().filter(|o| o.lookup_type == BitwiseOperationType::XorByte).count(); + let and = ops.iter().filter(|o| o.lookup_type == BitwiseOperationType::AndByte).count(); + let is_byte = ops.iter().filter(|o| o.lookup_type == BitwiseOperationType::IsByte).count(); + let hwsl = ops.iter().filter(|o| o.lookup_type == BitwiseOperationType::Hwsl).count(); + let is_half = ops.iter().filter(|o| o.lookup_type == BitwiseOperationType::IsHalf).count(); - for x in 0..5 { - for y in 0..5 { - for b in 0..8 { - let core_val = &core_trace.main_table.data[core_base + core_cols::input_state(x, y, b)]; - let rnd_val = &rnd_trace.main_table.data[rnd_base + rnd_cols::start(x, y, b)]; - assert_eq!(core_val, rnd_val, "Round 0 start mismatch at ({x},{y},{b})"); - } - } - } - println!("Round 0 start == core input_state ✓"); + assert_eq!(xor, 24 * 608, "XorByte count"); + assert_eq!(and, 24 * 200, "AndByte count"); + assert_eq!(is_byte, 24 * 480, "IsByte count"); + assert_eq!(hwsl, 24 * 120, "Hwsl count"); + assert_eq!(is_half, 100, "IsHalf count"); + assert_eq!(ops.len(), 100 + 24 * 1408, "Total bitwise ops"); + } - // Check: each round's output matches keccak_f1600 round-by-round - let mut ref_state = input; + #[test] + fn test_keccak_round_trace_matches_f1600() { + let (_, rop) = make_keccak_ops(); + let rnd_trace = keccak_rnd::generate_keccak_rnd_trace(&[rop]); + + let mut ref_state = [0u64; 25]; for round in 0..24 { - // Apply one round of keccak_f1600 let rc = executor::vm::instruction::execution::KECCAK_RC[round]; let mut c = [0u64; 5]; for x in 0..5 { c[x] = ref_state[x] ^ ref_state[x+5] ^ ref_state[x+10] ^ ref_state[x+15] ^ ref_state[x+20]; } @@ -2625,44 +2600,78 @@ mod keccak_trace_debug_tests { for x in 0..5 { for y in 0..5 { ref_state[x+5*y] = b[x+5*y] ^ (!b[(x+1)%5+5*y] & b[(x+2)%5+5*y]); } } ref_state[0] ^= rc; - // Compare with round chip's output (iota for lane 0, chi for rest) - let rnd_base_r = round * rnd_cols::NUM_COLUMNS; + let base = round * rnd_cols::NUM_COLUMNS; for lane in 0..25 { - let x = lane % 5; // This is how keccak indexes: lane = x + 5*y + let x = lane % 5; let y = lane / 5; for byte_idx in 0..8 { - let expected = ((ref_state[lane] >> (byte_idx * 8)) & 0xFF) as u64; - let trace_col = if x == 0 && y == 0 { - rnd_cols::iota(byte_idx) - } else { - rnd_cols::chi(x, y, byte_idx) - }; - let expected_fe = FE::from(expected); - let trace_fe = &rnd_trace.main_table.data[rnd_base_r + trace_col]; - if &expected_fe != trace_fe { - panic!("Round {round} lane ({x},{y}) byte {byte_idx}: expected {expected_fe:?}, trace {trace_fe:?}"); - } + let expected = FE::from(((ref_state[lane] >> (byte_idx * 8)) & 0xFF) as u64); + let col = if x == 0 && y == 0 { rnd_cols::iota(byte_idx) } else { rnd_cols::chi(x, y, byte_idx) }; + let trace_val = &rnd_trace.main_table.data[base + col]; + assert_eq!(&expected, trace_val, "Round {round} lane ({x},{y}) byte {byte_idx}"); + } + } + } + } + + #[test] + fn test_keccak_core_round_state_consistency() { + let (kop, rop) = make_keccak_ops(); + let core_trace = keccak::generate_keccak_trace(&[kop]); + let rnd_trace = keccak_rnd::generate_keccak_rnd_trace(&[rop]); + + // Round 0 start == core input_state + for x in 0..5 { + for y in 0..5 { + for b in 0..8 { + let core_val = &core_trace.main_table.data[core_cols::input_state(x, y, b)]; + let rnd_val = &rnd_trace.main_table.data[rnd_cols::start(x, y, b)]; + assert_eq!(core_val, rnd_val, "Round 0 start mismatch at ({x},{y},{b})"); } } } - println!("All 24 rounds match keccak_f1600 ✓"); - // Check: round 23 out matches core output_state + // Round 23 out == core output_state let rnd_base_23 = 23 * rnd_cols::NUM_COLUMNS; for x in 0..5 { for y in 0..5 { for b in 0..8 { - let core_val = &core_trace.main_table.data[core_base + core_cols::output_state(x, y, b)]; - // out[0][0] = iota, out[x][y] = chi for rest + let core_val = &core_trace.main_table.data[core_cols::output_state(x, y, b)]; let rnd_val = if x == 0 && y == 0 { &rnd_trace.main_table.data[rnd_base_23 + rnd_cols::iota(b)] } else { &rnd_trace.main_table.data[rnd_base_23 + rnd_cols::chi(x, y, b)] }; - assert_eq!(core_val, rnd_val, "Round 23 out mismatch at ({x},{y},{b}): core={core_val:?} rnd={rnd_val:?}"); + assert_eq!(core_val, rnd_val, "Round 23 out mismatch at ({x},{y},{b})"); } } } - println!("Round 23 out == core output_state ✓"); + } + + #[test] + fn test_keccak_bus_interaction_counts() { + assert_eq!(keccak::bus_interactions().len(), 129, + "KECCAK core: 1 ECALL + 1 MEMW read_addr + 25 MEMW lanes + 100 IS_HALF + 1 Keccak send + 1 Keccak recv"); + assert_eq!(keccak_rnd::bus_interactions().len(), 1411, + "KECCAK_RND: 3 IO + 500 theta + 500 rho + 400 chi + 8 iota"); + assert_eq!(keccak_rc::bus_interactions().len(), 1, + "KECCAK_RC: 1 receiver"); + } + + #[test] + fn test_keccak_column_counts() { + assert_eq!(core_cols::NUM_COLUMNS, 511, "KECCAK core columns"); + assert_eq!(rnd_cols::NUM_COLUMNS, 1775, "KECCAK_RND columns"); + assert_eq!(keccak_rc::cols::NUM_COLUMNS, 10, "KECCAK_RC columns"); + } + + #[test] + fn test_keccak_constraint_counts() { + let (core_constraints, _) = keccak::create_constraints(0); + assert_eq!(core_constraints.len(), 50, "KECCAK core: 25 ADD pairs"); + + let (rnd_constraints, _) = keccak_rnd::create_constraints(0); + assert_eq!(rnd_constraints.len(), 275, "KECCAK_RND: 200 pi + 50 IS_BIT rbc + 25 rnc equality"); } } + diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index b26e79525..8954db948 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -726,22 +726,6 @@ fn test_prove_elfs_keccak() { let mut traces = Traces::from_elf_and_logs(&elf, &logs, &Default::default()).unwrap(); - println!( - "keccak (fast): CPU {} rows, KECCAK {} rows, KECCAK_RND {} rows, KECCAK_RC {} rows, MEMW {} tables ({} rows), BITWISE {} rows", - traces.cpus[0].main_table.height, - traces.keccak.main_table.height, - traces.keccak_rnd.main_table.height, - traces.keccak_rc.main_table.height, - traces.memws.len(), - traces.memws[0].main_table.height, - traces.bitwise.main_table.height, - ); - println!( - "Bus interaction counts: KECCAK core={}, KECCAK_RND={}, KECCAK_RC={}", - crate::tables::keccak::bus_interactions().len(), - crate::tables::keccak_rnd::bus_interactions().len(), - crate::tables::keccak_rc::bus_interactions().len(), - ); assert!( prove_and_verify_vm_minimal(&elf, &mut traces), "keccak prove/verify failed" From ca605232ee4a51efe045cef46d4b7d713f64d1b8 Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Tue, 21 Apr 2026 14:15:08 -0300 Subject: [PATCH 08/14] align keccak with spec --- prover/src/tables/keccak.rs | 54 ++-- prover/src/tables/keccak_rnd.rs | 486 +++++++---------------------- prover/src/tables/trace_builder.rs | 10 +- 3 files changed, 164 insertions(+), 386 deletions(-) diff --git a/prover/src/tables/keccak.rs b/prover/src/tables/keccak.rs index b094459f3..984a3a0a9 100644 --- a/prover/src/tables/keccak.rs +++ b/prover/src/tables/keccak.rs @@ -147,6 +147,17 @@ pub fn generate_keccak_trace( data[base + cols::MU] = FE::one(); } + // Padding rows: state_ptr[lane][0] = 8 * lane_idx (per spec keccak.toml pad). + // Halfwords 1..3 stay zero since 8*24 = 192 fits in the low halfword. + // mu = 0 gates all bus interactions and the ADD constraint, so these values + // only need to satisfy the pad requirement, not reconstruct a real address. + for row_idx in n..num_rows { + let base = row_idx * cols::NUM_COLUMNS; + for lane_idx in 0..25 { + data[base + cols::state_ptr(lane_idx, 0)] = FE::from((lane_idx as u64) * 8); + } + } + TraceTable::new_main(data, cols::NUM_COLUMNS, 1) } @@ -160,8 +171,16 @@ pub fn bus_interactions() -> Vec { let mut interactions = Vec::with_capacity(160); // 1. ECALL receiver (shared bus, per spec keccak:c:output) - // Format: [ts_lo, ts_hi, syscall_lo32, syscall_hi32] - // Syscall number: lo32 = 2^32-2, hi32 = 2^32-1 + // Format: [ts_lo, ts_hi, syscall_lo32, syscall_hi32] (DWordWL convention). + // + // Spec keccak.toml:51 has `["arr", 2^32-1, 2^32-2]` which flattens to + // [hi, lo] — inconsistent with HALT/COMMIT which use `["cast", N, "DWordWL"]` + // → [lo, hi]. The CPU ECALL sender (cpu.rs) is shared across all three + // receivers and uses [lo, hi], so applying the spec's keccak ordering + // literally desbalances the LogUp bus. + // + // Upstream spec needs to change keccak.toml:51 to `["cast", -2, "DWordWL"]`. + // See docs/keccak-spec-deviations.md #7. interactions.push(BusInteraction::receiver( BusId::Ecall, Multiplicity::Column(cols::MU), @@ -225,6 +244,9 @@ pub fn bus_interactions() -> Vec { } // 2. Keccak bus: send (timestamp, 0, input_state[200]) + // Per spec keccak.toml: input = ["timestamp", 0, "input_state"] where + // input_state is [[[Byte, 8], 5], 5] — 200 Byte elements, each its own + // bus element (no packing). { let mut values = vec![ BusValue::Packed { start_column: cols::TIMESTAMP_0, packing: Packing::Direct }, @@ -233,14 +255,12 @@ pub fn bus_interactions() -> Vec { ]; for x in 0..5 { for y in 0..5 { - values.push(BusValue::Packed { - start_column: cols::input_state(x, y, 0), - packing: Packing::Word4L, - }); - values.push(BusValue::Packed { - start_column: cols::input_state(x, y, 4), - packing: Packing::Word4L, - }); + for b in 0..8 { + values.push(BusValue::Packed { + start_column: cols::input_state(x, y, b), + packing: Packing::Direct, + }); + } } } interactions.push(BusInteraction::sender( @@ -259,14 +279,12 @@ pub fn bus_interactions() -> Vec { ]; for x in 0..5 { for y in 0..5 { - values.push(BusValue::Packed { - start_column: cols::output_state(x, y, 0), - packing: Packing::Word4L, - }); - values.push(BusValue::Packed { - start_column: cols::output_state(x, y, 4), - packing: Packing::Word4L, - }); + for b in 0..8 { + values.push(BusValue::Packed { + start_column: cols::output_state(x, y, b), + packing: Packing::Direct, + }); + } } } interactions.push(BusInteraction::receiver( diff --git a/prover/src/tables/keccak_rnd.rs b/prover/src/tables/keccak_rnd.rs index 850613720..48714b680 100644 --- a/prover/src/tables/keccak_rnd.rs +++ b/prover/src/tables/keccak_rnd.rs @@ -22,18 +22,15 @@ //! | chi | 200 | State after χ [5][5][8] | //! | rc | 8 | Round constant bytes | //! | iota | 8 | χ[0][0] ⊕ rc | -//! | rnc | 25 | ρ rotation nibble constant [5][5] | -//! | rbc | 50 | ρ rotation byte count bits [5][5][2] | //! | mu | 1 | Multiplicity (1 for real, 0 for padding) | +//! +//! Note: spec [[variables.constant]] `rnc` and `rbc` are inlined as compile-time +//! constants derived from `KECCAK_RHO[x][y]`, not materialized as columns. use executor::vm::instruction::execution::{KECCAK_RC, KECCAK_RHO}; -use math::field::element::FieldElement; -use math::field::traits::{IsField, IsSubFieldOf}; use stark::constraints::transition::TransitionConstraint; use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity, Packing}; -use stark::table::TableView; use stark::trace::TraceTable; -use stark::traits::TransitionEvaluationContext; use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField}; @@ -70,31 +67,26 @@ pub mod cols { // rot_right[5][5][8] = 200 bytes pub const ROT_RIGHT: usize = ROT_LEFT + 200; // 883 - // pi[5][5][8] = 200 bytes — state after π∘ρ (materialized virtual) - pub const PI: usize = ROT_RIGHT + 200; // 1083 - // chi_ands[5][5][8] = 200 bytes - pub const CHI_ANDS: usize = PI + 200; // 1283 + // (pi is a spec [[variables.virtual]] — inlined as rot_left + rot_right at + // compile-resolved offsets, not materialized as columns.) + pub const CHI_ANDS: usize = ROT_RIGHT + 200; // 1083 // chi[5][5][8] = 200 bytes — state after χ - pub const CHI: usize = CHI_ANDS + 200; // 1483 + pub const CHI: usize = CHI_ANDS + 200; // 1283 // rc[8] — round constant bytes - pub const RC: usize = CHI + 200; // 1683 + pub const RC: usize = CHI + 200; // 1483 // iota[8] — χ[0][0] ⊕ rc - pub const IOTA: usize = RC + 8; // 1691 - - // rnc[5][5] — ρ rotation nibble constant (spec: [[variables.constant]]) - pub const RNC: usize = IOTA + 8; // 1699 - - // rbc[5][5][2] — ρ rotation byte count bits (spec: [[variables.constant]]) - pub const RBC: usize = RNC + 25; // 1724 + pub const IOTA: usize = RC + 8; // 1491 - // mu — multiplicity flag - pub const MU: usize = RBC + 50; // 1774 + // mu — multiplicity flag. + // rnc and rbc (spec [[variables.constant]]) are inlined as compile-time + // constants from KECCAK_RHO, not allocated as columns. + pub const MU: usize = IOTA + 8; // 1499 - pub const NUM_COLUMNS: usize = MU + 1; // 1775 + pub const NUM_COLUMNS: usize = MU + 1; // 1500 // ------------------------------------------------------------------------- // Index helpers @@ -148,10 +140,23 @@ pub mod cols { ROT_RIGHT + (x + 5 * y) * 8 + byte } - /// Index into pi[x][y][byte] + /// Resolve pi[x][y][z] (spec virtual) to the (rot_left_col, rot_right_col) + /// pair whose sum equals pi[x][y][z]. rbc is compile-time constant. #[inline] - pub const fn pi(x: usize, y: usize, byte: usize) -> usize { - PI + (x + 5 * y) * 8 + byte + pub fn pi_src_cols(x: usize, y: usize, z: usize) -> (usize, usize) { + use executor::vm::instruction::execution::KECCAK_RHO; + let sx = (x + 3 * y) % 5; + let sy = x; + let rho_offset = KECCAK_RHO[sx][sy] as usize; + let rbc_val = rho_offset / 16; + let (l_byte, r_byte) = match rbc_val { + 0 => (z, (z + 6) % 8), + 1 => ((z + 6) % 8, (z + 4) % 8), + 2 => ((z + 4) % 8, (z + 2) % 8), + 3 => ((z + 2) % 8, z), + _ => unreachable!(), + }; + (rot_left(sx, sy, l_byte), rot_right(sx, sy, r_byte)) } /// Index into chi_ands[x][y][byte] @@ -177,18 +182,6 @@ pub mod cols { pub const fn iota(byte: usize) -> usize { IOTA + byte } - - /// Index into rnc[x][y] - #[inline] - pub const fn rnc(x: usize, y: usize) -> usize { - RNC + x + 5 * y - } - - /// Index into rbc[x][y][bit] - #[inline] - pub const fn rbc(x: usize, y: usize, bit: usize) -> usize { - RBC + (x + 5 * y) * 2 + bit - } } // ========================================================================= @@ -291,6 +284,9 @@ pub fn generate_keccak_rnd_trace( // HWSL shifts each halfword independently. The carry from halfword k // propagates to halfword (k+1)%4, which is a 2-byte offset: // rotated_Cxz[z] = Cxz_left[z] + Cxz_right[(z-2) mod 8] + // Spec keccak_round.toml says (z-1) mod 8 — that is a spec bug: + // HWSL's SLLC is a u16 at bytes [2k, 2k+1] of Cxz_right, so the + // carry propagates by 2 bytes, not 1. See docs/keccak-spec-deviations.md. let mut cxz_left_bytes = [[0u8; 8]; 5]; let mut cxz_right_bytes = [[0u8; 8]; 5]; let mut rotated_c = [[0u8; 8]; 5]; @@ -344,23 +340,14 @@ pub fn generate_keccak_rnd_trace( // === ρ (rho) === // For each lane, rotate theta[x][y] by KECCAK_RHO[x][y] bits. - // Decompose rotation as: rnc (nibble, 0..15) + 16*rbc[0] + 32*rbc[1] - // HWSL handles the sub-16-bit rotation on each halfword. - // The byte-level shift (rbc) is handled by the pi column reconstruction. + // Decompose rotation as: rnc (nibble, 0..15) + 16*rbc[0] + 32*rbc[1]. + // rnc and rbc are inlined as compile-time constants per spec + // [[variables.constant]]; only HWSL outputs are stored in the trace. for x in 0..5 { for y in 0..5 { let rho_offset = KECCAK_RHO[x][y] as usize; let rnc_val = (rho_offset % 16) as u8; - let rbc_val = rho_offset / 16; // 0..3 - let rbc0 = (rbc_val & 1) as u8; - let rbc1 = ((rbc_val >> 1) & 1) as u8; - - data[base + cols::rnc(x, y)] = FE::from(rnc_val as u64); - data[base + cols::rbc(x, y, 0)] = FE::from(rbc0 as u64); - data[base + cols::rbc(x, y, 1)] = FE::from(rbc1 as u64); - let theta_lane = theta_lanes[x + 5 * y]; - // HWSL on each halfword with shift = rnc_val for hw in 0..4 { let halfword = ((theta_lane >> (hw * 16)) & 0xFFFF) as u16; let (shifted, carry) = hwsl(halfword, rnc_val); @@ -377,29 +364,22 @@ pub fn generate_keccak_rnd_trace( } // === π (pi) === - // pi[x][y] = rho[(x+3y)%5][x] where rho is the rotated theta - // rho[x'][y'] is reconstructed from rot_left/rot_right using rbc mux: - // rho[byte] = mux(rbc, rot_left[byte - 2*rbc_val] + rot_right[byte - 2*rbc_val - 1]) - // But it's simpler to just compute the full rotation and store pi directly. + // pi[x][y] = rho[(x+3y)%5][x] where rho is the rotated theta. + // pi is a spec [[variables.virtual]] — not stored as trace columns. + // It's reconstructed inline in chi bus interactions as + // pi[x][y][z] = rot_left[sx,sy,l_byte] + rot_right[sx,sy,r_byte] + // with (sx, sy) = ((x+3y)%5, x) and (l_byte, r_byte) resolved from + // the compile-time rbc constant. pi_lanes is still computed here + // for the chi step below. let mut pi_lanes = [0u64; 25]; for x in 0..5 { for y in 0..5 { - // rho rotation of theta[x][y] let rotated = theta_lanes[x + 5 * y].rotate_left(KECCAK_RHO[x][y]); - // π permutation: b[y][(2x+3y)%5] = rotated let dst_x = y; let dst_y = (2 * x + 3 * y) % 5; pi_lanes[dst_x + 5 * dst_y] = rotated; } } - for x in 0..5 { - for y in 0..5 { - for b in 0..8 { - data[base + cols::pi(x, y, b)] = - FE::from(byte_of(pi_lanes[x + 5 * y], b) as u64); - } - } - } // === χ (chi) === let mut chi_lanes = [0u64; 25]; @@ -443,12 +423,15 @@ pub fn generate_keccak_rnd_trace( // Bus interactions (1,411 total) // ========================================================================= +#[allow(clippy::needless_range_loop)] pub fn bus_interactions() -> Vec { let mut interactions = Vec::with_capacity(1420); // --- IO group (3) --- // 1. KECCAK bus: receive (timestamp, round, start[200]) + // Per spec keccak_round.toml: input = ["timestamp", "round", "start"] where + // start is [[[Byte, 8], 5], 5] — 200 Byte elements, each its own bus element. { let mut values = vec![ BusValue::Packed { @@ -464,18 +447,14 @@ pub fn bus_interactions() -> Vec { packing: Packing::Direct, }, ]; - // Pack state as 25 DWordBL = 50 bus elements for x in 0..5 { for y in 0..5 { - // Word4L packing: 4 consecutive byte columns → 1 bus element - values.push(BusValue::Packed { - start_column: cols::start(x, y, 0), - packing: Packing::Word4L, - }); - values.push(BusValue::Packed { - start_column: cols::start(x, y, 4), - packing: Packing::Word4L, - }); + for b in 0..8 { + values.push(BusValue::Packed { + start_column: cols::start(x, y, b), + packing: Packing::Direct, + }); + } } } interactions.push(BusInteraction::receiver( @@ -507,25 +486,15 @@ pub fn bus_interactions() -> Vec { ]; for x in 0..5 { for y in 0..5 { - if x == 0 && y == 0 { - // Lane [0][0] uses iota columns - values.push(BusValue::Packed { - start_column: cols::IOTA, - packing: Packing::Word4L, - }); - values.push(BusValue::Packed { - start_column: cols::IOTA + 4, - packing: Packing::Word4L, - }); - } else { - // Other lanes use chi columns - values.push(BusValue::Packed { - start_column: cols::chi(x, y, 0), - packing: Packing::Word4L, - }); + for b in 0..8 { + let col = if x == 0 && y == 0 { + cols::IOTA + b + } else { + cols::chi(x, y, b) + }; values.push(BusValue::Packed { - start_column: cols::chi(x, y, 4), - packing: Packing::Word4L, + start_column: col, + packing: Packing::Direct, }); } } @@ -637,7 +606,9 @@ pub fn bus_interactions() -> Vec { // --- Theta: Dxz XOR_BYTE (40) --- // D[x][b] = C[(x-1)%5][b] XOR rotated_C[(x+1)%5][b] - // rotated_C[x'][b] = Cxz_left[x'][b] + Cxz_right[x'][(b-1)%8] (virtual) + // rotated_C[x'][b] = Cxz_left[x'][b] + Cxz_right[x'][(b-2)%8] (virtual). + // Spec has (b-1)%8 — see docs/keccak-spec-deviations.md for why HWSL carry + // needs a 2-byte offset, not 1. for x in 0..5 { for b in 0..8 { interactions.push(BusInteraction::sender( @@ -675,8 +646,10 @@ pub fn bus_interactions() -> Vec { // --- Rho: HWSL (100) --- // HWSL(theta[x][y] halfword[hw], rnc[x][y]) → (rot_left, rot_right) + // rnc is inlined as a constant: KECCAK_RHO[x][y] % 16. for x in 0..5 { for y in 0..5 { + let rnc_val = (KECCAK_RHO[x][y] % 16) as u64; for hw in 0..4 { interactions.push(BusInteraction::sender( BusId::Hwsl, @@ -686,7 +659,7 @@ pub fn bus_interactions() -> Vec { LinearTerm::Column { coefficient: 1, column: cols::theta(x, y, hw * 2) }, LinearTerm::Column { coefficient: 256, column: cols::theta(x, y, hw * 2 + 1) }, ]), - BusValue::Packed { start_column: cols::rnc(x, y), packing: Packing::Direct }, + BusValue::constant(rnc_val), BusValue::linear(vec![ LinearTerm::Column { coefficient: 1, column: cols::rot_left(x, y, hw * 2) }, LinearTerm::Column { coefficient: 256, column: cols::rot_left(x, y, hw * 2 + 1) }, @@ -721,18 +694,26 @@ pub fn bus_interactions() -> Vec { // --- Chi: AND_BYTE (200) --- // chi_ands[x][y][b] = (255 - pi[(x+1)%5][y][b]) AND pi[(x+2)%5][y][b] + // pi is virtual: pi[x][y][z] = rot_left[sx,sy,l_byte] + rot_right[sx,sy,r_byte] + // with src lane (sx,sy) = ((x+3y)%5, x) and byte offsets from KECCAK_RHO. for x in 0..5 { for y in 0..5 { for b in 0..8 { + let (p1_l, p1_r) = cols::pi_src_cols((x + 1) % 5, y, b); + let (p2_l, p2_r) = cols::pi_src_cols((x + 2) % 5, y, b); interactions.push(BusInteraction::sender( BusId::AndByte, Multiplicity::Column(cols::MU), vec![ BusValue::linear(vec![ LinearTerm::Constant(255), - LinearTerm::Column { coefficient: -1, column: cols::pi((x + 1) % 5, y, b) }, + LinearTerm::Column { coefficient: -1, column: p1_l }, + LinearTerm::Column { coefficient: -1, column: p1_r }, + ]), + BusValue::linear(vec![ + LinearTerm::Column { coefficient: 1, column: p2_l }, + LinearTerm::Column { coefficient: 1, column: p2_r }, ]), - BusValue::Packed { start_column: cols::pi((x + 2) % 5, y, b), packing: Packing::Direct }, BusValue::Packed { start_column: cols::chi_ands(x, y, b), packing: Packing::Direct }, ], )); @@ -741,15 +722,19 @@ pub fn bus_interactions() -> Vec { } // --- Chi: XOR_BYTE (200) --- - // chi[x][y][b] = pi[x][y][b] XOR chi_ands[x][y][b] + // chi[x][y][b] = pi[x][y][b] XOR chi_ands[x][y][b] (pi virtual). for x in 0..5 { for y in 0..5 { for b in 0..8 { + let (p_l, p_r) = cols::pi_src_cols(x, y, b); interactions.push(BusInteraction::sender( BusId::XorByte, Multiplicity::Column(cols::MU), vec![ - BusValue::Packed { start_column: cols::pi(x, y, b), packing: Packing::Direct }, + BusValue::linear(vec![ + LinearTerm::Column { coefficient: 1, column: p_l }, + LinearTerm::Column { coefficient: 1, column: p_r }, + ]), BusValue::Packed { start_column: cols::chi_ands(x, y, b), packing: Packing::Direct }, BusValue::Packed { start_column: cols::chi(x, y, b), packing: Packing::Direct }, ], @@ -776,257 +761,23 @@ pub fn bus_interactions() -> Vec { } // ========================================================================= -// Constraints: Pi verification (200 degree-3 polynomial constraints) +// Constraints // ========================================================================= -/// Constraint verifying pi[x][y][z] = rho[(x+3y)%5][x][z]. +/// KECCAK_RND has no main-trace polynomial constraints. /// -/// rho is reconstructed from rot_left/rot_right using the rbc mux: -/// rho[z] = (1-b0)(1-b1)(L[z]+R[(z-2)%8]) + b0(1-b1)(L[(z-2)%8]+R[(z-4)%8]) -/// + (1-b0)b1(L[(z-4)%8]+R[(z-6)%8]) + b0*b1(L[(z-6)%8]+R[z]) +/// - pi is a spec [[variables.virtual]] inlined in chi bus interactions. +/// - rnc/rbc are spec [[variables.constant]] inlined as compile-time constants. /// -/// where (L,R) = (rot_left, rot_right) at the source lane, and -/// b0 = rbc[src_x][src_y][0], b1 = rbc[src_x][src_y][1]. -pub struct PiConstraint { - constraint_idx: usize, - /// Destination coordinates in pi - x: usize, - y: usize, - z: usize, - /// Source coordinates: (sx, sy) = ((x + 3y) % 5, x) - sx: usize, - sy: usize, -} - -impl PiConstraint { - pub fn new(constraint_idx: usize, x: usize, y: usize, z: usize) -> Self { - let sx = (x + 3 * y) % 5; - let sy = x; - Self { - constraint_idx, - x, - y, - z, - sx, - sy, - } - } - - fn compute(&self, step: &TableView) -> FieldElement - where - F: IsSubFieldOf, - E: IsField, - { - let one = FieldElement::::one(); - let z = self.z; - - // Source lane rbc bits - let b0 = step - .get_main_evaluation_element(0, cols::rbc(self.sx, self.sy, 0)) - .clone(); - let b1 = step - .get_main_evaluation_element(0, cols::rbc(self.sx, self.sy, 1)) - .clone(); - - let not_b0 = &one - &b0; - let not_b1 = &one - &b1; - - // Helper to get rot_left/rot_right at source lane with byte index - let l = |byte_idx: usize| { - step.get_main_evaluation_element(0, cols::rot_left(self.sx, self.sy, byte_idx)) - .clone() - }; - let r = |byte_idx: usize| { - step.get_main_evaluation_element(0, cols::rot_right(self.sx, self.sy, byte_idx)) - .clone() - }; - - // Corrected offsets: (z-2k) mod 8 for rbc case k - let case0 = ¬_b0 * ¬_b1 * (l(z) + r((z + 6) % 8)); - let case1 = &b0 * ¬_b1 * (l((z + 6) % 8) + r((z + 4) % 8)); - let case2 = ¬_b0 * &b1 * (l((z + 4) % 8) + r((z + 2) % 8)); - let case3 = &b0 * &b1 * (l((z + 2) % 8) + r(z)); - - let expected = case0 + case1 + case2 + case3; - - let pi_val = step - .get_main_evaluation_element(0, cols::pi(self.x, self.y, self.z)) - .clone(); - - // pi - expected = 0 (degree 3: b0 * b1 * column) - // No mu guard needed: on padding rows all columns are zero, - // so expected=0 and pi=0, satisfying the constraint. - pi_val - expected - } -} - -impl TransitionConstraint for PiConstraint { - fn degree(&self) -> usize { - // b0 * b1 * (L + R) has degree 3 - 3 - } - - fn constraint_idx(&self) -> usize { - self.constraint_idx - } - - fn end_exemptions(&self) -> usize { - 0 - } - - fn evaluate( - &self, - evaluation_context: &TransitionEvaluationContext, - transition_evaluations: &mut [FieldElement], - ) { - match evaluation_context { - TransitionEvaluationContext::Prover { - frame, - periodic_values: _, - rap_challenges: _, - .. - } => { - let constraint_value = self.compute(frame.get_evaluation_step(0)); - transition_evaluations[self.constraint_idx] = constraint_value.to_extension(); - } - - TransitionEvaluationContext::Verifier { - frame, - periodic_values: _, - rap_challenges: _, - .. - } => { - let constraint_value = self.compute(frame.get_evaluation_step(0)); - transition_evaluations[self.constraint_idx] = constraint_value; - } - } - } -} - -/// Create all pi verification constraints (200 total: 5×5×8). -/// Constraint: mu * (rnc[x][y] - CONSTANT) = 0 -/// -/// Forces rnc to equal the compile-time rotation nibble on active rows. -/// Per spec: rnc is [[variables.constant]]. -pub struct RncConstantConstraint { - constraint_idx: usize, - col: usize, - expected: u64, -} - -impl RncConstantConstraint { - pub fn new(constraint_idx: usize, x: usize, y: usize) -> Self { - let rho_offset = KECCAK_RHO[x][y] as u64; - Self { - constraint_idx, - col: cols::rnc(x, y), - expected: rho_offset % 16, - } - } - - fn compute(&self, step: &TableView) -> FieldElement - where - F: IsSubFieldOf, - E: IsField, - { - let mu = step.get_main_evaluation_element(0, cols::MU).clone(); - let rnc = step.get_main_evaluation_element(0, self.col).clone(); - let expected = FieldElement::::from(self.expected); - mu * (rnc - expected) - } -} - -impl TransitionConstraint for RncConstantConstraint { - fn degree(&self) -> usize { - 2 - } - - fn constraint_idx(&self) -> usize { - self.constraint_idx - } - - fn end_exemptions(&self) -> usize { - 0 - } - - fn evaluate( - &self, - evaluation_context: &TransitionEvaluationContext, - transition_evaluations: &mut [FieldElement], - ) { - match evaluation_context { - TransitionEvaluationContext::Prover { - frame, - periodic_values: _, - rap_challenges: _, - .. - } => { - let v = self.compute(frame.get_evaluation_step(0)); - transition_evaluations[self.constraint_idx] = v.to_extension(); - } - TransitionEvaluationContext::Verifier { - frame, - periodic_values: _, - rap_challenges: _, - .. - } => { - let v = self.compute(frame.get_evaluation_step(0)); - transition_evaluations[self.constraint_idx] = v; - } - } - } -} - -/// Create all keccak round constraints: -/// - 200 pi verification (degree-3) -/// - 50 IS_BIT for rbc (degree-2, per spec: rbc type is Bit) -/// - 25 rnc equality (degree-2, per spec: rnc is constant) -/// Total: 275 constraints +/// All other checks (XOR, AND, HWSL, IS_BYTE, IS_HALF, KECCAK, KECCAK_RC) are +/// enforced via bus interactions against the BITWISE/KECCAK_RC chips. pub fn create_constraints( constraint_idx_start: usize, ) -> ( Vec>>, usize, ) { - use crate::constraints::templates::IsBitConstraint; - - let mut constraints: Vec>> = - Vec::with_capacity(275); - let mut idx = constraint_idx_start; - - // 200 pi verification constraints (degree-3) - for x in 0..5 { - for y in 0..5 { - for z in 0..8 { - constraints.push(Box::new(PiConstraint::new(idx, x, y, z))); - idx += 1; - } - } - } - - // 50 IS_BIT constraints for rbc[x][y][bit] (degree-2, unconditional) - // Safe on padding: rbc=0 on padding rows, 0*(1-0)=0 ✓ - for x in 0..5 { - for y in 0..5 { - for bit in 0..2 { - constraints.push(Box::new(IsBitConstraint::unconditional( - cols::rbc(x, y, bit), - idx, - ))); - idx += 1; - } - } - } - - // 25 rnc equality constraints: mu * (rnc[x][y] - KECCAK_RHO[x][y] % 16) = 0 - for x in 0..5 { - for y in 0..5 { - constraints.push(Box::new(RncConstantConstraint::new(idx, x, y))); - idx += 1; - } - } - - (constraints, idx) + (Vec::new(), constraint_idx_start) } #[cfg(test)] @@ -1034,49 +785,52 @@ mod tests { use super::*; use executor::vm::instruction::execution::keccak_f1600; + /// pi is a spec virtual variable. Verify the inlined expression + /// (rot_left[sx,sy,l_byte] + rot_right[sx,sy,r_byte]) matches the byte of + /// rho(theta) for a non-trivial state. Uses mu=0 padding rows as a trivial + /// sanity check (all zeros), then a non-zero-input round as the real test. #[test] - fn test_pi_constraint_values() { - let input = [0u64; 25]; + fn test_pi_virtual_matches_rotate() { + // Use a non-zero input so theta_lanes are non-trivial. + let input = [0x0102030405060708u64; 25]; let mut output = input; keccak_f1600(&mut output); let op = KeccakRoundOperation { timestamp: 42, input, output }; let trace = generate_keccak_rnd_trace(&[op]); + let base = 0; + + // Recompute theta for round 0 in u64 to compare against virtual pi. + let mut c = [0u64; 5]; + for x in 0..5 { + c[x] = input[x] ^ input[x + 5] ^ input[x + 10] ^ input[x + 15] ^ input[x + 20]; + } + let mut d = [0u64; 5]; + for x in 0..5 { + d[x] = c[(x + 4) % 5] ^ c[(x + 1) % 5].rotate_left(1); + } + let mut theta_lanes = [0u64; 25]; + for x in 0..5 { + for y in 0..5 { + theta_lanes[x + 5 * y] = input[x + 5 * y] ^ d[x]; + } + } - // Check pi constraint on round 0 for x in 0..5 { for y in 0..5 { let sx = (x + 3 * y) % 5; let sy = x; - let rho_offset = KECCAK_RHO[sx][sy] as usize; - let rbc_val = rho_offset / 16; - let b0 = (rbc_val & 1) as u64; - let b1 = ((rbc_val >> 1) & 1) as u64; - + let rotated = theta_lanes[sx + 5 * sy].rotate_left(KECCAK_RHO[sx][sy]); for z in 0..8 { - let base = 0 * cols::NUM_COLUMNS; - let pi_val = &trace.main_table.data[base + cols::pi(x, y, z)]; - - // Reconstruct expected from rot_left/rot_right - let l = |bz: usize| &trace.main_table.data[base + cols::rot_left(sx, sy, bz)]; - let r = |bz: usize| &trace.main_table.data[base + cols::rot_right(sx, sy, bz)]; - - let expected = if b0 == 0 && b1 == 0 { - l(z) + r((z + 6) % 8) - } else if b0 == 1 && b1 == 0 { - l((z + 6) % 8) + r((z + 4) % 8) - } else if b0 == 0 && b1 == 1 { - l((z + 4) % 8) + r((z + 2) % 8) - } else { - l((z + 2) % 8) + r(z) - }; - + let (l_col, r_col) = cols::pi_src_cols(x, y, z); + let virtual_pi = &trace.main_table.data[base + l_col] + + &trace.main_table.data[base + r_col]; + let expected = FE::from(((rotated >> (z * 8)) & 0xFF) as u64); assert_eq!( - pi_val, &expected, - "Pi mismatch at ({x},{y},{z}): src=({sx},{sy}), rbc=({b0},{b1}), rho_offset={rho_offset}, pi={pi_val:?}, expected={expected:?}" + virtual_pi, expected, + "virtual pi mismatch at ({x},{y},{z}): sx={sx}, sy={sy}" ); } } } - println!("All pi constraints verified for round 0 ✓"); } } diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 99188171a..ea717c3fd 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -2661,7 +2661,10 @@ mod keccak_tests { #[test] fn test_keccak_column_counts() { assert_eq!(core_cols::NUM_COLUMNS, 511, "KECCAK core columns"); - assert_eq!(rnd_cols::NUM_COLUMNS, 1775, "KECCAK_RND columns"); + assert_eq!( + rnd_cols::NUM_COLUMNS, 1500, + "KECCAK_RND columns (rnc/rbc inlined as constants; pi virtual)" + ); assert_eq!(keccak_rc::cols::NUM_COLUMNS, 10, "KECCAK_RC columns"); } @@ -2671,7 +2674,10 @@ mod keccak_tests { assert_eq!(core_constraints.len(), 50, "KECCAK core: 25 ADD pairs"); let (rnd_constraints, _) = keccak_rnd::create_constraints(0); - assert_eq!(rnd_constraints.len(), 275, "KECCAK_RND: 200 pi + 50 IS_BIT rbc + 25 rnc equality"); + assert_eq!( + rnd_constraints.len(), 0, + "KECCAK_RND: no polynomial constraints (pi virtual, rnc/rbc inlined)" + ); } } From 9f993c8999de5af2f1cf16bf085e48aba644313a Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Tue, 21 Apr 2026 14:24:19 -0300 Subject: [PATCH 09/14] lint --- executor/src/vm/instruction/execution.rs | 3 +- prover/src/tables/cpu.rs | 6 +- prover/src/tables/keccak.rs | 100 ++++++++-- prover/src/tables/keccak_rnd.rs | 235 ++++++++++++++++++----- prover/src/tables/trace_builder.rs | 225 +++++++++++++++++----- prover/src/test_utils.rs | 8 +- prover/src/tests/prove_elfs_tests.rs | 3 +- 7 files changed, 449 insertions(+), 131 deletions(-) diff --git a/executor/src/vm/instruction/execution.rs b/executor/src/vm/instruction/execution.rs index c28b3de65..d53d5d194 100644 --- a/executor/src/vm/instruction/execution.rs +++ b/executor/src/vm/instruction/execution.rs @@ -585,8 +585,7 @@ pub fn keccak_f1600(state: &mut [u64; 25]) { let mut b = [0u64; 25]; for x in 0..5 { for y in 0..5 { - b[y + 5 * ((2 * x + 3 * y) % 5)] = - state[x + 5 * y].rotate_left(KECCAK_RHO[x][y]); + b[y + 5 * ((2 * x + 3 * y) % 5)] = state[x + 5 * y].rotate_left(KECCAK_RHO[x][y]); } } diff --git a/prover/src/tables/cpu.rs b/prover/src/tables/cpu.rs index f59281e0a..c6cbbb775 100644 --- a/prover/src/tables/cpu.rs +++ b/prover/src/tables/cpu.rs @@ -819,10 +819,8 @@ pub fn generate_cpu_trace( data[base + cols::DIVREM] = FE::from(d.op_divrem as u64); data[base + cols::ECALL] = FE::from(d.op_ecall as u64); data[base + cols::ECALL_KECCAK] = FE::from(op.ecall_keccak as u64); - data[base + cols::KECCAK_STATE_ADDR_0] = - FE::from(op.keccak_state_addr & 0xFFFF_FFFF); - data[base + cols::KECCAK_STATE_ADDR_1] = - FE::from(op.keccak_state_addr >> 32); + data[base + cols::KECCAK_STATE_ADDR_0] = FE::from(op.keccak_state_addr & 0xFFFF_FFFF); + data[base + cols::KECCAK_STATE_ADDR_1] = FE::from(op.keccak_state_addr >> 32); data[base + cols::EBREAK] = FE::from(d.op_ebreak as u64); // Output columns diff --git a/prover/src/tables/keccak.rs b/prover/src/tables/keccak.rs index 984a3a0a9..26c489925 100644 --- a/prover/src/tables/keccak.rs +++ b/prover/src/tables/keccak.rs @@ -20,8 +20,8 @@ use stark::constraints::transition::TransitionConstraint; use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity, Packing}; use stark::trace::TraceTable; -use crate::constraints::templates::{AddConstraint, AddOperand}; use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField}; +use crate::constraints::templates::{AddConstraint, AddOperand}; // ========================================================================= // Column indices @@ -204,22 +204,48 @@ pub fn bus_interactions() -> Vec { { // addr as DWordWL from DWordBL bytes: lo32 = sum(addr[0..4] * 256^i), hi32 = sum(addr[4..8] * 256^i) let addr_lo = BusValue::linear(vec![ - LinearTerm::Column { coefficient: 1, column: cols::addr(0) }, - LinearTerm::Column { coefficient: 256, column: cols::addr(1) }, - LinearTerm::Column { coefficient: 65536, column: cols::addr(2) }, - LinearTerm::Column { coefficient: 16777216, column: cols::addr(3) }, + LinearTerm::Column { + coefficient: 1, + column: cols::addr(0), + }, + LinearTerm::Column { + coefficient: 256, + column: cols::addr(1), + }, + LinearTerm::Column { + coefficient: 65536, + column: cols::addr(2), + }, + LinearTerm::Column { + coefficient: 16777216, + column: cols::addr(3), + }, ]); let addr_hi = BusValue::linear(vec![ - LinearTerm::Column { coefficient: 1, column: cols::addr(4) }, - LinearTerm::Column { coefficient: 256, column: cols::addr(5) }, - LinearTerm::Column { coefficient: 65536, column: cols::addr(6) }, - LinearTerm::Column { coefficient: 16777216, column: cols::addr(7) }, + LinearTerm::Column { + coefficient: 1, + column: cols::addr(4), + }, + LinearTerm::Column { + coefficient: 256, + column: cols::addr(5), + }, + LinearTerm::Column { + coefficient: 65536, + column: cols::addr(6), + }, + LinearTerm::Column { + coefficient: 16777216, + column: cols::addr(7), + }, ]); let mut values = Vec::with_capacity(24); // old[0..7] = addr as WL + 6 zeros values.push(addr_lo.clone()); values.push(addr_hi.clone()); - for _ in 2..8 { values.push(BusValue::constant(0)); } + for _ in 2..8 { + values.push(BusValue::constant(0)); + } // is_register = 1 values.push(BusValue::constant(1)); // base_address = 2*10 = 20 (register x10) @@ -228,10 +254,18 @@ pub fn bus_interactions() -> Vec { // value[0..7] = same as old (read) values.push(addr_lo); values.push(addr_hi); - for _ in 2..8 { values.push(BusValue::constant(0)); } + for _ in 2..8 { + values.push(BusValue::constant(0)); + } // timestamp - values.push(BusValue::Packed { start_column: cols::TIMESTAMP_0, packing: Packing::Direct }); - values.push(BusValue::Packed { start_column: cols::TIMESTAMP_1, packing: Packing::Direct }); + values.push(BusValue::Packed { + start_column: cols::TIMESTAMP_0, + packing: Packing::Direct, + }); + values.push(BusValue::Packed { + start_column: cols::TIMESTAMP_1, + packing: Packing::Direct, + }); // write2=1, write4=0, write8=0 (register access) values.push(BusValue::constant(1)); values.push(BusValue::constant(0)); @@ -249,8 +283,14 @@ pub fn bus_interactions() -> Vec { // bus element (no packing). { let mut values = vec![ - BusValue::Packed { start_column: cols::TIMESTAMP_0, packing: Packing::Direct }, - BusValue::Packed { start_column: cols::TIMESTAMP_1, packing: Packing::Direct }, + BusValue::Packed { + start_column: cols::TIMESTAMP_0, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::TIMESTAMP_1, + packing: Packing::Direct, + }, BusValue::constant(0), // round = 0 ]; for x in 0..5 { @@ -273,8 +313,14 @@ pub fn bus_interactions() -> Vec { // 3. Keccak bus: receive (timestamp, 24, output_state[200]) { let mut values = vec![ - BusValue::Packed { start_column: cols::TIMESTAMP_0, packing: Packing::Direct }, - BusValue::Packed { start_column: cols::TIMESTAMP_1, packing: Packing::Direct }, + BusValue::Packed { + start_column: cols::TIMESTAMP_0, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::TIMESTAMP_1, + packing: Packing::Direct, + }, BusValue::constant(24), // round = 24 ]; for x in 0..5 { @@ -317,12 +363,24 @@ pub fn bus_interactions() -> Vec { // Address as DWordWL: lo32 = h0 + 2^16*h1, hi32 = h2 + 2^16*h3 let addr_lo = BusValue::linear(vec![ - LinearTerm::Column { coefficient: 1, column: cols::state_ptr(lane_idx, 0) }, - LinearTerm::Column { coefficient: 65536, column: cols::state_ptr(lane_idx, 1) }, + LinearTerm::Column { + coefficient: 1, + column: cols::state_ptr(lane_idx, 0), + }, + LinearTerm::Column { + coefficient: 65536, + column: cols::state_ptr(lane_idx, 1), + }, ]); let addr_hi = BusValue::linear(vec![ - LinearTerm::Column { coefficient: 1, column: cols::state_ptr(lane_idx, 2) }, - LinearTerm::Column { coefficient: 65536, column: cols::state_ptr(lane_idx, 3) }, + LinearTerm::Column { + coefficient: 1, + column: cols::state_ptr(lane_idx, 2), + }, + LinearTerm::Column { + coefficient: 65536, + column: cols::state_ptr(lane_idx, 3), + }, ]); let mut values = Vec::with_capacity(24); diff --git a/prover/src/tables/keccak_rnd.rs b/prover/src/tables/keccak_rnd.rs index 48714b680..70826c1ed 100644 --- a/prover/src/tables/keccak_rnd.rs +++ b/prover/src/tables/keccak_rnd.rs @@ -300,14 +300,19 @@ pub fn generate_keccak_rnd_trace( cxz_left_bytes[x][hw * 2 + 1] = (shifted >> 8) as u8; cxz_right_bytes[x][hw * 2] = (carry & 0xFF) as u8; cxz_right_bytes[x][hw * 2 + 1] = (carry >> 8) as u8; - data[base + cols::cxz_left(x, hw * 2)] = FE::from(cxz_left_bytes[x][hw * 2] as u64); - data[base + cols::cxz_left(x, hw * 2 + 1)] = FE::from(cxz_left_bytes[x][hw * 2 + 1] as u64); - data[base + cols::cxz_right(x, hw * 2)] = FE::from(cxz_right_bytes[x][hw * 2] as u64); - data[base + cols::cxz_right(x, hw * 2 + 1)] = FE::from(cxz_right_bytes[x][hw * 2 + 1] as u64); + data[base + cols::cxz_left(x, hw * 2)] = + FE::from(cxz_left_bytes[x][hw * 2] as u64); + data[base + cols::cxz_left(x, hw * 2 + 1)] = + FE::from(cxz_left_bytes[x][hw * 2 + 1] as u64); + data[base + cols::cxz_right(x, hw * 2)] = + FE::from(cxz_right_bytes[x][hw * 2] as u64); + data[base + cols::cxz_right(x, hw * 2 + 1)] = + FE::from(cxz_right_bytes[x][hw * 2 + 1] as u64); } // Reconstruct: left[z] + right[(z-2) mod 8] for b in 0..8 { - rotated_c[x][b] = cxz_left_bytes[x][b].wrapping_add(cxz_right_bytes[x][(b + 6) % 8]); + rotated_c[x][b] = + cxz_left_bytes[x][b].wrapping_add(cxz_right_bytes[x][(b + 6) % 8]); } } @@ -390,8 +395,7 @@ pub fn generate_keccak_rnd_trace( let and_val = not_next & next2; chi_lanes[x + 5 * y] = pi_lanes[x + 5 * y] ^ and_val; for b in 0..8 { - data[base + cols::chi_ands(x, y, b)] = - FE::from(byte_of(and_val, b) as u64); + data[base + cols::chi_ands(x, y, b)] = FE::from(byte_of(and_val, b) as u64); data[base + cols::chi(x, y, b)] = FE::from(byte_of(chi_lanes[x + 5 * y], b) as u64); } @@ -533,9 +537,18 @@ pub fn bus_interactions() -> Vec { BusId::XorByte, Multiplicity::Column(cols::MU), vec![ - BusValue::Packed { start_column: cols::start(x, 0, b), packing: Packing::Direct }, - BusValue::Packed { start_column: cols::start(x, 1, b), packing: Packing::Direct }, - BusValue::Packed { start_column: cols::cxz(x, 0, b), packing: Packing::Direct }, + BusValue::Packed { + start_column: cols::start(x, 0, b), + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::start(x, 1, b), + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::cxz(x, 0, b), + packing: Packing::Direct, + }, ], )); } @@ -549,9 +562,18 @@ pub fn bus_interactions() -> Vec { BusId::XorByte, Multiplicity::Column(cols::MU), vec![ - BusValue::Packed { start_column: cols::cxz(x, stage - 1, b), packing: Packing::Direct }, - BusValue::Packed { start_column: cols::start(x, y, b), packing: Packing::Direct }, - BusValue::Packed { start_column: cols::cxz(x, stage, b), packing: Packing::Direct }, + BusValue::Packed { + start_column: cols::cxz(x, stage - 1, b), + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::start(x, y, b), + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::cxz(x, stage, b), + packing: Packing::Direct, + }, ], )); } @@ -568,20 +590,38 @@ pub fn bus_interactions() -> Vec { vec![ // Input halfword: Cxz[x][3][hw*2] + 256 * Cxz[x][3][hw*2+1] BusValue::linear(vec![ - LinearTerm::Column { coefficient: 1, column: cols::cxz(x, 3, hw * 2) }, - LinearTerm::Column { coefficient: 256, column: cols::cxz(x, 3, hw * 2 + 1) }, + LinearTerm::Column { + coefficient: 1, + column: cols::cxz(x, 3, hw * 2), + }, + LinearTerm::Column { + coefficient: 256, + column: cols::cxz(x, 3, hw * 2 + 1), + }, ]), // Shift amount = 1 BusValue::constant(1), // Output: shifted BusValue::linear(vec![ - LinearTerm::Column { coefficient: 1, column: cols::cxz_left(x, hw * 2) }, - LinearTerm::Column { coefficient: 256, column: cols::cxz_left(x, hw * 2 + 1) }, + LinearTerm::Column { + coefficient: 1, + column: cols::cxz_left(x, hw * 2), + }, + LinearTerm::Column { + coefficient: 256, + column: cols::cxz_left(x, hw * 2 + 1), + }, ]), // Output: carry BusValue::linear(vec![ - LinearTerm::Column { coefficient: 1, column: cols::cxz_right(x, hw * 2) }, - LinearTerm::Column { coefficient: 256, column: cols::cxz_right(x, hw * 2 + 1) }, + LinearTerm::Column { + coefficient: 1, + column: cols::cxz_right(x, hw * 2), + }, + LinearTerm::Column { + coefficient: 256, + column: cols::cxz_right(x, hw * 2 + 1), + }, ]), ], )); @@ -594,12 +634,18 @@ pub fn bus_interactions() -> Vec { interactions.push(BusInteraction::sender( BusId::IsByte, Multiplicity::Column(cols::MU), - vec![BusValue::Packed { start_column: cols::cxz_left(x, b), packing: Packing::Direct }], + vec![BusValue::Packed { + start_column: cols::cxz_left(x, b), + packing: Packing::Direct, + }], )); interactions.push(BusInteraction::sender( BusId::IsByte, Multiplicity::Column(cols::MU), - vec![BusValue::Packed { start_column: cols::cxz_right(x, b), packing: Packing::Direct }], + vec![BusValue::Packed { + start_column: cols::cxz_right(x, b), + packing: Packing::Direct, + }], )); } } @@ -615,12 +661,24 @@ pub fn bus_interactions() -> Vec { BusId::XorByte, Multiplicity::Column(cols::MU), vec![ - BusValue::Packed { start_column: cols::cxz((x + 4) % 5, 3, b), packing: Packing::Direct }, + BusValue::Packed { + start_column: cols::cxz((x + 4) % 5, 3, b), + packing: Packing::Direct, + }, BusValue::linear(vec![ - LinearTerm::Column { coefficient: 1, column: cols::cxz_left((x + 1) % 5, b) }, - LinearTerm::Column { coefficient: 1, column: cols::cxz_right((x + 1) % 5, (b + 6) % 8) }, + LinearTerm::Column { + coefficient: 1, + column: cols::cxz_left((x + 1) % 5, b), + }, + LinearTerm::Column { + coefficient: 1, + column: cols::cxz_right((x + 1) % 5, (b + 6) % 8), + }, ]), - BusValue::Packed { start_column: cols::dxz(x, b), packing: Packing::Direct }, + BusValue::Packed { + start_column: cols::dxz(x, b), + packing: Packing::Direct, + }, ], )); } @@ -635,9 +693,18 @@ pub fn bus_interactions() -> Vec { BusId::XorByte, Multiplicity::Column(cols::MU), vec![ - BusValue::Packed { start_column: cols::start(x, y, b), packing: Packing::Direct }, - BusValue::Packed { start_column: cols::dxz(x, b), packing: Packing::Direct }, - BusValue::Packed { start_column: cols::theta(x, y, b), packing: Packing::Direct }, + BusValue::Packed { + start_column: cols::start(x, y, b), + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::dxz(x, b), + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::theta(x, y, b), + packing: Packing::Direct, + }, ], )); } @@ -656,17 +723,35 @@ pub fn bus_interactions() -> Vec { Multiplicity::Column(cols::MU), vec![ BusValue::linear(vec![ - LinearTerm::Column { coefficient: 1, column: cols::theta(x, y, hw * 2) }, - LinearTerm::Column { coefficient: 256, column: cols::theta(x, y, hw * 2 + 1) }, + LinearTerm::Column { + coefficient: 1, + column: cols::theta(x, y, hw * 2), + }, + LinearTerm::Column { + coefficient: 256, + column: cols::theta(x, y, hw * 2 + 1), + }, ]), BusValue::constant(rnc_val), BusValue::linear(vec![ - LinearTerm::Column { coefficient: 1, column: cols::rot_left(x, y, hw * 2) }, - LinearTerm::Column { coefficient: 256, column: cols::rot_left(x, y, hw * 2 + 1) }, + LinearTerm::Column { + coefficient: 1, + column: cols::rot_left(x, y, hw * 2), + }, + LinearTerm::Column { + coefficient: 256, + column: cols::rot_left(x, y, hw * 2 + 1), + }, ]), BusValue::linear(vec![ - LinearTerm::Column { coefficient: 1, column: cols::rot_right(x, y, hw * 2) }, - LinearTerm::Column { coefficient: 256, column: cols::rot_right(x, y, hw * 2 + 1) }, + LinearTerm::Column { + coefficient: 1, + column: cols::rot_right(x, y, hw * 2), + }, + LinearTerm::Column { + coefficient: 256, + column: cols::rot_right(x, y, hw * 2 + 1), + }, ]), ], )); @@ -681,12 +766,18 @@ pub fn bus_interactions() -> Vec { interactions.push(BusInteraction::sender( BusId::IsByte, Multiplicity::Column(cols::MU), - vec![BusValue::Packed { start_column: cols::rot_left(x, y, b), packing: Packing::Direct }], + vec![BusValue::Packed { + start_column: cols::rot_left(x, y, b), + packing: Packing::Direct, + }], )); interactions.push(BusInteraction::sender( BusId::IsByte, Multiplicity::Column(cols::MU), - vec![BusValue::Packed { start_column: cols::rot_right(x, y, b), packing: Packing::Direct }], + vec![BusValue::Packed { + start_column: cols::rot_right(x, y, b), + packing: Packing::Direct, + }], )); } } @@ -707,14 +798,29 @@ pub fn bus_interactions() -> Vec { vec![ BusValue::linear(vec![ LinearTerm::Constant(255), - LinearTerm::Column { coefficient: -1, column: p1_l }, - LinearTerm::Column { coefficient: -1, column: p1_r }, + LinearTerm::Column { + coefficient: -1, + column: p1_l, + }, + LinearTerm::Column { + coefficient: -1, + column: p1_r, + }, ]), BusValue::linear(vec![ - LinearTerm::Column { coefficient: 1, column: p2_l }, - LinearTerm::Column { coefficient: 1, column: p2_r }, + LinearTerm::Column { + coefficient: 1, + column: p2_l, + }, + LinearTerm::Column { + coefficient: 1, + column: p2_r, + }, ]), - BusValue::Packed { start_column: cols::chi_ands(x, y, b), packing: Packing::Direct }, + BusValue::Packed { + start_column: cols::chi_ands(x, y, b), + packing: Packing::Direct, + }, ], )); } @@ -732,11 +838,23 @@ pub fn bus_interactions() -> Vec { Multiplicity::Column(cols::MU), vec![ BusValue::linear(vec![ - LinearTerm::Column { coefficient: 1, column: p_l }, - LinearTerm::Column { coefficient: 1, column: p_r }, + LinearTerm::Column { + coefficient: 1, + column: p_l, + }, + LinearTerm::Column { + coefficient: 1, + column: p_r, + }, ]), - BusValue::Packed { start_column: cols::chi_ands(x, y, b), packing: Packing::Direct }, - BusValue::Packed { start_column: cols::chi(x, y, b), packing: Packing::Direct }, + BusValue::Packed { + start_column: cols::chi_ands(x, y, b), + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::chi(x, y, b), + packing: Packing::Direct, + }, ], )); } @@ -750,9 +868,18 @@ pub fn bus_interactions() -> Vec { BusId::XorByte, Multiplicity::Column(cols::MU), vec![ - BusValue::Packed { start_column: cols::chi(0, 0, b), packing: Packing::Direct }, - BusValue::Packed { start_column: cols::rc(b), packing: Packing::Direct }, - BusValue::Packed { start_column: cols::iota(b), packing: Packing::Direct }, + BusValue::Packed { + start_column: cols::chi(0, 0, b), + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::rc(b), + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::iota(b), + packing: Packing::Direct, + }, ], )); } @@ -795,7 +922,11 @@ mod tests { let input = [0x0102030405060708u64; 25]; let mut output = input; keccak_f1600(&mut output); - let op = KeccakRoundOperation { timestamp: 42, input, output }; + let op = KeccakRoundOperation { + timestamp: 42, + input, + output, + }; let trace = generate_keccak_rnd_trace(&[op]); let base = 0; @@ -822,9 +953,9 @@ mod tests { let rotated = theta_lanes[sx + 5 * sy].rotate_left(KECCAK_RHO[sx][sy]); for z in 0..8 { let (l_col, r_col) = cols::pi_src_cols(x, y, z); - let virtual_pi = &trace.main_table.data[base + l_col] - + &trace.main_table.data[base + r_col]; - let expected = FE::from(((rotated >> (z * 8)) & 0xFF) as u64); + let virtual_pi = + &trace.main_table.data[base + l_col] + &trace.main_table.data[base + r_col]; + let expected = FE::from((rotated >> (z * 8)) & 0xFF); assert_eq!( virtual_pi, expected, "virtual pi mismatch at ({x},{y},{z}): sx={sx}, sy={sy}" diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index ea717c3fd..82e045ccc 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -1568,7 +1568,11 @@ fn collect_bitwise_from_keccak(keccak_ops: &[KeccakOperation]) -> Vec> (b * 8)) & 0xFF) as u8; let v1 = ((state[x + 5] >> (b * 8)) & 0xFF) as u8; cxz[x][0][b] = v0 ^ v1; - ops.push(BitwiseOperation::byte_op(BitwiseOperationType::XorByte, v0, v1)); + ops.push(BitwiseOperation::byte_op( + BitwiseOperationType::XorByte, + v0, + v1, + )); } for stage in 1..4usize { let y = stage + 1; @@ -1576,7 +1580,11 @@ fn collect_bitwise_from_keccak(keccak_ops: &[KeccakOperation]) -> Vec> (b * 8)) & 0xFF) as u8; cxz[x][stage][b] = prev ^ sv; - ops.push(BitwiseOperation::byte_op(BitwiseOperationType::XorByte, prev, sv)); + ops.push(BitwiseOperation::byte_op( + BitwiseOperationType::XorByte, + prev, + sv, + )); } } } @@ -1589,17 +1597,30 @@ fn collect_bitwise_from_keccak(keccak_ops: &[KeccakOperation]) -> Vec> 15 == 1 { 1u16 } else { 0 }; - ops.push(BitwiseOperation::new(BitwiseOperationType::Hwsl, + ops.push(BitwiseOperation::new( + BitwiseOperationType::Hwsl, (halfword & 0xFF) as u8, ((halfword >> 8) & 0xFF) as u8, 1, )); // IS_BYTE for cxz_left bytes - ops.push(BitwiseOperation::single_byte(BitwiseOperationType::IsByte, (shifted & 0xFF) as u8)); - ops.push(BitwiseOperation::single_byte(BitwiseOperationType::IsByte, ((shifted >> 8) & 0xFF) as u8)); + ops.push(BitwiseOperation::single_byte( + BitwiseOperationType::IsByte, + (shifted & 0xFF) as u8, + )); + ops.push(BitwiseOperation::single_byte( + BitwiseOperationType::IsByte, + ((shifted >> 8) & 0xFF) as u8, + )); // IS_BYTE for cxz_right bytes - ops.push(BitwiseOperation::single_byte(BitwiseOperationType::IsByte, (carry & 0xFF) as u8)); - ops.push(BitwiseOperation::single_byte(BitwiseOperationType::IsByte, ((carry >> 8) & 0xFF) as u8)); + ops.push(BitwiseOperation::single_byte( + BitwiseOperationType::IsByte, + (carry & 0xFF) as u8, + )); + ops.push(BitwiseOperation::single_byte( + BitwiseOperationType::IsByte, + ((carry >> 8) & 0xFF) as u8, + )); } // Reconstruct rotated_c let mut left_bytes = [0u8; 8]; @@ -1625,7 +1646,11 @@ fn collect_bitwise_from_keccak(keccak_ops: &[KeccakOperation]) -> Vec Vec> (b * 8)) & 0xFF) as u8; - ops.push(BitwiseOperation::byte_op(BitwiseOperationType::XorByte, s, d_bytes[x][b])); + ops.push(BitwiseOperation::byte_op( + BitwiseOperationType::XorByte, + s, + d_bytes[x][b], + )); } } } @@ -1659,17 +1688,30 @@ fn collect_bitwise_from_keccak(keccak_ops: &[KeccakOperation]) -> Vec> (16 - rnc_val)) }; - ops.push(BitwiseOperation::new(BitwiseOperationType::Hwsl, + ops.push(BitwiseOperation::new( + BitwiseOperationType::Hwsl, (halfword & 0xFF) as u8, ((halfword >> 8) & 0xFF) as u8, rnc_val, )); // IS_BYTE for rot_left - ops.push(BitwiseOperation::single_byte(BitwiseOperationType::IsByte, (shifted & 0xFF) as u8)); - ops.push(BitwiseOperation::single_byte(BitwiseOperationType::IsByte, ((shifted >> 8) & 0xFF) as u8)); + ops.push(BitwiseOperation::single_byte( + BitwiseOperationType::IsByte, + (shifted & 0xFF) as u8, + )); + ops.push(BitwiseOperation::single_byte( + BitwiseOperationType::IsByte, + ((shifted >> 8) & 0xFF) as u8, + )); // IS_BYTE for rot_right - ops.push(BitwiseOperation::single_byte(BitwiseOperationType::IsByte, (carry & 0xFF) as u8)); - ops.push(BitwiseOperation::single_byte(BitwiseOperationType::IsByte, ((carry >> 8) & 0xFF) as u8)); + ops.push(BitwiseOperation::single_byte( + BitwiseOperationType::IsByte, + (carry & 0xFF) as u8, + )); + ops.push(BitwiseOperation::single_byte( + BitwiseOperationType::IsByte, + ((carry >> 8) & 0xFF) as u8, + )); } } } @@ -1696,10 +1738,18 @@ fn collect_bitwise_from_keccak(keccak_ops: &[KeccakOperation]) -> Vec> (b * 8)) & 0xFF) as u8; let n2_byte = ((next2 >> (b * 8)) & 0xFF) as u8; - ops.push(BitwiseOperation::byte_op(BitwiseOperationType::AndByte, not_byte, n2_byte)); + ops.push(BitwiseOperation::byte_op( + BitwiseOperationType::AndByte, + not_byte, + n2_byte, + )); let pi_byte = ((pi_lanes[x + 5 * y] >> (b * 8)) & 0xFF) as u8; let and_byte = ((and_val >> (b * 8)) & 0xFF) as u8; - ops.push(BitwiseOperation::byte_op(BitwiseOperationType::XorByte, pi_byte, and_byte)); + ops.push(BitwiseOperation::byte_op( + BitwiseOperationType::XorByte, + pi_byte, + and_byte, + )); } } } @@ -1709,7 +1759,11 @@ fn collect_bitwise_from_keccak(keccak_ops: &[KeccakOperation]) -> Vec> (b * 8)) & 0xFF) as u8; let rc_byte = ((rc_val >> (b * 8)) & 0xFF) as u8; - ops.push(BitwiseOperation::byte_op(BitwiseOperationType::XorByte, chi_byte, rc_byte)); + ops.push(BitwiseOperation::byte_op( + BitwiseOperationType::XorByte, + chi_byte, + rc_byte, + )); } // Update state @@ -2029,8 +2083,15 @@ impl Traces { // Initialize memory state from ELF so first accesses get correct old_value. let mut memory_state = MemoryState::from_elf(elf); let mut register_state = RegisterState::new(elf.entry_point); - let (mut memw_ops, load_ops, mut lt_ops, shift_ops, mut bitwise_ops, commit_ops, keccak_ops) = - collect_ops_from_cpu(&cpu_ops, &mut memory_state, &mut register_state); + let ( + mut memw_ops, + load_ops, + mut lt_ops, + shift_ops, + mut bitwise_ops, + commit_ops, + keccak_ops, + ) = collect_ops_from_cpu(&cpu_ops, &mut memory_state, &mut register_state); // HALT finalization: 33 register MEMW operations at timestamp u64::MAX. // Must come before Phase 3 (LT from MEMW) so HALT ops get timestamp checks. @@ -2284,8 +2345,15 @@ impl Traces { // Entry point = first instruction's PC (start of execution) let entry_point = cpu_ops.first().map_or(0, |op| op.decode.pc); let mut register_state = RegisterState::new(entry_point); - let (mut memw_ops, load_ops, mut lt_ops, shift_ops, mut bitwise_ops, commit_ops, keccak_ops) = - collect_ops_from_cpu(&cpu_ops, &mut memory_state, &mut register_state); + let ( + mut memw_ops, + load_ops, + mut lt_ops, + shift_ops, + mut bitwise_ops, + commit_ops, + keccak_ops, + ) = collect_ops_from_cpu(&cpu_ops, &mut memory_state, &mut register_state); // HALT finalization: 33 register MEMW operations at timestamp u64::MAX. // Must come before Phase 3 (LT from MEMW) so HALT ops get timestamp checks. @@ -2549,17 +2617,26 @@ impl Traces { #[cfg(test)] mod keccak_tests { use super::*; - use executor::vm::instruction::execution::keccak_f1600; - use crate::tables::keccak_rnd::cols as rnd_cols; use crate::tables::keccak::cols as core_cols; + use crate::tables::keccak_rnd::cols as rnd_cols; use crate::tables::types::FE; + use executor::vm::instruction::execution::keccak_f1600; fn make_keccak_ops() -> (KeccakOperation, KeccakRoundOperation) { let input = [0u64; 25]; let mut output = input; keccak_f1600(&mut output); - let kop = KeccakOperation { timestamp: 42, state_addr: 0x1000, input, output }; - let rop = KeccakRoundOperation { timestamp: 42, input, output }; + let kop = KeccakOperation { + timestamp: 42, + state_addr: 0x1000, + input, + output, + }; + let rop = KeccakRoundOperation { + timestamp: 42, + input, + output, + }; (kop, rop) } @@ -2568,11 +2645,26 @@ mod keccak_tests { let (kop, _) = make_keccak_ops(); let ops = collect_bitwise_from_keccak(&[kop]); - let xor = ops.iter().filter(|o| o.lookup_type == BitwiseOperationType::XorByte).count(); - let and = ops.iter().filter(|o| o.lookup_type == BitwiseOperationType::AndByte).count(); - let is_byte = ops.iter().filter(|o| o.lookup_type == BitwiseOperationType::IsByte).count(); - let hwsl = ops.iter().filter(|o| o.lookup_type == BitwiseOperationType::Hwsl).count(); - let is_half = ops.iter().filter(|o| o.lookup_type == BitwiseOperationType::IsHalf).count(); + let xor = ops + .iter() + .filter(|o| o.lookup_type == BitwiseOperationType::XorByte) + .count(); + let and = ops + .iter() + .filter(|o| o.lookup_type == BitwiseOperationType::AndByte) + .count(); + let is_byte = ops + .iter() + .filter(|o| o.lookup_type == BitwiseOperationType::IsByte) + .count(); + let hwsl = ops + .iter() + .filter(|o| o.lookup_type == BitwiseOperationType::Hwsl) + .count(); + let is_half = ops + .iter() + .filter(|o| o.lookup_type == BitwiseOperationType::IsHalf) + .count(); assert_eq!(xor, 24 * 608, "XorByte count"); assert_eq!(and, 24 * 200, "AndByte count"); @@ -2591,24 +2683,51 @@ mod keccak_tests { for round in 0..24 { let rc = executor::vm::instruction::execution::KECCAK_RC[round]; let mut c = [0u64; 5]; - for x in 0..5 { c[x] = ref_state[x] ^ ref_state[x+5] ^ ref_state[x+10] ^ ref_state[x+15] ^ ref_state[x+20]; } + for x in 0..5 { + c[x] = ref_state[x] + ^ ref_state[x + 5] + ^ ref_state[x + 10] + ^ ref_state[x + 15] + ^ ref_state[x + 20]; + } let mut d = [0u64; 5]; - for x in 0..5 { d[x] = c[(x+4)%5] ^ c[(x+1)%5].rotate_left(1); } - for i in 0..25 { ref_state[i] ^= d[i % 5]; } + for x in 0..5 { + d[x] = c[(x + 4) % 5] ^ c[(x + 1) % 5].rotate_left(1); + } + for i in 0..25 { + ref_state[i] ^= d[i % 5]; + } let mut b = [0u64; 25]; - for x in 0..5 { for y in 0..5 { b[y + 5*((2*x+3*y)%5)] = ref_state[x+5*y].rotate_left(executor::vm::instruction::execution::KECCAK_RHO[x][y]); } } - for x in 0..5 { for y in 0..5 { ref_state[x+5*y] = b[x+5*y] ^ (!b[(x+1)%5+5*y] & b[(x+2)%5+5*y]); } } + for x in 0..5 { + for y in 0..5 { + b[y + 5 * ((2 * x + 3 * y) % 5)] = ref_state[x + 5 * y] + .rotate_left(executor::vm::instruction::execution::KECCAK_RHO[x][y]); + } + } + for x in 0..5 { + for y in 0..5 { + ref_state[x + 5 * y] = + b[x + 5 * y] ^ (!b[(x + 1) % 5 + 5 * y] & b[(x + 2) % 5 + 5 * y]); + } + } ref_state[0] ^= rc; let base = round * rnd_cols::NUM_COLUMNS; - for lane in 0..25 { + for (lane, &lane_val) in ref_state.iter().enumerate() { let x = lane % 5; let y = lane / 5; for byte_idx in 0..8 { - let expected = FE::from(((ref_state[lane] >> (byte_idx * 8)) & 0xFF) as u64); - let col = if x == 0 && y == 0 { rnd_cols::iota(byte_idx) } else { rnd_cols::chi(x, y, byte_idx) }; + let expected = FE::from((lane_val >> (byte_idx * 8)) & 0xFF); + let col = if x == 0 && y == 0 { + rnd_cols::iota(byte_idx) + } else { + rnd_cols::chi(x, y, byte_idx) + }; let trace_val = &rnd_trace.main_table.data[base + col]; - assert_eq!(&expected, trace_val, "Round {round} lane ({x},{y}) byte {byte_idx}"); + assert_eq!( + &expected, trace_val, + "Round {round} lane ({x},{y}) byte {byte_idx}" + ); } } } @@ -2650,19 +2769,29 @@ mod keccak_tests { #[test] fn test_keccak_bus_interaction_counts() { - assert_eq!(keccak::bus_interactions().len(), 129, - "KECCAK core: 1 ECALL + 1 MEMW read_addr + 25 MEMW lanes + 100 IS_HALF + 1 Keccak send + 1 Keccak recv"); - assert_eq!(keccak_rnd::bus_interactions().len(), 1411, - "KECCAK_RND: 3 IO + 500 theta + 500 rho + 400 chi + 8 iota"); - assert_eq!(keccak_rc::bus_interactions().len(), 1, - "KECCAK_RC: 1 receiver"); + assert_eq!( + keccak::bus_interactions().len(), + 129, + "KECCAK core: 1 ECALL + 1 MEMW read_addr + 25 MEMW lanes + 100 IS_HALF + 1 Keccak send + 1 Keccak recv" + ); + assert_eq!( + keccak_rnd::bus_interactions().len(), + 1411, + "KECCAK_RND: 3 IO + 500 theta + 500 rho + 400 chi + 8 iota" + ); + assert_eq!( + keccak_rc::bus_interactions().len(), + 1, + "KECCAK_RC: 1 receiver" + ); } #[test] fn test_keccak_column_counts() { assert_eq!(core_cols::NUM_COLUMNS, 511, "KECCAK core columns"); assert_eq!( - rnd_cols::NUM_COLUMNS, 1500, + rnd_cols::NUM_COLUMNS, + 1500, "KECCAK_RND columns (rnc/rbc inlined as constants; pi virtual)" ); assert_eq!(keccak_rc::cols::NUM_COLUMNS, 10, "KECCAK_RC columns"); @@ -2675,9 +2804,9 @@ mod keccak_tests { let (rnd_constraints, _) = keccak_rnd::create_constraints(0); assert_eq!( - rnd_constraints.len(), 0, + rnd_constraints.len(), + 0, "KECCAK_RND: no polynomial constraints (pi virtual, rnc/rbc inlined)" ); } } - diff --git a/prover/src/test_utils.rs b/prover/src/test_utils.rs index 9932f3957..06fb67818 100644 --- a/prover/src/test_utils.rs +++ b/prover/src/test_utils.rs @@ -44,8 +44,12 @@ use crate::tables::dvrm::{ }; use crate::tables::halt::{bus_interactions as halt_bus_interactions, cols as halt_cols}; use crate::tables::keccak::{bus_interactions as keccak_bus_interactions, cols as keccak_cols}; -use crate::tables::keccak_rc::{bus_interactions as keccak_rc_bus_interactions, cols as keccak_rc_cols}; -use crate::tables::keccak_rnd::{bus_interactions as keccak_rnd_bus_interactions, cols as keccak_rnd_cols}; +use crate::tables::keccak_rc::{ + bus_interactions as keccak_rc_bus_interactions, cols as keccak_rc_cols, +}; +use crate::tables::keccak_rnd::{ + bus_interactions as keccak_rnd_bus_interactions, cols as keccak_rnd_cols, +}; use crate::tables::load::{ bus_interactions as load_bus_interactions, cols as load_cols, constraints as load_constraints, }; diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index 8954db948..3f300e664 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -723,8 +723,7 @@ fn test_prove_elfs_keccak() { let (elf, logs, _instructions) = run_asm_elf("test_keccak"); // Must use from_elf_and_logs (not from_logs_minimal) because keccak accesses // RAM (stack memory), which requires PAGE tables for Memory bus balance. - let mut traces = - Traces::from_elf_and_logs(&elf, &logs, &Default::default()).unwrap(); + let mut traces = Traces::from_elf_and_logs(&elf, &logs, &Default::default()).unwrap(); assert!( prove_and_verify_vm_minimal(&elf, &mut traces), From a46eea5ba24118b5b5faeff99028eb6b2c376fe9 Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Wed, 22 Apr 2026 15:08:57 -0300 Subject: [PATCH 10/14] revolve conflicts --- prover/src/constraints/cpu.rs | 34 ++++++------------------------ prover/src/tables/keccak.rs | 13 ++++++------ prover/src/tables/keccak_rnd.rs | 4 ++-- prover/src/tables/trace_builder.rs | 19 +++++++++++++++++ prover/src/test_utils.rs | 8 +++---- 5 files changed, 38 insertions(+), 40 deletions(-) diff --git a/prover/src/constraints/cpu.rs b/prover/src/constraints/cpu.rs index fe1c44959..9d8a8495f 100644 --- a/prover/src/constraints/cpu.rs +++ b/prover/src/constraints/cpu.rs @@ -146,32 +146,12 @@ impl TransitionConstraint 0 } - fn evaluate( - &self, - evaluation_context: &TransitionEvaluationContext, - transition_evaluations: &mut [FieldElement], - ) { - match evaluation_context { - TransitionEvaluationContext::Prover { - frame, - periodic_values: _, - rap_challenges: _, - .. - } => { - let constraint_value = self.compute(frame.get_evaluation_step(0)); - transition_evaluations[self.constraint_idx] = constraint_value.to_extension(); - } - - TransitionEvaluationContext::Verifier { - frame, - periodic_values: _, - rap_challenges: _, - .. - } => { - let constraint_value = self.compute(frame.get_evaluation_step(0)); - transition_evaluations[self.constraint_idx] = constraint_value; - } - } + fn evaluate(&self, step: &TableView) -> FieldElement + where + F: IsSubFieldOf, + E: IsField, + { + self.compute(step) } } @@ -1159,7 +1139,7 @@ pub fn create_all_cpu_constraints() -> ( next_idx += 1; // ECALL_KECCAK implies ECALL - other.push(Box::new(EcallKeccakImpliesEcallConstraint::new(next_idx))); + other.push(EcallKeccakImpliesEcallConstraint::new(next_idx).boxed()); next_idx += 1; // rv1 zero-forcing (CM48): (1 - read_register1) * rv1[i] = 0 for i ∈ [0, 2] diff --git a/prover/src/tables/keccak.rs b/prover/src/tables/keccak.rs index 26c489925..ef181d67b 100644 --- a/prover/src/tables/keccak.rs +++ b/prover/src/tables/keccak.rs @@ -16,7 +16,7 @@ //! | mu | 1 | Multiplicity flag | use executor::vm::instruction::execution::KECCAK_SYSCALL_NUMBER; -use stark::constraints::transition::TransitionConstraint; +use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator}; use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity, Packing}; use stark::trace::TraceTable; @@ -441,11 +441,12 @@ pub fn bus_interactions() -> Vec { pub fn create_constraints( constraint_idx_start: usize, ) -> ( - Vec>>, + Vec>>, usize, ) { - let mut constraints: Vec>> = - Vec::with_capacity(50); + let mut constraints: Vec< + Box>, + > = Vec::with_capacity(50); let mut idx = constraint_idx_start; // state_ptr[lane] = addr + 8*lane_idx @@ -460,8 +461,8 @@ pub fn create_constraints( AddOperand::from_dword_hl(cols::state_ptr(lane_idx, 0)), idx, ); - constraints.push(Box::new(c0)); - constraints.push(Box::new(c1)); + constraints.push(c0.boxed()); + constraints.push(c1.boxed()); idx += 2; } diff --git a/prover/src/tables/keccak_rnd.rs b/prover/src/tables/keccak_rnd.rs index 70826c1ed..353cbf0c3 100644 --- a/prover/src/tables/keccak_rnd.rs +++ b/prover/src/tables/keccak_rnd.rs @@ -28,7 +28,7 @@ //! constants derived from `KECCAK_RHO[x][y]`, not materialized as columns. use executor::vm::instruction::execution::{KECCAK_RC, KECCAK_RHO}; -use stark::constraints::transition::TransitionConstraint; +use stark::constraints::transition::TransitionConstraintEvaluator; use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity, Packing}; use stark::trace::TraceTable; @@ -901,7 +901,7 @@ pub fn bus_interactions() -> Vec { pub fn create_constraints( constraint_idx_start: usize, ) -> ( - Vec>>, + Vec>>, usize, ) { (Vec::new(), constraint_idx_start) diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index a3ba096a7..5f5988ce8 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -2327,6 +2327,10 @@ impl Traces { use super::decode::cols::NUM_COLUMNS as DECODE_COLS; use super::dvrm::cols::NUM_COLUMNS as DVRM_COLS; use super::halt::cols::NUM_COLUMNS as HALT_COLS; + use super::keccak::cols::NUM_COLUMNS as KECCAK_COLS; + use super::keccak_rc::NUM_PRECOMPUTED_COLS as KECCAK_RC_PRECOMPUTED; + use super::keccak_rc::cols::NUM_COLUMNS as KECCAK_RC_COLS; + use super::keccak_rnd::cols::NUM_COLUMNS as KECCAK_RND_COLS; use super::load::cols::NUM_COLUMNS as LOAD_COLS; use super::lt::cols::NUM_COLUMNS as LT_COLS; use super::memw::cols::NUM_COLUMNS as MEMW_COLS; @@ -2355,6 +2359,9 @@ impl Traces { branches, halt, commit, + keccak, + keccak_rnd, + keccak_rc, memw_registers, page_configs: _, public_output_bytes: _, @@ -2399,6 +2406,9 @@ impl Traces { for t in memw_registers { total += (t.num_rows() * MEMW_R_COLS) as u64; } + total += (keccak.num_rows() * KECCAK_COLS) as u64; + total += (keccak_rnd.num_rows() * KECCAK_RND_COLS) as u64; + total += (keccak_rc.num_rows() * (KECCAK_RC_COLS - KECCAK_RC_PRECOMPUTED)) as u64; total } @@ -2431,6 +2441,9 @@ impl Traces { // page::bus_interactions count is constant regardless of page_base. let n_page = aux_cols(super::page::bus_interactions(0).len()); let n_memw_r = aux_cols(super::memw_register::bus_interactions().len()); + let n_keccak = aux_cols(super::keccak::bus_interactions().len()); + let n_keccak_rnd = aux_cols(super::keccak_rnd::bus_interactions().len()); + let n_keccak_rc = aux_cols(super::keccak_rc::bus_interactions().len()); let Traces { cpus, @@ -2448,6 +2461,9 @@ impl Traces { branches, halt, commit, + keccak, + keccak_rnd, + keccak_rc, memw_registers, page_configs: _, public_output_bytes: _, @@ -2492,6 +2508,9 @@ impl Traces { for t in memw_registers { total += (t.num_rows() * n_memw_r) as u64; } + total += (keccak.num_rows() * n_keccak) as u64; + total += (keccak_rnd.num_rows() * n_keccak_rnd) as u64; + total += (keccak_rc.num_rows() * n_keccak_rc) as u64; total } diff --git a/prover/src/test_utils.rs b/prover/src/test_utils.rs index ef2b19527..1dcb768b2 100644 --- a/prover/src/test_utils.rs +++ b/prover/src/test_utils.rs @@ -802,8 +802,7 @@ pub fn create_register_air(proof_options: &ProofOptions) -> VmAir { /// Create KECCAK core AIR with ADD constraints and bus interactions. pub fn create_keccak_air(proof_options: &ProofOptions) -> VmAir { let (constraints, _) = crate::tables::keccak::create_constraints(0); - let transition_constraints: Vec>> = - constraints.into_iter().map(|c| c as _).collect(); + let transition_constraints: Vec>> = constraints; let auxiliary_trace_build_data = AuxiliaryTraceBuildData { interactions: keccak_bus_interactions(), @@ -822,8 +821,7 @@ pub fn create_keccak_air(proof_options: &ProofOptions) -> VmAir { /// Create KECCAK_RND AIR with pi constraints and bus interactions. pub fn create_keccak_rnd_air(proof_options: &ProofOptions) -> VmAir { let (constraints, _) = crate::tables::keccak_rnd::create_constraints(0); - let transition_constraints: Vec>> = - constraints.into_iter().map(|c| c as _).collect(); + let transition_constraints: Vec>> = constraints; let auxiliary_trace_build_data = AuxiliaryTraceBuildData { interactions: keccak_rnd_bus_interactions(), @@ -841,7 +839,7 @@ pub fn create_keccak_rnd_air(proof_options: &ProofOptions) -> VmAir { /// Create KECCAK_RC AIR with bus interactions (preprocessed table). pub fn create_keccak_rc_air(proof_options: &ProofOptions) -> VmAir { - let transition_constraints: Vec>> = vec![]; + let transition_constraints: Vec>> = vec![]; let auxiliary_trace_build_data = AuxiliaryTraceBuildData { interactions: keccak_rc_bus_interactions(), From 32d15eed6b4d210b59462d53316b3fb7150f8255 Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Mon, 27 Apr 2026 16:50:06 -0300 Subject: [PATCH 11/14] Remove dead keccak CPU columns and BusId; verify KAT in test_keccak --- executor/programs/asm/test_keccak.s | 8 ++++ executor/tests/asm.rs | 41 ++++++++++++++++- prover/src/constraints/cpu.rs | 65 ++------------------------- prover/src/tables/cpu.rs | 17 ++----- prover/src/tables/keccak.rs | 12 +---- prover/src/tables/keccak_rnd.rs | 12 +++-- prover/src/tables/types.rs | 8 +--- prover/src/tests/constraints_tests.rs | 18 ++++---- prover/src/tests/cpu_tests.rs | 2 +- 9 files changed, 73 insertions(+), 110 deletions(-) diff --git a/executor/programs/asm/test_keccak.s b/executor/programs/asm/test_keccak.s index 5d5aac618..31cd93be6 100644 --- a/executor/programs/asm/test_keccak.s +++ b/executor/programs/asm/test_keccak.s @@ -21,6 +21,14 @@ main: li a7, -2 ecall + # Commit the post-permutation state so the test can verify the KAT. + # Commit syscall: a0=fd(1), a1=buf_addr, a2=count, a7=64 + li a0, 1 + mv a1, sp + li a2, 200 + li a7, 64 + ecall + # Restore stack and halt addi sp, sp, 200 li a0, 0 diff --git a/executor/tests/asm.rs b/executor/tests/asm.rs index 70485fb1c..86722b82c 100644 --- a/executor/tests/asm.rs +++ b/executor/tests/asm.rs @@ -806,5 +806,44 @@ fn test_sub_underflow() { #[test] fn test_keccak() { - run_program("./program_artifacts/asm/test_keccak.elf"); + // Runs keccak-f[1600] on a zeroed state and commits the 200-byte result. + // Expected output is the FIPS-202 zero-input KAT. + let elf_data = std::fs::read("./program_artifacts/asm/test_keccak.elf").unwrap(); + let program = Elf::load(&elf_data).unwrap(); + let executor = Executor::new(&program, vec![]).expect("Failed to create executor"); + let result = executor.run().expect("Failed to run program"); + + let expected_state: [u64; 25] = [ + 0xF1258F7940E1DDE7, + 0x84D5CCF933C0478A, + 0xD598261EA65AA9EE, + 0xBD1547306F80494D, + 0x8B284E056253D057, + 0xFF97A42D7F8E6FD4, + 0x90FEE5A0A44647C4, + 0x8C5BDA0CD6192E76, + 0xAD30A6F71B19059C, + 0x30935AB7D08FFC64, + 0xEB5AA93F2317D635, + 0xA9A6E6260D712103, + 0x81A57C16DBCF555F, + 0x43B831CD0347C826, + 0x01F22F1A11A5569F, + 0x05E5635A21D9AE61, + 0x64BEFEF28CC970F2, + 0x613670957BC46611, + 0xB87C5A554FD00ECB, + 0x8C3EE88A1CCF32C8, + 0x940C7922AE3A2614, + 0x1841F924A2C509E4, + 0x16F53526E70465C2, + 0x75F644E97F30A13B, + 0xEAF1FF7B5CECA249, + ]; + let mut expected_bytes = Vec::with_capacity(200); + for lane in expected_state { + expected_bytes.extend_from_slice(&lane.to_le_bytes()); + } + assert_eq!(result.return_values.memory_values, expected_bytes); + assert_eq!(result.return_values.register_values.0, 0); } diff --git a/prover/src/constraints/cpu.rs b/prover/src/constraints/cpu.rs index 8aee467fa..546f2f2a4 100644 --- a/prover/src/constraints/cpu.rs +++ b/prover/src/constraints/cpu.rs @@ -83,7 +83,6 @@ pub const BIT_FLAG_COLUMNS: &[usize] = &[ cols::MUL, cols::DIVREM, cols::ECALL, - cols::ECALL_KECCAK, cols::EBREAK, // Sign bits cols::RV1_EXT_BIT, @@ -104,60 +103,6 @@ pub fn create_is_bit_constraints(constraint_idx_start: usize) -> (Vec Self { - Self { constraint_idx } - } - - fn compute(&self, step: &TableView) -> FieldElement - where - F: IsSubFieldOf, - E: IsField, - { - let ecall_keccak = step - .get_main_evaluation_element(0, cols::ECALL_KECCAK) - .clone(); - let ecall = step.get_main_evaluation_element(0, cols::ECALL).clone(); - let one = FieldElement::::one(); - ecall_keccak * (one - ecall) - } -} - -impl TransitionConstraint - for EcallKeccakImpliesEcallConstraint -{ - fn degree(&self) -> usize { - 2 - } - - fn constraint_idx(&self) -> usize { - self.constraint_idx - } - - fn end_exemptions(&self) -> usize { - 0 - } - - fn evaluate(&self, step: &TableView) -> FieldElement - where - F: IsSubFieldOf, - E: IsField, - { - self.compute(step) - } -} - // ========================================================================= // ALU ADD Constraints // ========================================================================= @@ -1088,7 +1033,7 @@ pub fn create_jalr_constraints(constraint_idx_start: usize) -> (Vec (Vec ( other.push(EbreakConstraint::new(next_idx).boxed()); next_idx += 1; - // ECALL_KECCAK implies ECALL - other.push(EcallKeccakImpliesEcallConstraint::new(next_idx).boxed()); - next_idx += 1; - // rv1 zero-forcing (CM48): (1 - read_register1) * rv1[i] = 0 for i ∈ [0, 2] for &value_col in &[cols::RV1_0, cols::RV1_1, cols::RV1_2] { other.push( diff --git a/prover/src/tables/cpu.rs b/prover/src/tables/cpu.rs index 0f2936a27..57f207d4d 100644 --- a/prover/src/tables/cpu.rs +++ b/prover/src/tables/cpu.rs @@ -232,24 +232,16 @@ pub mod cols { /// branch_cond: Whether branch is taken pub const BRANCH_COND: usize = 73; - /// ECALL_KECCAK: 1 when the ECALL is a KeccakPermute syscall, 0 otherwise - pub const ECALL_KECCAK: usize = 74; - - /// Keccak state address (DWordWL: lo32 and hi32). - /// Non-zero only for KeccakPermute ECALLs. - pub const KECCAK_STATE_ADDR_0: usize = 75; - pub const KECCAK_STATE_ADDR_1: usize = 76; - /// prev_pc_timestamp_borrow: Borrow bit for the 32-bit subtraction timestamp_lo - 3 /// in the inline PC prev_ts formula. Fires only when timestamp_lo < 3 and /// pc_double_read = 0 (i.e. after timestamp wraps past 2^32 into values 0..2). - pub const PREV_PC_TIMESTAMP_BORROW: usize = 77; + pub const PREV_PC_TIMESTAMP_BORROW: usize = 74; /// pc_double_read: Whether PC is read as rs1 this cycle (AUIPC/JAL) - pub const PC_DOUBLE_READ: usize = 78; + pub const PC_DOUBLE_READ: usize = 75; /// Total number of columns - pub const NUM_COLUMNS: usize = 79; + pub const NUM_COLUMNS: usize = 76; // ------------------------------------------------------------------------- // Helper ranges for iteration @@ -821,9 +813,6 @@ pub fn generate_cpu_trace( data[base + cols::MUL] = FE::from(d.op_mul as u64); data[base + cols::DIVREM] = FE::from(d.op_divrem as u64); data[base + cols::ECALL] = FE::from(d.op_ecall as u64); - data[base + cols::ECALL_KECCAK] = FE::from(op.ecall_keccak as u64); - data[base + cols::KECCAK_STATE_ADDR_0] = FE::from(op.keccak_state_addr & 0xFFFF_FFFF); - data[base + cols::KECCAK_STATE_ADDR_1] = FE::from(op.keccak_state_addr >> 32); data[base + cols::EBREAK] = FE::from(d.op_ebreak as u64); // Output columns diff --git a/prover/src/tables/keccak.rs b/prover/src/tables/keccak.rs index ef181d67b..f7e786c4f 100644 --- a/prover/src/tables/keccak.rs +++ b/prover/src/tables/keccak.rs @@ -171,16 +171,8 @@ pub fn bus_interactions() -> Vec { let mut interactions = Vec::with_capacity(160); // 1. ECALL receiver (shared bus, per spec keccak:c:output) - // Format: [ts_lo, ts_hi, syscall_lo32, syscall_hi32] (DWordWL convention). - // - // Spec keccak.toml:51 has `["arr", 2^32-1, 2^32-2]` which flattens to - // [hi, lo] — inconsistent with HALT/COMMIT which use `["cast", N, "DWordWL"]` - // → [lo, hi]. The CPU ECALL sender (cpu.rs) is shared across all three - // receivers and uses [lo, hi], so applying the spec's keccak ordering - // literally desbalances the LogUp bus. - // - // Upstream spec needs to change keccak.toml:51 to `["cast", -2, "DWordWL"]`. - // See docs/keccak-spec-deviations.md #7. + // Payload: [ts_lo, ts_hi, syscall_lo32, syscall_hi32] in DWordWL [lo, hi] + // ordering, matching the CPU ECALL sender shared with HALT/COMMIT. interactions.push(BusInteraction::receiver( BusId::Ecall, Multiplicity::Column(cols::MU), diff --git a/prover/src/tables/keccak_rnd.rs b/prover/src/tables/keccak_rnd.rs index 353cbf0c3..f740491b7 100644 --- a/prover/src/tables/keccak_rnd.rs +++ b/prover/src/tables/keccak_rnd.rs @@ -281,12 +281,10 @@ pub fn generate_keccak_rnd_trace( } // Rotate C left by 1 bit using HWSL decomposition. - // HWSL shifts each halfword independently. The carry from halfword k - // propagates to halfword (k+1)%4, which is a 2-byte offset: + // HWSL shifts each halfword (u16) independently and emits the carry + // as a u16 at bytes [2k, 2k+1] of Cxz_right, so the carry propagates + // by 2 bytes (not 1): // rotated_Cxz[z] = Cxz_left[z] + Cxz_right[(z-2) mod 8] - // Spec keccak_round.toml says (z-1) mod 8 — that is a spec bug: - // HWSL's SLLC is a u16 at bytes [2k, 2k+1] of Cxz_right, so the - // carry propagates by 2 bytes, not 1. See docs/keccak-spec-deviations.md. let mut cxz_left_bytes = [[0u8; 8]; 5]; let mut cxz_right_bytes = [[0u8; 8]; 5]; let mut rotated_c = [[0u8; 8]; 5]; @@ -653,8 +651,8 @@ pub fn bus_interactions() -> Vec { // --- Theta: Dxz XOR_BYTE (40) --- // D[x][b] = C[(x-1)%5][b] XOR rotated_C[(x+1)%5][b] // rotated_C[x'][b] = Cxz_left[x'][b] + Cxz_right[x'][(b-2)%8] (virtual). - // Spec has (b-1)%8 — see docs/keccak-spec-deviations.md for why HWSL carry - // needs a 2-byte offset, not 1. + // The 2-byte offset comes from HWSL emitting the carry as a u16 spanning + // bytes [2k, 2k+1] of Cxz_right. for x in 0..5 { for b in 0..8 { interactions.push(BusInteraction::sender( diff --git a/prover/src/tables/types.rs b/prover/src/tables/types.rs index be2c19ec6..70aa6813d 100644 --- a/prover/src/tables/types.rs +++ b/prover/src/tables/types.rs @@ -110,8 +110,6 @@ pub enum BusId { /// COMMIT output bus: verifier computes the receiver contribution externally /// from `VmProof.public_output` using the shared LogUp challenges Commit, - /// ECALL dispatch for KeccakPermute syscall (CPU → KECCAK) - EcallKeccak, /// Keccak core ↔ round chip: (timestamp, round, state[200 bytes]) Keccak, /// Keccak round ↔ RC lookup: (round, rc[8 bytes]) @@ -144,7 +142,6 @@ impl BusId { BusId::Dvrm => "Dvrm", BusId::CommitNextByte => "CommitNextByte", BusId::Commit => "Commit", - BusId::EcallKeccak => "EcallKeccak", BusId::Keccak => "Keccak", BusId::KeccakRc => "KeccakRc", } @@ -178,9 +175,8 @@ impl TryFrom for BusId { 19 => Ok(BusId::Ecall), 20 => Ok(BusId::CommitNextByte), 21 => Ok(BusId::Commit), - 22 => Ok(BusId::EcallKeccak), - 23 => Ok(BusId::Keccak), - 24 => Ok(BusId::KeccakRc), + 22 => Ok(BusId::Keccak), + 23 => Ok(BusId::KeccakRc), other => Err(other), } } diff --git a/prover/src/tests/constraints_tests.rs b/prover/src/tests/constraints_tests.rs index 9dfe5776f..e48f73d67 100644 --- a/prover/src/tests/constraints_tests.rs +++ b/prover/src/tests/constraints_tests.rs @@ -523,8 +523,8 @@ use crate::tables::cpu::cols as cpu_cols; #[test] fn test_cpu_bit_flag_columns_count() { - // Should have 35 bit flag columns (includes read_register1, read_register2, ecall_keccak, inline-pc columns) - assert_eq!(BIT_FLAG_COLUMNS.len(), 35); + // Should have 34 bit flag columns (includes read_register1, read_register2, inline-pc columns) + assert_eq!(BIT_FLAG_COLUMNS.len(), 34); } #[test] @@ -539,8 +539,8 @@ fn test_cpu_bit_flag_columns_valid() { fn test_create_is_bit_constraints() { let (constraints, next_idx) = create_is_bit_constraints(0); - assert_eq!(constraints.len(), 35); - assert_eq!(next_idx, 35); + assert_eq!(constraints.len(), 34); + assert_eq!(next_idx, 34); // Check constraint indices are sequential for (i, c) in constraints.iter().enumerate() { @@ -622,14 +622,14 @@ fn test_next_pc_add_constraint_new_pair() { fn test_create_all_cpu_constraints() { let (is_bit, add, other, total) = create_all_cpu_constraints(); - assert_eq!(is_bit.len(), 35); + assert_eq!(is_bit.len(), 34); // ADD constraints: 2 (ADD+LOAD) + 2 (STORE: arg1+imm) + 2 (SUB+BEQ) + 2 (JALR) = 8 assert_eq!(add.len(), 8); - // Other: branch_cond(1) + ebreak(1) + ecall_keccak_implies(1) + rv1_zero_forcing(3) + rv2_zero_forcing(3) + arg1(2) + arg2(2) + rvd(2) + slt_zero(7) + ext_bit_zero(3) + next_pc(2) = 27 - assert_eq!(other.len(), 27); + // Other: branch_cond(1) + ebreak(1) + rv1_zero_forcing(3) + rv2_zero_forcing(3) + arg1(2) + arg2(2) + rvd(2) + slt_zero(7) + ext_bit_zero(3) + next_pc(2) = 26 + assert_eq!(other.len(), 26); - // Total should be 35 + 8 + 27 = 70 - assert_eq!(total, 70); + // Total should be 34 + 8 + 26 = 68 + assert_eq!(total, 68); assert_eq!(total, NUM_CPU_CONSTRAINTS); } diff --git a/prover/src/tests/cpu_tests.rs b/prover/src/tests/cpu_tests.rs index 205327048..9004d24c0 100644 --- a/prover/src/tests/cpu_tests.rs +++ b/prover/src/tests/cpu_tests.rs @@ -339,7 +339,7 @@ fn test_bus_interactions_count() { #[test] fn test_column_count() { - assert_eq!(cols::NUM_COLUMNS, 79); + assert_eq!(cols::NUM_COLUMNS, 76); } #[test] From f93d01d4bb3cd3b704ae3357ffb1696c7b50000c Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Tue, 28 Apr 2026 14:39:40 -0300 Subject: [PATCH 12/14] Apply spec Cxz_right Byte Bit optimization in KECCAK_RND --- prover/src/tables/keccak_rnd.rs | 189 ++++++++++++++++------------- prover/src/tables/trace_builder.rs | 49 ++++---- 2 files changed, 128 insertions(+), 110 deletions(-) diff --git a/prover/src/tables/keccak_rnd.rs b/prover/src/tables/keccak_rnd.rs index f740491b7..0405a1f83 100644 --- a/prover/src/tables/keccak_rnd.rs +++ b/prover/src/tables/keccak_rnd.rs @@ -3,32 +3,33 @@ //! One row per round (24 rows per keccak call). All bitwise operations are //! delegated to BITWISE lookup tables (XOR_BYTE, AND_BYTE, HWSL, IS_BYTE). //! -//! ## Column layout (~1,775 columns) +//! ## Column layout (1,480 columns) //! -//! | Group | Size | Description | -//! |----------------|------|-----------------------------------------------| -//! | timestamp | 2 | DWordWL | -//! | round | 1 | Round index (0..23) | -//! | start | 200 | Input state bytes [5][5][8] | -//! | Cxz | 160 | Column parity chain [5][4][8] | -//! | Cxz_left | 40 | Left component of rotated C [5][8] | -//! | Cxz_right | 40 | Right component of rotated C [5][8] | -//! | Dxz | 40 | D values [5][8] | -//! | theta | 200 | State after θ [5][5][8] | -//! | rot_left | 200 | Left half of ρ rotation [5][5][8] | -//! | rot_right | 200 | Right half of ρ rotation [5][5][8] | -//! | pi | 200 | State after π∘ρ (materialized) [5][5][8] | -//! | chi_ands | 200 | AND results for χ [5][5][8] | -//! | chi | 200 | State after χ [5][5][8] | -//! | rc | 8 | Round constant bytes | -//! | iota | 8 | χ[0][0] ⊕ rc | -//! | mu | 1 | Multiplicity (1 for real, 0 for padding) | +//! | Group | Size | Description | +//! |----------------|------|---------------------------------------------------| +//! | timestamp | 2 | DWordWL | +//! | round | 1 | Round index (0..23) | +//! | start | 200 | Input state bytes [5][5][8] | +//! | Cxz | 160 | Column parity chain [5][4][8] | +//! | Cxz_left | 40 | Left component of rotated C [5][8] | +//! | Cxz_right | 20 | Carry bits of HWSL(C[x],1) [5][4] | +//! | Dxz | 40 | D values [5][8] | +//! | theta | 200 | State after θ [5][5][8] | +//! | rot_left | 200 | Left half of ρ rotation [5][5][8] | +//! | rot_right | 200 | Right half of ρ rotation [5][5][8] | +//! | chi_ands | 200 | AND results for χ [5][5][8] | +//! | chi | 200 | State after χ [5][5][8] | +//! | rc | 8 | Round constant bytes | +//! | iota | 8 | χ[0][0] ⊕ rc | +//! | mu | 1 | Multiplicity (1 for real, 0 for padding) | //! //! Note: spec [[variables.constant]] `rnc` and `rbc` are inlined as compile-time //! constants derived from `KECCAK_RHO[x][y]`, not materialized as columns. +//! `Cxz_right` is typed `[Bit, 4]` per spec d75944ee — HWSL with shift=1 +//! produces a single-bit carry, range-checked via IS_BIT polynomial constraints. use executor::vm::instruction::execution::{KECCAK_RC, KECCAK_RHO}; -use stark::constraints::transition::TransitionConstraintEvaluator; +use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator}; use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity, Packing}; use stark::trace::TraceTable; @@ -52,41 +53,42 @@ pub mod cols { // Cxz_left[5][8] = 40 bytes — left shift component of rotated C pub const CXZ_LEFT: usize = CXZ + 160; // 363 - // Cxz_right[5][8] = 40 bytes — right shift component of rotated C + // Cxz_right[5][4] = 20 bits — carry bit of HWSL(C[x] halfword[hw], 1). + // For shift=1, HWSL emits a single-bit carry; one column per halfword. pub const CXZ_RIGHT: usize = CXZ_LEFT + 40; // 403 // Dxz[5][8] = 40 bytes - pub const DXZ: usize = CXZ_RIGHT + 40; // 443 + pub const DXZ: usize = CXZ_RIGHT + 20; // 423 // theta[5][5][8] = 200 bytes — state after θ - pub const THETA: usize = DXZ + 40; // 483 + pub const THETA: usize = DXZ + 40; // 463 // rot_left[5][5][8] = 200 bytes - pub const ROT_LEFT: usize = THETA + 200; // 683 + pub const ROT_LEFT: usize = THETA + 200; // 663 // rot_right[5][5][8] = 200 bytes - pub const ROT_RIGHT: usize = ROT_LEFT + 200; // 883 + pub const ROT_RIGHT: usize = ROT_LEFT + 200; // 863 // chi_ands[5][5][8] = 200 bytes // (pi is a spec [[variables.virtual]] — inlined as rot_left + rot_right at // compile-resolved offsets, not materialized as columns.) - pub const CHI_ANDS: usize = ROT_RIGHT + 200; // 1083 + pub const CHI_ANDS: usize = ROT_RIGHT + 200; // 1063 // chi[5][5][8] = 200 bytes — state after χ - pub const CHI: usize = CHI_ANDS + 200; // 1283 + pub const CHI: usize = CHI_ANDS + 200; // 1263 // rc[8] — round constant bytes - pub const RC: usize = CHI + 200; // 1483 + pub const RC: usize = CHI + 200; // 1463 // iota[8] — χ[0][0] ⊕ rc - pub const IOTA: usize = RC + 8; // 1491 + pub const IOTA: usize = RC + 8; // 1471 // mu — multiplicity flag. // rnc and rbc (spec [[variables.constant]]) are inlined as compile-time // constants from KECCAK_RHO, not allocated as columns. - pub const MU: usize = IOTA + 8; // 1499 + pub const MU: usize = IOTA + 8; // 1479 - pub const NUM_COLUMNS: usize = MU + 1; // 1500 + pub const NUM_COLUMNS: usize = MU + 1; // 1480 // ------------------------------------------------------------------------- // Index helpers @@ -110,10 +112,23 @@ pub mod cols { CXZ_LEFT + x * 8 + byte } - /// Index into Cxz_right[x][byte] + /// Index into Cxz_right[x][hw] — single-bit carry for halfword `hw` of x. #[inline] - pub const fn cxz_right(x: usize, byte: usize) -> usize { - CXZ_RIGHT + x * 8 + byte + pub const fn cxz_right_bit(x: usize, hw: usize) -> usize { + CXZ_RIGHT + x * 4 + hw + } + + /// For byte `b` of the rotated_Cxz output, return Some(hw) if a Cxz_right + /// bit contributes (even b), else None (odd b → only Cxz_left contributes). + /// Spec d75944ee/9143370f: rotated_Cxz[z] = Cxz_left[z] + (1 - z%2) * + /// Cxz_right[(z/2 - 1) mod 4]. + #[inline] + pub const fn cxz_right_bit_for_byte(b: usize) -> Option { + if b.is_multiple_of(2) { + Some((b / 2 + 3) % 4) + } else { + None + } } /// Index into Dxz[x][byte] @@ -281,12 +296,12 @@ pub fn generate_keccak_rnd_trace( } // Rotate C left by 1 bit using HWSL decomposition. - // HWSL shifts each halfword (u16) independently and emits the carry - // as a u16 at bytes [2k, 2k+1] of Cxz_right, so the carry propagates - // by 2 bytes (not 1): - // rotated_Cxz[z] = Cxz_left[z] + Cxz_right[(z-2) mod 8] + // HWSL shifts each halfword (u16) independently. For shift=1, the + // carry is a single bit (top bit of the halfword); we store it in + // one column per halfword (Cxz_right[x][hw], spec d75944ee). + // rotated_Cxz[z] = Cxz_left[z] + (1 - z%2) * Cxz_right[(z/2 - 1) mod 4] let mut cxz_left_bytes = [[0u8; 8]; 5]; - let mut cxz_right_bytes = [[0u8; 8]; 5]; + let mut cxz_right_bits = [[0u8; 4]; 5]; let mut rotated_c = [[0u8; 8]; 5]; for x in 0..5 { for hw in 0..4 { @@ -296,21 +311,22 @@ pub fn generate_keccak_rnd_trace( let (shifted, carry) = hwsl(halfword, 1); cxz_left_bytes[x][hw * 2] = (shifted & 0xFF) as u8; cxz_left_bytes[x][hw * 2 + 1] = (shifted >> 8) as u8; - cxz_right_bytes[x][hw * 2] = (carry & 0xFF) as u8; - cxz_right_bytes[x][hw * 2 + 1] = (carry >> 8) as u8; + // For shift=1, carry ∈ {0, 1}. + cxz_right_bits[x][hw] = carry as u8; data[base + cols::cxz_left(x, hw * 2)] = FE::from(cxz_left_bytes[x][hw * 2] as u64); data[base + cols::cxz_left(x, hw * 2 + 1)] = FE::from(cxz_left_bytes[x][hw * 2 + 1] as u64); - data[base + cols::cxz_right(x, hw * 2)] = - FE::from(cxz_right_bytes[x][hw * 2] as u64); - data[base + cols::cxz_right(x, hw * 2 + 1)] = - FE::from(cxz_right_bytes[x][hw * 2 + 1] as u64); + data[base + cols::cxz_right_bit(x, hw)] = + FE::from(cxz_right_bits[x][hw] as u64); } - // Reconstruct: left[z] + right[(z-2) mod 8] + // Reconstruct: left[b] + (1 - b%2) * right[(b/2 + 3) mod 4] for b in 0..8 { - rotated_c[x][b] = - cxz_left_bytes[x][b].wrapping_add(cxz_right_bytes[x][(b + 6) % 8]); + let right_contribution = match cols::cxz_right_bit_for_byte(b) { + Some(hw) => cxz_right_bits[x][hw], + None => 0, + }; + rotated_c[x][b] = cxz_left_bytes[x][b].wrapping_add(right_contribution); } } @@ -427,7 +443,7 @@ pub fn generate_keccak_rnd_trace( #[allow(clippy::needless_range_loop)] pub fn bus_interactions() -> Vec { - let mut interactions = Vec::with_capacity(1420); + let mut interactions = Vec::with_capacity(1380); // --- IO group (3) --- @@ -580,6 +596,7 @@ pub fn bus_interactions() -> Vec { // --- Theta: HWSL for rotated C (20) --- // HWSL(C[x] halfword[hw], 1) → (Cxz_left, Cxz_right) + // Cxz_right is a single carry bit zero-extended to a halfword (spec d75944ee). for x in 0..5 { for hw in 0..4 { interactions.push(BusInteraction::sender( @@ -610,23 +627,18 @@ pub fn bus_interactions() -> Vec { column: cols::cxz_left(x, hw * 2 + 1), }, ]), - // Output: carry - BusValue::linear(vec![ - LinearTerm::Column { - coefficient: 1, - column: cols::cxz_right(x, hw * 2), - }, - LinearTerm::Column { - coefficient: 256, - column: cols::cxz_right(x, hw * 2 + 1), - }, - ]), + // Output: carry (single bit cast to Half — high byte = 0). + BusValue::Packed { + start_column: cols::cxz_right_bit(x, hw), + packing: Packing::Direct, + }, ], )); } } - // --- Theta: IS_BYTE range checks on Cxz_left + Cxz_right (80) --- + // --- Theta: IS_BYTE range checks on Cxz_left (40) --- + // Cxz_right uses IS_BIT polynomial constraints (see create_constraints). for x in 0..5 { for b in 0..8 { interactions.push(BusInteraction::sender( @@ -637,24 +649,25 @@ pub fn bus_interactions() -> Vec { packing: Packing::Direct, }], )); - interactions.push(BusInteraction::sender( - BusId::IsByte, - Multiplicity::Column(cols::MU), - vec![BusValue::Packed { - start_column: cols::cxz_right(x, b), - packing: Packing::Direct, - }], - )); } } // --- Theta: Dxz XOR_BYTE (40) --- // D[x][b] = C[(x-1)%5][b] XOR rotated_C[(x+1)%5][b] - // rotated_C[x'][b] = Cxz_left[x'][b] + Cxz_right[x'][(b-2)%8] (virtual). - // The 2-byte offset comes from HWSL emitting the carry as a u16 spanning - // bytes [2k, 2k+1] of Cxz_right. + // rotated_C[x'][b] = Cxz_left[x'][b] + (1 - b%2) * Cxz_right[x'][(b/2 - 1)%4] + // (spec d75944ee/9143370f). For odd b only Cxz_left contributes. for x in 0..5 { for b in 0..8 { + let mut rotated_c_terms = vec![LinearTerm::Column { + coefficient: 1, + column: cols::cxz_left((x + 1) % 5, b), + }]; + if let Some(hw) = cols::cxz_right_bit_for_byte(b) { + rotated_c_terms.push(LinearTerm::Column { + coefficient: 1, + column: cols::cxz_right_bit((x + 1) % 5, hw), + }); + } interactions.push(BusInteraction::sender( BusId::XorByte, Multiplicity::Column(cols::MU), @@ -663,16 +676,7 @@ pub fn bus_interactions() -> Vec { start_column: cols::cxz((x + 4) % 5, 3, b), packing: Packing::Direct, }, - BusValue::linear(vec![ - LinearTerm::Column { - coefficient: 1, - column: cols::cxz_left((x + 1) % 5, b), - }, - LinearTerm::Column { - coefficient: 1, - column: cols::cxz_right((x + 1) % 5, (b + 6) % 8), - }, - ]), + BusValue::linear(rotated_c_terms), BusValue::Packed { start_column: cols::dxz(x, b), packing: Packing::Direct, @@ -889,7 +893,11 @@ pub fn bus_interactions() -> Vec { // Constraints // ========================================================================= -/// KECCAK_RND has no main-trace polynomial constraints. +/// KECCAK_RND polynomial constraints: 20 IS_BIT(μ; Cxz_right) constraints. +/// +/// Per spec d75944ee, `Cxz_right` is typed `[Bit, 4], 5` and range-checked via +/// IS_BIT polynomial constraints (kind="template", cond="μ"), not lookups: +/// μ * Cxz_right[x][hw] * (1 - Cxz_right[x][hw]) = 0 /// /// - pi is a spec [[variables.virtual]] inlined in chi bus interactions. /// - rnc/rbc are spec [[variables.constant]] inlined as compile-time constants. @@ -902,7 +910,20 @@ pub fn create_constraints( Vec>>, usize, ) { - (Vec::new(), constraint_idx_start) + use crate::constraints::templates::IsBitConstraint; + + let mut constraints: Vec< + Box>, + > = Vec::with_capacity(20); + let mut idx = constraint_idx_start; + for x in 0..5 { + for hw in 0..4 { + constraints + .push(IsBitConstraint::new(cols::MU, cols::cxz_right_bit(x, hw), idx).boxed()); + idx += 1; + } + } + (constraints, idx) } #[cfg(test)] diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 224f6225e..d0641d7df 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -1694,14 +1694,15 @@ fn collect_bitwise_from_keccak(keccak_ops: &[KeccakOperation]) -> Vec> 15 == 1 { 1u16 } else { 0 }; ops.push(BitwiseOperation::new( BitwiseOperationType::Hwsl, (halfword & 0xFF) as u8, @@ -1717,30 +1718,24 @@ fn collect_bitwise_from_keccak(keccak_ops: &[KeccakOperation]) -> Vec> 8) & 0xFF) as u8, )); - // IS_BYTE for cxz_right bytes - ops.push(BitwiseOperation::single_byte( - BitwiseOperationType::IsByte, - (carry & 0xFF) as u8, - )); - ops.push(BitwiseOperation::single_byte( - BitwiseOperationType::IsByte, - ((carry >> 8) & 0xFF) as u8, - )); } - // Reconstruct rotated_c + // Reconstruct rotated_c using the bit-typed Cxz_right. let mut left_bytes = [0u8; 8]; - let mut right_bytes = [0u8; 8]; + let mut right_bits = [0u8; 4]; for hw in 0..4 { let halfword = (c[hw * 2] as u16) | ((c[hw * 2 + 1] as u16) << 8); let shifted = halfword << 1; - let carry = if halfword >> 15 == 1 { 1u16 } else { 0 }; left_bytes[hw * 2] = (shifted & 0xFF) as u8; left_bytes[hw * 2 + 1] = ((shifted >> 8) & 0xFF) as u8; - right_bytes[hw * 2] = (carry & 0xFF) as u8; - right_bytes[hw * 2 + 1] = ((carry >> 8) & 0xFF) as u8; + right_bits[hw] = (halfword >> 15) as u8; } - for b in 0..8 { - rotated_c[x][b] = left_bytes[b].wrapping_add(right_bytes[(b + 6) % 8]); + for b in 0usize..8 { + let right_contribution = if b.is_multiple_of(2) { + right_bits[(b / 2 + 3) % 4] + } else { + 0 + }; + rotated_c[x][b] = left_bytes[b].wrapping_add(right_contribution); } } @@ -2892,10 +2887,11 @@ mod keccak_tests { assert_eq!(xor, 24 * 608, "XorByte count"); assert_eq!(and, 24 * 200, "AndByte count"); - assert_eq!(is_byte, 24 * 480, "IsByte count"); + // Cxz_right Byte→Bit (spec d75944ee): drops 40 IS_BYTE per round. + assert_eq!(is_byte, 24 * 440, "IsByte count"); assert_eq!(hwsl, 24 * 120, "Hwsl count"); assert_eq!(is_half, 100, "IsHalf count"); - assert_eq!(ops.len(), 100 + 24 * 1408, "Total bitwise ops"); + assert_eq!(ops.len(), 100 + 24 * 1368, "Total bitwise ops"); } #[test] @@ -3000,8 +2996,9 @@ mod keccak_tests { ); assert_eq!( keccak_rnd::bus_interactions().len(), - 1411, - "KECCAK_RND: 3 IO + 500 theta + 500 rho + 400 chi + 8 iota" + 1371, + "KECCAK_RND: 3 IO + 460 theta + 500 rho + 400 chi + 8 iota \ + (Cxz_right Byte→Bit drops 40 IS_BYTE per spec d75944ee)" ); assert_eq!( keccak_rc::bus_interactions().len(), @@ -3015,8 +3012,8 @@ mod keccak_tests { assert_eq!(core_cols::NUM_COLUMNS, 511, "KECCAK core columns"); assert_eq!( rnd_cols::NUM_COLUMNS, - 1500, - "KECCAK_RND columns (rnc/rbc inlined as constants; pi virtual)" + 1480, + "KECCAK_RND columns (rnc/rbc inlined; pi virtual; Cxz_right Bit-typed)" ); assert_eq!(keccak_rc::cols::NUM_COLUMNS, 10, "KECCAK_RC columns"); } @@ -3029,8 +3026,8 @@ mod keccak_tests { let (rnd_constraints, _) = keccak_rnd::create_constraints(0); assert_eq!( rnd_constraints.len(), - 0, - "KECCAK_RND: no polynomial constraints (pi virtual, rnc/rbc inlined)" + 20, + "KECCAK_RND: 20 IS_BIT(μ; Cxz_right_bit) per spec d75944ee" ); } } From 802faf42f7b5c00bec60872f6ad477a607cdf7ac Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Tue, 12 May 2026 10:21:45 -0300 Subject: [PATCH 13/14] Reject unaligned/overflowing Keccak state_addr in executor and AIR Mirror alignment + no-overflow in the AIR, dedup KECCAK_RC, fix chi indexer, and add a multi-call tiny-keccak cross-check test. --- executor/programs/asm/test_keccak_multi.s | 48 +++++++++++ executor/src/vm/instruction/execution.rs | 61 +++++++++++++- prover/Cargo.toml | 1 + prover/src/tables/keccak.rs | 97 +++++++++++++++++++++-- prover/src/tables/keccak_rc.rs | 32 +------- prover/src/tables/keccak_rnd.rs | 6 +- prover/src/tables/trace_builder.rs | 47 ++++++++--- prover/src/tests/prove_elfs_tests.rs | 40 ++++++++++ 8 files changed, 281 insertions(+), 51 deletions(-) create mode 100644 executor/programs/asm/test_keccak_multi.s diff --git a/executor/programs/asm/test_keccak_multi.s b/executor/programs/asm/test_keccak_multi.s new file mode 100644 index 000000000..fcd192de7 --- /dev/null +++ b/executor/programs/asm/test_keccak_multi.s @@ -0,0 +1,48 @@ + .attribute 5, "rv64i2p1_m2p0_zmmul1p0" +.Lfunc_end0: + .globl main +main: + # Allocate 200 bytes on the stack for the Keccak state (25 × u64). + addi sp, sp, -200 + + # Initialize a non-zero, deterministic state: lane[i] = i + 1. + # Used by the host test as the initial state for tiny-keccak::keccakf + # cross-checking. + mv t0, sp + li t1, 1 + li t2, 26 +.Linit_loop: + sd t1, 0(t0) + addi t0, t0, 8 + addi t1, t1, 1 + bne t1, t2, .Linit_loop + + # First keccak-f[1600] call. + mv a0, sp + li a7, -2 + ecall + + # Second keccak-f[1600] call on the result. + mv a0, sp + li a7, -2 + ecall + + # Third keccak-f[1600] call on the result. + mv a0, sp + li a7, -2 + ecall + + # Commit the final 200-byte state. + li a0, 1 + mv a1, sp + li a2, 200 + li a7, 64 + ecall + + # Restore stack and halt. + addi sp, sp, 200 + li a0, 0 + li a7, 93 + ecall +.Lfunc_end1: + .size main, .Lfunc_end1-main diff --git a/executor/src/vm/instruction/execution.rs b/executor/src/vm/instruction/execution.rs index 3796ca0a1..04502645b 100644 --- a/executor/src/vm/instruction/execution.rs +++ b/executor/src/vm/instruction/execution.rs @@ -8,17 +8,19 @@ use crate::vm::{ const REGULAR_PC_UPDATE: u64 = 4; pub enum SyscallNumbers { + // Placeholder discriminant. The actual syscall value is KECCAK_SYSCALL_NUMBER. + KeccakPermute = 0, Print = 1, Panic = 2, Commit = 64, Halt = 93, - KeccakPermute = 94, // Actual syscall number is KECCAK_SYSCALL_NUMBER (u64::MAX - 1) } /// Syscall number for KeccakPermute (u64::MAX - 1 = 0xFFFF_FFFF_FFFF_FFFE). /// /// Cannot be an enum discriminant because it exceeds isize::MAX. pub const KECCAK_SYSCALL_NUMBER: u64 = u64::MAX - 1; +const KECCAK_STATE_BYTES: u64 = 25 * 8; impl TryFrom for SyscallNumbers { type Error = (); @@ -334,13 +336,26 @@ impl Instruction { SyscallNumbers::KeccakPermute => { // keccak-f[1600] permutation on 200 bytes (25 × u64) at address in x10 let state_addr = registers.read(10)?; + if !state_addr.is_multiple_of(8) { + return Err(ExecutionError::UnalignedKeccakStateAddress(state_addr)); + } + state_addr + .checked_add(KECCAK_STATE_BYTES - 1) + .ok_or(ExecutionError::KeccakStateAddressOverflow(state_addr))?; + let mut state = [0u64; 25]; for (i, lane) in state.iter_mut().enumerate() { - *lane = memory.load_doubleword(state_addr + (i as u64) * 8)?; + let lane_addr = state_addr + .checked_add((i as u64) * 8) + .ok_or(ExecutionError::KeccakStateAddressOverflow(state_addr))?; + *lane = memory.load_doubleword(lane_addr)?; } keccak_f1600(&mut state); for (i, &lane) in state.iter().enumerate() { - memory.store_doubleword(state_addr + (i as u64) * 8, lane)?; + let lane_addr = state_addr + .checked_add((i as u64) * 8) + .ok_or(ExecutionError::KeccakStateAddressOverflow(state_addr))?; + memory.store_doubleword(lane_addr, lane)?; } src2_val = state_addr; } @@ -516,6 +531,10 @@ pub enum ExecutionError { InvalidWSuffixOperation(ArithOp), #[error("Invalid commit fd: expected 1 (stdout), got {0}")] InvalidCommitFd(u64), + #[error("Unaligned Keccak state address: {0:#018x}")] + UnalignedKeccakStateAddress(u64), + #[error("Keccak state address range overflows: {0:#018x}")] + KeccakStateAddressOverflow(u64), } // ============================================================================= @@ -649,4 +668,40 @@ mod tests { assert_ne!(state, original); assert!(state.iter().any(|&x| x != 0)); } + + #[test] + fn test_keccak_syscall_rejects_unaligned_state_addr() { + let mut pc = 0; + let mut registers = Registers::default(); + let mut memory = Memory::default(); + + registers.write(17, KECCAK_SYSCALL_NUMBER).unwrap(); + registers.write(10, 0x1001).unwrap(); + + let err = Instruction::EcallEbreak + .run(&mut pc, &mut registers, &mut memory) + .unwrap_err(); + assert!(matches!( + err, + ExecutionError::UnalignedKeccakStateAddress(0x1001) + )); + } + + #[test] + fn test_keccak_syscall_rejects_overflowing_state_range() { + let mut pc = 0; + let mut registers = Registers::default(); + let mut memory = Memory::default(); + + registers.write(17, KECCAK_SYSCALL_NUMBER).unwrap(); + registers.write(10, u64::MAX - 191).unwrap(); + + let err = Instruction::EcallEbreak + .run(&mut pc, &mut registers, &mut memory) + .unwrap_err(); + assert!(matches!( + err, + ExecutionError::KeccakStateAddressOverflow(addr) if addr == u64::MAX - 191 + )); + } } diff --git a/prover/Cargo.toml b/prover/Cargo.toml index dac711002..60ed39c0c 100644 --- a/prover/Cargo.toml +++ b/prover/Cargo.toml @@ -20,6 +20,7 @@ rayon = { version = "1.8.0", optional = true } [dev-dependencies] env_logger = "*" criterion = { version = "0.5", default-features = false } +tiny-keccak = { version = "2.0", features = ["keccak"] } [[bench]] name = "vm_prover_benchmark" diff --git a/prover/src/tables/keccak.rs b/prover/src/tables/keccak.rs index f7e786c4f..4756b0e78 100644 --- a/prover/src/tables/keccak.rs +++ b/prover/src/tables/keccak.rs @@ -16,12 +16,15 @@ //! | mu | 1 | Multiplicity flag | use executor::vm::instruction::execution::KECCAK_SYSCALL_NUMBER; +use math::field::element::FieldElement; +use math::field::traits::{IsField, IsSubFieldOf}; use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator}; use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity, Packing}; +use stark::table::TableView; use stark::trace::TraceTable; use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField}; -use crate::constraints::templates::{AddConstraint, AddOperand}; +use crate::constraints::templates::{AddConstraint, AddOperand, INV_SHIFT_32}; // ========================================================================= // Column indices @@ -136,7 +139,10 @@ pub fn generate_keccak_trace( // State pointers: state_ptr[lane] = addr + 8 * lane_idx for lane_idx in 0..25 { - let ptr = op.state_addr.wrapping_add(lane_idx as u64 * 8); + let ptr = op + .state_addr + .checked_add(lane_idx as u64 * 8) + .expect("keccak state address range must be validated by the executor"); data[base + cols::state_ptr(lane_idx, 0)] = FE::from(ptr & 0xFFFF); data[base + cols::state_ptr(lane_idx, 1)] = FE::from((ptr >> 16) & 0xFFFF); data[base + cols::state_ptr(lane_idx, 2)] = FE::from((ptr >> 32) & 0xFFFF); @@ -346,7 +352,21 @@ pub fn bus_interactions() -> Vec { } } - // 5. MEMW interactions: 25 combined read+write per lane (per spec) + // 5. Alignment: addr[0] & 7 = 0, which enforces addr % 8 == 0. + interactions.push(BusInteraction::sender( + BusId::AndByte, + Multiplicity::Column(cols::MU), + vec![ + BusValue::Packed { + start_column: cols::addr(0), + packing: Packing::Direct, + }, + BusValue::constant(7), + BusValue::constant(0), + ], + )); + + // 6. MEMW interactions: 25 combined read+write per lane (per spec) // Format: [old[8], is_register, addr_lo32, addr_hi32, value[8], ts[2], w2, w4, w8] = 24 // old = input_state (read), value = output_state (write) for lane_idx in 0..25 { @@ -423,12 +443,76 @@ pub fn bus_interactions() -> Vec { // Constraints // ========================================================================= +struct KeccakAddressNoOverflowConstraint { + constraint_idx: usize, +} + +impl KeccakAddressNoOverflowConstraint { + fn new(constraint_idx: usize) -> Self { + Self { constraint_idx } + } + + fn compute(&self, step: &TableView) -> FieldElement + where + F: IsSubFieldOf, + E: IsField, + { + let addr_lo = step.get_main_evaluation_element(0, cols::addr(0)).clone() + + step.get_main_evaluation_element(0, cols::addr(1)) * FieldElement::::from(256) + + step.get_main_evaluation_element(0, cols::addr(2)) * FieldElement::::from(65536) + + step.get_main_evaluation_element(0, cols::addr(3)) + * FieldElement::::from(16777216); + let addr_hi = step.get_main_evaluation_element(0, cols::addr(4)).clone() + + step.get_main_evaluation_element(0, cols::addr(5)) * FieldElement::::from(256) + + step.get_main_evaluation_element(0, cols::addr(6)) * FieldElement::::from(65536) + + step.get_main_evaluation_element(0, cols::addr(7)) + * FieldElement::::from(16777216); + + let ptr_lo = step + .get_main_evaluation_element(0, cols::state_ptr(24, 0)) + .clone() + + step.get_main_evaluation_element(0, cols::state_ptr(24, 1)) + * FieldElement::::from(65536); + let ptr_hi = step + .get_main_evaluation_element(0, cols::state_ptr(24, 2)) + .clone() + + step.get_main_evaluation_element(0, cols::state_ptr(24, 3)) + * FieldElement::::from(65536); + + let inv_2_32 = FieldElement::::from(INV_SHIFT_32); + let carry_0 = (addr_lo + FieldElement::::from(192) - ptr_lo) * inv_2_32.clone(); + let carry_1 = (addr_hi + carry_0 - ptr_hi) * inv_2_32; + step.get_main_evaluation_element(0, cols::MU).clone() * carry_1 + } +} + +impl TransitionConstraint + for KeccakAddressNoOverflowConstraint +{ + fn degree(&self) -> usize { + 2 + } + + fn constraint_idx(&self) -> usize { + self.constraint_idx + } + + fn evaluate(&self, step: &TableView) -> FieldElement + where + F: IsSubFieldOf, + E: IsField, + { + self.compute(step) + } +} + /// Create constraints for the KECCAK core chip. /// /// Per spec (keccak:c:state_ptr): ADD template for each lane: /// state_ptr[lane] = addr + 8 * lane_idx /// -/// 25 lane pointers × 2 constraints per ADD = 50 constraints total. +/// 25 lane pointers × 2 constraints per ADD + 1 top-lane no-overflow +/// constraint = 51 constraints total. /// Conditional on mu (only real rows). pub fn create_constraints( constraint_idx_start: usize, @@ -438,7 +522,7 @@ pub fn create_constraints( ) { let mut constraints: Vec< Box>, - > = Vec::with_capacity(50); + > = Vec::with_capacity(51); let mut idx = constraint_idx_start; // state_ptr[lane] = addr + 8*lane_idx @@ -458,5 +542,8 @@ pub fn create_constraints( idx += 2; } + constraints.push(KeccakAddressNoOverflowConstraint::new(idx).boxed()); + idx += 1; + (constraints, idx) } diff --git a/prover/src/tables/keccak_rc.rs b/prover/src/tables/keccak_rc.rs index 0ba29297b..c2e14d643 100644 --- a/prover/src/tables/keccak_rc.rs +++ b/prover/src/tables/keccak_rc.rs @@ -18,6 +18,8 @@ use stark::proof::options::ProofOptions; use stark::prover::evaluate_polynomial_on_lde_domain; use stark::trace::{TraceTable, columns2rows}; +use executor::vm::instruction::execution::KECCAK_RC; + use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField}; // ========================================================================= @@ -45,34 +47,6 @@ pub const NUM_REAL_ROWS: usize = 24; /// Number of rows in the trace (padded to next power of 2). pub const NUM_ROWS: usize = 32; -/// Keccak-f[1600] round constants. -const RC: [u64; 24] = [ - 0x0000000000000001, - 0x0000000000008082, - 0x800000000000808A, - 0x8000000080008000, - 0x000000000000808B, - 0x0000000080000001, - 0x8000000080008081, - 0x8000000000008009, - 0x000000000000008A, - 0x0000000000000088, - 0x0000000080008009, - 0x000000008000000A, - 0x000000008000808B, - 0x800000000000008B, - 0x8000000000008089, - 0x8000000000008003, - 0x8000000000008002, - 0x8000000000000080, - 0x000000000000800A, - 0x800000008000000A, - 0x8000000080008081, - 0x8000000000008080, - 0x0000000080000001, - 0x8000000080008008, -]; - /// Whether this table is preprocessed. pub const fn is_preprocessed() -> bool { true @@ -80,7 +54,7 @@ pub const fn is_preprocessed() -> bool { /// Generate one precomputed row: [round, rc_byte0, ..., rc_byte7]. pub const fn generate_row(round: usize) -> [u64; NUM_PRECOMPUTED_COLS] { - let rc_val = if round < 24 { RC[round] } else { 0 }; + let rc_val = if round < 24 { KECCAK_RC[round] } else { 0 }; [ round as u64, rc_val & 0xFF, diff --git a/prover/src/tables/keccak_rnd.rs b/prover/src/tables/keccak_rnd.rs index 0405a1f83..277281583 100644 --- a/prover/src/tables/keccak_rnd.rs +++ b/prover/src/tables/keccak_rnd.rs @@ -183,7 +183,7 @@ pub mod cols { /// Index into chi[x][y][byte] #[inline] pub const fn chi(x: usize, y: usize, byte: usize) -> usize { - CHI + (x * 5 + y) * 8 + byte + CHI + (x + 5 * y) * 8 + byte } /// Index into rc[byte] @@ -438,12 +438,12 @@ pub fn generate_keccak_rnd_trace( } // ========================================================================= -// Bus interactions (1,411 total) +// Bus interactions (1,371 total) // ========================================================================= #[allow(clippy::needless_range_loop)] pub fn bus_interactions() -> Vec { - let mut interactions = Vec::with_capacity(1380); + let mut interactions = Vec::with_capacity(1371); // --- IO group (3) --- diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index d0641d7df..249c0cbc8 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -407,10 +407,15 @@ fn collect_ops_from_cpu( let state_addr = op.keccak_state_addr; let mut input = [0u64; 25]; for (i, lane) in input.iter_mut().enumerate() { - let addr = state_addr.wrapping_add(i as u64 * 8); + let addr = state_addr + .checked_add(i as u64 * 8) + .expect("keccak state address range must be validated by the executor"); let mut val = 0u64; for b in 0..8 { - let (byte_val, _ts) = memory_state.read_byte(addr + b as u64); + let byte_addr = addr + .checked_add(b as u64) + .expect("keccak state address range must be validated by the executor"); + let (byte_val, _ts) = memory_state.read_byte(byte_addr); val |= (byte_val as u64) << (b * 8); } *lane = val; @@ -845,13 +850,18 @@ fn collect_keccak_memw_ops( // input = [0, state_ptr, output_state, timestamp, 0, 0, 1], output = input_state // The MEMW table sees: old=input_state, value=output_state, is_read=true. for (lane_idx, (&in_lane, &out_lane)) in input.iter().zip(output.iter()).enumerate() { - let lane_addr = state_addr.wrapping_add(lane_idx as u64 * 8); + let lane_addr = state_addr + .checked_add(lane_idx as u64 * 8) + .expect("keccak state address range must be validated by the executor"); let mut old_bytes = [0u64; 8]; let mut old_timestamps = [0u64; 8]; for b in 0..8 { old_bytes[b] = (in_lane >> (b * 8)) & 0xFF; - let (_old_val, old_ts) = memory_state.read_byte(lane_addr + b as u64); + let byte_addr = lane_addr + .checked_add(b as u64) + .expect("keccak state address range must be validated by the executor"); + let (_old_val, old_ts) = memory_state.read_byte(byte_addr); old_timestamps[b] = old_ts; } @@ -866,7 +876,10 @@ fn collect_keccak_memw_ops( // Update memory state for (b, &val) in value_bytes.iter().enumerate() { - memory_state.write_byte(lane_addr + b as u64, val as u8, ts); + let byte_addr = lane_addr + .checked_add(b as u64) + .expect("keccak state address range must be validated by the executor"); + memory_state.write_byte(byte_addr, val as u8, ts); } } @@ -1650,9 +1663,17 @@ fn collect_bitwise_from_keccak(keccak_ops: &[KeccakOperation]) -> Vec> shift) & 0xFFFF) as u16; ops.push(BitwiseOperation::halfword( @@ -2886,12 +2907,12 @@ mod keccak_tests { .count(); assert_eq!(xor, 24 * 608, "XorByte count"); - assert_eq!(and, 24 * 200, "AndByte count"); + assert_eq!(and, 24 * 200 + 1, "AndByte count"); // Cxz_right Byte→Bit (spec d75944ee): drops 40 IS_BYTE per round. assert_eq!(is_byte, 24 * 440, "IsByte count"); assert_eq!(hwsl, 24 * 120, "Hwsl count"); assert_eq!(is_half, 100, "IsHalf count"); - assert_eq!(ops.len(), 100 + 24 * 1368, "Total bitwise ops"); + assert_eq!(ops.len(), 101 + 24 * 1368, "Total bitwise ops"); } #[test] @@ -2991,8 +3012,8 @@ mod keccak_tests { fn test_keccak_bus_interaction_counts() { assert_eq!( keccak::bus_interactions().len(), - 129, - "KECCAK core: 1 ECALL + 1 MEMW read_addr + 25 MEMW lanes + 100 IS_HALF + 1 Keccak send + 1 Keccak recv" + 130, + "KECCAK core: 1 ECALL + 1 MEMW read_addr + 25 MEMW lanes + 100 IS_HALF + 1 AND_BYTE alignment + 1 Keccak send + 1 Keccak recv" ); assert_eq!( keccak_rnd::bus_interactions().len(), @@ -3021,7 +3042,11 @@ mod keccak_tests { #[test] fn test_keccak_constraint_counts() { let (core_constraints, _) = keccak::create_constraints(0); - assert_eq!(core_constraints.len(), 50, "KECCAK core: 25 ADD pairs"); + assert_eq!( + core_constraints.len(), + 51, + "KECCAK core: 25 ADD pairs + no-overflow" + ); let (rnd_constraints, _) = keccak_rnd::create_constraints(0); assert_eq!( diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index aeae45f7d..56f6d3f18 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -731,6 +731,46 @@ fn test_prove_elfs_keccak() { ); } +#[test] +fn test_prove_elfs_keccak_multi_call() { + let _ = env_logger::builder().is_test(true).try_init(); + + let elf_bytes = crate::test_utils::asm_elf_bytes("test_keccak_multi"); + let elf = Elf::load(&elf_bytes).expect("Failed to load ELF"); + let executor = + executor::vm::execution::Executor::new(&elf, vec![]).expect("Failed to create executor"); + let result = executor.run().expect("Failed to run program"); + + // The guest initializes lane[i] = i + 1 and applies keccak-f[1600] three times. + // Cross-check the committed output against tiny-keccak's independent + // implementation of the permutation. + let mut expected_state: [u64; 25] = core::array::from_fn(|i| (i + 1) as u64); + for _ in 0..3 { + tiny_keccak::keccakf(&mut expected_state); + } + let mut expected_bytes = Vec::with_capacity(200); + for lane in expected_state { + expected_bytes.extend_from_slice(&lane.to_le_bytes()); + } + + assert_eq!( + result.return_values.memory_values, expected_bytes, + "committed state must match tiny-keccak after 3 keccak-f[1600] calls" + ); + + let mut traces = + Traces::from_elf_and_logs(&elf, &result.logs, &Default::default(), &[]).unwrap(); + assert_eq!( + traces.public_output_bytes, + result.return_values.memory_values + ); + + assert!( + prove_and_verify_vm_minimal(&elf, &mut traces), + "keccak multi-call prove/verify failed" + ); +} + #[test] fn test_prove_elfs_test_commit_4() { let elf_bytes = crate::test_utils::asm_elf_bytes("test_commit_4"); From 7de95f9402f28f17a1b809ff7a3cac325aac3034 Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Tue, 12 May 2026 13:18:01 -0300 Subject: [PATCH 14/14] Range-check Keccak addr bytes with IS_BYTE to close alignment bypass in the AIR --- prover/src/tables/keccak.rs | 20 +++++++++++++- prover/src/tables/trace_builder.rs | 20 +++++++++++--- prover/src/tests/prove_elfs_tests.rs | 39 ++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 5 deletions(-) diff --git a/prover/src/tables/keccak.rs b/prover/src/tables/keccak.rs index 4756b0e78..87e8dc122 100644 --- a/prover/src/tables/keccak.rs +++ b/prover/src/tables/keccak.rs @@ -366,7 +366,25 @@ pub fn bus_interactions() -> Vec { ], )); - // 6. MEMW interactions: 25 combined read+write per lane (per spec) + // 6. Range-check every addr byte. The addr columns are reconstructed as a + // linear combination (addr_lo = b0 + 256*b1 + 65536*b2 + 2^24*b3, etc.) + // for the MEMW lookup and the no-overflow / alignment constraints. Without + // an explicit byte range check on each cell, an attacker can keep the + // field-element value of that linear combination correct while encoding + // arbitrary non-byte values in the individual cells (e.g. addr[0]=0, + // addr[1]=V_lo * 256^{-1} mod p), bypassing the alignment check. + for b in 0..8 { + interactions.push(BusInteraction::sender( + BusId::IsByte, + Multiplicity::Column(cols::MU), + vec![BusValue::Packed { + start_column: cols::addr(b), + packing: Packing::Direct, + }], + )); + } + + // 7. MEMW interactions: 25 combined read+write per lane (per spec) // Format: [old[8], is_register, addr_lo32, addr_hi32, value[8], ts[2], w2, w4, w8] = 24 // old = input_state (read), value = output_state (write) for lane_idx in 0..25 { diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 249c0cbc8..e7a662502 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -1669,6 +1669,17 @@ fn collect_bitwise_from_keccak(keccak_ops: &[KeccakOperation]) -> Vec> (b * 8)) & 0xFF) as u8; + ops.push(BitwiseOperation::single_byte( + BitwiseOperationType::IsByte, + byte, + )); + } + // IS_HALF for state_ptr halfwords (100 per call) for lane_idx in 0..25 { let ptr = state_addr @@ -2909,10 +2920,11 @@ mod keccak_tests { assert_eq!(xor, 24 * 608, "XorByte count"); assert_eq!(and, 24 * 200 + 1, "AndByte count"); // Cxz_right Byte→Bit (spec d75944ee): drops 40 IS_BYTE per round. - assert_eq!(is_byte, 24 * 440, "IsByte count"); + // +8 per call to range-check the addr bytes used in alignment / no-overflow. + assert_eq!(is_byte, 24 * 440 + 8, "IsByte count"); assert_eq!(hwsl, 24 * 120, "Hwsl count"); assert_eq!(is_half, 100, "IsHalf count"); - assert_eq!(ops.len(), 101 + 24 * 1368, "Total bitwise ops"); + assert_eq!(ops.len(), 109 + 24 * 1368, "Total bitwise ops"); } #[test] @@ -3012,8 +3024,8 @@ mod keccak_tests { fn test_keccak_bus_interaction_counts() { assert_eq!( keccak::bus_interactions().len(), - 130, - "KECCAK core: 1 ECALL + 1 MEMW read_addr + 25 MEMW lanes + 100 IS_HALF + 1 AND_BYTE alignment + 1 Keccak send + 1 Keccak recv" + 138, + "KECCAK core: 1 ECALL + 1 MEMW read_addr + 25 MEMW lanes + 100 IS_HALF + 1 AND_BYTE alignment + 8 IS_BYTE addr + 1 Keccak send + 1 Keccak recv" ); assert_eq!( keccak_rnd::bus_interactions().len(), diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index 56f6d3f18..adbf02143 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -771,6 +771,45 @@ fn test_prove_elfs_keccak_multi_call() { ); } +/// Verifier REJECTS a forged trace where an addr byte cell is set to a +/// non-byte field element. +/// +/// Without the IS_BYTE range checks on addr(0..7), an attacker could keep +/// `addr_lo = b0 + 256·b1 + 65536·b2 + 2^24·b3` equal to an unaligned target +/// address as a field element while setting addr(0)=0 (passing the AndByte +/// alignment check) and folding the carry into addr(1) as a non-byte +/// FE-element. This test asserts that mutating addr(1) to a non-byte value +/// unbalances the verifier's bus checks and the proof is rejected. +#[test] +fn test_prove_elfs_keccak_unaligned_state_addr() { + use crate::tables::keccak::cols as keccak_cols; + + let _ = env_logger::builder().is_test(true).try_init(); + + let elf_bytes = crate::test_utils::asm_elf_bytes("test_keccak_multi"); + let elf = Elf::load(&elf_bytes).expect("Failed to load ELF"); + let executor = + executor::vm::execution::Executor::new(&elf, vec![]).expect("Failed to create executor"); + let result = executor.run().expect("Failed to run program"); + let mut traces = + Traces::from_elf_and_logs(&elf, &result.logs, &Default::default(), &[]).unwrap(); + + // Tamper the first real keccak row: replace addr(1) (a byte cell) with a + // value outside [0, 256). The new IS_BYTE bus sender will emit this + // value with multiplicity MU=1; the IS_BYTE preprocessed table only + // contains 0..256, so the bus cannot balance. + traces.keccak.main_table.set( + 0, + keccak_cols::addr(1), + FieldElement::::from(257u64), + ); + + assert!( + !prove_and_verify_vm_minimal(&elf, &mut traces), + "Verifier must reject a keccak proof whose addr cells are not bytes" + ); +} + #[test] fn test_prove_elfs_test_commit_4() { let elf_bytes = crate::test_utils::asm_elf_bytes("test_commit_4");