From e6278fb4487d3fa0abb85fe5bc4bbd630015ccf4 Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Tue, 5 May 2026 13:06:50 -0300 Subject: [PATCH 1/8] =?UTF-8?q?Add=20per-AIR=20trace=20cells=20(rows=20?= =?UTF-8?q?=C3=97=20cols)=20to=20the=20prover=20instruments=20report?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crypto/stark/src/instruments.rs | 17 +++++- crypto/stark/src/prover.rs | 19 +++--- prover/src/instruments.rs | 102 ++++++++++++++++++++++++++++---- 3 files changed, 113 insertions(+), 25 deletions(-) diff --git a/crypto/stark/src/instruments.rs b/crypto/stark/src/instruments.rs index 16ff95082..24c353d14 100644 --- a/crypto/stark/src/instruments.rs +++ b/crypto/stark/src/instruments.rs @@ -60,6 +60,19 @@ pub struct Round1SubOps { pub aux_merkle: Duration, } +/// Per-table accounting captured during rounds 2-4. Includes shape (rows × +/// main/aux cols) so callers can compute total main-trace cells (the +/// `sum(rows × main_cols)` headline metric used to compare against ZisK / SP1). +#[derive(Clone, Debug, Default)] +pub struct TableTiming { + pub name: String, + pub rows: usize, + pub main_cols: usize, + pub aux_cols: usize, + pub duration: Duration, + pub sub_ops: TableSubOps, +} + /// Timing data collected inside `multi_prove`. pub struct MultiProveTiming { pub prepass: Duration, @@ -69,8 +82,8 @@ pub struct MultiProveTiming { pub rounds_2_4: Duration, /// Sub-op breakdown for Round 1 (main + aux LDE vs Merkle). pub round1_sub: Round1SubOps, - /// (name, rows, duration, sub_ops) per table for rounds 2-4. - pub table_timings: Vec<(String, usize, Duration, TableSubOps)>, + /// Shape + timing for every AIR proven this run. + pub table_timings: Vec, pub heap_snapshots: Vec, } diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index a5386017a..04871e53c 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1855,12 +1855,7 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let phase_start = Instant::now(); #[cfg(feature = "instruments")] - let mut table_timings: Vec<( - String, - usize, - std::time::Duration, - crate::instruments::TableSubOps, - )> = Vec::with_capacity(num_airs); + let mut table_timings: Vec = Vec::with_capacity(num_airs); let mut proofs = Vec::with_capacity(num_airs); let mut lde_drain = cached_ldes.into_iter(); @@ -1919,12 +1914,14 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let table_timing = { let sub_ops = crate::instruments::take_round_sub_ops().unwrap_or_default(); - ( - air.name().to_string(), - trace.num_rows(), - table_start.elapsed(), + crate::instruments::TableTiming { + name: air.name().to_string(), + rows: trace.num_rows(), + main_cols: trace.num_main_columns, + aux_cols: trace.num_aux_columns, + duration: table_start.elapsed(), sub_ops, - ) + } }; #[cfg(feature = "instruments")] diff --git a/prover/src/instruments.rs b/prover/src/instruments.rs index f15223e18..28f930f6d 100644 --- a/prover/src/instruments.rs +++ b/prover/src/instruments.rs @@ -11,6 +11,18 @@ fn fmt_rows(rows: usize) -> String { } } +fn fmt_cells(cells: u128) -> String { + if cells >= 1_000_000_000 { + format!("{:.2}G", cells as f64 / 1_000_000_000.0) + } else if cells >= 1_000_000 { + format!("{:.1}M", cells as f64 / 1_000_000.0) + } else if cells >= 1_000 { + format!("{}K", (cells + 500) / 1_000) + } else { + format!("{cells}") + } +} + fn pct(dur: Duration, total: Duration) -> f64 { if total > Duration::ZERO { dur.as_secs_f64() / total.as_secs_f64() * 100.0 @@ -94,25 +106,25 @@ pub fn print_report( // Merge split tables: MEMW[0..4] → MEMW x5 let mut merged: BTreeMap = BTreeMap::new(); - for (name, rows, dur, sub_ops) in &mp.table_timings { - let base = base_name(name).to_string(); + for tt in &mp.table_timings { + let base = base_name(&tt.name).to_string(); let entry = merged.entry(base).or_insert(MergedTable { total_dur: Duration::ZERO, total_rows: 0, count: 0, sub_ops: stark::instruments::TableSubOps::default(), }); - entry.total_dur += *dur; - entry.total_rows += rows; + entry.total_dur += tt.duration; + entry.total_rows += tt.rows; entry.count += 1; - entry.sub_ops.constraints += sub_ops.constraints; - entry.sub_ops.comp_decompose += sub_ops.comp_decompose; - entry.sub_ops.comp_commit += sub_ops.comp_commit; - entry.sub_ops.ood += sub_ops.ood; - entry.sub_ops.deep_comp += sub_ops.deep_comp; - entry.sub_ops.deep_extend += sub_ops.deep_extend; - entry.sub_ops.fri_commit += sub_ops.fri_commit; - entry.sub_ops.queries += sub_ops.queries; + entry.sub_ops.constraints += tt.sub_ops.constraints; + entry.sub_ops.comp_decompose += tt.sub_ops.comp_decompose; + entry.sub_ops.comp_commit += tt.sub_ops.comp_commit; + entry.sub_ops.ood += tt.sub_ops.ood; + entry.sub_ops.deep_comp += tt.sub_ops.deep_comp; + entry.sub_ops.deep_extend += tt.sub_ops.deep_extend; + entry.sub_ops.fri_commit += tt.sub_ops.fri_commit; + entry.sub_ops.queries += tt.sub_ops.queries; } let mut sorted: Vec<_> = merged.into_iter().collect(); @@ -212,6 +224,72 @@ pub fn print_report( total_merkle.as_secs_f64(), pct(total_merkle, total) ); + + // Per-AIR main-trace cell breakdown. The headline metric used to + // compare against ZisK / SP1 is the sum of `rows × main_cols` across + // all AIRs. Aux cells are printed separately because they reflect + // LogUp/permutation overhead and don't contribute to the ZisK-style + // "main-trace cells" baseline. + let mut by_table: BTreeMap = BTreeMap::new(); + for tt in &mp.table_timings { + let base = base_name(&tt.name).to_string(); + let entry = by_table.entry(base).or_insert((0, tt.main_cols, 0, 0, 0)); + entry.0 += tt.rows; + entry.2 += (tt.rows as u128) * (tt.main_cols as u128); + entry.3 += (tt.rows as u128) * (tt.aux_cols as u128); + entry.4 += 1; + // tt.aux_cols may differ across instances of a split table; track the + // last value seen (split tables share aux_col count by construction). + entry.1 = tt.main_cols; + } + let mut cell_rows: Vec<(String, usize, usize, u128, u128, usize)> = by_table + .into_iter() + .map(|(name, (rows, main_cols, main_cells, aux_cells, count))| { + (name, rows, main_cols, main_cells, aux_cells, count) + }) + .collect(); + cell_rows.sort_by(|a, b| b.3.cmp(&a.3)); + + let mut total_main_cells: u128 = 0; + let mut total_aux_cells: u128 = 0; + let mut total_table_rows: usize = 0; + for r in &cell_rows { + total_main_cells += r.3; + total_aux_cells += r.4; + total_table_rows += r.1; + } + + eprintln!(); + eprintln!("=== TRACE CELLS (rows × main_cols) ==="); + eprintln!( + " {:<22} {:>6} {:>5} {:>10} {:>10}", + "Table", "rows", "cols", "main", "aux", + ); + eprintln!(" {}", "─".repeat(58)); + for (name, rows, main_cols, main_cells, aux_cells, count) in &cell_rows { + let display_name = if *count > 1 { + format!("{name} x{count}") + } else { + name.clone() + }; + eprintln!( + " {:<22} {:>6} {:>5} {:>10} {:>10}", + display_name, + fmt_rows(*rows), + main_cols, + fmt_cells(*main_cells), + fmt_cells(*aux_cells), + ); + } + eprintln!(" {}", "─".repeat(58)); + eprintln!( + " {:<22} {:>6} {:>5} {:>10} {:>10}", + "TOTAL", + fmt_rows(total_table_rows), + "", + fmt_cells(total_main_cells), + fmt_cells(total_aux_cells), + ); } eprintln!(" {}", "─".repeat(58)); From 22a2e1f95b5eb2972075bb4beafce2be898fec4e Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Tue, 5 May 2026 13:32:32 -0300 Subject: [PATCH 2/8] Add empty byte_ops table skeleton wired through VmAirs and Traces --- prover/src/lib.rs | 21 ++- prover/src/tables/byte_ops.rs | 279 +++++++++++++++++++++++++++++ prover/src/tables/mod.rs | 1 + prover/src/tables/trace_builder.rs | 14 ++ prover/src/test_utils.rs | 22 +++ 5 files changed, 333 insertions(+), 4 deletions(-) create mode 100644 prover/src/tables/byte_ops.rs diff --git a/prover/src/lib.rs b/prover/src/lib.rs index bd42b9948..00cacf9cb 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -33,16 +33,17 @@ use stark::verifier::{IsStarkVerifier, Verifier}; pub use crate::tables::MaxRowsConfig; use crate::tables::bitwise; +use crate::tables::byte_ops; use crate::tables::decode; use crate::tables::page; use crate::tables::register; use crate::tables::trace_builder::Traces; use crate::tables::types::BusId; use crate::test_utils::{ - E, F, VmAir, create_bitwise_air, create_branch_air, create_commit_air, create_cpu_air, - create_decode_air, create_dvrm_air, create_halt_air, create_load_air, create_lt_air, - create_memw_air, create_memw_aligned_air, create_memw_register_air, create_mul_air, - create_page_air, create_register_air, create_shift_air, + E, F, VmAir, create_bitwise_air, create_branch_air, create_byte_ops_air, create_commit_air, + create_cpu_air, create_decode_air, create_dvrm_air, create_halt_air, create_load_air, + create_lt_air, create_memw_air, create_memw_aligned_air, create_memw_register_air, + create_mul_air, create_page_air, create_register_air, create_shift_air, }; use stark::proof::options::{GoldilocksCubicProofOptions, ProofOptions}; @@ -189,6 +190,7 @@ type AirTracePair<'a> = ( pub(crate) struct VmAirs { pub cpus: Vec, pub bitwise: VmAir, + pub byte_ops: VmAir, pub lts: Vec, pub shifts: Vec, pub memws: Vec, @@ -210,6 +212,7 @@ impl VmAirs { pub fn air_trace_pairs<'a>(&'a self, traces: &'a mut Traces) -> Vec> { let mut pairs: Vec> = vec![ (&self.bitwise, &mut traces.bitwise, &()), + (&self.byte_ops, &mut traces.byte_ops, &()), (&self.decode, &mut traces.decode, &()), (&self.halt, &mut traces.halt, &()), (&self.commit, &mut traces.commit, &()), @@ -265,6 +268,7 @@ impl VmAirs { pub fn air_refs(&self) -> Vec<&dyn AIR> { let mut refs: Vec<&dyn AIR> = vec![ &self.bitwise, + &self.byte_ops, &self.decode, &self.halt, &self.commit, @@ -332,6 +336,14 @@ impl VmAirs { bitwise::NUM_PRECOMPUTED_COLS, ) }; + let byte_ops = if minimal_bitwise { + create_byte_ops_air(proof_options) + } else { + create_byte_ops_air(proof_options).with_preprocessed( + byte_ops::preprocessed_commitment(proof_options), + byte_ops::NUM_PRECOMPUTED_COLS, + ) + }; let lts: Vec<_> = (0..table_counts.lt) .map(|i| create_lt_air(proof_options).with_name(&format!("LT[{}]", i))) .collect(); @@ -395,6 +407,7 @@ impl VmAirs { Self { cpus, bitwise, + byte_ops, lts, shifts, memws, diff --git a/prover/src/tables/byte_ops.rs b/prover/src/tables/byte_ops.rs new file mode 100644 index 000000000..4eba6c323 --- /dev/null +++ b/prover/src/tables/byte_ops.rs @@ -0,0 +1,279 @@ +//! BYTE_OPS precomputed lookup table for byte-pair operations. +//! +//! Holds every `(X, Y) ∈ [0, 256)²` and the precomputed result of the +//! byte-pair lookups that BITWISE used to multiplex into its 2²⁰ rows. The +//! 16× factor was driven only by 20-bit ops (HWSL/IS_B20/ZERO); pulling these +//! out into a dedicated 2¹⁶ table cuts ~12M cells. +//! +//! ## Operations served (Step 2 will wire the receivers) +//! - `AND_BYTE[X, Y]` -> X & Y +//! - `OR_BYTE[X, Y]` -> X | Y +//! - `XOR_BYTE[X, Y]` -> X ^ Y +//! - `MSB8[X]` -> most significant bit of byte (Y = 0) +//! - `MSB16[X + 256*Y]` -> most significant bit of halfword +//! - `IS_BYTE[X, Y]` -> range check on a byte pair +//! - `IS_HALF[X + 256*Y]` -> range check on a halfword +//! +//! ## Table Structure +//! +//! 2¹⁶ = 65,536 rows indexed by `(X: Byte, Y: Byte)`. All lookups are received +//! with negative multiplicity (other tables send to this one). + +use std::sync::OnceLock; + +use math::fft::cpu::bit_reversing::in_place_bit_reverse_permute; +use math::polynomial::Polynomial; +use stark::config::{BatchedMerkleTree, Commitment}; +use stark::lookup::BusInteraction; +use stark::proof::options::ProofOptions; +use stark::prover::evaluate_polynomial_on_lde_domain; +use stark::trace::{TraceTable, columns2rows}; + +#[cfg(feature = "parallel")] +use rayon::prelude::*; + +use super::bitwise::{BitwiseOperation, BitwiseOperationType}; +use super::types::{FE, GoldilocksExtension, GoldilocksField}; + +// ========================================================================= +// Column indices for BYTE_OPS table +// ========================================================================= + +pub mod cols { + /// X: Byte input (0-255) + pub const X: usize = 0; + /// Y: Byte input (0-255) + pub const Y: usize = 1; + /// AND result: X & Y + pub const AND: usize = 2; + /// OR result: X | Y + pub const OR: usize = 3; + /// XOR result: X ^ Y + pub const XOR: usize = 4; + /// MSB of byte X: (X >> 7) & 1 + pub const MSB8: usize = 5; + /// MSB of halfword X + 256*Y: ((X + 256*Y) >> 15) & 1 + pub const MSB16: usize = 6; + + /// Multiplicity for AND_BYTE lookups + pub const MU_AND: usize = 7; + /// Multiplicity for OR_BYTE lookups + pub const MU_OR: usize = 8; + /// Multiplicity for XOR_BYTE lookups + pub const MU_XOR: usize = 9; + /// Multiplicity for MSB8 lookups + pub const MU_MSB8: usize = 10; + /// Multiplicity for MSB16 lookups + pub const MU_MSB16: usize = 11; + /// Multiplicity for IS_BYTE lookups + pub const MU_IS_BYTE: usize = 12; + /// Multiplicity for IS_HALF lookups + pub const MU_IS_HALF: usize = 13; + + /// Total number of columns + pub const NUM_COLUMNS: usize = 14; +} + +/// 2¹⁶ rows = 65,536. +pub const NUM_ROWS: usize = 256 * 256; + +/// Number of precomputed (non-multiplicity) columns. +pub const NUM_PRECOMPUTED_COLS: usize = 7; + +// ========================================================================= +// Compile-time row generation +// ========================================================================= + +/// Generate one row of the byte_ops table. +/// +/// Index encoding: `index = x + y * 256` with `x, y ∈ [0, 255]`. +#[inline] +pub const fn generate_byte_ops_row(index: usize) -> [u64; NUM_PRECOMPUTED_COLS] { + let x = (index & 0xFF) as u32; + let y = ((index >> 8) & 0xFF) as u32; + + let and_val = x & y; + let or_val = x | y; + let xor_val = x ^ y; + + let msb8 = (x >> 7) & 1; + let halfword = x + y * 256; + let msb16 = (halfword >> 15) & 1; + + [ + x as u64, + y as u64, + and_val as u64, + or_val as u64, + xor_val as u64, + msb8 as u64, + msb16 as u64, + ] +} + +/// Whether this table is preprocessed (commitment is hardcoded). +pub const fn is_preprocessed() -> bool { + true +} + +// ========================================================================= +// Preprocessed commitment (computed once, cached) +// ========================================================================= + +static BYTE_OPS_COMMITMENT: OnceLock = OnceLock::new(); + +/// Computes the Merkle commitment over the precomputed byte_ops columns. +/// +/// Mirrors [`bitwise::compute_preprocessed_commitment`] — see that for the +/// rationale (LDE-rooted commitment is required so FRI queries at any +/// blow-up index can be opened against this precomputed table). +fn compute_preprocessed_commitment(options: &ProofOptions) -> Commitment { + #[cfg(feature = "parallel")] + let columns: Vec> = (0..NUM_PRECOMPUTED_COLS) + .into_par_iter() + .map(|col_idx| { + (0..NUM_ROWS) + .map(|idx| { + let row = generate_byte_ops_row(idx); + FE::from(row[col_idx]) + }) + .collect() + }) + .collect(); + + #[cfg(not(feature = "parallel"))] + let columns: Vec> = { + let mut cols: Vec> = (0..NUM_PRECOMPUTED_COLS) + .map(|_| Vec::with_capacity(NUM_ROWS)) + .collect(); + for idx in 0..NUM_ROWS { + let row = generate_byte_ops_row(idx); + for (col_idx, &value) in row.iter().enumerate() { + cols[col_idx].push(FE::from(value)); + } + } + cols + }; + + #[cfg(feature = "parallel")] + let polys: Vec> = columns + .par_iter() + .map(|col| { + Polynomial::interpolate_fft::(col) + .expect("FFT interpolation failed for byte_ops column") + }) + .collect(); + + #[cfg(not(feature = "parallel"))] + let polys: Vec> = columns + .iter() + .map(|col| { + Polynomial::interpolate_fft::(col) + .expect("FFT interpolation failed for byte_ops column") + }) + .collect(); + + let blowup_factor = options.blowup_factor as usize; + let coset_offset = FE::from(options.coset_offset); + + #[cfg(feature = "parallel")] + let mut lde_columns: Vec> = polys + .par_iter() + .map(|poly| { + evaluate_polynomial_on_lde_domain(poly, blowup_factor, NUM_ROWS, &coset_offset) + .expect("LDE evaluation failed for byte_ops polynomial") + }) + .collect(); + + #[cfg(not(feature = "parallel"))] + let mut lde_columns: Vec> = polys + .iter() + .map(|poly| { + evaluate_polynomial_on_lde_domain(poly, blowup_factor, NUM_ROWS, &coset_offset) + .expect("LDE evaluation failed for byte_ops polynomial") + }) + .collect(); + + #[cfg(feature = "parallel")] + lde_columns.par_iter_mut().for_each(|col| { + in_place_bit_reverse_permute(col); + }); + + #[cfg(not(feature = "parallel"))] + for col in lde_columns.iter_mut() { + in_place_bit_reverse_permute(col); + } + + let lde_rows = columns2rows(lde_columns); + + let tree = BatchedMerkleTree::::build(&lde_rows) + .expect("Failed to build Merkle tree for byte_ops LDE"); + + tree.root +} + +#[inline] +pub fn preprocessed_commitment(options: &ProofOptions) -> Commitment { + *BYTE_OPS_COMMITMENT.get_or_init(|| compute_preprocessed_commitment(options)) +} + +// ========================================================================= +// Trace generation +// ========================================================================= + +/// Generate the precomputed BYTE_OPS trace table (multiplicities zeroed). +pub fn generate_byte_ops_trace() -> TraceTable { + let mut data = vec![FE::zero(); NUM_ROWS * cols::NUM_COLUMNS]; + + for x in 0u32..256 { + for y in 0u32..256 { + let row_idx = (x as usize) + (y as usize) * 256; + let base = row_idx * cols::NUM_COLUMNS; + + data[base + cols::X] = FE::from(x as u64); + data[base + cols::Y] = FE::from(y as u64); + data[base + cols::AND] = FE::from((x & y) as u64); + data[base + cols::OR] = FE::from((x | y) as u64); + data[base + cols::XOR] = FE::from((x ^ y) as u64); + + let msb8 = (x >> 7) & 1; + let halfword = x + y * 256; + let msb16 = (halfword >> 15) & 1; + data[base + cols::MSB8] = FE::from(msb8 as u64); + data[base + cols::MSB16] = FE::from(msb16 as u64); + + // Multiplicity columns initialized to zero by the vec! above. + } + } + + TraceTable::new_main(data, cols::NUM_COLUMNS, 1) +} + +#[inline] +pub fn row_index(x: u8, y: u8) -> usize { + (x as usize) + (y as usize) * 256 +} + +/// Apply lookups to multiplicity columns. +/// +/// Step 1 leaves this as a no-op — BITWISE still receives every byte-pair +/// bus, so byte_ops's multiplicity columns stay zeroed. Step 2 will route +/// AndByte/OrByte/XorByte/Msb8/Msb16/IsByte/IsHalf events here using the +/// same `BitwiseOperation` stream the BITWISE generator already produces. +pub fn update_multiplicities( + _trace: &mut TraceTable, + _ops: &[BitwiseOperation], +) { + // No-op until Step 2. + let _ = BitwiseOperationType::AndByte; // keep import live +} + +// ========================================================================= +// Bus interactions (empty in Step 1; populated in Step 2) +// ========================================================================= + +/// Receivers for byte-pair lookups. Step 1 returns an empty list — BITWISE +/// keeps all receivers; Step 2 moves them here. +pub fn bus_interactions() -> Vec { + Vec::new() +} diff --git a/prover/src/tables/mod.rs b/prover/src/tables/mod.rs index 19d14411d..c904a4fd6 100644 --- a/prover/src/tables/mod.rs +++ b/prover/src/tables/mod.rs @@ -23,6 +23,7 @@ pub mod types; pub mod bitwise; pub mod branch; +pub mod byte_ops; pub mod commit; pub mod cpu; pub mod decode; diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index d2743a1e5..95764c687 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -1619,6 +1619,9 @@ pub struct Traces { /// BITWISE precomputed lookup table (2^20 rows) pub bitwise: TraceTable, + /// BYTE_OPS precomputed lookup table for byte-pair ops (2^16 rows) + pub byte_ops: TraceTable, + /// LT comparison traces (split into chunks of max_rows::LT) pub lts: Vec>, @@ -1906,6 +1909,9 @@ fn build_traces( let mut bitwise = bitwise::generate_bitwise_trace(); bitwise::update_multiplicities(&mut bitwise, &bitwise_ops); + let mut byte_ops = super::byte_ops::generate_byte_ops_trace(); + super::byte_ops::update_multiplicities(&mut byte_ops, &bitwise_ops); + // Update DECODE multiplicities // Each CPU operation looks up the DECODE table once // Padding rows also look up pc=1 (the CPU padding entry) @@ -1962,6 +1968,7 @@ fn build_traces( Ok(Traces { cpus, bitwise, + byte_ops, lts, shifts, memws, @@ -1993,6 +2000,8 @@ impl Traces { use super::bitwise::NUM_PRECOMPUTED_COLS as BITWISE_PRECOMPUTED; use super::bitwise::cols::NUM_COLUMNS as BITWISE_COLS; use super::branch::cols::NUM_COLUMNS as BRANCH_COLS; + use super::byte_ops::NUM_PRECOMPUTED_COLS as BYTE_OPS_PRECOMPUTED; + use super::byte_ops::cols::NUM_COLUMNS as BYTE_OPS_COLS; use super::commit::cols::NUM_COLUMNS as COMMIT_COLS; use super::cpu::cols::NUM_COLUMNS as CPU_COLS; use super::decode::NUM_PRECOMPUTED_COLS as DECODE_PRECOMPUTED; @@ -2014,6 +2023,7 @@ impl Traces { let Traces { cpus, bitwise, + byte_ops, lts, shifts, memws, @@ -2037,6 +2047,7 @@ impl Traces { total += (t.num_rows() * CPU_COLS) as u64; } total += (bitwise.num_rows() * (BITWISE_COLS - BITWISE_PRECOMPUTED)) as u64; + total += (byte_ops.num_rows() * (BYTE_OPS_COLS - BYTE_OPS_PRECOMPUTED)) as u64; for t in lts { total += (t.num_rows() * LT_COLS) as u64; } @@ -2088,6 +2099,7 @@ impl Traces { let n_cpu = aux_cols(super::cpu::bus_interactions().len()); let n_bitwise = aux_cols(super::bitwise::bus_interactions().len()); + let n_byte_ops = aux_cols(super::byte_ops::bus_interactions().len()); let n_lt = aux_cols(super::lt::bus_interactions().len()); let n_shift = aux_cols(super::shift::bus_interactions().len()); let n_memw = aux_cols(super::memw::bus_interactions().len()); @@ -2107,6 +2119,7 @@ impl Traces { let Traces { cpus, bitwise, + byte_ops, lts, shifts, memws, @@ -2130,6 +2143,7 @@ impl Traces { total += (t.num_rows() * n_cpu) as u64; } total += (bitwise.num_rows() * n_bitwise) as u64; + total += (byte_ops.num_rows() * n_byte_ops) as u64; for t in lts { total += (t.num_rows() * n_lt) as u64; } diff --git a/prover/src/test_utils.rs b/prover/src/test_utils.rs index b47554857..d55801625 100644 --- a/prover/src/test_utils.rs +++ b/prover/src/test_utils.rs @@ -31,6 +31,9 @@ use crate::tables::bitwise::{ use crate::tables::branch::{ branch_constraints, bus_interactions as branch_bus_interactions, cols as branch_cols, }; +use crate::tables::byte_ops::{ + bus_interactions as byte_ops_bus_interactions, cols as byte_ops_cols, +}; use crate::tables::commit::{ bus_interactions as commit_bus_interactions, cols as commit_cols, create_constraints as commit_constraints, @@ -506,6 +509,25 @@ pub fn create_bitwise_air(proof_options: &ProofOptions) -> VmAir { .with_name("BITWISE") } +/// Create BYTE_OPS AIR. Step 1 wires it through with no bus interactions +/// (BITWISE still owns every receiver); Step 2 will move them here. +pub fn create_byte_ops_air(proof_options: &ProofOptions) -> VmAir { + let transition_constraints: Vec>> = vec![]; + + let auxiliary_trace_build_data = AuxiliaryTraceBuildData { + interactions: byte_ops_bus_interactions(), + }; + + AirWithBuses::new( + byte_ops_cols::NUM_COLUMNS, + auxiliary_trace_build_data, + proof_options, + 1, + transition_constraints, + ) + .with_name("BYTE_OPS") +} + /// Create LT AIR with bus interactions. pub fn create_lt_air(proof_options: &ProofOptions) -> VmAir { let transition_constraints: Vec>> = vec![]; From a9bc73dad6d21ed82b83f2bb8e5f70d0a214a00c Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Tue, 5 May 2026 13:40:32 -0300 Subject: [PATCH 3/8] Move 7 byte-pair bus receivers from bitwise to byte_ops --- prover/src/tables/bitwise.rs | 150 +++---------------------------- prover/src/tables/byte_ops.rs | 165 +++++++++++++++++++++++++++++++--- 2 files changed, 164 insertions(+), 151 deletions(-) diff --git a/prover/src/tables/bitwise.rs b/prover/src/tables/bitwise.rs index 455f696f2..a614e60c4 100644 --- a/prover/src/tables/bitwise.rs +++ b/prover/src/tables/bitwise.rs @@ -370,21 +370,21 @@ pub fn update_multiplicities( ops: &[BitwiseOperation], ) { for op in ops { - let row = row_index(op.x, op.y, op.z); let mu_col = match op.lookup_type { - BitwiseOperationType::AndByte => cols::MU_AND, - BitwiseOperationType::OrByte => cols::MU_OR, - BitwiseOperationType::XorByte => cols::MU_XOR, - BitwiseOperationType::Msb8 => cols::MU_MSB8, - BitwiseOperationType::Msb16 => cols::MU_MSB16, BitwiseOperationType::Zero => cols::MU_ZERO, - BitwiseOperationType::IsByte => cols::MU_IS_BYTE, - BitwiseOperationType::IsHalf => cols::MU_IS_HALF, BitwiseOperationType::IsB20 => cols::MU_IS_B20, BitwiseOperationType::Hwsl => cols::MU_HWSL, + // Byte-pair ops are received by `byte_ops::update_multiplicities`. + BitwiseOperationType::AndByte + | BitwiseOperationType::OrByte + | BitwiseOperationType::XorByte + | BitwiseOperationType::Msb8 + | BitwiseOperationType::Msb16 + | BitwiseOperationType::IsByte + | BitwiseOperationType::IsHalf => continue, }; - // Increment multiplicity + let row = row_index(op.x, op.y, op.z); let current = trace.main_table.get_row(row)[mu_col]; trace.set_main(row, mu_col, current + FE::one()); } @@ -543,106 +543,11 @@ impl BitwiseOperation { /// Creates all bus interactions for the BITWISE table. /// -/// The BITWISE table is a **receiver** for all lookups (negative multiplicity -/// in the spec corresponds to receiving lookups from other tables). +/// BITWISE keeps only the 20-bit-space lookups (Zero / IsB20 / Hwsl) that +/// genuinely need the Z dimension. Byte-pair lookups (And/Or/Xor/Msb8/Msb16/ +/// IsByte/IsHalfword) live in [`super::byte_ops`] now. pub fn bus_interactions() -> Vec { vec![ - // AND_BYTE[X, Y] -> AND - BusInteraction::receiver( - BusId::AndByte, - Multiplicity::Column(cols::MU_AND), - vec![ - BusValue::Packed { - start_column: cols::X, - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: cols::Y, - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: cols::AND, - packing: Packing::Direct, - }, - ], - ), - // OR_BYTE[X, Y] -> OR - BusInteraction::receiver( - BusId::OrByte, - Multiplicity::Column(cols::MU_OR), - vec![ - BusValue::Packed { - start_column: cols::X, - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: cols::Y, - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: cols::OR, - packing: Packing::Direct, - }, - ], - ), - // XOR_BYTE[X, Y] -> XOR - BusInteraction::receiver( - BusId::XorByte, - Multiplicity::Column(cols::MU_XOR), - vec![ - BusValue::Packed { - start_column: cols::X, - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: cols::Y, - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: cols::XOR, - packing: Packing::Direct, - }, - ], - ), - // MSB8[X] -> MSB8 - BusInteraction::receiver( - BusId::Msb8, - Multiplicity::Column(cols::MU_MSB8), - vec![ - BusValue::Packed { - start_column: cols::X, - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: cols::MSB8, - packing: Packing::Direct, - }, - ], - ), - // MSB16[X + 256*Y] -> MSB16 - // Input is packed as Word2L (X + 2^8 * Y would need custom, but spec says X + 256*Y) - // Since X and Y are bytes, we use a linear combination - BusInteraction::receiver( - BusId::Msb16, - Multiplicity::Column(cols::MU_MSB16), - vec![ - // X + 256*Y as linear combination - BusValue::linear(vec![ - stark::lookup::LinearTerm::Column { - coefficient: 1, - column: cols::X, - }, - stark::lookup::LinearTerm::Column { - coefficient: 256, - column: cols::Y, - }, - ]), - BusValue::Packed { - start_column: cols::MSB16, - packing: Packing::Direct, - }, - ], - ), // ZERO[X + 256*Y + 65536*Z] -> ZERO BusInteraction::receiver( BusId::Zero, @@ -668,37 +573,6 @@ pub fn bus_interactions() -> Vec { }, ], ), - // IS_BYTE[X, Y] - range check two byte values, no output. - // Single-byte checks send the second argument as 0. - BusInteraction::receiver( - BusId::IsByte, - Multiplicity::Column(cols::MU_IS_BYTE), - vec![ - BusValue::Packed { - start_column: cols::X, - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: cols::Y, - packing: Packing::Direct, - }, - ], - ), - // IS_HALF[X + 256*Y] - range check for halfword - BusInteraction::receiver( - BusId::IsHalfword, - Multiplicity::Column(cols::MU_IS_HALF), - vec![BusValue::linear(vec![ - stark::lookup::LinearTerm::Column { - coefficient: 1, - column: cols::X, - }, - stark::lookup::LinearTerm::Column { - coefficient: 256, - column: cols::Y, - }, - ])], - ), // IS_B20[X + 256*Y + 65536*Z] - range check for 20-bit BusInteraction::receiver( BusId::IsB20, diff --git a/prover/src/tables/byte_ops.rs b/prover/src/tables/byte_ops.rs index 4eba6c323..bc02a0ff0 100644 --- a/prover/src/tables/byte_ops.rs +++ b/prover/src/tables/byte_ops.rs @@ -24,7 +24,7 @@ use std::sync::OnceLock; use math::fft::cpu::bit_reversing::in_place_bit_reverse_permute; use math::polynomial::Polynomial; use stark::config::{BatchedMerkleTree, Commitment}; -use stark::lookup::BusInteraction; +use stark::lookup::{BusInteraction, BusValue, Multiplicity, Packing}; use stark::proof::options::ProofOptions; use stark::prover::evaluate_polynomial_on_lde_domain; use stark::trace::{TraceTable, columns2rows}; @@ -33,7 +33,7 @@ use stark::trace::{TraceTable, columns2rows}; use rayon::prelude::*; use super::bitwise::{BitwiseOperation, BitwiseOperationType}; -use super::types::{FE, GoldilocksExtension, GoldilocksField}; +use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField}; // ========================================================================= // Column indices for BYTE_OPS table @@ -256,24 +256,163 @@ pub fn row_index(x: u8, y: u8) -> usize { /// Apply lookups to multiplicity columns. /// -/// Step 1 leaves this as a no-op — BITWISE still receives every byte-pair -/// bus, so byte_ops's multiplicity columns stay zeroed. Step 2 will route -/// AndByte/OrByte/XorByte/Msb8/Msb16/IsByte/IsHalf events here using the -/// same `BitwiseOperation` stream the BITWISE generator already produces. +/// Routes byte-pair events (AndByte/OrByte/XorByte/Msb8/Msb16/IsByte/IsHalf) +/// into byte_ops; events for the 20-bit ops (Zero/IsB20/Hwsl) stay in +/// `bitwise::update_multiplicities` and are skipped here. pub fn update_multiplicities( - _trace: &mut TraceTable, - _ops: &[BitwiseOperation], + trace: &mut TraceTable, + ops: &[BitwiseOperation], ) { - // No-op until Step 2. - let _ = BitwiseOperationType::AndByte; // keep import live + for op in ops { + let mu_col = match op.lookup_type { + BitwiseOperationType::AndByte => cols::MU_AND, + BitwiseOperationType::OrByte => cols::MU_OR, + BitwiseOperationType::XorByte => cols::MU_XOR, + BitwiseOperationType::Msb8 => cols::MU_MSB8, + BitwiseOperationType::Msb16 => cols::MU_MSB16, + BitwiseOperationType::IsByte => cols::MU_IS_BYTE, + BitwiseOperationType::IsHalf => cols::MU_IS_HALF, + BitwiseOperationType::Zero + | BitwiseOperationType::IsB20 + | BitwiseOperationType::Hwsl => continue, + }; + let row = row_index(op.x, op.y); + let current = trace.main_table.get_row(row)[mu_col]; + trace.set_main(row, mu_col, current + FE::one()); + } } // ========================================================================= // Bus interactions (empty in Step 1; populated in Step 2) // ========================================================================= -/// Receivers for byte-pair lookups. Step 1 returns an empty list — BITWISE -/// keeps all receivers; Step 2 moves them here. +/// Receivers for byte-pair lookups. The 20-bit ops (Zero/IsB20/Hwsl) stay +/// on the BITWISE table. pub fn bus_interactions() -> Vec { - Vec::new() + vec![ + // AND_BYTE[X, Y] -> AND + BusInteraction::receiver( + BusId::AndByte, + Multiplicity::Column(cols::MU_AND), + vec![ + BusValue::Packed { + start_column: cols::X, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::Y, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::AND, + packing: Packing::Direct, + }, + ], + ), + // OR_BYTE[X, Y] -> OR + BusInteraction::receiver( + BusId::OrByte, + Multiplicity::Column(cols::MU_OR), + vec![ + BusValue::Packed { + start_column: cols::X, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::Y, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::OR, + packing: Packing::Direct, + }, + ], + ), + // XOR_BYTE[X, Y] -> XOR + BusInteraction::receiver( + BusId::XorByte, + Multiplicity::Column(cols::MU_XOR), + vec![ + BusValue::Packed { + start_column: cols::X, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::Y, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::XOR, + packing: Packing::Direct, + }, + ], + ), + // MSB8[X] -> MSB8 + BusInteraction::receiver( + BusId::Msb8, + Multiplicity::Column(cols::MU_MSB8), + vec![ + BusValue::Packed { + start_column: cols::X, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::MSB8, + packing: Packing::Direct, + }, + ], + ), + // MSB16[X + 256*Y] -> MSB16 + BusInteraction::receiver( + BusId::Msb16, + Multiplicity::Column(cols::MU_MSB16), + vec![ + BusValue::linear(vec![ + stark::lookup::LinearTerm::Column { + coefficient: 1, + column: cols::X, + }, + stark::lookup::LinearTerm::Column { + coefficient: 256, + column: cols::Y, + }, + ]), + BusValue::Packed { + start_column: cols::MSB16, + packing: Packing::Direct, + }, + ], + ), + // IS_BYTE[X, Y] - range check two byte values, no output. + // Single-byte checks send the second argument as 0. + BusInteraction::receiver( + BusId::IsByte, + Multiplicity::Column(cols::MU_IS_BYTE), + vec![ + BusValue::Packed { + start_column: cols::X, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::Y, + packing: Packing::Direct, + }, + ], + ), + // IS_HALF[X + 256*Y] - range check for halfword + BusInteraction::receiver( + BusId::IsHalfword, + Multiplicity::Column(cols::MU_IS_HALF), + vec![BusValue::linear(vec![ + stark::lookup::LinearTerm::Column { + coefficient: 1, + column: cols::X, + }, + stark::lookup::LinearTerm::Column { + coefficient: 256, + column: cols::Y, + }, + ])], + ), + ] } From bd6187ef076cc48f908eb5c54e504b32a88e7d12 Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Tue, 5 May 2026 13:54:58 -0300 Subject: [PATCH 4/8] Drop unused byte-pair cols from bitwise after byte_ops split --- prover/src/tables/bitwise.rs | 89 ++++-------------- prover/src/test_utils.rs | 94 ------------------- prover/src/tests/bitwise_tests.rs | 114 +++--------------------- prover/src/tests/prove_elfs_tests.rs | 14 +-- prover/src/tests/trace_builder_tests.rs | 32 ++++--- 5 files changed, 56 insertions(+), 287 deletions(-) diff --git a/prover/src/tables/bitwise.rs b/prover/src/tables/bitwise.rs index a614e60c4..8bacc000e 100644 --- a/prover/src/tables/bitwise.rs +++ b/prover/src/tables/bitwise.rs @@ -45,7 +45,9 @@ use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField}; // Column indices for BITWISE table // ========================================================================= -/// Input columns (precomputed) +/// Input columns (precomputed). After splitting byte-pair ops into +/// [`super::byte_ops`], BITWISE only retains the 20-bit-space data needed +/// by the ZERO / IS_B20 / HWSL receivers. pub mod cols { /// X: Byte input (0-255) pub const X: usize = 0; @@ -53,55 +55,29 @@ pub mod cols { pub const Y: usize = 1; /// Z: 4-bit input (0-15) for shift amount pub const Z: usize = 2; - - /// AND result: X & Y - pub const AND: usize = 3; - /// OR result: X | Y - pub const OR: usize = 4; - /// XOR result: X ^ Y - pub const XOR: usize = 5; - /// MSB of byte X: (X >> 7) & 1 - pub const MSB8: usize = 6; - /// MSB of halfword (X + 256*Y): ((X + 256*Y) >> 15) & 1 - pub const MSB16: usize = 7; - /// Zero check: (X == 0 && Y == 0) ? 1 : 0 - pub const ZERO: usize = 8; + /// Zero check: (X == 0 && Y == 0 && Z == 0) ? 1 : 0 + pub const ZERO: usize = 3; /// Shift left result: ((X + 256*Y) << Z) & 0xFFFF - pub const SLL: usize = 9; + pub const SLL: usize = 4; /// Shift left carry: (X + 256*Y) >> (16 - Z) - pub const SLLC: usize = 10; - - // Multiplicity columns for each lookup type - /// Multiplicity for AND_BYTE lookups - pub const MU_AND: usize = 11; - /// Multiplicity for OR_BYTE lookups - pub const MU_OR: usize = 12; - /// Multiplicity for XOR_BYTE lookups - pub const MU_XOR: usize = 13; - /// Multiplicity for MSB8 lookups - pub const MU_MSB8: usize = 14; - /// Multiplicity for MSB16 lookups - pub const MU_MSB16: usize = 15; + pub const SLLC: usize = 5; + /// Multiplicity for ZERO lookups - pub const MU_ZERO: usize = 16; - /// Multiplicity for IS_BYTE lookups. Each lookup checks X and Y; pass Y=0 - /// for a single-byte range check. - pub const MU_IS_BYTE: usize = 17; - /// Multiplicity for IS_HALF lookups - pub const MU_IS_HALF: usize = 18; + pub const MU_ZERO: usize = 6; /// Multiplicity for IS_B20 lookups - pub const MU_IS_B20: usize = 19; + pub const MU_IS_B20: usize = 7; /// Multiplicity for HWSL lookups - pub const MU_HWSL: usize = 20; + pub const MU_HWSL: usize = 8; + /// Total number of columns - pub const NUM_COLUMNS: usize = 21; + pub const NUM_COLUMNS: usize = 9; } /// Number of rows in the BITWISE table: 256 * 256 * 16 = 2^20 pub const NUM_ROWS: usize = 256 * 256 * 16; /// Number of precomputed (non-multiplicity) columns -pub const NUM_PRECOMPUTED_COLS: usize = 11; +pub const NUM_PRECOMPUTED_COLS: usize = 6; // ========================================================================= // Compile-time row generation @@ -115,22 +91,14 @@ pub const NUM_PRECOMPUTED_COLS: usize = 11; /// Index encoding: `index = x + y * 256 + z * 65536` /// where x, y ∈ [0, 255] and z ∈ [0, 15] /// -/// Returns the 11 precomputed columns: [X, Y, Z, AND, OR, XOR, MSB8, MSB16, ZERO, SLL, SLLC] +/// Returns the 6 precomputed columns: [X, Y, Z, ZERO, SLL, SLLC] #[inline] pub const fn generate_bitwise_row(index: usize) -> [u64; NUM_PRECOMPUTED_COLS] { let x = (index & 0xFF) as u32; let y = ((index >> 8) & 0xFF) as u32; let z = ((index >> 16) & 0xF) as u32; - // Bitwise operations on bytes - let and_val = x & y; - let or_val = x | y; - let xor_val = x ^ y; - - // MSB extractions - let msb8 = (x >> 7) & 1; let halfword = x + y * 256; - let msb16 = (halfword >> 15) & 1; // Zero check (X + 256*Y + 65536*Z must be zero) let is_zero = if x == 0 && y == 0 && z == 0 { 1 } else { 0 }; @@ -147,11 +115,6 @@ pub const fn generate_bitwise_row(index: usize) -> [u64; NUM_PRECOMPUTED_COLS] { x as u64, // X y as u64, // Y z as u64, // Z - and_val as u64, // AND - or_val as u64, // OR - xor_val as u64, // XOR - msb8 as u64, // MSB8 - msb16 as u64, // MSB16 is_zero as u64, // ZERO sll as u64, // SLL sllc as u64, // SLLC @@ -306,25 +269,12 @@ pub fn generate_bitwise_trace() -> TraceTable> 7) & 1; - let halfword = x + y * 256; - let msb16 = (halfword >> 15) & 1; - data[base + cols::MSB8] = FE::from(msb8 as u64); - data[base + cols::MSB16] = FE::from(msb16 as u64); - - // Zero check (X + 256*Y + 65536*Z must be zero) let is_zero = if x == 0 && y == 0 && z == 0 { 1u64 } else { @@ -332,7 +282,6 @@ pub fn generate_bitwise_trace() -> TraceTable TraceTable = (0..num_rows) .filter(|&row| { let row_data = trace.main_table.get_row(row); - // Check all multiplicity columns (indices 11-20) - (cols::MU_AND..=cols::MU_HWSL).any(|col| row_data[col] != FE::zero()) + (cols::MU_ZERO..=cols::MU_HWSL).any(|col| row_data[col] != FE::zero()) }) .collect(); diff --git a/prover/src/test_utils.rs b/prover/src/test_utils.rs index d55801625..567e6ee52 100644 --- a/prover/src/test_utils.rs +++ b/prover/src/test_utils.rs @@ -21,7 +21,6 @@ use math::field::element::FieldElement; use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator}; use stark::lookup::{AirWithBuses, AuxiliaryTraceBuildData, NullBoundaryConstraintBuilder}; use stark::proof::options::ProofOptions; -use stark::trace::TraceTable; use crate::constraints::cpu::create_all_cpu_constraints; use crate::tables::bitwise::{ @@ -363,99 +362,6 @@ pub fn collect_bitwise_ops_from_load( .collect() } -// ============================================================================= -// Minimal Trace Generation (for testing/benchmarking only) -// ============================================================================= - -/// Generate a minimal bitwise trace containing only the rows needed for the given lookups. -/// -/// This is much faster than the full 2^20 row table for benchmarking/testing. -/// -/// **WARNING: FOR TESTING/BENCHMARKING ONLY - NOT PRODUCTION SAFE!** -/// The verifier expects the full deterministic 2^20 row public table. -pub fn generate_minimal_bitwise_trace(ops: &[BitwiseOperation]) -> TraceTable { - use std::collections::HashMap; - - // Collect unique (lo_byte, hi_byte, shift) tuples and count multiplicities per lookup type - let mut row_data: HashMap<(u8, u8, u8), [u64; 10]> = HashMap::new(); - - for op in ops { - let key = (op.x, op.y, op.z); - let mu_idx = match op.lookup_type { - BitwiseOperationType::AndByte => 0, - BitwiseOperationType::OrByte => 1, - BitwiseOperationType::XorByte => 2, - BitwiseOperationType::Msb8 => 3, - BitwiseOperationType::Msb16 => 4, - BitwiseOperationType::Zero => 5, - BitwiseOperationType::IsByte => 6, - BitwiseOperationType::IsHalf => 7, - BitwiseOperationType::IsB20 => 8, - BitwiseOperationType::Hwsl => 9, - }; - row_data.entry(key).or_insert([0; 10])[mu_idx] += 1; - } - - // Need at least 4 rows for FRI, pad to power of 2 - let unique_rows: Vec<_> = row_data.keys().cloned().collect(); - let num_rows = unique_rows.len().max(4).next_power_of_two(); - - let mut data = vec![FE::zero(); num_rows * bitwise_cols::NUM_COLUMNS]; - - for (row_idx, (x, y, z)) in unique_rows.iter().enumerate() { - let base = row_idx * bitwise_cols::NUM_COLUMNS; - let x = *x as u32; - let y = *y as u32; - let z = *z as u32; - - // Input columns - data[base + bitwise_cols::X] = FE::from(x as u64); - data[base + bitwise_cols::Y] = FE::from(y as u64); - data[base + bitwise_cols::Z] = FE::from(z as u64); - - // Bitwise operation results - data[base + bitwise_cols::AND] = FE::from((x & y) as u64); - data[base + bitwise_cols::OR] = FE::from((x | y) as u64); - data[base + bitwise_cols::XOR] = FE::from((x ^ y) as u64); - - // MSB extractions - let msb8 = (x >> 7) & 1; - let halfword = x + y * 256; - let msb16 = (halfword >> 15) & 1; - data[base + bitwise_cols::MSB8] = FE::from(msb8 as u64); - data[base + bitwise_cols::MSB16] = FE::from(msb16 as u64); - - // Zero check - let is_zero = if x == 0 && y == 0 { 1u64 } else { 0u64 }; - data[base + bitwise_cols::ZERO] = FE::from(is_zero); - - // Shift operations - let sll = if z == 0 { - halfword - } else { - (halfword << z) & 0xFFFF - }; - let sllc = if z == 0 { 0 } else { halfword >> (16 - z) }; - data[base + bitwise_cols::SLL] = FE::from(sll as u64); - data[base + bitwise_cols::SLLC] = FE::from(sllc as u64); - - // Multiplicity columns - let mus = &row_data[&(x as u8, y as u8, z as u8)]; - data[base + bitwise_cols::MU_AND] = FE::from(mus[0]); - data[base + bitwise_cols::MU_OR] = FE::from(mus[1]); - data[base + bitwise_cols::MU_XOR] = FE::from(mus[2]); - data[base + bitwise_cols::MU_MSB8] = FE::from(mus[3]); - data[base + bitwise_cols::MU_MSB16] = FE::from(mus[4]); - data[base + bitwise_cols::MU_ZERO] = FE::from(mus[5]); - data[base + bitwise_cols::MU_IS_BYTE] = FE::from(mus[6]); - data[base + bitwise_cols::MU_IS_HALF] = FE::from(mus[7]); - data[base + bitwise_cols::MU_IS_B20] = FE::from(mus[8]); - data[base + bitwise_cols::MU_HWSL] = FE::from(mus[9]); - } - - TraceTable::new_main(data, bitwise_cols::NUM_COLUMNS, 1) -} - // ============================================================================= // AIR Creation Helpers // ============================================================================= diff --git a/prover/src/tests/bitwise_tests.rs b/prover/src/tests/bitwise_tests.rs index 2848edef4..a6273aebc 100644 --- a/prover/src/tests/bitwise_tests.rs +++ b/prover/src/tests/bitwise_tests.rs @@ -41,25 +41,7 @@ fn test_generate_bitwise_trace() { assert_eq!(row_data[cols::X], FE::from(5u64)); assert_eq!(row_data[cols::Y], FE::from(3u64)); assert_eq!(row_data[cols::Z], FE::from(0u64)); - assert_eq!(row_data[cols::AND], FE::from(1u64)); // 5 & 3 = 1 - assert_eq!(row_data[cols::OR], FE::from(7u64)); // 5 | 3 = 7 - assert_eq!(row_data[cols::XOR], FE::from(6u64)); // 5 ^ 3 = 6 - - // Check MSB8 for x=128 (MSB set) - let row = row_index(128, 0, 0); - let row_data = trace.main_table.get_row(row); - assert_eq!(row_data[cols::MSB8], FE::from(1u64)); - - // Check MSB8 for x=127 (MSB not set) - let row = row_index(127, 0, 0); - let row_data = trace.main_table.get_row(row); - assert_eq!(row_data[cols::MSB8], FE::from(0u64)); - - // Check MSB16 for halfword = 32768 (0x8000) - // 32768 = 0 + 256 * 128 - let row = row_index(0, 128, 0); - let row_data = trace.main_table.get_row(row); - assert_eq!(row_data[cols::MSB16], FE::from(1u64)); + // AND/OR/XOR/MSB8/MSB16 live in byte_ops now; covered by byte_ops_tests. // Check shift: x=1, y=0, z=4 -> SLL = 16, SLLC = 0 let row = row_index(1, 0, 4); @@ -94,8 +76,9 @@ fn test_zero_check() { #[test] fn test_bus_interactions_count() { let interactions = bus_interactions(); - // Should have 10 interactions (one per lookup type; HWSLC merged into HWSL) - assert_eq!(interactions.len(), 10); + // BITWISE keeps only ZERO / IS_B20 / HWSL receivers; byte-pair lookups + // moved to byte_ops. + assert_eq!(interactions.len(), 3); } #[test] @@ -108,14 +91,9 @@ fn test_first_row() { assert_eq!(row_data[cols::X], FE::from(0u64)); assert_eq!(row_data[cols::Y], FE::from(0u64)); assert_eq!(row_data[cols::Z], FE::from(0u64)); - assert_eq!(row_data[cols::AND], FE::from(0u64)); // 0 & 0 = 0 - assert_eq!(row_data[cols::OR], FE::from(0u64)); // 0 | 0 = 0 - assert_eq!(row_data[cols::XOR], FE::from(0u64)); // 0 ^ 0 = 0 - assert_eq!(row_data[cols::MSB8], FE::from(0u64)); // MSB of 0 = 0 - assert_eq!(row_data[cols::MSB16], FE::from(0u64)); // MSB of 0 = 0 - assert_eq!(row_data[cols::ZERO], FE::from(1u64)); // 0 and 0 are both zero - assert_eq!(row_data[cols::SLL], FE::from(0u64)); // 0 << 0 = 0 - assert_eq!(row_data[cols::SLLC], FE::from(0u64)); // 0 >> 16 = 0 + assert_eq!(row_data[cols::ZERO], FE::from(1u64)); + assert_eq!(row_data[cols::SLL], FE::from(0u64)); + assert_eq!(row_data[cols::SLLC], FE::from(0u64)); } #[test] @@ -128,34 +106,13 @@ fn test_last_row() { assert_eq!(row_data[cols::X], FE::from(255u64)); assert_eq!(row_data[cols::Y], FE::from(255u64)); assert_eq!(row_data[cols::Z], FE::from(15u64)); - assert_eq!(row_data[cols::AND], FE::from(255u64)); // 255 & 255 = 255 - assert_eq!(row_data[cols::OR], FE::from(255u64)); // 255 | 255 = 255 - assert_eq!(row_data[cols::XOR], FE::from(0u64)); // 255 ^ 255 = 0 - assert_eq!(row_data[cols::MSB8], FE::from(1u64)); // MSB of 255 = 1 - // halfword = 255 + 256*255 = 65535 = 0xFFFF, MSB is bit 15 = 1 - assert_eq!(row_data[cols::MSB16], FE::from(1u64)); - assert_eq!(row_data[cols::ZERO], FE::from(0u64)); // not zero + assert_eq!(row_data[cols::ZERO], FE::from(0u64)); // SLL: (65535 << 15) & 0xFFFF = 0x8000 = 32768 assert_eq!(row_data[cols::SLL], FE::from(32768u64)); // SLLC: 65535 >> (16 - 15) = 65535 >> 1 = 32767 assert_eq!(row_data[cols::SLLC], FE::from(32767u64)); } -#[test] -fn test_boundary_msb16() { - let trace = generate_bitwise_trace(); - - // halfword = 32767 (0x7FFF): MSB16 should be 0 - // 32767 = 255 + 256*127, so x=255, y=127 - let row = row_index(255, 127, 0); - assert_eq!(trace.main_table.get_row(row)[cols::MSB16], FE::from(0u64)); - - // halfword = 32768 (0x8000): MSB16 should be 1 - // 32768 = 0 + 256*128, so x=0, y=128 - let row = row_index(0, 128, 0); - assert_eq!(trace.main_table.get_row(row)[cols::MSB16], FE::from(1u64)); -} - #[test] fn test_shift_boundaries() { let trace = generate_bitwise_trace(); @@ -186,28 +143,8 @@ fn test_shift_boundaries() { assert_eq!(row_data[cols::SLLC], FE::from(1u64)); } -#[test] -fn test_all_bitwise_operations() { - let trace = generate_bitwise_trace(); - - // Test with x=0xAA, y=0x55 (alternating bits) - let row = row_index(0xAA, 0x55, 0); - let row_data = trace.main_table.get_row(row); - - assert_eq!(row_data[cols::AND], FE::from(0u64)); // 0xAA & 0x55 = 0 - assert_eq!(row_data[cols::OR], FE::from(0xFFu64)); // 0xAA | 0x55 = 0xFF - assert_eq!(row_data[cols::XOR], FE::from(0xFFu64)); // 0xAA ^ 0x55 = 0xFF - assert_eq!(row_data[cols::MSB8], FE::from(1u64)); // MSB of 0xAA = 1 - - // Test with x=0x55, y=0xAA - let row = row_index(0x55, 0xAA, 0); - let row_data = trace.main_table.get_row(row); - - assert_eq!(row_data[cols::AND], FE::from(0u64)); // 0x55 & 0xAA = 0 - assert_eq!(row_data[cols::OR], FE::from(0xFFu64)); // 0x55 | 0xAA = 0xFF - assert_eq!(row_data[cols::XOR], FE::from(0xFFu64)); // 0x55 ^ 0xAA = 0xFF - assert_eq!(row_data[cols::MSB8], FE::from(0u64)); // MSB of 0x55 = 0 -} +// `test_all_bitwise_operations` was removed: AND/OR/XOR/MSB8 are byte-pair +// ops that moved to byte_ops; coverage lives there. #[test] fn test_row_count() { @@ -249,7 +186,7 @@ fn test_generate_bitwise_row_matches_trace() { let const_row = generate_bitwise_row(idx); let trace_row = trace.main_table.get_row(idx); - // Verify all 11 precomputed columns match + // Verify all precomputed columns match (X, Y, Z, ZERO, SLL, SLLC). assert_eq!(const_row.len(), NUM_PRECOMPUTED_COLS); assert_eq!( @@ -269,41 +206,16 @@ fn test_generate_bitwise_row_matches_trace() { ); assert_eq!( const_row[3], - trace_row[cols::AND].canonical_u64(), - "AND mismatch at index {idx}" - ); - assert_eq!( - const_row[4], - trace_row[cols::OR].canonical_u64(), - "OR mismatch at index {idx}" - ); - assert_eq!( - const_row[5], - trace_row[cols::XOR].canonical_u64(), - "XOR mismatch at index {idx}" - ); - assert_eq!( - const_row[6], - trace_row[cols::MSB8].canonical_u64(), - "MSB8 mismatch at index {idx}" - ); - assert_eq!( - const_row[7], - trace_row[cols::MSB16].canonical_u64(), - "MSB16 mismatch at index {idx}" - ); - assert_eq!( - const_row[8], trace_row[cols::ZERO].canonical_u64(), "ZERO mismatch at index {idx}" ); assert_eq!( - const_row[9], + const_row[4], trace_row[cols::SLL].canonical_u64(), "SLL mismatch at index {idx}" ); assert_eq!( - const_row[10], + const_row[5], trace_row[cols::SLLC].canonical_u64(), "SLLC mismatch at index {idx}" ); diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index 7e0fbc181..592ee3b1c 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -1411,21 +1411,21 @@ fn test_debug_memory_tokens_sb_sh() { // (e.g. CPU's paired IS_BYTE checks) also bump this same MU_IS_BYTE // column and may hit the same (X, Y) rows, so this is a coarse sanity // check (BITWISE mult >= PAGE's contribution), not an exact balance. - use crate::tables::bitwise::cols as bitwise_cols; - let bitwise_is_byte_mult_over_page_pairs: u64 = page_pair_counts + use crate::tables::byte_ops::cols as byte_ops_cols; + let byte_ops_is_byte_mult_over_page_pairs: u64 = page_pair_counts .keys() .map(|&(x, y)| { let row = x as usize + 256 * y as usize; traces - .bitwise + .byte_ops .main_table - .get(row, bitwise_cols::MU_IS_BYTE) + .get(row, byte_ops_cols::MU_IS_BYTE) .to_raw() }) .sum(); println!( - "Bitwise IS_BYTE mult summed over PAGE (init, fini) rows: {}", - bitwise_is_byte_mult_over_page_pairs + "byte_ops IS_BYTE mult summed over PAGE (init, fini) rows: {}", + byte_ops_is_byte_mult_over_page_pairs ); println!( "Total IS_BYTE lookups from PAGE (counted): {}", @@ -1435,7 +1435,7 @@ fn test_debug_memory_tokens_sb_sh() { // hit some of the same (init, fini) rows. It should never be negative. println!( "Difference: {} (>= 0 expected; PAGE pairs may also receive from CPU)", - bitwise_is_byte_mult_over_page_pairs as i64 - page_is_byte_total as i64 + byte_ops_is_byte_mult_over_page_pairs as i64 - page_is_byte_total as i64 ); // === Verify PAGE AIR uses correct page_base === diff --git a/prover/src/tests/trace_builder_tests.rs b/prover/src/tests/trace_builder_tests.rs index 0e54f353c..013f68ac2 100644 --- a/prover/src/tests/trace_builder_tests.rs +++ b/prover/src/tests/trace_builder_tests.rs @@ -1,6 +1,6 @@ //! Tests for the trace builder module. -use crate::tables::bitwise; +use crate::tables::byte_ops; use crate::tables::cpu::cols; use crate::tables::lt; use crate::tables::memw_register; @@ -268,10 +268,11 @@ fn test_bitwise_lookups_collected() { let traces = Traces::from_logs(&logs, instructions, &Default::default()).unwrap(); - // Check AND multiplicity was updated for (0x12, 0x34, 0) - let row_idx = bitwise::row_index(0x12, 0x34, 0); - let row = traces.bitwise.main_table.get_row(row_idx); - assert_eq!(row[bitwise::cols::MU_AND], FE::one()); + // Check AND multiplicity was updated for (0x12, 0x34) in BYTE_OPS + // (byte-pair receivers moved out of BITWISE). + let row_idx = byte_ops::row_index(0x12, 0x34); + let row = traces.byte_ops.main_table.get_row(row_idx); + assert_eq!(row[byte_ops::cols::MU_AND], FE::one()); } #[test] @@ -344,7 +345,10 @@ fn test_mixed_instructions() { // 5 ops (4 + ecall) padded to 8 assert_eq!(traces.cpus[0].main_table.height, 8); - assert_eq!(traces.bitwise.main_table.height, bitwise::NUM_ROWS); + assert_eq!( + traces.bitwise.main_table.height, + crate::tables::bitwise::NUM_ROWS + ); // 1 SLT + 1 BLT = 2 LT ops assert!(traces.lts[0].main_table.height >= 2); } @@ -482,13 +486,13 @@ fn test_memw_generates_lt_for_timestamp_ordering() { ); // Register ops use IS_HALF for timestamp ordering instead of LT. - // Verify the bitwise table has at least one IS_HALF entry with non-zero + // Verify the byte_ops table has at least one IS_HALF entry with non-zero // multiplicity, proving that MEMW_R's IS_HALF lookups were emitted. - let has_is_half_entry = (0..traces.bitwise.main_table.height) - .any(|i| traces.bitwise.main_table.get_row(i)[bitwise::cols::MU_IS_HALF] != FE::zero()); + let has_is_half_entry = (0..traces.byte_ops.main_table.height) + .any(|i| traces.byte_ops.main_table.get_row(i)[byte_ops::cols::MU_IS_HALF] != FE::zero()); assert!( has_is_half_entry, - "MEMW_R register ops should produce IS_HALF bitwise entries" + "MEMW_R register ops should produce IS_HALF byte_ops entries" ); // The LT table should still have ops from non-register MEMW accesses @@ -553,11 +557,11 @@ fn test_lt_generates_bitwise_lookups() { let lhs_sub_rhs = 0x1234u64.wrapping_sub(0x5678); let sub_0 = (lhs_sub_rhs & 0xFFFF) as u16; // 0xBBBC - // Check IS_HALF multiplicity for lhs_sub_rhs[0] - let row_idx = bitwise::row_index((sub_0 & 0xFF) as u8, (sub_0 >> 8) as u8, 0); - let row = traces.bitwise.main_table.get_row(row_idx); + // Check IS_HALF multiplicity for lhs_sub_rhs[0] (now lives in BYTE_OPS). + let row_idx = byte_ops::row_index((sub_0 & 0xFF) as u8, (sub_0 >> 8) as u8); + let row = traces.byte_ops.main_table.get_row(row_idx); assert_ne!( - row[bitwise::cols::MU_IS_HALF], + row[byte_ops::cols::MU_IS_HALF], FE::zero(), "IS_HALF lookup for lhs_sub_rhs[0] should have non-zero multiplicity" ); From 95e4d4b366b0ddfb18920cd8f002f6f8cbd8bc5e Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Tue, 5 May 2026 16:01:30 -0300 Subject: [PATCH 5/8] Collapse AndByte/OrByte/XorByte into unified BusId::Bitwise --- prover/src/tables/branch.rs | 5 +- prover/src/tables/byte_ops.rs | 263 ++++++++++++------------ prover/src/tables/cpu.rs | 73 ++----- prover/src/tables/shift.rs | 15 +- prover/src/tables/types.rs | 8 + prover/src/tests/trace_builder_tests.rs | 8 +- 6 files changed, 181 insertions(+), 191 deletions(-) diff --git a/prover/src/tables/branch.rs b/prover/src/tables/branch.rs index d505ee60b..b2bf49c71 100644 --- a/prover/src/tables/branch.rs +++ b/prover/src/tables/branch.rs @@ -247,12 +247,13 @@ pub fn bus_interactions() -> Vec { BusValue::constant(0), ], ), - // AND_BYTE[next_pc_low[0]; unmasked_low_byte, 254] + // Bitwise[op_id=1, unmasked_low_byte, 254, next_pc_low[0]] // Verifies: next_pc_low[0] = unmasked_low_byte & 0xFE BusInteraction::sender( - BusId::AndByte, + BusId::Bitwise, Multiplicity::Column(cols::MU), vec![ + BusValue::constant(1), BusValue::Packed { start_column: cols::UNMASKED_LOW_BYTE, packing: Packing::Direct, diff --git a/prover/src/tables/byte_ops.rs b/prover/src/tables/byte_ops.rs index bc02a0ff0..0e4b967e1 100644 --- a/prover/src/tables/byte_ops.rs +++ b/prover/src/tables/byte_ops.rs @@ -1,23 +1,24 @@ //! BYTE_OPS precomputed lookup table for byte-pair operations. //! -//! Holds every `(X, Y) ∈ [0, 256)²` and the precomputed result of the -//! byte-pair lookups that BITWISE used to multiplex into its 2²⁰ rows. The -//! 16× factor was driven only by 20-bit ops (HWSL/IS_B20/ZERO); pulling these -//! out into a dedicated 2¹⁶ table cuts ~12M cells. +//! Holds every `(X, Y) ∈ [0, 256)²` across four `OP_ID` slices and serves +//! the byte-pair lookups that BITWISE used to multiplex into its 2²⁰ rows. //! -//! ## Operations served (Step 2 will wire the receivers) -//! - `AND_BYTE[X, Y]` -> X & Y -//! - `OR_BYTE[X, Y]` -> X | Y -//! - `XOR_BYTE[X, Y]` -> X ^ Y -//! - `MSB8[X]` -> most significant bit of byte (Y = 0) -//! - `MSB16[X + 256*Y]` -> most significant bit of halfword -//! - `IS_BYTE[X, Y]` -> range check on a byte pair -//! - `IS_HALF[X + 256*Y]` -> range check on a halfword +//! ## Layout (4 slices × 256² = 2¹⁸ rows, power of two) //! -//! ## Table Structure +//! | Slice | OP_ID | Role | RESULT col holds | +//! |-------|------:|--------------------------------|------------------| +//! | 0 | 0 | non-bitwise (MSB8/MSB16/range) | 0 (unused) | +//! | 1 | 1 | AND | X & Y | +//! | 2 | 2 | OR | X \| Y | +//! | 3 | 4 | XOR | X ^ Y | //! -//! 2¹⁶ = 65,536 rows indexed by `(X: Byte, Y: Byte)`. All lookups are received -//! with negative multiplicity (other tables send to this one). +//! `OP_ID = 1 / 2 / 4` is the disjoint-bit encoding `AND + 2*OR + 4*XOR`. +//! Senders emit `(op_id, X, Y, RESULT)` against `BusId::Bitwise`; the row +//! at the matching slice has the right RESULT precomputed. +//! +//! Non-bitwise multiplicities (MU_IS_BYTE, MU_IS_HALF, MU_MSB8, MU_MSB16) +//! live exclusively on the slice-0 rows so each (X, Y) has a single +//! canonical home; the senders for those buses don't include an op_id. use std::sync::OnceLock; @@ -44,41 +45,59 @@ pub mod cols { pub const X: usize = 0; /// Y: Byte input (0-255) pub const Y: usize = 1; - /// AND result: X & Y - pub const AND: usize = 2; - /// OR result: X | Y - pub const OR: usize = 3; - /// XOR result: X ^ Y - pub const XOR: usize = 4; + /// OP_ID: ∈ {0, 1, 2, 4}. 0 = non-bitwise slice, 1/2/4 = AND/OR/XOR. + pub const OP_ID: usize = 2; + /// RESULT: precomputed result for the slice's op (0 on slice 0). + pub const RESULT: usize = 3; /// MSB of byte X: (X >> 7) & 1 - pub const MSB8: usize = 5; - /// MSB of halfword X + 256*Y: ((X + 256*Y) >> 15) & 1 - pub const MSB16: usize = 6; - - /// Multiplicity for AND_BYTE lookups - pub const MU_AND: usize = 7; - /// Multiplicity for OR_BYTE lookups - pub const MU_OR: usize = 8; - /// Multiplicity for XOR_BYTE lookups - pub const MU_XOR: usize = 9; - /// Multiplicity for MSB8 lookups - pub const MU_MSB8: usize = 10; - /// Multiplicity for MSB16 lookups - pub const MU_MSB16: usize = 11; - /// Multiplicity for IS_BYTE lookups - pub const MU_IS_BYTE: usize = 12; - /// Multiplicity for IS_HALF lookups - pub const MU_IS_HALF: usize = 13; + pub const MSB8: usize = 4; + /// MSB of halfword (X + 256*Y): ((X + 256*Y) >> 15) & 1 + pub const MSB16: usize = 5; + + /// Multiplicity for the unified Bitwise lookup (AND/OR/XOR). + pub const MU_BITWISE: usize = 6; + /// Multiplicity for IS_BYTE lookups (only fired on slice 0). + pub const MU_IS_BYTE: usize = 7; + /// Multiplicity for IS_HALF lookups (only fired on slice 0). + pub const MU_IS_HALF: usize = 8; + /// Multiplicity for MSB8 lookups (only fired on slice 0). + pub const MU_MSB8: usize = 9; + /// Multiplicity for MSB16 lookups (only fired on slice 0). + pub const MU_MSB16: usize = 10; /// Total number of columns - pub const NUM_COLUMNS: usize = 14; + pub const NUM_COLUMNS: usize = 11; } -/// 2¹⁶ rows = 65,536. -pub const NUM_ROWS: usize = 256 * 256; +/// 256² × 4 slices = 262,144 rows (2¹⁸). +pub const NUM_ROWS: usize = 256 * 256 * 4; /// Number of precomputed (non-multiplicity) columns. -pub const NUM_PRECOMPUTED_COLS: usize = 7; +pub const NUM_PRECOMPUTED_COLS: usize = 6; + +/// Number of slices in the table (op_id discriminator). +pub const NUM_SLICES: usize = 4; + +/// Maps a slice index `s ∈ [0, 4)` to its `OP_ID` value. +const fn slice_to_op_id(s: usize) -> u32 { + match s { + 0 => 0, + 1 => 1, + 2 => 2, + 3 => 4, + _ => 0, + } +} + +/// Inverse: given an op_id ∈ {1, 2, 4}, return its slice index. +const fn op_id_to_slice(op_id: u8) -> usize { + match op_id { + 1 => 1, + 2 => 2, + 4 => 3, + _ => 0, + } +} // ========================================================================= // Compile-time row generation @@ -86,15 +105,21 @@ pub const NUM_PRECOMPUTED_COLS: usize = 7; /// Generate one row of the byte_ops table. /// -/// Index encoding: `index = x + y * 256` with `x, y ∈ [0, 255]`. +/// Index encoding: `index = x + 256 * y + 65536 * slice` where +/// `x, y ∈ [0, 255]` and `slice ∈ [0, 4)`. #[inline] pub const fn generate_byte_ops_row(index: usize) -> [u64; NUM_PRECOMPUTED_COLS] { let x = (index & 0xFF) as u32; let y = ((index >> 8) & 0xFF) as u32; - - let and_val = x & y; - let or_val = x | y; - let xor_val = x ^ y; + let slice = (index >> 16) & 0x3; + let op_id = slice_to_op_id(slice); + + let result = match op_id { + 1 => x & y, + 2 => x | y, + 4 => x ^ y, + _ => 0, + }; let msb8 = (x >> 7) & 1; let halfword = x + y * 256; @@ -103,15 +128,13 @@ pub const fn generate_byte_ops_row(index: usize) -> [u64; NUM_PRECOMPUTED_COLS] [ x as u64, y as u64, - and_val as u64, - or_val as u64, - xor_val as u64, + op_id as u64, + result as u64, msb8 as u64, msb16 as u64, ] } -/// Whether this table is preprocessed (commitment is hardcoded). pub const fn is_preprocessed() -> bool { true } @@ -124,9 +147,9 @@ static BYTE_OPS_COMMITMENT: OnceLock = OnceLock::new(); /// Computes the Merkle commitment over the precomputed byte_ops columns. /// -/// Mirrors [`bitwise::compute_preprocessed_commitment`] — see that for the -/// rationale (LDE-rooted commitment is required so FRI queries at any -/// blow-up index can be opened against this precomputed table). +/// Mirrors [`bitwise::compute_preprocessed_commitment`] — the LDE-rooted +/// commitment is required so FRI queries at any blow-up index can be opened +/// against this preprocessed table. fn compute_preprocessed_commitment(options: &ProofOptions) -> Commitment { #[cfg(feature = "parallel")] let columns: Vec> = (0..NUM_PRECOMPUTED_COLS) @@ -225,114 +248,99 @@ pub fn preprocessed_commitment(options: &ProofOptions) -> Commitment { pub fn generate_byte_ops_trace() -> TraceTable { let mut data = vec![FE::zero(); NUM_ROWS * cols::NUM_COLUMNS]; - for x in 0u32..256 { - for y in 0u32..256 { - let row_idx = (x as usize) + (y as usize) * 256; - let base = row_idx * cols::NUM_COLUMNS; - - data[base + cols::X] = FE::from(x as u64); - data[base + cols::Y] = FE::from(y as u64); - data[base + cols::AND] = FE::from((x & y) as u64); - data[base + cols::OR] = FE::from((x | y) as u64); - data[base + cols::XOR] = FE::from((x ^ y) as u64); - - let msb8 = (x >> 7) & 1; - let halfword = x + y * 256; - let msb16 = (halfword >> 15) & 1; - data[base + cols::MSB8] = FE::from(msb8 as u64); - data[base + cols::MSB16] = FE::from(msb16 as u64); - - // Multiplicity columns initialized to zero by the vec! above. + for slice in 0..NUM_SLICES { + let op_id = slice_to_op_id(slice); + for x in 0u32..256 { + for y in 0u32..256 { + let row_idx = (x as usize) + (y as usize) * 256 + slice * 65536; + let base = row_idx * cols::NUM_COLUMNS; + + data[base + cols::X] = FE::from(x as u64); + data[base + cols::Y] = FE::from(y as u64); + data[base + cols::OP_ID] = FE::from(op_id as u64); + + let result = match op_id { + 1 => x & y, + 2 => x | y, + 4 => x ^ y, + _ => 0, + }; + data[base + cols::RESULT] = FE::from(result as u64); + + let msb8 = (x >> 7) & 1; + let halfword = x + y * 256; + let msb16 = (halfword >> 15) & 1; + data[base + cols::MSB8] = FE::from(msb8 as u64); + data[base + cols::MSB16] = FE::from(msb16 as u64); + + // Multiplicity columns are zero-initialized by `vec!` above. + } } } TraceTable::new_main(data, cols::NUM_COLUMNS, 1) } +/// Slice-0 row index for a given (x, y), used by non-bitwise senders that +/// don't include an op_id. #[inline] pub fn row_index(x: u8, y: u8) -> usize { (x as usize) + (y as usize) * 256 } +/// Row index for a bitwise op (AND/OR/XOR) at the matching op_id slice. +#[inline] +pub fn bitwise_row_index(x: u8, y: u8, op_id: u8) -> usize { + let slice = op_id_to_slice(op_id); + (x as usize) + (y as usize) * 256 + slice * 65536 +} + /// Apply lookups to multiplicity columns. /// -/// Routes byte-pair events (AndByte/OrByte/XorByte/Msb8/Msb16/IsByte/IsHalf) -/// into byte_ops; events for the 20-bit ops (Zero/IsB20/Hwsl) stay in -/// `bitwise::update_multiplicities` and are skipped here. +/// Routes the byte-pair operations into their canonical rows: +/// - AndByte/OrByte/XorByte → MU_BITWISE on the matching op_id slice +/// - Msb8/Msb16/IsByte/IsHalf → MU_* on slice 0 (single canonical home per (X, Y)) +/// - 20-bit ops (Zero/IsB20/Hwsl) stay on the BITWISE table. pub fn update_multiplicities( trace: &mut TraceTable, ops: &[BitwiseOperation], ) { for op in ops { - let mu_col = match op.lookup_type { - BitwiseOperationType::AndByte => cols::MU_AND, - BitwiseOperationType::OrByte => cols::MU_OR, - BitwiseOperationType::XorByte => cols::MU_XOR, - BitwiseOperationType::Msb8 => cols::MU_MSB8, - BitwiseOperationType::Msb16 => cols::MU_MSB16, - BitwiseOperationType::IsByte => cols::MU_IS_BYTE, - BitwiseOperationType::IsHalf => cols::MU_IS_HALF, + let (mu_col, row) = match op.lookup_type { + BitwiseOperationType::AndByte => (cols::MU_BITWISE, bitwise_row_index(op.x, op.y, 1)), + BitwiseOperationType::OrByte => (cols::MU_BITWISE, bitwise_row_index(op.x, op.y, 2)), + BitwiseOperationType::XorByte => (cols::MU_BITWISE, bitwise_row_index(op.x, op.y, 4)), + BitwiseOperationType::Msb8 => (cols::MU_MSB8, row_index(op.x, op.y)), + BitwiseOperationType::Msb16 => (cols::MU_MSB16, row_index(op.x, op.y)), + BitwiseOperationType::IsByte => (cols::MU_IS_BYTE, row_index(op.x, op.y)), + BitwiseOperationType::IsHalf => (cols::MU_IS_HALF, row_index(op.x, op.y)), BitwiseOperationType::Zero | BitwiseOperationType::IsB20 | BitwiseOperationType::Hwsl => continue, }; - let row = row_index(op.x, op.y); let current = trace.main_table.get_row(row)[mu_col]; trace.set_main(row, mu_col, current + FE::one()); } } // ========================================================================= -// Bus interactions (empty in Step 1; populated in Step 2) +// Bus interactions // ========================================================================= -/// Receivers for byte-pair lookups. The 20-bit ops (Zero/IsB20/Hwsl) stay -/// on the BITWISE table. +/// Receivers for byte-pair lookups. The unified Bitwise receiver matches +/// `(op_id, X, Y, RESULT)`; non-bitwise receivers stay separate but only +/// receive on slice-0 rows where their multiplicity is non-zero. pub fn bus_interactions() -> Vec { vec![ - // AND_BYTE[X, Y] -> AND - BusInteraction::receiver( - BusId::AndByte, - Multiplicity::Column(cols::MU_AND), - vec![ - BusValue::Packed { - start_column: cols::X, - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: cols::Y, - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: cols::AND, - packing: Packing::Direct, - }, - ], - ), - // OR_BYTE[X, Y] -> OR + // Bitwise[op_id, X, Y, RESULT] BusInteraction::receiver( - BusId::OrByte, - Multiplicity::Column(cols::MU_OR), + BusId::Bitwise, + Multiplicity::Column(cols::MU_BITWISE), vec![ BusValue::Packed { - start_column: cols::X, + start_column: cols::OP_ID, packing: Packing::Direct, }, - BusValue::Packed { - start_column: cols::Y, - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: cols::OR, - packing: Packing::Direct, - }, - ], - ), - // XOR_BYTE[X, Y] -> XOR - BusInteraction::receiver( - BusId::XorByte, - Multiplicity::Column(cols::MU_XOR), - vec![ BusValue::Packed { start_column: cols::X, packing: Packing::Direct, @@ -342,7 +350,7 @@ pub fn bus_interactions() -> Vec { packing: Packing::Direct, }, BusValue::Packed { - start_column: cols::XOR, + start_column: cols::RESULT, packing: Packing::Direct, }, ], @@ -384,7 +392,6 @@ pub fn bus_interactions() -> Vec { ], ), // IS_BYTE[X, Y] - range check two byte values, no output. - // Single-byte checks send the second argument as 0. BusInteraction::receiver( BusId::IsByte, Multiplicity::Column(cols::MU_IS_BYTE), diff --git a/prover/src/tables/cpu.rs b/prover/src/tables/cpu.rs index 70ae8c501..dec8e403b 100644 --- a/prover/src/tables/cpu.rs +++ b/prover/src/tables/cpu.rs @@ -1038,61 +1038,32 @@ pub fn bus_interactions() -> Vec { // )); // ------------------------------------------------------------------------- - // AND_BYTE interactions (×8 for each byte) - // ------------------------------------------------------------------------- - for i in 0..8 { - interactions.push(BusInteraction::sender( - BusId::AndByte, - Multiplicity::Column(cols::AND), - vec![ - BusValue::Packed { - start_column: cols::ARG1[i], - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: cols::ARG2[i], - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: cols::RES[i], - packing: Packing::Direct, - }, - ], - )); - } - - // ------------------------------------------------------------------------- - // OR_BYTE interactions (×8) - // ------------------------------------------------------------------------- - for i in 0..8 { - interactions.push(BusInteraction::sender( - BusId::OrByte, - Multiplicity::Column(cols::OR), - vec![ - BusValue::Packed { - start_column: cols::ARG1[i], - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: cols::ARG2[i], - packing: Packing::Direct, - }, - BusValue::Packed { - start_column: cols::RES[i], - packing: Packing::Direct, - }, - ], - )); - } - - // ------------------------------------------------------------------------- - // XOR_BYTE interactions (×8) + // Unified Bitwise interaction (×8 for each byte) + // + // Replaces 24 sends (AndByte/OrByte/XorByte ×8 each) with 8 sends to + // BusId::Bitwise. Token: (op_id, X, Y, RESULT) with disjoint-bit op_id + // = AND + 2*OR + 4*XOR. Multiplicity = AND + OR + XOR (at-most-one). // ------------------------------------------------------------------------- for i in 0..8 { interactions.push(BusInteraction::sender( - BusId::XorByte, - Multiplicity::Column(cols::XOR), + BusId::Bitwise, + Multiplicity::Sum3(cols::AND, cols::OR, cols::XOR), vec![ + // op_id = AND + 2*OR + 4*XOR + BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1, + column: cols::AND, + }, + LinearTerm::Column { + coefficient: 2, + column: cols::OR, + }, + LinearTerm::Column { + coefficient: 4, + column: cols::XOR, + }, + ]), BusValue::Packed { start_column: cols::ARG1[i], packing: Packing::Direct, diff --git a/prover/src/tables/shift.rs b/prover/src/tables/shift.rs index 9014799e5..2abc499b2 100644 --- a/prover/src/tables/shift.rs +++ b/prover/src/tables/shift.rs @@ -396,11 +396,12 @@ pub fn bus_interactions() -> Vec { ], )); - // SHIFT-C1: AND_BYTE[shift, 15] → bit_shift | left (= μ - direction) + // SHIFT-C1: Bitwise[op_id=1, shift, 15] → bit_shift | left (= μ - direction) interactions.push(BusInteraction::sender( - BusId::AndByte, + BusId::Bitwise, Multiplicity::Diff(cols::MU, cols::DIRECTION), vec![ + BusValue::constant(1), BusValue::Packed { start_column: cols::SHIFT_AMOUNT, packing: Packing::Direct, @@ -413,15 +414,16 @@ pub fn bus_interactions() -> Vec { ], )); - // SHIFT-C2: AND_BYTE[256 - zbs * 16 - shift, 15] → bit_shift | right (= direction) + // SHIFT-C2: Bitwise[op_id=1, 256 - zbs*16 - shift, 15] → bit_shift | right (= direction) // 256 - shift would overflow a byte when shift = 0. Subtracting zbs * 16 keeps it in // [0,255]. // When zbs = 1, shift is a multiple of 16 (i.e. shift ∈ [0, 240]), so // 256 - 16 - shift ∈ [0,255]. interactions.push(BusInteraction::sender( - BusId::AndByte, + BusId::Bitwise, Multiplicity::Column(cols::DIRECTION), vec![ + BusValue::constant(1), BusValue::linear(vec![ LinearTerm::Constant(256), LinearTerm::Column { @@ -519,13 +521,14 @@ pub fn bus_interactions() -> Vec { ], )); - // SHIFT-C11: AND_BYTE[encoded_limb; shift, mask] | μ + // SHIFT-C11: Bitwise[op_id=1, shift, mask] → encoded_limb | μ // encoded = (1 - ls[0]) + 15*ls[1] + 31*ls[2] + 47*ls[3] // mask = 48 - 32 * word_instr interactions.push(BusInteraction::sender( - BusId::AndByte, + BusId::Bitwise, Multiplicity::Column(cols::MU), vec![ + BusValue::constant(1), // first input: shift BusValue::Packed { start_column: cols::SHIFT_AMOUNT, diff --git a/prover/src/tables/types.rs b/prover/src/tables/types.rs index a1dcd043a..7dc2e053d 100644 --- a/prover/src/tables/types.rs +++ b/prover/src/tables/types.rs @@ -54,6 +54,12 @@ pub enum BusId { // ========================================================================= // Bitwise operations (BITWISE table provides) // ========================================================================= + /// Unified byte-bitwise lookup: BITWISE[op_id, X, Y] -> RESULT. + /// `op_id ∈ {1, 2, 4}` selects AND, OR, XOR respectively (disjoint-bit + /// encoding so `op_id = AND + 2*OR + 4*XOR`). Replaces AndByte/OrByte/ + /// XorByte. The receiver carries one row per (X, Y, op_id) so the + /// `RESULT` column varies by op_id. + Bitwise, /// Bitwise AND of two bytes: AND_BYTE[X, Y] -> X & Y AndByte, /// Bitwise OR of two bytes: OR_BYTE[X, Y] -> X | Y @@ -119,6 +125,7 @@ impl BusId { BusId::IsByte => "IsByte", BusId::IsHalfword => "IsHalfword", BusId::IsB20 => "IsB20", + BusId::Bitwise => "Bitwise", BusId::AndByte => "AndByte", BusId::OrByte => "OrByte", BusId::XorByte => "XorByte", @@ -169,6 +176,7 @@ impl TryFrom for BusId { 19 => Ok(BusId::Ecall), 20 => Ok(BusId::CommitNextByte), 21 => Ok(BusId::Commit), + 22 => Ok(BusId::Bitwise), other => Err(other), } } diff --git a/prover/src/tests/trace_builder_tests.rs b/prover/src/tests/trace_builder_tests.rs index 013f68ac2..719a70a90 100644 --- a/prover/src/tests/trace_builder_tests.rs +++ b/prover/src/tests/trace_builder_tests.rs @@ -268,11 +268,11 @@ fn test_bitwise_lookups_collected() { let traces = Traces::from_logs(&logs, instructions, &Default::default()).unwrap(); - // Check AND multiplicity was updated for (0x12, 0x34) in BYTE_OPS - // (byte-pair receivers moved out of BITWISE). - let row_idx = byte_ops::row_index(0x12, 0x34); + // Check unified Bitwise multiplicity at (op_id=AND, X=0x12, Y=0x34). + // The byte_ops table now has 4 op_id slices; AND lookups land on slice 1. + let row_idx = byte_ops::bitwise_row_index(0x12, 0x34, 1); let row = traces.byte_ops.main_table.get_row(row_idx); - assert_eq!(row[byte_ops::cols::MU_AND], FE::one()); + assert_eq!(row[byte_ops::cols::MU_BITWISE], FE::one()); } #[test] From 874510d219a67c72459550ea24f3ac903782c0a4 Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Tue, 5 May 2026 17:18:22 -0300 Subject: [PATCH 6/8] Split AND/OR/XOR rows into a dedicated CPU_BITWISE chip --- prover/src/lib.rs | 21 ++++++++++----- prover/src/tables/cpu.rs | 36 +++++++++++++++++++++++++ prover/src/tables/mod.rs | 1 + prover/src/tables/trace_builder.rs | 32 ++++++++++++++++++---- prover/src/test_utils.rs | 24 ++++++++++++++++- prover/src/tests/prove_elfs_tests.rs | 5 +++- prover/src/tests/trace_builder_tests.rs | 6 +++-- 7 files changed, 110 insertions(+), 15 deletions(-) diff --git a/prover/src/lib.rs b/prover/src/lib.rs index 00cacf9cb..9fff54ab1 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -41,9 +41,10 @@ use crate::tables::trace_builder::Traces; use crate::tables::types::BusId; use crate::test_utils::{ E, F, VmAir, create_bitwise_air, create_branch_air, create_byte_ops_air, create_commit_air, - create_cpu_air, create_decode_air, create_dvrm_air, create_halt_air, create_load_air, - create_lt_air, create_memw_air, create_memw_aligned_air, create_memw_register_air, - create_mul_air, create_page_air, create_register_air, create_shift_air, + create_cpu_air, create_cpu_bitwise_air, create_decode_air, create_dvrm_air, create_halt_air, + create_load_air, create_lt_air, create_memw_air, create_memw_aligned_air, + create_memw_register_air, create_mul_air, create_page_air, create_register_air, + create_shift_air, }; use stark::proof::options::{GoldilocksCubicProofOptions, ProofOptions}; @@ -189,6 +190,7 @@ type AirTracePair<'a> = ( /// All VM AIR instances, grouped by table. pub(crate) struct VmAirs { pub cpus: Vec, + pub cpu_bitwise: VmAir, pub bitwise: VmAir, pub byte_ops: VmAir, pub lts: Vec, @@ -213,6 +215,7 @@ impl VmAirs { let mut pairs: Vec> = vec![ (&self.bitwise, &mut traces.bitwise, &()), (&self.byte_ops, &mut traces.byte_ops, &()), + (&self.cpu_bitwise, &mut traces.cpu_bitwise, &()), (&self.decode, &mut traces.decode, &()), (&self.halt, &mut traces.halt, &()), (&self.commit, &mut traces.commit, &()), @@ -269,6 +272,7 @@ impl VmAirs { let mut refs: Vec<&dyn AIR> = vec![ &self.bitwise, &self.byte_ops, + &self.cpu_bitwise, &self.decode, &self.halt, &self.commit, @@ -328,6 +332,7 @@ impl VmAirs { let cpus: Vec<_> = (0..table_counts.cpu) .map(|i| create_cpu_air(proof_options).with_name(&format!("CPU[{}]", i))) .collect(); + let cpu_bitwise = create_cpu_bitwise_air(proof_options); let bitwise = if minimal_bitwise { create_bitwise_air(proof_options) } else { @@ -406,6 +411,7 @@ impl VmAirs { Self { cpus, + cpu_bitwise, bitwise, byte_ops, lts, @@ -703,11 +709,14 @@ pub fn verify_with_options( ); // Cross-check: table_counts must match the number of sub-proofs. - // Fixed tables (bitwise, decode, halt, commit, register) = 5, plus page tables. - let expected_proof_count = vm_proof.table_counts.total() + 5 + page_configs.len(); + // Fixed tables = 7: bitwise, byte_ops, cpu_bitwise, decode, halt, commit, register. + // Plus one page table per page_config. + const NUM_FIXED_TABLES: usize = 7; + let expected_proof_count = + vm_proof.table_counts.total() + NUM_FIXED_TABLES + page_configs.len(); if expected_proof_count != vm_proof.proof.proofs.len() { return Err(Error::InvalidTableCounts(format!( - "table_counts total ({}) + 5 fixed + {} pages = {}, but proof contains {} sub-proofs", + "table_counts total ({}) + {NUM_FIXED_TABLES} fixed + {} pages = {}, but proof contains {} sub-proofs", vm_proof.table_counts.total(), page_configs.len(), expected_proof_count, diff --git a/prover/src/tables/cpu.rs b/prover/src/tables/cpu.rs index dec8e403b..9dbd66244 100644 --- a/prover/src/tables/cpu.rs +++ b/prover/src/tables/cpu.rs @@ -2043,6 +2043,42 @@ pub fn bus_interactions() -> Vec { interactions } +// ========================================================================= +// Bus interaction filters for the CPU / CPU_BITWISE chip split +// ========================================================================= + +/// Bus interactions for the main CPU chip after the AND/OR/XOR rows are +/// routed to a dedicated `CPU_BITWISE` chip. +/// +/// Filters out the 8 unified `BusId::Bitwise` sender declarations (Diego's +/// collapsed bitwise bus). Non-bitwise CPU rows then no longer pay aux EF +/// cells for those declarations. +pub fn bus_interactions_without_bitwise() -> Vec { + let bitwise_id: u64 = BusId::Bitwise.into(); + bus_interactions() + .into_iter() + .filter(|i| i.bus_id != bitwise_id) + .collect() +} + +/// Bus interactions for the CPU_BITWISE chip — only the buses an AND/OR/XOR +/// row actually uses. Drops every other class's declarations so the +/// bitwise chip's effective width stays as small as possible. +pub fn bus_interactions_bitwise_chip() -> Vec { + let keep: [u64; 6] = [ + BusId::Decode.into(), + BusId::IsByte.into(), + BusId::Bitwise.into(), + BusId::Msb16.into(), // ANDW/ORW/XORW set word_instr=1, triggering MSB16 lookups + BusId::Memw.into(), + BusId::Memory.into(), // inline PC read/write + ]; + bus_interactions() + .into_iter() + .filter(|i| keep.contains(&i.bus_id)) + .collect() +} + // ========================================================================= // Constraints (placeholder - will be implemented in constraints/) // ========================================================================= diff --git a/prover/src/tables/mod.rs b/prover/src/tables/mod.rs index c904a4fd6..4007de336 100644 --- a/prover/src/tables/mod.rs +++ b/prover/src/tables/mod.rs @@ -26,6 +26,7 @@ pub mod branch; pub mod byte_ops; pub mod commit; pub mod cpu; +pub mod cpu_bitwise; pub mod decode; pub mod dvrm; pub mod halt; diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 95764c687..d49af6c89 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -1613,9 +1613,13 @@ fn generate_page_tables( /// All generated trace tables. pub struct Traces { - /// CPU execution traces (split into chunks of max_rows::CPU) + /// CPU execution traces (split into chunks of max_rows::CPU). + /// Excludes AND/OR/XOR rows after Phase 4 — those go to `cpu_bitwise`. pub cpus: Vec>, + /// CPU_BITWISE chip — holds AND/OR/XOR rows peeled from the main CPU. + pub cpu_bitwise: TraceTable, + /// BITWISE precomputed lookup table (2^20 rows) pub bitwise: TraceTable, @@ -1867,12 +1871,23 @@ fn build_traces( // COMMIT table sends IsByte and IsHalfword lookups bitwise_ops.extend(collect_bitwise_from_commit(&commit_ops)); - // CPU padding rows send IS_BYTE with all-zero values. - // Add corresponding ops so the bitwise table multiplicities balance. - let num_padding_rows: usize = cpu_ops + // Phase 4: split cpu_ops by op_and/op_or/op_xor so AND/OR/XOR rows live + // on the dedicated CPU_BITWISE chip; the main CPU chunks hold everything + // else. + let (bitwise_cpu_ops, non_bitwise_cpu_ops): (Vec, Vec) = cpu_ops + .iter() + .cloned() + .partition(|op| op.decode.op_and || op.decode.op_or || op.decode.op_xor); + + // CPU + CPU_BITWISE padding rows both emit IS_BYTE with all-zero values. + // Sum padding across both tables so byte_ops multiplicities balance. + let cpu_padding: usize = non_bitwise_cpu_ops .chunks(max_rows.cpu) .map(|chunk| chunk.len().next_power_of_two().max(4) - chunk.len()) .sum(); + let cpu_bitwise_padding: usize = + bitwise_cpu_ops.len().next_power_of_two().max(4) - bitwise_cpu_ops.len(); + let num_padding_rows = cpu_padding + cpu_bitwise_padding; bitwise_ops.extend(collect_byte_check_ops_for_padding(num_padding_rows)); // ===================================================================== @@ -1887,7 +1902,8 @@ fn build_traces( .ok_or(Error::MissingHaltEcall)?; let halt_timestamp = halt_op.timestamp; - let cpus = chunk_and_generate(&cpu_ops, max_rows.cpu, cpu::generate_cpu_trace); + let cpus = chunk_and_generate(&non_bitwise_cpu_ops, max_rows.cpu, cpu::generate_cpu_trace); + let cpu_bitwise = cpu::generate_cpu_trace(&bitwise_cpu_ops); let memws = chunk_and_generate(&memw_ops, max_rows.memw, memw::generate_memw_trace); let memw_aligneds = chunk_and_generate( &memw_aligned_ops, @@ -1967,6 +1983,7 @@ fn build_traces( Ok(Traces { cpus, + cpu_bitwise, bitwise, byte_ops, lts, @@ -2022,6 +2039,7 @@ impl Traces { let Traces { cpus, + cpu_bitwise, bitwise, byte_ops, lts, @@ -2046,6 +2064,7 @@ impl Traces { for t in cpus { total += (t.num_rows() * CPU_COLS) as u64; } + total += (cpu_bitwise.num_rows() * CPU_COLS) as u64; total += (bitwise.num_rows() * (BITWISE_COLS - BITWISE_PRECOMPUTED)) as u64; total += (byte_ops.num_rows() * (BYTE_OPS_COLS - BYTE_OPS_PRECOMPUTED)) as u64; for t in lts { @@ -2098,6 +2117,7 @@ impl Traces { } let n_cpu = aux_cols(super::cpu::bus_interactions().len()); + let n_cpu_bitwise = aux_cols(super::cpu_bitwise::bus_interactions().len()); let n_bitwise = aux_cols(super::bitwise::bus_interactions().len()); let n_byte_ops = aux_cols(super::byte_ops::bus_interactions().len()); let n_lt = aux_cols(super::lt::bus_interactions().len()); @@ -2118,6 +2138,7 @@ impl Traces { let Traces { cpus, + cpu_bitwise, bitwise, byte_ops, lts, @@ -2142,6 +2163,7 @@ impl Traces { for t in cpus { total += (t.num_rows() * n_cpu) as u64; } + total += (cpu_bitwise.num_rows() * n_cpu_bitwise) as u64; total += (bitwise.num_rows() * n_bitwise) as u64; total += (byte_ops.num_rows() * n_byte_ops) as u64; for t in lts { diff --git a/prover/src/test_utils.rs b/prover/src/test_utils.rs index 567e6ee52..efaead356 100644 --- a/prover/src/test_utils.rs +++ b/prover/src/test_utils.rs @@ -38,8 +38,9 @@ use crate::tables::commit::{ create_constraints as commit_constraints, }; use crate::tables::cpu::{ - CpuOperation, bus_interactions as cpu_bus_interactions, cols as cpu_cols, + CpuOperation, bus_interactions_without_bitwise as cpu_bus_interactions, cols as cpu_cols, }; +use crate::tables::cpu_bitwise::bus_interactions as cpu_bitwise_bus_interactions; use crate::tables::decode::{bus_interactions as decode_bus_interactions, cols as decode_cols}; use crate::tables::dvrm::{ bus_interactions as dvrm_bus_interactions, cols as dvrm_cols, dvrm_constraints, @@ -415,6 +416,27 @@ pub fn create_bitwise_air(proof_options: &ProofOptions) -> VmAir { .with_name("BITWISE") } +/// Create CPU_BITWISE AIR. Phase 4 — handles AND/OR/XOR rows (and `*W` +/// 32-bit variants). Reuses CPU's column layout but declares only the +/// buses an AND/OR/XOR row actually fires (Decode, IsByte, Bitwise, Msb16, +/// Memw, Memory). +pub fn create_cpu_bitwise_air(proof_options: &ProofOptions) -> VmAir { + let transition_constraints: Vec>> = vec![]; + + let auxiliary_trace_build_data = AuxiliaryTraceBuildData { + interactions: cpu_bitwise_bus_interactions(), + }; + + AirWithBuses::new( + cpu_cols::NUM_COLUMNS, + auxiliary_trace_build_data, + proof_options, + 1, + transition_constraints, + ) + .with_name("CPU_BITWISE") +} + /// Create BYTE_OPS AIR. Step 1 wires it through with no bus interactions /// (BITWISE still owns every receiver); Step 2 will move them here. pub fn create_byte_ops_air(proof_options: &ProofOptions) -> VmAir { diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index 592ee3b1c..21ff173eb 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -1796,7 +1796,10 @@ fn test_crafted_zero_count_proof_must_not_verify() { let airs = VmAirs::new(&elf, &proof_options, true, &[], &zero_counts); let verifier_air_refs = airs.air_refs(); - assert_eq!(verifier_air_refs.len(), 5); + // Always-present singletons: BITWISE, BYTE_OPS, CPU_BITWISE, DECODE, + // HALT, COMMIT, REGISTER. Vec'd tables (CPU/LT/etc) are empty under + // zero-counts. + assert_eq!(verifier_air_refs.len(), 7); let mut bitwise_trace = crate::tables::bitwise::generate_bitwise_trace(); diff --git a/prover/src/tests/trace_builder_tests.rs b/prover/src/tests/trace_builder_tests.rs index 719a70a90..4820e963a 100644 --- a/prover/src/tests/trace_builder_tests.rs +++ b/prover/src/tests/trace_builder_tests.rs @@ -343,8 +343,10 @@ fn test_mixed_instructions() { let traces = Traces::from_logs(&logs, instructions, &Default::default()).unwrap(); - // 5 ops (4 + ecall) padded to 8 - assert_eq!(traces.cpus[0].main_table.height, 8); + // After Phase 4 the AND row is routed to CPU_BITWISE; CPU keeps the + // remaining 4 (ADD, SLT, BLT, ECALL) padded to 4. + assert_eq!(traces.cpus[0].main_table.height, 4); + assert_eq!(traces.cpu_bitwise.main_table.height, 4); // 1 AND padded to min 4 assert_eq!( traces.bitwise.main_table.height, crate::tables::bitwise::NUM_ROWS From 3bc82ed0df658a53d42e97c58ec844ded5e28bb6 Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Tue, 5 May 2026 17:26:40 -0300 Subject: [PATCH 7/8] add missing file --- prover/src/tables/cpu_bitwise.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 prover/src/tables/cpu_bitwise.rs diff --git a/prover/src/tables/cpu_bitwise.rs b/prover/src/tables/cpu_bitwise.rs new file mode 100644 index 000000000..2d977c5c8 --- /dev/null +++ b/prover/src/tables/cpu_bitwise.rs @@ -0,0 +1,18 @@ +//! CPU_BITWISE chip — handles AND, OR, XOR (and their `*W` 32-bit variants). +//! +//! Phase 4 of the CPU-width reduction plan: peel the bitwise rows out of +//! the unified CPU table so the main CPU table stops paying aux EF cells +//! for the unified `BusId::Bitwise` declarations on every non-bitwise row. +//! +//! The chip reuses the CPU column layout and trace generator; the only +//! thing it changes is the `bus_interactions` filter — only buses an +//! AND/OR/XOR row actually fires are declared here. + +pub use super::cpu::{CpuOperation, cols, generate_cpu_trace}; + +use stark::lookup::BusInteraction; + +/// Bus interactions for the CPU_BITWISE chip. +pub fn bus_interactions() -> Vec { + super::cpu::bus_interactions_bitwise_chip() +} From 2d6a26af6249910e45f35afa29ef40e2adcf123c Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Wed, 6 May 2026 16:48:23 -0300 Subject: [PATCH 8/8] Update CPU bus_interactions test from 58 to 42 after AndByte/OrByte/XorByte unification into BusId::Bitwise --- prover/src/tests/cpu_tests.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/prover/src/tests/cpu_tests.rs b/prover/src/tests/cpu_tests.rs index 6b3239f43..b3cb254bf 100644 --- a/prover/src/tests/cpu_tests.rs +++ b/prover/src/tests/cpu_tests.rs @@ -310,9 +310,7 @@ fn test_bus_interactions_count() { let interactions = bus_interactions(); // Expected interactions: - // - 8 AND_BYTE - // - 8 OR_BYTE - // - 8 XOR_BYTE + // - 8 BITWISE (unified bus: 8 byte pairs × 1 send each, op_id ∈ {1,2,4}) // - 2 MSB16 (rv1_sign_bit, arg2_sign_bit) // - 1 MSB8 (res_sign_bit) // - 1 ZERO (is_equal for BEQ) @@ -332,9 +330,10 @@ fn test_bus_interactions_count() { // - 1 IS_BYTE for (RS1, RS2) paired // - 1 IS_BYTE for (RD, 0) // - 12 IS_BYTE (ARG1/ARG2/RES byte pairs: 4 pairs × 3 arrays) - // Inline PC replaces CM54: -1 CM54, +4 inline PC → net +3 vs pre-PR main. - // Total: 8 + 8 + 8 + 2 + 1 + 1 + 1 + 1 + 5 + 4 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 12 = 58 - assert_eq!(interactions.len(), 58); + // AndByte/OrByte/XorByte (24 sends pre-split) collapsed into 8 unified + // BusId::Bitwise sends in commit 95e4d4b3. + // Total: 8 + 3 + 1 + 1 + 5 + 4 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 14 = 42 + assert_eq!(interactions.len(), 42); } #[test]