From 293dd24f0e813a360e1256053eb579c529ce440c Mon Sep 17 00:00:00 2001 From: Nicole Date: Thu, 23 Apr 2026 16:58:54 -0300 Subject: [PATCH 1/5] compute LogUp fingerprints in parallel chunks of 1024 rows --- crypto/stark/src/lookup.rs | 283 +++++++++++++++++++++++++++++++++++-- 1 file changed, 273 insertions(+), 10 deletions(-) diff --git a/crypto/stark/src/lookup.rs b/crypto/stark/src/lookup.rs index 770193020..0a5e0473d 100644 --- a/crypto/stark/src/lookup.rs +++ b/crypto/stark/src/lookup.rs @@ -19,7 +19,7 @@ use math::field::{ traits::{IsFFTField, IsField, IsPrimeField, IsSubFieldOf}, }; #[cfg(feature = "parallel")] -use rayon::prelude::{IntoParallelIterator, ParallelIterator}; +use rayon::prelude::{IndexedParallelIterator, ParallelIterator, ParallelSliceMut}; // ============================================================================= // Shift Constants for Type Combining @@ -100,6 +100,11 @@ pub const LOGUP_CHALLENGE_ALPHA: usize = 1; /// Number of challenges required by the LogUp protocol. pub const LOGUP_NUM_CHALLENGES: usize = 2; +/// Chunk size for fused chunk-local LogUp processing. +/// Each chunk processes all interactions for CHUNK_SIZE rows, fitting in L2 cache. +#[cfg(feature = "parallel")] +const LOGUP_CHUNK_SIZE: usize = 1024; + /// Split N interactions into committed batched pairs and absorbed remainder. /// /// Returns `(num_committed_pairs, absorbed_count)` where: @@ -1028,23 +1033,24 @@ where // Clone main columns once (shared across all interactions) let main_segment_cols = trace.columns_main(); let trace_len = trace.num_rows(); - let table_name = self.name.as_deref().unwrap_or("UNKNOWN"); + let _table_name = self.name.as_deref().unwrap_or("UNKNOWN"); // Split interactions: committed pairs get term columns, last 1-2 are absorbed (virtual) let (num_committed_pairs, absorbed_count) = split_interactions(num_interactions); - // Compute committed term columns in parallel (batched pairs only) + // Compute committed term columns (batched pairs only). + // With `parallel`: sequential over pairs, each using chunk-local parallelism + // (parallel across row-chunks, not across pairs) for better cache locality. + // Without `parallel`: sequential over pairs, sequential over rows. #[cfg(feature = "parallel")] let committed_columns: Vec>> = (0..num_committed_pairs) - .into_par_iter() .map(|i| { - compute_logup_batched_term_column( + compute_logup_batched_term_column_chunked( &self.auxiliary_trace_build_data.interactions[i * 2], &self.auxiliary_trace_build_data.interactions[i * 2 + 1], &main_segment_cols, trace_len, challenges, - table_name, ) }) .collect(); @@ -1057,12 +1063,30 @@ where &main_segment_cols, trace_len, challenges, - table_name, + _table_name, ) }) .collect(); // Compute virtual column for absorbed interactions (NOT written to trace) + #[cfg(feature = "parallel")] + let virtual_column = if absorbed_count == 2 { + compute_logup_batched_term_column_chunked( + &self.auxiliary_trace_build_data.interactions[num_interactions - 2], + &self.auxiliary_trace_build_data.interactions[num_interactions - 1], + &main_segment_cols, + trace_len, + challenges, + ) + } else { + compute_logup_term_column_chunked( + &self.auxiliary_trace_build_data.interactions[num_interactions - 1], + &main_segment_cols, + trace_len, + challenges, + ) + }; + #[cfg(not(feature = "parallel"))] let virtual_column = if absorbed_count == 2 { compute_logup_batched_term_column( &self.auxiliary_trace_build_data.interactions[num_interactions - 2], @@ -1070,7 +1094,7 @@ where &main_segment_cols, trace_len, challenges, - table_name, + _table_name, ) } else { compute_logup_term_column( @@ -1078,7 +1102,7 @@ where &main_segment_cols, trace_len, challenges, - table_name, + _table_name, ) }; @@ -1096,7 +1120,7 @@ where &main_segment_cols, trace_len, challenges, - table_name, + _table_name, ); // Build accumulated from all columns (committed + virtual) @@ -1373,6 +1397,7 @@ where /// This is a pure function that takes shared main columns and returns the computed column, /// enabling parallel computation across interactions within a table. #[allow(clippy::needless_range_loop)] +#[cfg_attr(feature = "parallel", allow(dead_code))] fn compute_logup_term_column( table_interaction: &BusInteraction, main_segment_cols: &[Vec>], @@ -1537,6 +1562,7 @@ where /// /// Uses a single batch inversion for both fingerprint vectors (2*N elements). #[allow(clippy::needless_range_loop)] +#[cfg_attr(feature = "parallel", allow(dead_code))] fn compute_logup_batched_term_column( interaction_a: &BusInteraction, interaction_b: &BusInteraction, @@ -1680,6 +1706,243 @@ where .collect() } +/// Computes the multiplicity for a single row of an interaction. +/// +/// This avoids materializing a full Vec> of length trace_len +/// when processing rows in chunks. +#[cfg(feature = "parallel")] +#[inline] +fn compute_multiplicity_for_row( + multiplicity: &Multiplicity, + main_segment_cols: &[Vec>], + row: usize, +) -> FieldElement { + match multiplicity { + Multiplicity::One => FieldElement::one(), + Multiplicity::Column(col) => main_segment_cols[*col][row].clone(), + Multiplicity::Sum(col_a, col_b) => { + &main_segment_cols[*col_a][row] + &main_segment_cols[*col_b][row] + } + Multiplicity::Negated(col) => FieldElement::::one() - &main_segment_cols[*col][row], + Multiplicity::Diff(col_a, col_b) => { + &main_segment_cols[*col_a][row] - &main_segment_cols[*col_b][row] + } + Multiplicity::Sum3(col_a, col_b, col_c) => { + &main_segment_cols[*col_a][row] + + &main_segment_cols[*col_b][row] + + &main_segment_cols[*col_c][row] + } + Multiplicity::Linear(terms) => { + let mut result = FieldElement::::zero(); + for term in terms { + match *term { + LinearTerm::Column { + coefficient, + column, + } => { + let coeff = FieldElement::::from(coefficient); + result += &main_segment_cols[column][row] * coeff; + } + LinearTerm::ColumnUnsigned { + coefficient, + column, + } => { + let coeff = FieldElement::::from(coefficient); + result += &main_segment_cols[column][row] * coeff; + } + LinearTerm::Constant(value) => { + result += FieldElement::::from(value); + } + } + } + result + } + } +} + +/// Chunk-local batched term column computation for two interactions. +/// +/// Processes rows in chunks of `LOGUP_CHUNK_SIZE`. Per chunk: +/// 1. Compute 2*CHUNK fingerprints (interaction_a and interaction_b) +/// 2. Batch-invert locally (one Montgomery inverse per chunk) +/// 3. Compute terms: m_a/fp_a +/- m_b/fp_b +/// +/// Parallelism is across row-chunks (not across interaction pairs), giving +/// much better cache locality: each thread touches only CHUNK_SIZE rows of +/// main trace data before moving to the next phase. +#[cfg(feature = "parallel")] +fn compute_logup_batched_term_column_chunked( + interaction_a: &BusInteraction, + interaction_b: &BusInteraction, + main_segment_cols: &[Vec>], + trace_len: usize, + challenges: &[FieldElement], +) -> Vec> +where + F: IsFFTField + IsSubFieldOf + IsPrimeField + Send + Sync, + E: IsField + Send + Sync, +{ + let z = &challenges[0]; + let alpha = &challenges[LOGUP_CHALLENGE_ALPHA]; + + let max_bus_elements = interaction_a + .num_bus_elements() + .max(interaction_b.num_bus_elements()); + let alpha_powers = compute_alpha_powers(alpha, max_bus_elements); + + let negate_a = !interaction_a.is_sender; + let negate_b = !interaction_b.is_sender; + + let bus_id_a = FieldElement::::from(interaction_a.bus_id); + let bus_id_b = FieldElement::::from(interaction_b.bus_id); + let shifts = PackingShifts::::new(); + + // Output: one FieldElement per row + let mut result = vec![FieldElement::::zero(); trace_len]; + + result + .par_chunks_mut(LOGUP_CHUNK_SIZE) + .enumerate() + .for_each(|(chunk_idx, result_chunk)| { + let chunk_start = chunk_idx * LOGUP_CHUNK_SIZE; + let chunk_len = result_chunk.len(); + + // Phase 1: Compute fingerprints for both interactions in this chunk. + // Layout: [fp_a[0..chunk_len], fp_b[0..chunk_len]] + let compute_chunk_fingerprints = + |interaction: &BusInteraction, + bus_id_f: &FieldElement, + fps: &mut Vec>| { + for row in chunk_start..chunk_start + chunk_len { + let mut lc = bus_id_f * &alpha_powers[0]; + let mut alpha_offset = 1; + for bv in &interaction.values { + let consumed = bv.accumulate_fingerprint( + main_segment_cols, + row, + &alpha_powers, + alpha_offset, + &mut lc, + &shifts, + ); + alpha_offset += consumed; + } + fps.push(z - &lc); + } + }; + + let mut fingerprints: Vec> = Vec::with_capacity(2 * chunk_len); + compute_chunk_fingerprints(interaction_a, &bus_id_a, &mut fingerprints); + compute_chunk_fingerprints(interaction_b, &bus_id_b, &mut fingerprints); + + // Phase 2: Batch-invert all fingerprints in this chunk + FieldElement::inplace_batch_inverse(&mut fingerprints) + .expect("fingerprint is zero - probability of sampling zero is negligible"); + + // Phase 3: Compute terms: m_a/fp_a +/- m_b/fp_b + for (i, result_elem) in result_chunk.iter_mut().enumerate() { + let row = chunk_start + i; + let fp_a_inv = &fingerprints[i]; + let fp_b_inv = &fingerprints[chunk_len + i]; + + let m_a = compute_multiplicity_for_row( + &interaction_a.multiplicity, + main_segment_cols, + row, + ); + let m_b = compute_multiplicity_for_row( + &interaction_b.multiplicity, + main_segment_cols, + row, + ); + + let term_a = &m_a * fp_a_inv; + let term_b = &m_b * fp_b_inv; + let term_a = if negate_a { -term_a } else { term_a }; + let term_b = if negate_b { -term_b } else { term_b }; + *result_elem = term_a + term_b; + } + }); + + result +} + +/// Chunk-local single-interaction term column computation. +/// +/// Same cache-locality benefits as `compute_logup_batched_term_column_chunked` +/// but for a single interaction (used for the virtual absorbed column when +/// `absorbed_count == 1`). +#[cfg(feature = "parallel")] +fn compute_logup_term_column_chunked( + interaction: &BusInteraction, + main_segment_cols: &[Vec>], + trace_len: usize, + challenges: &[FieldElement], +) -> Vec> +where + F: IsFFTField + IsSubFieldOf + IsPrimeField + Send + Sync, + E: IsField + Send + Sync, +{ + let z = &challenges[0]; + let alpha = &challenges[LOGUP_CHALLENGE_ALPHA]; + + let num_bus_elements = interaction.num_bus_elements(); + let alpha_powers = compute_alpha_powers(alpha, num_bus_elements); + + let negate = !interaction.is_sender; + + let bus_id_f = FieldElement::::from(interaction.bus_id); + let shifts = PackingShifts::::new(); + + let mut result = vec![FieldElement::::zero(); trace_len]; + + result + .par_chunks_mut(LOGUP_CHUNK_SIZE) + .enumerate() + .for_each(|(chunk_idx, result_chunk)| { + let chunk_start = chunk_idx * LOGUP_CHUNK_SIZE; + let chunk_len = result_chunk.len(); + + // Phase 1: Compute fingerprints for this chunk + let mut fingerprints: Vec> = Vec::with_capacity(chunk_len); + + for row in chunk_start..chunk_start + chunk_len { + let mut lc = &bus_id_f * &alpha_powers[0]; + let mut alpha_offset = 1; + for bv in &interaction.values { + let consumed = bv.accumulate_fingerprint( + main_segment_cols, + row, + &alpha_powers, + alpha_offset, + &mut lc, + &shifts, + ); + alpha_offset += consumed; + } + fingerprints.push(z - &lc); + } + + // Phase 2: Batch-invert fingerprints + FieldElement::inplace_batch_inverse(&mut fingerprints) + .expect("fingerprint is zero - probability of sampling zero is negligible"); + + // Phase 3: Compute terms: +/- m / fp + for (i, result_elem) in result_chunk.iter_mut().enumerate() { + let row = chunk_start + i; + let fp_inv = &fingerprints[i]; + + let m = + compute_multiplicity_for_row(&interaction.multiplicity, main_segment_cols, row); + + let term = &m * fp_inv; + *result_elem = if negate { -term } else { term }; + } + }); + + result +} + /// Builds the circular accumulated column from pre-computed term columns. /// /// For the circular constraint: acc[(i+1) mod N] - acc[i] - terms[(i+1) mod N] + L/N = 0 From 3d712cc62d9551709c4beae8d9b8ac281a7ea98a Mon Sep 17 00:00:00 2001 From: Nicole Date: Thu, 23 Apr 2026 18:35:03 -0300 Subject: [PATCH 2/5] add differential test verifying compute_logup_batched_term_column_chunked matches sequential output on 2048-row trace --- crypto/stark/src/lookup.rs | 87 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/crypto/stark/src/lookup.rs b/crypto/stark/src/lookup.rs index 0a5e0473d..af2eacd93 100644 --- a/crypto/stark/src/lookup.rs +++ b/crypto/stark/src/lookup.rs @@ -2420,3 +2420,90 @@ where } } } + +#[cfg(test)] +mod tests { + use math::field::{ + element::FieldElement, extensions_goldilocks::Degree3GoldilocksExtensionField, + goldilocks::GoldilocksField, + }; + + use super::*; + + /// Builds a synthetic main-segment trace with `num_cols` columns and `trace_len` rows. + /// Column `i` is filled with `(row * num_cols + i + 1)` as a simple pattern. + fn make_trace_cols( + num_cols: usize, + trace_len: usize, + ) -> Vec>> { + (0..num_cols) + .map(|col| { + (0..trace_len) + .map(|row| { + FieldElement::::from((row * num_cols + col + 1) as u64) + }) + .collect() + }) + .collect() + } + + #[cfg(feature = "parallel")] + #[test] + fn batched_term_column_chunked_matches_non_chunked() { + type F = GoldilocksField; + type E = Degree3GoldilocksExtensionField; + + let trace_len: usize = 2048; + let num_cols: usize = 4; + + // Build synthetic main trace columns + let main_cols = make_trace_cols(num_cols, trace_len); + + // interaction_a: sender on bus 1, multiplicity from column 0, two BusValue columns (1, 2) + let interaction_a = BusInteraction::sender( + 1u64, + Multiplicity::Column(0), + vec![BusValue::column(1), BusValue::column(2)], + ); + + // interaction_b: receiver on bus 1, multiplicity One, one BusValue column (3) + let interaction_b = + BusInteraction::receiver(1u64, Multiplicity::One, vec![BusValue::column(3)]); + + // Construct challenges: [z, alpha] — two extension-field elements + let challenges: Vec> = vec![ + FieldElement::::from(7u64), + FieldElement::::from(13u64), + ]; + + let result_standard = compute_logup_batched_term_column::( + &interaction_a, + &interaction_b, + &main_cols, + trace_len, + &challenges, + "test_table", + ); + + let result_chunked = compute_logup_batched_term_column_chunked::( + &interaction_a, + &interaction_b, + &main_cols, + trace_len, + &challenges, + ); + + assert_eq!( + result_standard.len(), + result_chunked.len(), + "output lengths differ" + ); + for (row, (a, b)) in result_standard + .iter() + .zip(result_chunked.iter()) + .enumerate() + { + assert_eq!(a, b, "mismatch at row {row}: standard={a:?}, chunked={b:?}"); + } + } +} From 34acc74d3c5e542107d10f92a245a66bd0669ea3 Mon Sep 17 00:00:00 2001 From: Nicole Date: Fri, 24 Apr 2026 11:56:56 -0300 Subject: [PATCH 3/5] Emit per-row bus_debug::log_interaction inside compute_logup_term_column_chunked and add test --- crypto/stark/src/lookup.rs | 78 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/crypto/stark/src/lookup.rs b/crypto/stark/src/lookup.rs index af2eacd93..467d54e94 100644 --- a/crypto/stark/src/lookup.rs +++ b/crypto/stark/src/lookup.rs @@ -1084,6 +1084,7 @@ where &main_segment_cols, trace_len, challenges, + _table_name, ) }; #[cfg(not(feature = "parallel"))] @@ -1878,6 +1879,7 @@ fn compute_logup_term_column_chunked( main_segment_cols: &[Vec>], trace_len: usize, challenges: &[FieldElement], + #[cfg_attr(not(feature = "debug-checks"), allow(unused))] _table_name: &str, ) -> Vec> where F: IsFFTField + IsSubFieldOf + IsPrimeField + Send + Sync, @@ -1921,6 +1923,30 @@ where alpha_offset += consumed; } fingerprints.push(z - &lc); + + #[cfg(feature = "debug-checks")] + { + let mut base_elements: Vec> = vec![bus_id_f.clone()]; + base_elements.extend( + interaction.values.iter().flat_map(|bv| { + bv.combine_from(|col| main_segment_cols[col][row].clone()) + }), + ); + let multiplicity = compute_multiplicity_for_row( + &interaction.multiplicity, + main_segment_cols, + row, + ); + crate::bus_debug::log_interaction( + _table_name, + row, + interaction.bus_id, + interaction.is_sender, + &multiplicity.canonical(), + &base_elements, + fingerprints.last().unwrap(), + ); + } } // Phase 2: Batch-invert fingerprints @@ -2506,4 +2532,56 @@ mod tests { assert_eq!(a, b, "mismatch at row {row}: standard={a:?}, chunked={b:?}"); } } + + #[cfg(feature = "parallel")] + #[test] + fn term_column_chunked_matches_non_chunked() { + type F = GoldilocksField; + type E = Degree3GoldilocksExtensionField; + + let trace_len: usize = 2048; + let num_cols: usize = 4; + + let main_cols = make_trace_cols(num_cols, trace_len); + + let interaction = BusInteraction::sender( + 1u64, + Multiplicity::Column(0), + vec![BusValue::column(1), BusValue::column(2)], + ); + + let challenges: Vec> = vec![ + FieldElement::::from(7u64), + FieldElement::::from(13u64), + ]; + + let result_standard = compute_logup_term_column::( + &interaction, + &main_cols, + trace_len, + &challenges, + "test_table", + ); + + let result_chunked = compute_logup_term_column_chunked::( + &interaction, + &main_cols, + trace_len, + &challenges, + "test_table", + ); + + assert_eq!( + result_standard.len(), + result_chunked.len(), + "output lengths differ" + ); + for (row, (a, b)) in result_standard + .iter() + .zip(result_chunked.iter()) + .enumerate() + { + assert_eq!(a, b, "mismatch at row {row}: standard={a:?}, chunked={b:?}"); + } + } } From db916083005a595bbeac1cf9b360c60316dc7cb6 Mon Sep 17 00:00:00 2001 From: Nicole Date: Fri, 24 Apr 2026 12:01:15 -0300 Subject: [PATCH 4/5] Dispatch committed LogUp term columns --- crypto/stark/src/lookup.rs | 49 +++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/crypto/stark/src/lookup.rs b/crypto/stark/src/lookup.rs index 467d54e94..39ea71f42 100644 --- a/crypto/stark/src/lookup.rs +++ b/crypto/stark/src/lookup.rs @@ -19,7 +19,9 @@ use math::field::{ traits::{IsFFTField, IsField, IsPrimeField, IsSubFieldOf}, }; #[cfg(feature = "parallel")] -use rayon::prelude::{IndexedParallelIterator, ParallelIterator, ParallelSliceMut}; +use rayon::prelude::{ + IndexedParallelIterator, IntoParallelIterator, ParallelIterator, ParallelSliceMut, +}; // ============================================================================= // Shift Constants for Type Combining @@ -1039,21 +1041,40 @@ where let (num_committed_pairs, absorbed_count) = split_interactions(num_interactions); // Compute committed term columns (batched pairs only). - // With `parallel`: sequential over pairs, each using chunk-local parallelism - // (parallel across row-chunks, not across pairs) for better cache locality. + // With `parallel`: when `trace_len > LOGUP_CHUNK_SIZE` the chunk-internal + // parallelism inside each pair already saturates Rayon, so iterate pairs + // sequentially to keep cache locality. When `trace_len <= LOGUP_CHUNK_SIZE` + // each pair yields a single chunk, so parallelize across pairs to recover + // the throughput the per-pair dispatch used to provide for small-trace + // tables with many interactions. // Without `parallel`: sequential over pairs, sequential over rows. #[cfg(feature = "parallel")] - let committed_columns: Vec>> = (0..num_committed_pairs) - .map(|i| { - compute_logup_batched_term_column_chunked( - &self.auxiliary_trace_build_data.interactions[i * 2], - &self.auxiliary_trace_build_data.interactions[i * 2 + 1], - &main_segment_cols, - trace_len, - challenges, - ) - }) - .collect(); + let committed_columns: Vec>> = if trace_len <= LOGUP_CHUNK_SIZE { + (0..num_committed_pairs) + .into_par_iter() + .map(|i| { + compute_logup_batched_term_column_chunked( + &self.auxiliary_trace_build_data.interactions[i * 2], + &self.auxiliary_trace_build_data.interactions[i * 2 + 1], + &main_segment_cols, + trace_len, + challenges, + ) + }) + .collect() + } else { + (0..num_committed_pairs) + .map(|i| { + compute_logup_batched_term_column_chunked( + &self.auxiliary_trace_build_data.interactions[i * 2], + &self.auxiliary_trace_build_data.interactions[i * 2 + 1], + &main_segment_cols, + trace_len, + challenges, + ) + }) + .collect() + }; #[cfg(not(feature = "parallel"))] let committed_columns: Vec>> = (0..num_committed_pairs) .map(|i| { From e55d12a97478998780b11cff3034dfe4efbff3c9 Mon Sep 17 00:00:00 2001 From: MauroFab Date: Fri, 24 Apr 2026 14:33:10 -0300 Subject: [PATCH 5/5] simplify code --- crypto/stark/src/lookup.rs | 825 ++++++++----------------------------- 1 file changed, 170 insertions(+), 655 deletions(-) diff --git a/crypto/stark/src/lookup.rs b/crypto/stark/src/lookup.rs index 39ea71f42..17ba7c5ec 100644 --- a/crypto/stark/src/lookup.rs +++ b/crypto/stark/src/lookup.rs @@ -1053,7 +1053,7 @@ where (0..num_committed_pairs) .into_par_iter() .map(|i| { - compute_logup_batched_term_column_chunked( + compute_logup_batched_term_column( &self.auxiliary_trace_build_data.interactions[i * 2], &self.auxiliary_trace_build_data.interactions[i * 2 + 1], &main_segment_cols, @@ -1065,7 +1065,7 @@ where } else { (0..num_committed_pairs) .map(|i| { - compute_logup_batched_term_column_chunked( + compute_logup_batched_term_column( &self.auxiliary_trace_build_data.interactions[i * 2], &self.auxiliary_trace_build_data.interactions[i * 2 + 1], &main_segment_cols, @@ -1084,31 +1084,11 @@ where &main_segment_cols, trace_len, challenges, - _table_name, ) }) .collect(); // Compute virtual column for absorbed interactions (NOT written to trace) - #[cfg(feature = "parallel")] - let virtual_column = if absorbed_count == 2 { - compute_logup_batched_term_column_chunked( - &self.auxiliary_trace_build_data.interactions[num_interactions - 2], - &self.auxiliary_trace_build_data.interactions[num_interactions - 1], - &main_segment_cols, - trace_len, - challenges, - ) - } else { - compute_logup_term_column_chunked( - &self.auxiliary_trace_build_data.interactions[num_interactions - 1], - &main_segment_cols, - trace_len, - challenges, - _table_name, - ) - }; - #[cfg(not(feature = "parallel"))] let virtual_column = if absorbed_count == 2 { compute_logup_batched_term_column( &self.auxiliary_trace_build_data.interactions[num_interactions - 2], @@ -1116,7 +1096,6 @@ where &main_segment_cols, trace_len, challenges, - _table_name, ) } else { compute_logup_term_column( @@ -1262,6 +1241,58 @@ pub enum Multiplicity { Linear(Vec), } +impl Multiplicity { + /// Evaluate the multiplicity for a single row. + #[inline] + fn evaluate_at_row( + &self, + main_segment_cols: &[Vec>], + row: usize, + ) -> FieldElement { + match self { + Multiplicity::One => FieldElement::one(), + Multiplicity::Column(col) => main_segment_cols[*col][row].clone(), + Multiplicity::Sum(col_a, col_b) => { + &main_segment_cols[*col_a][row] + &main_segment_cols[*col_b][row] + } + Multiplicity::Negated(col) => FieldElement::::one() - &main_segment_cols[*col][row], + Multiplicity::Diff(col_a, col_b) => { + &main_segment_cols[*col_a][row] - &main_segment_cols[*col_b][row] + } + Multiplicity::Sum3(col_a, col_b, col_c) => { + &main_segment_cols[*col_a][row] + + &main_segment_cols[*col_b][row] + + &main_segment_cols[*col_c][row] + } + Multiplicity::Linear(terms) => { + let mut result = FieldElement::::zero(); + for term in terms { + match *term { + LinearTerm::Column { + coefficient, + column, + } => { + let coeff = FieldElement::::from(coefficient); + result += &main_segment_cols[column][row] * coeff; + } + LinearTerm::ColumnUnsigned { + coefficient, + column, + } => { + let coeff = FieldElement::::from(coefficient); + result += &main_segment_cols[column][row] * coeff; + } + LinearTerm::Constant(value) => { + result += FieldElement::::from(value); + } + } + } + result + } + } + } +} + /// Struct representing a lookup interaction for a given table. /// Contains the multiplicity and bus values involved in said interaction. /// @@ -1418,8 +1449,10 @@ where /// /// This is a pure function that takes shared main columns and returns the computed column, /// enabling parallel computation across interactions within a table. -#[allow(clippy::needless_range_loop)] -#[cfg_attr(feature = "parallel", allow(dead_code))] +/// +/// With `parallel`: processes rows in chunks of `LOGUP_CHUNK_SIZE` via `par_chunks_mut`, +/// giving good cache locality (each thread touches only CHUNK_SIZE rows before moving on). +/// Without `parallel`: processes all rows as a single chunk (equivalent to the old sequential path). fn compute_logup_term_column( table_interaction: &BusInteraction, main_segment_cols: &[Vec>], @@ -1431,369 +1464,98 @@ where F: IsFFTField + IsSubFieldOf + IsPrimeField + Send + Sync, E: IsField + Send + Sync, { - // Handle multiplicity column(s) - let multiplicities_owned: Vec>; - let multiplicities: &[FieldElement] = match table_interaction.multiplicity { - Multiplicity::One => { - multiplicities_owned = vec![FieldElement::one(); trace_len]; - &multiplicities_owned - } - Multiplicity::Column(col) => &main_segment_cols[col], - Multiplicity::Sum(col_a, col_b) => { - multiplicities_owned = main_segment_cols[col_a] - .iter() - .zip(main_segment_cols[col_b].iter()) - .map(|(a, b)| a + b) - .collect(); - &multiplicities_owned - } - Multiplicity::Negated(col) => { - multiplicities_owned = main_segment_cols[col] - .iter() - .map(|elem| FieldElement::::one() - elem) - .collect(); - &multiplicities_owned - } - Multiplicity::Diff(col_a, col_b) => { - multiplicities_owned = main_segment_cols[col_a] - .iter() - .zip(main_segment_cols[col_b].iter()) - .map(|(a, b)| a - b) - .collect(); - &multiplicities_owned - } - Multiplicity::Sum3(col_a, col_b, col_c) => { - multiplicities_owned = (0..trace_len) - .map(|row| { - &main_segment_cols[col_a][row] - + &main_segment_cols[col_b][row] - + &main_segment_cols[col_c][row] - }) - .collect(); - &multiplicities_owned - } - Multiplicity::Linear(ref terms) => { - multiplicities_owned = (0..trace_len) - .map(|row| { - let mut result = FieldElement::::zero(); - for term in terms { - match *term { - LinearTerm::Column { - coefficient, - column, - } => { - let coeff = FieldElement::::from(coefficient); - result += &main_segment_cols[column][row] * coeff; - } - LinearTerm::ColumnUnsigned { - coefficient, - column, - } => { - let coeff = FieldElement::::from(coefficient); - result += &main_segment_cols[column][row] * coeff; - } - LinearTerm::Constant(value) => { - result += FieldElement::::from(value); - } - } - } - result - }) - .collect(); - &multiplicities_owned - } - }; - - // LogUp challenges (must be shared across all tables for bus to balance) let z = &challenges[0]; let alpha = &challenges[LOGUP_CHALLENGE_ALPHA]; - - // Precompute powers of alpha for all bus elements (using incremental multiplication) let num_bus_elements = table_interaction.num_bus_elements(); let alpha_powers = compute_alpha_powers(alpha, num_bus_elements); - let negate = !table_interaction.is_sender; - - // Batch inversion: collect all fingerprints, invert once, then multiply back. - // Compute fingerprint = z - (bus_id*α^0 + v0*α^1 + v1*α^2 + ...) using - // base-field × extension-field multiplication (F×E→E) to avoid to_extension(). - // - // Zero-allocation inner loop: accumulate the linear combination directly - // into the fingerprint without collecting bus elements into intermediate Vecs. let bus_id_f = FieldElement::::from(table_interaction.bus_id); let shifts = PackingShifts::::new(); - let mut fingerprints: Vec> = Vec::with_capacity(trace_len); - for row in 0..trace_len { - // Accumulate fingerprint directly: bus_id * α^0 + Σ element_i * α^(1+i) - let mut linear_combination = &bus_id_f * &alpha_powers[0]; - let mut alpha_offset = 1; - for bv in &table_interaction.values { - let consumed = bv.accumulate_fingerprint( - main_segment_cols, - row, - &alpha_powers, - alpha_offset, - &mut linear_combination, - &shifts, - ); - alpha_offset += consumed; - } - fingerprints.push(z - &linear_combination); - - #[cfg(feature = "debug-checks")] - { - // Reconstruct base_elements for debug logging - let mut base_elements: Vec> = vec![bus_id_f.clone()]; - base_elements.extend( - table_interaction - .values - .iter() - .flat_map(|bv| bv.combine_from(|col| main_segment_cols[col][row].clone())), - ); - crate::bus_debug::log_interaction( - _table_name, - row, - table_interaction.bus_id, - table_interaction.is_sender, - &multiplicities[row].canonical(), - &base_elements, - fingerprints.last().unwrap(), - ); - } - } - - FieldElement::inplace_batch_inverse(&mut fingerprints) - .expect("fingerprint is zero - probability of sampling zero is negligible"); - - // Compute terms: term[i] = ±(multiplicity[i] * fingerprint_inv[i]) - // Use conditional negation instead of E×E sign multiplication - multiplicities - .iter() - .zip(fingerprints.iter()) - .map(|(multiplicity, fingerprint_inv)| { - let term = multiplicity * fingerprint_inv; - if negate { -term } else { term } - }) - .collect() -} + let mut result = vec![FieldElement::::zero(); trace_len]; -/// Computes a batched term column for two interactions sharing one aux column. -/// -/// Each row contains: `term[i] = sign_a * m_a[i] / fp_a[i] + sign_b * m_b[i] / fp_b[i]` -/// -/// Uses a single batch inversion for both fingerprint vectors (2*N elements). -#[allow(clippy::needless_range_loop)] -#[cfg_attr(feature = "parallel", allow(dead_code))] -fn compute_logup_batched_term_column( - interaction_a: &BusInteraction, - interaction_b: &BusInteraction, - main_segment_cols: &[Vec>], - trace_len: usize, - challenges: &[FieldElement], - #[cfg_attr(not(feature = "debug-checks"), allow(unused))] _table_name: &str, -) -> Vec> -where - F: IsFFTField + IsSubFieldOf + IsPrimeField + Send + Sync, - E: IsField + Send + Sync, -{ - let z = &challenges[0]; - let alpha = &challenges[LOGUP_CHALLENGE_ALPHA]; + let process_chunk = |chunk_start: usize, result_chunk: &mut [FieldElement]| { + let chunk_len = result_chunk.len(); - let max_bus_elements = interaction_a - .num_bus_elements() - .max(interaction_b.num_bus_elements()); - let alpha_powers = compute_alpha_powers(alpha, max_bus_elements); + // Phase 1: Compute fingerprints + let mut fingerprints: Vec> = Vec::with_capacity(chunk_len); + for row in chunk_start..chunk_start + chunk_len { + let mut lc = &bus_id_f * &alpha_powers[0]; + let mut alpha_offset = 1; + for bv in &table_interaction.values { + let consumed = bv.accumulate_fingerprint( + main_segment_cols, + row, + &alpha_powers, + alpha_offset, + &mut lc, + &shifts, + ); + alpha_offset += consumed; + } + fingerprints.push(z - &lc); - let negate_a = !interaction_a.is_sender; - let negate_b = !interaction_b.is_sender; + #[cfg(feature = "debug-checks")] + { + let mut base_elements: Vec> = vec![bus_id_f.clone()]; + base_elements.extend( + table_interaction + .values + .iter() + .flat_map(|bv| bv.combine_from(|col| main_segment_cols[col][row].clone())), + ); + let multiplicity = table_interaction + .multiplicity + .evaluate_at_row(main_segment_cols, row); + crate::bus_debug::log_interaction( + _table_name, + row, + table_interaction.bus_id, + table_interaction.is_sender, + &multiplicity.canonical(), + &base_elements, + fingerprints.last().unwrap(), + ); + } + } - // Helper to compute multiplicities for an interaction - let compute_multiplicities = |interaction: &BusInteraction| -> Vec> { - match &interaction.multiplicity { - Multiplicity::One => vec![FieldElement::one(); trace_len], - Multiplicity::Column(col) => main_segment_cols[*col].clone(), - Multiplicity::Sum(col_a, col_b) => main_segment_cols[*col_a] - .iter() - .zip(main_segment_cols[*col_b].iter()) - .map(|(a, b)| a + b) - .collect(), - Multiplicity::Negated(col) => main_segment_cols[*col] - .iter() - .map(|elem| FieldElement::::one() - elem) - .collect(), - Multiplicity::Diff(col_a, col_b) => main_segment_cols[*col_a] - .iter() - .zip(main_segment_cols[*col_b].iter()) - .map(|(a, b)| a - b) - .collect(), - Multiplicity::Sum3(col_a, col_b, col_c) => (0..trace_len) - .map(|row| { - &main_segment_cols[*col_a][row] - + &main_segment_cols[*col_b][row] - + &main_segment_cols[*col_c][row] - }) - .collect(), - Multiplicity::Linear(terms) => (0..trace_len) - .map(|row| { - let mut result = FieldElement::::zero(); - for term in terms { - match *term { - LinearTerm::Column { - coefficient, - column, - } => { - let coeff = FieldElement::::from(coefficient); - result += &main_segment_cols[column][row] * coeff; - } - LinearTerm::ColumnUnsigned { - coefficient, - column, - } => { - let coeff = FieldElement::::from(coefficient); - result += &main_segment_cols[column][row] * coeff; - } - LinearTerm::Constant(value) => { - result += FieldElement::::from(value); - } - } - } - result - }) - .collect(), + // Phase 2: Batch-invert + FieldElement::inplace_batch_inverse(&mut fingerprints) + .expect("fingerprint is zero - probability of sampling zero is negligible"); + + // Phase 3: Compute terms + for (i, result_elem) in result_chunk.iter_mut().enumerate() { + let row = chunk_start + i; + let m = table_interaction + .multiplicity + .evaluate_at_row(main_segment_cols, row); + let term = &m * &fingerprints[i]; + *result_elem = if negate { -term } else { term }; } }; - let multiplicities_a = compute_multiplicities(interaction_a); - let multiplicities_b = compute_multiplicities(interaction_b); - - // Compute fingerprints for both interactions using accumulate_fingerprint - // (zero-allocation inner loop: F×E multiplication instead of to_extension()) - let bus_id_a = FieldElement::::from(interaction_a.bus_id); - let bus_id_b = FieldElement::::from(interaction_b.bus_id); - let shifts = PackingShifts::::new(); + #[cfg(feature = "parallel")] + result + .par_chunks_mut(LOGUP_CHUNK_SIZE) + .enumerate() + .for_each(|(i, chunk)| process_chunk(i * LOGUP_CHUNK_SIZE, chunk)); - // Concatenate both fingerprint vectors for a single batch inversion - let mut all_fingerprints: Vec> = Vec::with_capacity(2 * trace_len); + #[cfg(not(feature = "parallel"))] + process_chunk(0, &mut result); - for row in 0..trace_len { - let mut lc_a = &bus_id_a * &alpha_powers[0]; - let mut alpha_offset = 1; - for bv in &interaction_a.values { - let consumed = bv.accumulate_fingerprint( - main_segment_cols, - row, - &alpha_powers, - alpha_offset, - &mut lc_a, - &shifts, - ); - alpha_offset += consumed; - } - all_fingerprints.push(z - &lc_a); - } - for row in 0..trace_len { - let mut lc_b = &bus_id_b * &alpha_powers[0]; - let mut alpha_offset = 1; - for bv in &interaction_b.values { - let consumed = bv.accumulate_fingerprint( - main_segment_cols, - row, - &alpha_powers, - alpha_offset, - &mut lc_b, - &shifts, - ); - alpha_offset += consumed; - } - all_fingerprints.push(z - &lc_b); - } - - // Single batch inversion for all 2*N fingerprints - FieldElement::inplace_batch_inverse(&mut all_fingerprints) - .expect("fingerprint is zero - probability of sampling zero is negligible"); - - // Compute batched terms: term[i] = m_a[i] / fp_a[i] ± m_b[i] / fp_b[i] - // Use conditional negation instead of E×E sign multiplication - (0..trace_len) - .map(|row| { - let fp_a_inv = &all_fingerprints[row]; - let fp_b_inv = &all_fingerprints[trace_len + row]; - let term_a = &multiplicities_a[row] * fp_a_inv; - let term_b = &multiplicities_b[row] * fp_b_inv; - let term_a = if negate_a { -term_a } else { term_a }; - let term_b = if negate_b { -term_b } else { term_b }; - term_a + term_b - }) - .collect() + result } -/// Computes the multiplicity for a single row of an interaction. +/// Computes a batched term column for two interactions sharing one aux column. /// -/// This avoids materializing a full Vec> of length trace_len -/// when processing rows in chunks. -#[cfg(feature = "parallel")] -#[inline] -fn compute_multiplicity_for_row( - multiplicity: &Multiplicity, - main_segment_cols: &[Vec>], - row: usize, -) -> FieldElement { - match multiplicity { - Multiplicity::One => FieldElement::one(), - Multiplicity::Column(col) => main_segment_cols[*col][row].clone(), - Multiplicity::Sum(col_a, col_b) => { - &main_segment_cols[*col_a][row] + &main_segment_cols[*col_b][row] - } - Multiplicity::Negated(col) => FieldElement::::one() - &main_segment_cols[*col][row], - Multiplicity::Diff(col_a, col_b) => { - &main_segment_cols[*col_a][row] - &main_segment_cols[*col_b][row] - } - Multiplicity::Sum3(col_a, col_b, col_c) => { - &main_segment_cols[*col_a][row] - + &main_segment_cols[*col_b][row] - + &main_segment_cols[*col_c][row] - } - Multiplicity::Linear(terms) => { - let mut result = FieldElement::::zero(); - for term in terms { - match *term { - LinearTerm::Column { - coefficient, - column, - } => { - let coeff = FieldElement::::from(coefficient); - result += &main_segment_cols[column][row] * coeff; - } - LinearTerm::ColumnUnsigned { - coefficient, - column, - } => { - let coeff = FieldElement::::from(coefficient); - result += &main_segment_cols[column][row] * coeff; - } - LinearTerm::Constant(value) => { - result += FieldElement::::from(value); - } - } - } - result - } - } -} - -/// Chunk-local batched term column computation for two interactions. +/// Each row contains: `term[i] = sign_a * m_a[i] / fp_a[i] + sign_b * m_b[i] / fp_b[i]` /// -/// Processes rows in chunks of `LOGUP_CHUNK_SIZE`. Per chunk: -/// 1. Compute 2*CHUNK fingerprints (interaction_a and interaction_b) -/// 2. Batch-invert locally (one Montgomery inverse per chunk) -/// 3. Compute terms: m_a/fp_a +/- m_b/fp_b +/// Uses chunk-local batch inversion for good cache locality: each chunk processes +/// both interactions for CHUNK_SIZE rows before moving on. /// -/// Parallelism is across row-chunks (not across interaction pairs), giving -/// much better cache locality: each thread touches only CHUNK_SIZE rows of -/// main trace data before moving to the next phase. -#[cfg(feature = "parallel")] -fn compute_logup_batched_term_column_chunked( +/// With `parallel`: processes rows in chunks of `LOGUP_CHUNK_SIZE` via `par_chunks_mut`. +/// Without `parallel`: processes all rows as a single chunk (equivalent to the old sequential path). +fn compute_logup_batched_term_column( interaction_a: &BusInteraction, interaction_b: &BusInteraction, main_segment_cols: &[Vec>], @@ -1806,131 +1568,27 @@ where { let z = &challenges[0]; let alpha = &challenges[LOGUP_CHALLENGE_ALPHA]; - let max_bus_elements = interaction_a .num_bus_elements() .max(interaction_b.num_bus_elements()); let alpha_powers = compute_alpha_powers(alpha, max_bus_elements); - let negate_a = !interaction_a.is_sender; let negate_b = !interaction_b.is_sender; - let bus_id_a = FieldElement::::from(interaction_a.bus_id); let bus_id_b = FieldElement::::from(interaction_b.bus_id); let shifts = PackingShifts::::new(); - // Output: one FieldElement per row - let mut result = vec![FieldElement::::zero(); trace_len]; - - result - .par_chunks_mut(LOGUP_CHUNK_SIZE) - .enumerate() - .for_each(|(chunk_idx, result_chunk)| { - let chunk_start = chunk_idx * LOGUP_CHUNK_SIZE; - let chunk_len = result_chunk.len(); - - // Phase 1: Compute fingerprints for both interactions in this chunk. - // Layout: [fp_a[0..chunk_len], fp_b[0..chunk_len]] - let compute_chunk_fingerprints = - |interaction: &BusInteraction, - bus_id_f: &FieldElement, - fps: &mut Vec>| { - for row in chunk_start..chunk_start + chunk_len { - let mut lc = bus_id_f * &alpha_powers[0]; - let mut alpha_offset = 1; - for bv in &interaction.values { - let consumed = bv.accumulate_fingerprint( - main_segment_cols, - row, - &alpha_powers, - alpha_offset, - &mut lc, - &shifts, - ); - alpha_offset += consumed; - } - fps.push(z - &lc); - } - }; - - let mut fingerprints: Vec> = Vec::with_capacity(2 * chunk_len); - compute_chunk_fingerprints(interaction_a, &bus_id_a, &mut fingerprints); - compute_chunk_fingerprints(interaction_b, &bus_id_b, &mut fingerprints); - - // Phase 2: Batch-invert all fingerprints in this chunk - FieldElement::inplace_batch_inverse(&mut fingerprints) - .expect("fingerprint is zero - probability of sampling zero is negligible"); - - // Phase 3: Compute terms: m_a/fp_a +/- m_b/fp_b - for (i, result_elem) in result_chunk.iter_mut().enumerate() { - let row = chunk_start + i; - let fp_a_inv = &fingerprints[i]; - let fp_b_inv = &fingerprints[chunk_len + i]; - - let m_a = compute_multiplicity_for_row( - &interaction_a.multiplicity, - main_segment_cols, - row, - ); - let m_b = compute_multiplicity_for_row( - &interaction_b.multiplicity, - main_segment_cols, - row, - ); - - let term_a = &m_a * fp_a_inv; - let term_b = &m_b * fp_b_inv; - let term_a = if negate_a { -term_a } else { term_a }; - let term_b = if negate_b { -term_b } else { term_b }; - *result_elem = term_a + term_b; - } - }); - - result -} - -/// Chunk-local single-interaction term column computation. -/// -/// Same cache-locality benefits as `compute_logup_batched_term_column_chunked` -/// but for a single interaction (used for the virtual absorbed column when -/// `absorbed_count == 1`). -#[cfg(feature = "parallel")] -fn compute_logup_term_column_chunked( - interaction: &BusInteraction, - main_segment_cols: &[Vec>], - trace_len: usize, - challenges: &[FieldElement], - #[cfg_attr(not(feature = "debug-checks"), allow(unused))] _table_name: &str, -) -> Vec> -where - F: IsFFTField + IsSubFieldOf + IsPrimeField + Send + Sync, - E: IsField + Send + Sync, -{ - let z = &challenges[0]; - let alpha = &challenges[LOGUP_CHALLENGE_ALPHA]; - - let num_bus_elements = interaction.num_bus_elements(); - let alpha_powers = compute_alpha_powers(alpha, num_bus_elements); - - let negate = !interaction.is_sender; - - let bus_id_f = FieldElement::::from(interaction.bus_id); - let shifts = PackingShifts::::new(); - let mut result = vec![FieldElement::::zero(); trace_len]; - result - .par_chunks_mut(LOGUP_CHUNK_SIZE) - .enumerate() - .for_each(|(chunk_idx, result_chunk)| { - let chunk_start = chunk_idx * LOGUP_CHUNK_SIZE; - let chunk_len = result_chunk.len(); - - // Phase 1: Compute fingerprints for this chunk - let mut fingerprints: Vec> = Vec::with_capacity(chunk_len); + let process_chunk = |chunk_start: usize, result_chunk: &mut [FieldElement]| { + let chunk_len = result_chunk.len(); + // Phase 1: Compute fingerprints for both interactions + let compute_fps = |interaction: &BusInteraction, + bus_id_f: &FieldElement, + fps: &mut Vec>| { for row in chunk_start..chunk_start + chunk_len { - let mut lc = &bus_id_f * &alpha_powers[0]; + let mut lc = bus_id_f * &alpha_powers[0]; let mut alpha_offset = 1; for bv in &interaction.values { let consumed = bv.accumulate_fingerprint( @@ -1943,49 +1601,45 @@ where ); alpha_offset += consumed; } - fingerprints.push(z - &lc); - - #[cfg(feature = "debug-checks")] - { - let mut base_elements: Vec> = vec![bus_id_f.clone()]; - base_elements.extend( - interaction.values.iter().flat_map(|bv| { - bv.combine_from(|col| main_segment_cols[col][row].clone()) - }), - ); - let multiplicity = compute_multiplicity_for_row( - &interaction.multiplicity, - main_segment_cols, - row, - ); - crate::bus_debug::log_interaction( - _table_name, - row, - interaction.bus_id, - interaction.is_sender, - &multiplicity.canonical(), - &base_elements, - fingerprints.last().unwrap(), - ); - } + fps.push(z - &lc); } + }; - // Phase 2: Batch-invert fingerprints - FieldElement::inplace_batch_inverse(&mut fingerprints) - .expect("fingerprint is zero - probability of sampling zero is negligible"); - - // Phase 3: Compute terms: +/- m / fp - for (i, result_elem) in result_chunk.iter_mut().enumerate() { - let row = chunk_start + i; - let fp_inv = &fingerprints[i]; + let mut fingerprints: Vec> = Vec::with_capacity(2 * chunk_len); + compute_fps(interaction_a, &bus_id_a, &mut fingerprints); + compute_fps(interaction_b, &bus_id_b, &mut fingerprints); + + // Phase 2: Batch-invert + FieldElement::inplace_batch_inverse(&mut fingerprints) + .expect("fingerprint is zero - probability of sampling zero is negligible"); + + // Phase 3: Compute terms + for (i, result_elem) in result_chunk.iter_mut().enumerate() { + let row = chunk_start + i; + let fp_a_inv = &fingerprints[i]; + let fp_b_inv = &fingerprints[chunk_len + i]; + let m_a = interaction_a + .multiplicity + .evaluate_at_row(main_segment_cols, row); + let m_b = interaction_b + .multiplicity + .evaluate_at_row(main_segment_cols, row); + let term_a = &m_a * fp_a_inv; + let term_b = &m_b * fp_b_inv; + let term_a = if negate_a { -term_a } else { term_a }; + let term_b = if negate_b { -term_b } else { term_b }; + *result_elem = term_a + term_b; + } + }; - let m = - compute_multiplicity_for_row(&interaction.multiplicity, main_segment_cols, row); + #[cfg(feature = "parallel")] + result + .par_chunks_mut(LOGUP_CHUNK_SIZE) + .enumerate() + .for_each(|(i, chunk)| process_chunk(i * LOGUP_CHUNK_SIZE, chunk)); - let term = &m * fp_inv; - *result_elem = if negate { -term } else { term }; - } - }); + #[cfg(not(feature = "parallel"))] + process_chunk(0, &mut result); result } @@ -2467,142 +2121,3 @@ where } } } - -#[cfg(test)] -mod tests { - use math::field::{ - element::FieldElement, extensions_goldilocks::Degree3GoldilocksExtensionField, - goldilocks::GoldilocksField, - }; - - use super::*; - - /// Builds a synthetic main-segment trace with `num_cols` columns and `trace_len` rows. - /// Column `i` is filled with `(row * num_cols + i + 1)` as a simple pattern. - fn make_trace_cols( - num_cols: usize, - trace_len: usize, - ) -> Vec>> { - (0..num_cols) - .map(|col| { - (0..trace_len) - .map(|row| { - FieldElement::::from((row * num_cols + col + 1) as u64) - }) - .collect() - }) - .collect() - } - - #[cfg(feature = "parallel")] - #[test] - fn batched_term_column_chunked_matches_non_chunked() { - type F = GoldilocksField; - type E = Degree3GoldilocksExtensionField; - - let trace_len: usize = 2048; - let num_cols: usize = 4; - - // Build synthetic main trace columns - let main_cols = make_trace_cols(num_cols, trace_len); - - // interaction_a: sender on bus 1, multiplicity from column 0, two BusValue columns (1, 2) - let interaction_a = BusInteraction::sender( - 1u64, - Multiplicity::Column(0), - vec![BusValue::column(1), BusValue::column(2)], - ); - - // interaction_b: receiver on bus 1, multiplicity One, one BusValue column (3) - let interaction_b = - BusInteraction::receiver(1u64, Multiplicity::One, vec![BusValue::column(3)]); - - // Construct challenges: [z, alpha] — two extension-field elements - let challenges: Vec> = vec![ - FieldElement::::from(7u64), - FieldElement::::from(13u64), - ]; - - let result_standard = compute_logup_batched_term_column::( - &interaction_a, - &interaction_b, - &main_cols, - trace_len, - &challenges, - "test_table", - ); - - let result_chunked = compute_logup_batched_term_column_chunked::( - &interaction_a, - &interaction_b, - &main_cols, - trace_len, - &challenges, - ); - - assert_eq!( - result_standard.len(), - result_chunked.len(), - "output lengths differ" - ); - for (row, (a, b)) in result_standard - .iter() - .zip(result_chunked.iter()) - .enumerate() - { - assert_eq!(a, b, "mismatch at row {row}: standard={a:?}, chunked={b:?}"); - } - } - - #[cfg(feature = "parallel")] - #[test] - fn term_column_chunked_matches_non_chunked() { - type F = GoldilocksField; - type E = Degree3GoldilocksExtensionField; - - let trace_len: usize = 2048; - let num_cols: usize = 4; - - let main_cols = make_trace_cols(num_cols, trace_len); - - let interaction = BusInteraction::sender( - 1u64, - Multiplicity::Column(0), - vec![BusValue::column(1), BusValue::column(2)], - ); - - let challenges: Vec> = vec![ - FieldElement::::from(7u64), - FieldElement::::from(13u64), - ]; - - let result_standard = compute_logup_term_column::( - &interaction, - &main_cols, - trace_len, - &challenges, - "test_table", - ); - - let result_chunked = compute_logup_term_column_chunked::( - &interaction, - &main_cols, - trace_len, - &challenges, - "test_table", - ); - - assert_eq!( - result_standard.len(), - result_chunked.len(), - "output lengths differ" - ); - for (row, (a, b)) in result_standard - .iter() - .zip(result_chunked.iter()) - .enumerate() - { - assert_eq!(a, b, "mismatch at row {row}: standard={a:?}, chunked={b:?}"); - } - } -}