diff --git a/problemreductions-cli/src/test_support.rs b/problemreductions-cli/src/test_support.rs index 81d3d33c8..3b4e55d8f 100644 --- a/problemreductions-cli/src/test_support.rs +++ b/problemreductions-cli/src/test_support.rs @@ -180,6 +180,7 @@ problemreductions::inventory::submit! { }), capabilities: EdgeCapabilities::aggregate_only(), overhead_eval_fn: |_| ProblemSize::new(vec![]), + source_size_fn: |_| ProblemSize::new(vec![]), } } @@ -202,6 +203,7 @@ problemreductions::inventory::submit! { }), capabilities: EdgeCapabilities::aggregate_only(), overhead_eval_fn: |_| ProblemSize::new(vec![]), + source_size_fn: |_| ProblemSize::new(vec![]), } } diff --git a/problemreductions-macros/src/lib.rs b/problemreductions-macros/src/lib.rs index cd9d66b8f..e82983283 100644 --- a/problemreductions-macros/src/lib.rs +++ b/problemreductions-macros/src/lib.rs @@ -252,6 +252,47 @@ fn generate_overhead_eval_fn( }) } +/// Generate a function that extracts the source problem's size fields from `&dyn Any`. +/// +/// Collects all variable names referenced in the overhead expressions, generates +/// getter calls for each, and returns a `ProblemSize`. +fn generate_source_size_fn( + fields: &[(String, String)], + source_type: &Type, +) -> syn::Result { + let src_ident = syn::Ident::new("__src", proc_macro2::Span::call_site()); + + // Collect all unique variable names from overhead expressions + let mut var_names = std::collections::BTreeSet::new(); + for (_, expr_str) in fields { + let parsed = parser::parse_expr(expr_str).map_err(|e| { + syn::Error::new( + proc_macro2::Span::call_site(), + format!("error parsing overhead expression \"{expr_str}\": {e}"), + ) + })?; + for v in parsed.variables() { + var_names.insert(v.to_string()); + } + } + + let getter_tokens: Vec<_> = var_names + .iter() + .map(|var| { + let getter = syn::Ident::new(var, proc_macro2::Span::call_site()); + let name_lit = var.as_str(); + quote! { (#name_lit, #src_ident.#getter() as usize) } + }) + .collect(); + + Ok(quote! { + |__any_src: &dyn std::any::Any| -> crate::types::ProblemSize { + let #src_ident = __any_src.downcast_ref::<#source_type>().unwrap(); + crate::types::ProblemSize::new(vec![#(#getter_tokens),*]) + } + }) +} + /// Generate the reduction entry code fn generate_reduction_entry( attrs: &ReductionAttrs, @@ -288,8 +329,8 @@ fn generate_reduction_entry( let source_variant_body = make_variant_fn_body(source_type, &type_generics)?; let target_variant_body = make_variant_fn_body(&target_type, &type_generics)?; - // Generate overhead and eval fn - let (overhead, overhead_eval_fn) = match &attrs.overhead { + // Generate overhead, eval fn, and source size fn + let (overhead, overhead_eval_fn, source_size_fn) = match &attrs.overhead { Some(OverheadSpec::Legacy(tokens)) => { let eval_fn = quote! { |_: &dyn std::any::Any| -> crate::types::ProblemSize { @@ -297,12 +338,18 @@ fn generate_reduction_entry( migrate to parsed syntax: field = \"expression\"") } }; - (tokens.clone(), eval_fn) + let size_fn = quote! { + |_: &dyn std::any::Any| -> crate::types::ProblemSize { + crate::types::ProblemSize::new(vec![]) + } + }; + (tokens.clone(), eval_fn, size_fn) } Some(OverheadSpec::Parsed(fields)) => { let overhead_tokens = generate_parsed_overhead(fields)?; let eval_fn = generate_overhead_eval_fn(fields, source_type)?; - (overhead_tokens, eval_fn) + let size_fn = generate_source_size_fn(fields, source_type)?; + (overhead_tokens, eval_fn, size_fn) } None => { return Err(syn::Error::new( @@ -337,6 +384,7 @@ fn generate_reduction_entry( reduce_aggregate_fn: None, capabilities: #capabilities, overhead_eval_fn: #overhead_eval_fn, + source_size_fn: #source_size_fn, } } diff --git a/src/rules/cost.rs b/src/rules/cost.rs index 0cbf1bf14..7678d4d87 100644 --- a/src/rules/cost.rs +++ b/src/rules/cost.rs @@ -27,6 +27,39 @@ impl PathCostFn for MinimizeSteps { } } +/// Minimize total output size (sum of all output field values). +/// +/// Prefers reduction paths that produce smaller intermediate and final problems. +/// Breaks ties that `MinimizeSteps` cannot resolve (e.g., two 2-step paths +/// where one produces 144 ILP variables and the other 1,332). +pub struct MinimizeOutputSize; + +impl PathCostFn for MinimizeOutputSize { + fn edge_cost(&self, overhead: &ReductionOverhead, size: &ProblemSize) -> f64 { + let output = overhead.evaluate_output_size(size); + output.total() as f64 + } +} + +/// Minimize steps first, then use output size as tiebreaker. +/// +/// Each edge has a primary cost of `STEP_WEIGHT` (ensuring fewer-step paths +/// always win) plus a small overhead-based cost that breaks ties between +/// equal-step paths. +pub struct MinimizeStepsThenOverhead; + +impl PathCostFn for MinimizeStepsThenOverhead { + fn edge_cost(&self, overhead: &ReductionOverhead, size: &ProblemSize) -> f64 { + // Use a large step weight to ensure step count dominates. + // The overhead tiebreaker uses log1p to compress the range, + // keeping it far smaller than STEP_WEIGHT for any realistic problem size. + const STEP_WEIGHT: f64 = 1e9; + let output = overhead.evaluate_output_size(size); + let overhead_tiebreaker = (1.0 + output.total() as f64).ln(); + STEP_WEIGHT + overhead_tiebreaker + } +} + /// Custom cost function from closure. pub struct CustomCost(pub F); diff --git a/src/rules/graph.rs b/src/rules/graph.rs index 303cbb698..3143924d5 100644 --- a/src/rules/graph.rs +++ b/src/rules/graph.rs @@ -904,6 +904,54 @@ impl ReductionGraph { result } + /// Evaluate the cumulative output size along a reduction path. + /// + /// Walks the path from start to end, applying each edge's overhead + /// expressions to transform the problem size at each step. + /// Returns `None` if any edge in the path cannot be found. + pub fn evaluate_path_overhead( + &self, + path: &ReductionPath, + input_size: &ProblemSize, + ) -> Option { + let mut current_size = input_size.clone(); + for pair in path.steps.windows(2) { + let src = self.lookup_node(&pair[0].name, &pair[0].variant)?; + let dst = self.lookup_node(&pair[1].name, &pair[1].variant)?; + let edge_idx = self.graph.find_edge(src, dst)?; + let edge = &self.graph[edge_idx]; + current_size = edge.overhead.evaluate_output_size(¤t_size); + } + Some(current_size) + } + + /// Compute the source problem's size from a type-erased instance. + /// + /// Iterates over all registered reduction entries with a matching source name + /// and merges their `source_size_fn` results to capture all size fields. + /// Different entries may reference different getter methods (e.g., one uses + /// `num_vertices` while another also uses `num_edges`). + pub fn compute_source_size(name: &str, instance: &dyn Any) -> ProblemSize { + let mut merged: Vec<(String, usize)> = Vec::new(); + let mut seen: HashSet = HashSet::new(); + + for entry in inventory::iter:: { + if entry.source_name == name { + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + (entry.source_size_fn)(instance) + })); + if let Ok(size) = result { + for (k, v) in size.components { + if seen.insert(k.clone()) { + merged.push((k, v)); + } + } + } + } + } + ProblemSize { components: merged } + } + /// Get all incoming reductions to a problem (across all its variants). pub fn incoming_reductions(&self, name: &str) -> Vec { let Some(indices) = self.name_to_nodes.get(name) else { diff --git a/src/rules/mod.rs b/src/rules/mod.rs index 002775c1f..8ade1884b 100644 --- a/src/rules/mod.rs +++ b/src/rules/mod.rs @@ -3,7 +3,9 @@ pub mod analysis; pub mod cost; pub mod registry; -pub use cost::{CustomCost, Minimize, MinimizeSteps, PathCostFn}; +pub use cost::{ + CustomCost, Minimize, MinimizeOutputSize, MinimizeSteps, MinimizeStepsThenOverhead, PathCostFn, +}; pub use registry::{EdgeCapabilities, ReductionEntry, ReductionOverhead}; pub(crate) mod circuit_spinglass; diff --git a/src/rules/registry.rs b/src/rules/registry.rs index 00bd892a7..d8dc4bb43 100644 --- a/src/rules/registry.rs +++ b/src/rules/registry.rs @@ -157,6 +157,10 @@ pub struct ReductionEntry { /// Takes a `&dyn Any` (must be `&SourceType`), calls getter methods directly, /// and returns the computed target problem size. pub overhead_eval_fn: fn(&dyn Any) -> ProblemSize, + /// Extract source problem size from a type-erased instance. + /// Takes a `&dyn Any` (must be `&SourceType`), calls getter methods, + /// and returns the source problem's size fields as a `ProblemSize`. + pub source_size_fn: fn(&dyn Any) -> ProblemSize, } impl ReductionEntry { diff --git a/src/solvers/ilp/solver.rs b/src/solvers/ilp/solver.rs index 40812d7ce..1ffd494b2 100644 --- a/src/solvers/ilp/solver.rs +++ b/src/solvers/ilp/solver.rs @@ -239,18 +239,23 @@ impl ILPSolver { any.is::>() || any.is::>() || any.is::() } + /// Two-level path selection: + /// 1. Dijkstra finds the cheapest path to each ILP variant using + /// `MinimizeStepsThenOverhead` (additive edge costs: step count + log overhead). + /// 2. Across ILP variants, we pick the path whose composed final output size + /// is smallest — this is the actual ILP problem size the solver will face. fn best_path_to_ilp( &self, graph: &crate::rules::ReductionGraph, name: &str, variant: &std::collections::BTreeMap, mode: ReductionMode, + instance: &dyn std::any::Any, ) -> Option { - use crate::types::ProblemSize; - let ilp_variants = graph.variants_for("ILP"); - let input_size = ProblemSize::new(vec![]); - let mut best_path = None; + let input_size = crate::rules::ReductionGraph::compute_source_size(name, instance); + let mut best_path: Option = None; + let mut best_cost = f64::INFINITY; for dv in &ilp_variants { if let Some(path) = graph.find_cheapest_path_mode( @@ -260,12 +265,16 @@ impl ILPSolver { dv, mode, &input_size, - &crate::rules::MinimizeSteps, + &crate::rules::MinimizeStepsThenOverhead, ) { - let is_better = best_path - .as_ref() - .is_none_or(|current: &crate::rules::ReductionPath| path.len() < current.len()); - if is_better { + // Use composed final output size for cross-variant comparison, + // since this determines the actual ILP problem size. + let final_size = graph + .evaluate_path_overhead(&path, &input_size) + .unwrap_or_default(); + let cost = final_size.total() as f64; + if cost < best_cost { + best_cost = cost; best_path = Some(path); } } @@ -290,10 +299,11 @@ impl ILPSolver { let graph = crate::rules::ReductionGraph::new(); - let Some(path) = self.best_path_to_ilp(&graph, name, variant, ReductionMode::Witness) + let Some(path) = + self.best_path_to_ilp(&graph, name, variant, ReductionMode::Witness, instance) else { if self - .best_path_to_ilp(&graph, name, variant, ReductionMode::Aggregate) + .best_path_to_ilp(&graph, name, variant, ReductionMode::Aggregate, instance) .is_some() { return Err(SolveViaReductionError::WitnessPathRequired { diff --git a/src/types.rs b/src/types.rs index b9236aec7..4d14f6ca2 100644 --- a/src/types.rs +++ b/src/types.rs @@ -504,7 +504,7 @@ impl fmt::Display for Extremum { } /// Problem size metadata (varies by problem type). -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] pub struct ProblemSize { /// Named size components. pub components: Vec<(String, usize)>, @@ -528,6 +528,11 @@ impl ProblemSize { .find(|(k, _)| k == name) .map(|(_, v)| *v) } + + /// Sum of all component values. + pub fn total(&self) -> usize { + self.components.iter().map(|(_, v)| *v).sum() + } } impl fmt::Display for ProblemSize { diff --git a/src/unit_tests/rules/cost.rs b/src/unit_tests/rules/cost.rs index bcf2f0151..c489b7fe3 100644 --- a/src/unit_tests/rules/cost.rs +++ b/src/unit_tests/rules/cost.rs @@ -48,3 +48,39 @@ fn test_minimize_missing_field() { assert_eq!(cost_fn.edge_cost(&overhead, &size), 0.0); } + +#[test] +fn test_minimize_output_size() { + let cost_fn = MinimizeOutputSize; + let size = ProblemSize::new(vec![("n", 10), ("m", 5)]); + let overhead = test_overhead(); + + // output n = 20, output m = 5 → total = 25 + assert_eq!(cost_fn.edge_cost(&overhead, &size), 25.0); +} + +#[test] +fn test_minimize_steps_then_overhead() { + let cost_fn = MinimizeStepsThenOverhead; + let size = ProblemSize::new(vec![("n", 10), ("m", 5)]); + let overhead = test_overhead(); + + let cost = cost_fn.edge_cost(&overhead, &size); + // Should be dominated by the step weight (1e9) with small overhead tiebreaker + assert!(cost > 1e8, "step weight should dominate"); + assert!(cost < 2e9, "should be roughly 1e9 + small tiebreaker"); + + // Two edges with different overhead should have different costs + let small_overhead = + ReductionOverhead::new(vec![("n", Expr::Const(1.0)), ("m", Expr::Const(1.0))]); + let cost_small = cost_fn.edge_cost(&small_overhead, &size); + // Both have the same step weight but different tiebreakers + assert!(cost > cost_small, "larger overhead should cost more"); +} + +#[test] +fn test_problem_size_total() { + let size = ProblemSize::new(vec![("a", 3), ("b", 7), ("c", 10)]); + assert_eq!(size.total(), 20); + assert_eq!(ProblemSize::new(vec![]).total(), 0); +} diff --git a/src/unit_tests/rules/graph.rs b/src/unit_tests/rules/graph.rs index ee1fb0933..a011c1a6b 100644 --- a/src/unit_tests/rules/graph.rs +++ b/src/unit_tests/rules/graph.rs @@ -1588,3 +1588,95 @@ fn test_variant_complexity() { None ); } + +#[test] +fn test_compute_source_size() { + let problem = MaximumIndependentSet::::new( + SimpleGraph::new(4, vec![(0, 1), (1, 2), (2, 3)]), + vec![1, 1, 1, 1], + ); + let size = ReductionGraph::compute_source_size("MaximumIndependentSet", &problem); + assert_eq!(size.get("num_vertices"), Some(4)); + assert_eq!(size.get("num_edges"), Some(3)); +} + +#[test] +fn test_compute_source_size_unknown_problem() { + let problem = 42u32; + let size = ReductionGraph::compute_source_size("NonExistentProblem", &problem); + assert!(size.components.is_empty()); +} + +#[test] +fn test_evaluate_path_overhead() { + use crate::rules::cost::MinimizeStepsThenOverhead; + + let graph = ReductionGraph::new(); + let src = ReductionGraph::variant_to_map(&MaximumIndependentSet::::variant()); + let dst = ReductionGraph::variant_to_map(&MinimumVertexCover::::variant()); + let input_size = ProblemSize::new(vec![("num_vertices", 10), ("num_edges", 20)]); + + let path = graph + .find_cheapest_path( + "MaximumIndependentSet", + &src, + "MinimumVertexCover", + &dst, + &input_size, + &MinimizeStepsThenOverhead, + ) + .expect("should find path"); + + let final_size = graph + .evaluate_path_overhead(&path, &input_size) + .expect("should evaluate overhead"); + + // MIS → MVC preserves num_vertices and num_edges + assert_eq!(final_size.get("num_vertices"), Some(10)); + assert_eq!(final_size.get("num_edges"), Some(20)); +} + +#[test] +fn test_evaluate_path_overhead_multistep() { + use crate::rules::cost::MinimizeStepsThenOverhead; + + // MIS → SetPacking → SetPacking → ILP (3 steps with size transformations) + let graph = ReductionGraph::new(); + let src = ReductionGraph::variant_to_map(&MaximumIndependentSet::::variant()); + let dst_variants = graph.variants_for("ILP"); + let dst = dst_variants + .iter() + .find(|v| v.get("variable") == Some(&"bool".to_string())) + .expect("ILP variant should exist"); + let input_size = ProblemSize::new(vec![("num_vertices", 10), ("num_edges", 20)]); + + let path = graph + .find_cheapest_path_mode( + "MaximumIndependentSet", + &src, + "ILP", + dst, + ReductionMode::Witness, + &input_size, + &MinimizeStepsThenOverhead, + ) + .expect("should find path"); + + assert!( + path.len() >= 2, + "path should have at least 2 steps, got {}", + path.len() + ); + + let final_size = graph + .evaluate_path_overhead(&path, &input_size) + .expect("should evaluate overhead"); + + // MIS(V=10,E=20) → SetPacking(sets=V=10, universe=E=20) → ... → ILP(vars=10, constraints=20) + // The final ILP dimensions should reflect the composed overhead, not the input. + assert_eq!(final_size.get("num_vars"), Some(10)); + assert_eq!(final_size.get("num_constraints"), Some(20)); + // Original MIS fields should NOT appear in the final output + assert_eq!(final_size.get("num_vertices"), None); + assert_eq!(final_size.get("num_edges"), None); +} diff --git a/src/unit_tests/rules/registry.rs b/src/unit_tests/rules/registry.rs index 21ba8dde5..3fdef0c9d 100644 --- a/src/unit_tests/rules/registry.rs +++ b/src/unit_tests/rules/registry.rs @@ -18,6 +18,10 @@ fn dummy_overhead_eval_fn(_: &dyn std::any::Any) -> ProblemSize { ProblemSize::new(vec![]) } +fn dummy_source_size_fn(_: &dyn std::any::Any) -> ProblemSize { + ProblemSize::new(vec![]) +} + #[test] fn test_reduction_overhead_evaluate() { let overhead = ReductionOverhead::new(vec![ @@ -51,6 +55,7 @@ fn test_reduction_entry_overhead() { reduce_aggregate_fn: None, capabilities: EdgeCapabilities::witness_only(), overhead_eval_fn: dummy_overhead_eval_fn, + source_size_fn: dummy_source_size_fn, }; let overhead = entry.overhead(); @@ -72,6 +77,7 @@ fn test_reduction_entry_debug() { reduce_aggregate_fn: None, capabilities: EdgeCapabilities::witness_only(), overhead_eval_fn: dummy_overhead_eval_fn, + source_size_fn: dummy_source_size_fn, }; let debug_str = format!("{:?}", entry); @@ -92,6 +98,7 @@ fn test_is_base_reduction_unweighted() { reduce_aggregate_fn: None, capabilities: EdgeCapabilities::witness_only(), overhead_eval_fn: dummy_overhead_eval_fn, + source_size_fn: dummy_source_size_fn, }; assert!(entry.is_base_reduction()); } @@ -109,6 +116,7 @@ fn test_is_base_reduction_source_weighted() { reduce_aggregate_fn: None, capabilities: EdgeCapabilities::witness_only(), overhead_eval_fn: dummy_overhead_eval_fn, + source_size_fn: dummy_source_size_fn, }; assert!(!entry.is_base_reduction()); } @@ -126,6 +134,7 @@ fn test_is_base_reduction_target_weighted() { reduce_aggregate_fn: None, capabilities: EdgeCapabilities::witness_only(), overhead_eval_fn: dummy_overhead_eval_fn, + source_size_fn: dummy_source_size_fn, }; assert!(!entry.is_base_reduction()); } @@ -143,6 +152,7 @@ fn test_is_base_reduction_both_weighted() { reduce_aggregate_fn: None, capabilities: EdgeCapabilities::witness_only(), overhead_eval_fn: dummy_overhead_eval_fn, + source_size_fn: dummy_source_size_fn, }; assert!(!entry.is_base_reduction()); } @@ -161,6 +171,7 @@ fn test_is_base_reduction_no_weight_key() { reduce_aggregate_fn: None, capabilities: EdgeCapabilities::witness_only(), overhead_eval_fn: dummy_overhead_eval_fn, + source_size_fn: dummy_source_size_fn, }; assert!(entry.is_base_reduction()); } @@ -178,6 +189,7 @@ fn test_reduction_entry_can_store_aggregate_executor() { reduce_aggregate_fn: Some(dummy_reduce_aggregate_fn), capabilities: EdgeCapabilities::aggregate_only(), overhead_eval_fn: dummy_overhead_eval_fn, + source_size_fn: dummy_source_size_fn, }; assert!(entry.reduce_fn.is_none());