diff --git a/docs/paper/reductions.typ b/docs/paper/reductions.typ index 565abdb14..16e2297b1 100644 --- a/docs/paper/reductions.typ +++ b/docs/paper/reductions.typ @@ -53,6 +53,7 @@ "BicliqueCover": [Biclique Cover], "BinPacking": [Bin Packing], "ClosestVectorProblem": [Closest Vector Problem], + "LongestCommonSubsequence": [Longest Common Subsequence], "SubsetSum": [Subset Sum], "MinimumFeedbackVertexSet": [Minimum Feedback Vertex Set], ) @@ -980,6 +981,14 @@ Biclique Cover is equivalent to factoring the biadjacency matrix $M$ of the bipa *Example.* Let $n = 4$ items with weights $(2, 3, 4, 5)$, values $(3, 4, 5, 7)$, and capacity $C = 7$. Selecting $S = {1, 2}$ (items with weights 3 and 4) gives total weight $3 + 4 = 7 lt.eq C$ and total value $4 + 5 = 9$. Selecting $S = {0, 3}$ (weights 2 and 5) gives weight $2 + 5 = 7 lt.eq C$ and value $3 + 7 = 10$, which is optimal. ] +#problem-def("LongestCommonSubsequence")[ + Given $k$ strings $s_1, dots, s_k$ over a finite alphabet $Sigma$, find a longest string $w$ that is a subsequence of every $s_i$. A string $w$ is a _subsequence_ of $s$ if $w$ can be obtained by deleting zero or more characters from $s$ without changing the order of the remaining characters. +][ + The LCS problem is polynomial-time solvable for $k = 2$ strings via dynamic programming in $O(n_1 n_2)$ time (Wagner & Fischer, 1974), but NP-hard for $k gt.eq 3$ strings @maier1978. It is a foundational problem in bioinformatics (sequence alignment), version control (diff algorithms), and data compression. The problem is listed as SR10 in Garey & Johnson @garey1979. + + *Example.* Let $s_1 = $ `ABAC` and $s_2 = $ `BACA` over $Sigma = {A, B, C}$. The longest common subsequence has length 3, e.g., `BAC`: positions 1, 2, 3 of $s_1$ match positions 0, 1, 2 of $s_2$. +] + #problem-def("SubsetSum")[ Given a finite set $A = {a_0, dots, a_(n-1)}$ with sizes $s(a_i) in ZZ^+$ and a target $B in ZZ^+$, determine whether there exists a subset $A' subset.eq A$ such that $sum_(a in A') s(a) = B$. ][ @@ -1702,6 +1711,22 @@ The following reductions to Integer Linear Programming are straightforward formu _Solution extraction._ For each position $k$, find vertex $v$ with $x_(v,k) = 1$ to recover the tour permutation; then select edges between consecutive positions. ] +#reduction-rule("LongestCommonSubsequence", "ILP")[ + The match-pair ILP formulation @blum2021 encodes subsequence alignment as a binary optimization. For two strings $s_1$ (length $n_1$) and $s_2$ (length $n_2$), each position pair $(j_1, j_2)$ where $s_1[j_1] = s_2[j_2]$ yields a binary variable. Constraints enforce one-to-one matching and order preservation (no crossings). The objective maximizes the number of matched pairs. +][ + _Construction._ Given strings $s_1$ and $s_2$: + + _Variables:_ Binary $m_(j_1, j_2) in {0, 1}$ for each $(j_1, j_2)$ with $s_1[j_1] = s_2[j_2]$. Interpretation: $m_(j_1, j_2) = 1$ iff position $j_1$ of $s_1$ is matched to position $j_2$ of $s_2$. + + _Constraints:_ (1) Each position in $s_1$ matched at most once: $sum_(j_2 : (j_1, j_2) in M) m_(j_1, j_2) lt.eq 1$ for all $j_1$. (2) Each position in $s_2$ matched at most once: $sum_(j_1 : (j_1, j_2) in M) m_(j_1, j_2) lt.eq 1$ for all $j_2$. (3) No crossings: for $(j_1, j_2), (j'_1, j'_2) in M$ with $j_1 < j'_1$ and $j_2 > j'_2$: $m_(j_1, j_2) + m_(j'_1, j'_2) lt.eq 1$. + + _Objective:_ Maximize $sum_((j_1, j_2) in M) m_(j_1, j_2)$. + + _Correctness._ ($arrow.r.double$) A common subsequence of length $ell$ defines $ell$ matched pairs that are order-preserving (no crossings) and one-to-one, yielding a feasible ILP solution with objective $ell$. ($arrow.l.double$) An ILP solution with objective $ell$ defines $ell$ matched pairs; constraints (1)--(2) ensure one-to-one matching, and constraint (3) ensures order preservation, so the matched characters form a common subsequence of length $ell$. + + _Solution extraction._ Collect pairs $(j_1, j_2)$ with $m_(j_1, j_2) = 1$, sort by $j_1$, and read the characters. +] + == Unit Disk Mapping #reduction-rule("MaximumIndependentSet", "KingsSubgraph")[ diff --git a/docs/paper/references.bib b/docs/paper/references.bib index 0ec874f61..ba7e02393 100644 --- a/docs/paper/references.bib +++ b/docs/paper/references.bib @@ -407,6 +407,27 @@ @article{ibarra1975 doi = {10.1145/321906.321909} } +@article{maier1978, + author = {David Maier}, + title = {The Complexity of Some Problems on Subsequences and Supersequences}, + journal = {Journal of the ACM}, + volume = {25}, + number = {2}, + pages = {322--336}, + year = {1978}, + doi = {10.1145/322063.322075} +} + +@article{blum2021, + author = {Christian Blum and Maria J. Blesa and Borja Calvo}, + title = {{ILP}-based reduced variable neighborhood search for the longest common subsequence problem}, + journal = {Computers \& Operations Research}, + volume = {125}, + pages = {105089}, + year = {2021}, + doi = {10.1016/j.cor.2020.105089} +} + @book{sipser2012, author = {Michael Sipser}, title = {Introduction to the Theory of Computation}, diff --git a/examples/reduction_longestcommonsubsequence_to_ilp.rs b/examples/reduction_longestcommonsubsequence_to_ilp.rs new file mode 100644 index 000000000..5ff1529ac --- /dev/null +++ b/examples/reduction_longestcommonsubsequence_to_ilp.rs @@ -0,0 +1,109 @@ +// # LongestCommonSubsequence to ILP Reduction +// +// ## Mathematical Formulation +// Uses the match-pair formulation (Blum et al., 2021). +// For each position pair (j1, j2) where s1[j1] == s2[j2], a binary variable m_{j1,j2}. +// Constraints: +// (1) Each s1 position matched at most once +// (2) Each s2 position matched at most once +// (3) Order preservation: no crossings among matched pairs +// Objective: maximize total matched pairs. +// +// ## This Example +// - Instance: s1 = "ABAC", s2 = "BACA" +// - 6 match pairs, LCS = "BAC" (length 3) +// +// ## Output +// Exports `docs/paper/examples/longestcommonsubsequence_to_ilp.json`. + +use problemreductions::export::*; +use problemreductions::models::algebraic::ILP; +use problemreductions::prelude::*; +use problemreductions::solvers::ILPSolver; + +pub fn run() { + // 1. Create LCS instance: s1 = "ABAC", s2 = "BACA" + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'A', b'C'], + vec![b'B', b'A', b'C', b'A'], + ]); + + // 2. Reduce to ILP + let reduction = ReduceTo::>::reduce_to(&problem); + let ilp = reduction.target_problem(); + + // 3. Print transformation + println!("\n=== Problem Transformation ==="); + println!( + "Source: LCS with {} strings, total length {}", + problem.num_strings(), + problem.total_length() + ); + println!( + "Target: ILP with {} variables, {} constraints", + ilp.num_vars, + ilp.constraints.len() + ); + + // 4. Solve ILP + let solver = ILPSolver::new(); + let ilp_solution = solver + .solve(ilp) + .expect("ILP should be feasible for ABAC/BACA"); + println!("\n=== Solution ==="); + println!("ILP solution: {:?}", &ilp_solution); + + // 5. Extract LCS solution + let extracted = reduction.extract_solution(&ilp_solution); + println!("Source LCS config: {:?}", extracted); + + // 6. Verify + let metric = problem.evaluate(&extracted); + assert!(metric.is_valid()); + let lcs_length = metric.unwrap(); + println!("LCS length: {}", lcs_length); + assert_eq!(lcs_length, 3); + println!("\nReduction verified successfully"); + + // 7. Collect solutions and export JSON + let solutions = vec![SolutionPair { + source_config: extracted, + target_config: ilp_solution, + }]; + + let source_variant = variant_to_map(LongestCommonSubsequence::variant()); + let target_variant = variant_to_map(ILP::::variant()); + let overhead = + lookup_overhead("LongestCommonSubsequence", &source_variant, "ILP", &target_variant) + .expect("LCS -> ILP overhead not found"); + + let data = ReductionData { + source: ProblemSide { + problem: LongestCommonSubsequence::NAME.to_string(), + variant: source_variant, + instance: serde_json::json!({ + "strings": [ + [65, 66, 65, 67], + [66, 65, 67, 65], + ], + }), + }, + target: ProblemSide { + problem: ILP::::NAME.to_string(), + variant: target_variant, + instance: serde_json::json!({ + "num_vars": ilp.num_vars, + "num_constraints": ilp.constraints.len(), + }), + }, + overhead: overhead_to_json(&overhead), + }; + + let results = ResultData { solutions }; + let name = "longestcommonsubsequence_to_ilp"; + write_example(name, &data, &results); +} + +fn main() { + run() +} diff --git a/problemreductions-cli/src/cli.rs b/problemreductions-cli/src/cli.rs index 91792e20b..2eee89aca 100644 --- a/problemreductions-cli/src/cli.rs +++ b/problemreductions-cli/src/cli.rs @@ -217,6 +217,7 @@ Flags by problem type: BicliqueCover --left, --right, --biedges, --k BMF --matrix (0/1), --rank CVP --basis, --target-vec [--bounds] + LCS --strings FVS --arcs [--weights] [--num-vertices] ILP, CircuitSAT (via reduction only) @@ -329,6 +330,9 @@ pub struct CreateArgs { /// Variable bounds for CVP as "lower,upper" (e.g., "-10,10") [default: -10,10] #[arg(long, allow_hyphen_values = true)] pub bounds: Option, + /// Input strings for LCS (semicolon-separated, e.g., "ABAC;BACA") + #[arg(long)] + pub strings: Option, /// Directed arcs for directed graph problems (e.g., 0>1,1>2,2>0) #[arg(long)] pub arcs: Option, diff --git a/problemreductions-cli/src/commands/create.rs b/problemreductions-cli/src/commands/create.rs index 2df4f0994..fae9adcd1 100644 --- a/problemreductions-cli/src/commands/create.rs +++ b/problemreductions-cli/src/commands/create.rs @@ -6,7 +6,7 @@ use crate::util; use anyhow::{bail, Context, Result}; use problemreductions::models::algebraic::{ClosestVectorProblem, BMF}; use problemreductions::models::graph::GraphPartitioning; -use problemreductions::models::misc::{BinPacking, PaintShop}; +use problemreductions::models::misc::{BinPacking, LongestCommonSubsequence, PaintShop}; use problemreductions::prelude::*; use problemreductions::registry::collect_schemas; use problemreductions::topology::{ @@ -47,6 +47,7 @@ fn all_data_flags_empty(args: &CreateArgs) -> bool { && args.basis.is_none() && args.target_vec.is_none() && args.bounds.is_none() + && args.strings.is_none() && args.arcs.is_none() } @@ -424,6 +425,24 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> { (ser(BMF::new(matrix, rank))?, resolved_variant.clone()) } + // LongestCommonSubsequence + "LongestCommonSubsequence" => { + let strings_str = args.strings.as_deref().ok_or_else(|| { + anyhow::anyhow!( + "LCS requires --strings\n\n\ + Usage: pred create LCS --strings \"ABAC;BACA\"" + ) + })?; + let strings: Vec> = strings_str + .split(';') + .map(|s| s.trim().as_bytes().to_vec()) + .collect(); + ( + ser(LongestCommonSubsequence::new(strings))?, + resolved_variant.clone(), + ) + } + // ClosestVectorProblem "ClosestVectorProblem" => { let basis_str = args.basis.as_deref().ok_or_else(|| { diff --git a/problemreductions-cli/src/dispatch.rs b/problemreductions-cli/src/dispatch.rs index 49dd523cb..e162efc24 100644 --- a/problemreductions-cli/src/dispatch.rs +++ b/problemreductions-cli/src/dispatch.rs @@ -1,6 +1,6 @@ use anyhow::{bail, Context, Result}; use problemreductions::models::algebraic::{ClosestVectorProblem, ILP}; -use problemreductions::models::misc::{BinPacking, Knapsack, SubsetSum}; +use problemreductions::models::misc::{BinPacking, Knapsack, LongestCommonSubsequence, SubsetSum}; use problemreductions::prelude::*; use problemreductions::rules::{MinimizeSteps, ReductionGraph}; use problemreductions::solvers::{BruteForce, ILPSolver, Solver}; @@ -246,6 +246,7 @@ pub fn load_problem( _ => deser_opt::>(data), }, "Knapsack" => deser_opt::(data), + "LongestCommonSubsequence" => deser_opt::(data), "MinimumFeedbackVertexSet" => deser_opt::>(data), "SubsetSum" => deser_sat::(data), _ => bail!("{}", crate::problem_name::unknown_problem_error(&canonical)), @@ -309,6 +310,7 @@ pub fn serialize_any_problem( _ => try_ser::>(any), }, "Knapsack" => try_ser::(any), + "LongestCommonSubsequence" => try_ser::(any), "MinimumFeedbackVertexSet" => try_ser::>(any), "SubsetSum" => try_ser::(any), _ => bail!("{}", crate::problem_name::unknown_problem_error(&canonical)), diff --git a/problemreductions-cli/src/problem_name.rs b/problemreductions-cli/src/problem_name.rs index 2b6c8c737..a595f61b6 100644 --- a/problemreductions-cli/src/problem_name.rs +++ b/problemreductions-cli/src/problem_name.rs @@ -20,6 +20,7 @@ pub const ALIASES: &[(&str, &str)] = &[ ("KSAT", "KSatisfiability"), ("TSP", "TravelingSalesman"), ("CVP", "ClosestVectorProblem"), + ("LCS", "LongestCommonSubsequence"), ("MaxMatching", "MaximumMatching"), ("FVS", "MinimumFeedbackVertexSet"), ]; @@ -54,6 +55,7 @@ pub fn resolve_alias(input: &str) -> String { "binpacking" => "BinPacking".to_string(), "cvp" | "closestvectorproblem" => "ClosestVectorProblem".to_string(), "knapsack" => "Knapsack".to_string(), + "lcs" | "longestcommonsubsequence" => "LongestCommonSubsequence".to_string(), "fvs" | "minimumfeedbackvertexset" => "MinimumFeedbackVertexSet".to_string(), "subsetsum" => "SubsetSum".to_string(), _ => input.to_string(), // pass-through for exact names diff --git a/src/lib.rs b/src/lib.rs index a74c906f3..ef6754138 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -45,7 +45,7 @@ pub mod prelude { KColoring, MaxCut, MaximalIS, MaximumClique, MaximumIndependentSet, MaximumMatching, MinimumDominatingSet, MinimumFeedbackVertexSet, MinimumVertexCover, TravelingSalesman, }; - pub use crate::models::misc::{BinPacking, Factoring, Knapsack, PaintShop, SubsetSum}; + pub use crate::models::misc::{BinPacking, Factoring, Knapsack, LongestCommonSubsequence, PaintShop, SubsetSum}; pub use crate::models::set::{MaximumSetPacking, MinimumSetCovering}; // Core traits diff --git a/src/models/misc/longest_common_subsequence.rs b/src/models/misc/longest_common_subsequence.rs new file mode 100644 index 000000000..8d4fd6e0c --- /dev/null +++ b/src/models/misc/longest_common_subsequence.rs @@ -0,0 +1,188 @@ +//! Longest Common Subsequence (LCS) problem implementation. +//! +//! Given a set of strings over a finite alphabet, find the longest string +//! that is a subsequence of every input string. Polynomial-time for 2 strings +//! via dynamic programming, but NP-hard for k >= 3 strings (Maier, 1978). + +use crate::registry::{FieldInfo, ProblemSchemaEntry}; +use crate::traits::{OptimizationProblem, Problem}; +use crate::types::{Direction, SolutionSize}; +use serde::{Deserialize, Serialize}; + +inventory::submit! { + ProblemSchemaEntry { + name: "LongestCommonSubsequence", + module_path: module_path!(), + description: "Find the longest string that is a subsequence of every input string", + fields: &[ + FieldInfo { name: "strings", type_name: "Vec>", description: "The input strings" }, + ], + } +} + +/// The Longest Common Subsequence problem. +/// +/// Given `k` strings `s_1, ..., s_k` over a finite alphabet, find a longest +/// string `w` that is a subsequence of every `s_i`. +/// +/// A string `w` is a **subsequence** of `s` if `w` can be obtained by deleting +/// zero or more characters from `s` without changing the order of the remaining +/// characters. +/// +/// # Representation +/// +/// The configuration is a binary vector of length equal to the shortest string. +/// Each entry indicates whether the corresponding character of the shortest +/// string is included in the candidate subsequence. +/// +/// # Example +/// +/// ``` +/// use problemreductions::models::misc::LongestCommonSubsequence; +/// use problemreductions::{Problem, Solver, BruteForce}; +/// +/// let problem = LongestCommonSubsequence::new(vec![ +/// vec![b'A', b'B', b'C'], +/// vec![b'A', b'C', b'B'], +/// ]); +/// let solver = BruteForce::new(); +/// let solution = solver.find_best(&problem); +/// assert!(solution.is_some()); +/// ``` +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LongestCommonSubsequence { + /// The input strings. + strings: Vec>, +} + +impl LongestCommonSubsequence { + /// Create a new LCS instance from a list of strings. + /// + /// # Panics + /// Panics if fewer than 2 strings are provided. + pub fn new(strings: Vec>) -> Self { + assert!( + strings.len() >= 2, + "LCS requires at least 2 strings, got {}", + strings.len() + ); + Self { strings } + } + + /// Get the input strings. + pub fn strings(&self) -> &[Vec] { + &self.strings + } + + /// Get the number of strings. + pub fn num_strings(&self) -> usize { + self.strings.len() + } + + /// Get the total length of all strings. + pub fn total_length(&self) -> usize { + self.strings.iter().map(|s| s.len()).sum() + } + + /// Get the length of the first string. + pub fn num_chars_first(&self) -> usize { + self.strings[0].len() + } + + /// Get the length of the second string. + pub fn num_chars_second(&self) -> usize { + self.strings[1].len() + } + + /// Index of the shortest string. + fn shortest_index(&self) -> usize { + self.strings + .iter() + .enumerate() + .min_by_key(|(_, s)| s.len()) + .map(|(i, _)| i) + .unwrap() + } + + /// Length of the shortest string. + pub fn min_string_length(&self) -> usize { + self.strings[self.shortest_index()].len() + } +} + +/// Check if `candidate` is a subsequence of `target`. +fn is_subsequence(candidate: &[u8], target: &[u8]) -> bool { + let mut ti = 0; + for &ch in candidate { + while ti < target.len() && target[ti] != ch { + ti += 1; + } + if ti >= target.len() { + return false; + } + ti += 1; + } + true +} + +impl Problem for LongestCommonSubsequence { + const NAME: &'static str = "LongestCommonSubsequence"; + type Metric = SolutionSize; + + fn variant() -> Vec<(&'static str, &'static str)> { + crate::variant_params![] + } + + fn dims(&self) -> Vec { + vec![2; self.min_string_length()] + } + + fn evaluate(&self, config: &[usize]) -> SolutionSize { + let shortest_idx = self.shortest_index(); + let shortest = &self.strings[shortest_idx]; + + if config.len() != shortest.len() { + return SolutionSize::Invalid; + } + if config.iter().any(|&v| v >= 2) { + return SolutionSize::Invalid; + } + + // Build candidate subsequence from selected positions of shortest string + let candidate: Vec = config + .iter() + .enumerate() + .filter(|(_, &x)| x == 1) + .map(|(i, _)| shortest[i]) + .collect(); + + // Check that candidate is a subsequence of ALL strings + for (i, s) in self.strings.iter().enumerate() { + if i == shortest_idx { + // The candidate is always a subsequence of the string it was built from + continue; + } + if !is_subsequence(&candidate, s) { + return SolutionSize::Invalid; + } + } + + SolutionSize::Valid(candidate.len() as i32) + } +} + +impl OptimizationProblem for LongestCommonSubsequence { + type Value = i32; + + fn direction(&self) -> Direction { + Direction::Maximize + } +} + +crate::declare_variants! { + LongestCommonSubsequence => "2^min_string_length", +} + +#[cfg(test)] +#[path = "../../unit_tests/models/misc/longest_common_subsequence.rs"] +mod tests; diff --git a/src/models/misc/mod.rs b/src/models/misc/mod.rs index 36ebe905b..943b758a2 100644 --- a/src/models/misc/mod.rs +++ b/src/models/misc/mod.rs @@ -4,17 +4,20 @@ //! - [`BinPacking`]: Bin Packing (minimize bins) //! - [`Factoring`]: Integer factorization //! - [`Knapsack`]: 0-1 Knapsack (maximize value subject to weight capacity) +//! - [`LongestCommonSubsequence`]: Longest Common Subsequence //! - [`PaintShop`]: Minimize color switches in paint shop scheduling //! - [`SubsetSum`]: Find a subset summing to exactly a target value mod bin_packing; pub(crate) mod factoring; mod knapsack; +mod longest_common_subsequence; pub(crate) mod paintshop; mod subset_sum; pub use bin_packing::BinPacking; pub use factoring::Factoring; pub use knapsack::Knapsack; +pub use longest_common_subsequence::LongestCommonSubsequence; pub use paintshop::PaintShop; pub use subset_sum::SubsetSum; diff --git a/src/models/mod.rs b/src/models/mod.rs index ceb584ce2..6c8ac38a7 100644 --- a/src/models/mod.rs +++ b/src/models/mod.rs @@ -16,5 +16,5 @@ pub use graph::{ MaximumIndependentSet, MaximumMatching, MinimumDominatingSet, MinimumFeedbackVertexSet, MinimumVertexCover, SpinGlass, TravelingSalesman, }; -pub use misc::{BinPacking, Factoring, Knapsack, PaintShop, SubsetSum}; +pub use misc::{BinPacking, Factoring, Knapsack, LongestCommonSubsequence, PaintShop, SubsetSum}; pub use set::{MaximumSetPacking, MinimumSetCovering}; diff --git a/src/rules/longestcommonsubsequence_ilp.rs b/src/rules/longestcommonsubsequence_ilp.rs new file mode 100644 index 000000000..a69610a35 --- /dev/null +++ b/src/rules/longestcommonsubsequence_ilp.rs @@ -0,0 +1,175 @@ +//! Reduction from LongestCommonSubsequence to ILP (Integer Linear Programming). +//! +//! Uses the match-pair formulation (Blum et al., 2021; Althaus et al., 2006). +//! For 2 strings s1 (length n1) and s2 (length n2): +//! +//! ## Variables +//! For each pair (j1, j2) where s1[j1] == s2[j2], a binary variable m_{j1,j2}. +//! +//! ## Constraints +//! 1. Each position in s1 matched at most once +//! 2. Each position in s2 matched at most once +//! 3. Order preservation (no crossings): for (j1,j2),(j1',j2') with j1 < j1' and j2 > j2': +//! m_{j1,j2} + m_{j1',j2'} <= 1 +//! +//! ## Objective +//! Maximize sum of all match variables. + +use crate::models::algebraic::{LinearConstraint, ObjectiveSense, ILP}; +use crate::models::misc::LongestCommonSubsequence; +use crate::reduction; +use crate::rules::traits::{ReduceTo, ReductionResult}; + +/// Result of reducing LongestCommonSubsequence to ILP. +#[derive(Debug, Clone)] +pub struct ReductionLCSToILP { + target: ILP, + /// The match pairs: (j1, j2) for each variable index. + match_pairs: Vec<(usize, usize)>, + /// Number of characters in the first string. + n1: usize, + /// Number of characters in the second string. + n2: usize, +} + +impl ReductionResult for ReductionLCSToILP { + type Source = LongestCommonSubsequence; + type Target = ILP; + + fn target_problem(&self) -> &ILP { + &self.target + } + + /// Extract solution from ILP back to LCS. + /// + /// The ILP solution has binary variables for match pairs. We extract the + /// matched positions, build the LCS, and map back to a binary selection + /// on the shortest source string. + fn extract_solution(&self, target_solution: &[usize]) -> Vec { + // The source problem's dims() is based on the shortest string. + // We build match pairs on s1=strings[0], s2=strings[1]. + // If shortest is s1, we use j1 positions; if s2, we use j2 positions. + let shortest_len = std::cmp::min(self.n1, self.n2); + let shortest_is_first = self.n1 <= self.n2; + + let matched_positions: Vec = self + .match_pairs + .iter() + .enumerate() + .filter(|(i, _)| target_solution.get(*i).copied().unwrap_or(0) == 1) + .map(|(_, &(j1, j2))| { + if shortest_is_first { + j1 + } else { + j2 + } + }) + .collect(); + + let mut config = vec![0usize; shortest_len]; + for pos in matched_positions { + if pos < config.len() { + config[pos] = 1; + } + } + config + } +} + +#[reduction(overhead = { + num_vars = "num_chars_first * num_chars_second", + num_constraints = "num_chars_first + num_chars_second + (num_chars_first * num_chars_second) ^ 2", +})] +impl ReduceTo> for LongestCommonSubsequence { + type Result = ReductionLCSToILP; + + fn reduce_to(&self) -> Self::Result { + let strings = self.strings(); + assert!( + strings.len() == 2, + "LCS to ILP reduction is defined for exactly 2 strings, got {}", + strings.len() + ); + + let s1 = &strings[0]; + let s2 = &strings[1]; + let n1 = s1.len(); + let n2 = s2.len(); + + // Build match pairs: (j1, j2) where s1[j1] == s2[j2] + let mut match_pairs: Vec<(usize, usize)> = Vec::new(); + for (j1, &c1) in s1.iter().enumerate() { + for (j2, &c2) in s2.iter().enumerate() { + if c1 == c2 { + match_pairs.push((j1, j2)); + } + } + } + + let num_vars = match_pairs.len(); + let mut constraints = Vec::new(); + + // Constraint 1: Each position in s1 matched at most once + for j1 in 0..n1 { + let terms: Vec<(usize, f64)> = match_pairs + .iter() + .enumerate() + .filter(|(_, &(a, _))| a == j1) + .map(|(idx, _)| (idx, 1.0)) + .collect(); + if !terms.is_empty() { + constraints.push(LinearConstraint::le(terms, 1.0)); + } + } + + // Constraint 2: Each position in s2 matched at most once + for j2 in 0..n2 { + let terms: Vec<(usize, f64)> = match_pairs + .iter() + .enumerate() + .filter(|(_, &(_, b))| b == j2) + .map(|(idx, _)| (idx, 1.0)) + .collect(); + if !terms.is_empty() { + constraints.push(LinearConstraint::le(terms, 1.0)); + } + } + + // Constraint 3: Order preservation (no crossings) + // For all pairs (j1, j2) and (j1', j2') with j1 < j1' and j2 > j2': + // m_{j1,j2} + m_{j1',j2'} <= 1 + for (i, &(j1, j2)) in match_pairs.iter().enumerate() { + for (k, &(j1p, j2p)) in match_pairs.iter().enumerate() { + if i < k && j1 < j1p && j2 > j2p { + constraints.push(LinearConstraint::le( + vec![(i, 1.0), (k, 1.0)], + 1.0, + )); + } + // Also check the reverse: j1 > j1p and j2 < j2p + if i < k && j1 > j1p && j2 < j2p { + constraints.push(LinearConstraint::le( + vec![(i, 1.0), (k, 1.0)], + 1.0, + )); + } + } + } + + // Objective: maximize sum of all match variables + let objective: Vec<(usize, f64)> = (0..num_vars).map(|i| (i, 1.0)).collect(); + + let ilp = ILP::::new(num_vars, constraints, objective, ObjectiveSense::Maximize); + + ReductionLCSToILP { + target: ilp, + match_pairs, + n1, + n2, + } + } +} + +#[cfg(test)] +#[path = "../unit_tests/rules/longestcommonsubsequence_ilp.rs"] +mod tests; diff --git a/src/rules/mod.rs b/src/rules/mod.rs index fa77a18dc..5414031ec 100644 --- a/src/rules/mod.rs +++ b/src/rules/mod.rs @@ -49,6 +49,8 @@ mod ilp_bool_ilp_i32; #[cfg(feature = "ilp-solver")] mod ilp_qubo; #[cfg(feature = "ilp-solver")] +mod longestcommonsubsequence_ilp; +#[cfg(feature = "ilp-solver")] mod maximumclique_ilp; #[cfg(feature = "ilp-solver")] mod maximummatching_ilp; diff --git a/src/unit_tests/models/misc/longest_common_subsequence.rs b/src/unit_tests/models/misc/longest_common_subsequence.rs new file mode 100644 index 000000000..e9059ddaf --- /dev/null +++ b/src/unit_tests/models/misc/longest_common_subsequence.rs @@ -0,0 +1,179 @@ +use super::*; +use crate::solvers::{BruteForce, Solver}; +use crate::traits::{OptimizationProblem, Problem}; +use crate::types::Direction; + +#[test] +fn test_lcs_basic() { + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'C', b'D', b'A', b'B'], + vec![b'B', b'D', b'C', b'A', b'B', b'A'], + ]); + assert_eq!(problem.num_strings(), 2); + assert_eq!(problem.total_length(), 12); + assert_eq!(problem.num_chars_first(), 6); + assert_eq!(problem.num_chars_second(), 6); + assert_eq!(problem.direction(), Direction::Maximize); + assert_eq!(::NAME, "LongestCommonSubsequence"); + assert_eq!(::variant(), vec![]); +} + +#[test] +fn test_lcs_dims() { + // Shortest string has 4 chars + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'A', b'C'], + vec![b'B', b'A', b'C', b'A', b'B', b'C'], + ]); + assert_eq!(problem.dims(), vec![2; 4]); +} + +#[test] +fn test_lcs_evaluate_valid() { + // s1 = "ABC", s2 = "ACB" + // Selecting positions 0,2 of s1 (shorter) gives "AC" which is subseq of "ACB" + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'C'], + vec![b'A', b'C', b'B'], + ]); + let result = problem.evaluate(&[1, 0, 1]); // "AC" + assert!(result.is_valid()); + assert_eq!(result.unwrap(), 2); +} + +#[test] +fn test_lcs_evaluate_invalid_subsequence() { + // s1 = "ABC", s2 = "CAB" + // Selecting positions 1,2 of s1 gives "BC" - is "BC" a subseq of "CAB"? No + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'C'], + vec![b'C', b'A', b'B'], + ]); + let result = problem.evaluate(&[0, 1, 1]); // "BC" + assert!(!result.is_valid()); +} + +#[test] +fn test_lcs_evaluate_empty_selection() { + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'C'], + vec![b'X', b'Y', b'Z'], + ]); + let result = problem.evaluate(&[0, 0, 0]); // empty + assert!(result.is_valid()); + assert_eq!(result.unwrap(), 0); +} + +#[test] +fn test_lcs_evaluate_wrong_config_length() { + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B'], + vec![b'A', b'B', b'C'], + ]); + assert!(!problem.evaluate(&[1]).is_valid()); + assert!(!problem.evaluate(&[1, 0, 0]).is_valid()); +} + +#[test] +fn test_lcs_evaluate_invalid_variable_value() { + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B'], + vec![b'A', b'B'], + ]); + assert!(!problem.evaluate(&[2, 0]).is_valid()); +} + +#[test] +fn test_lcs_brute_force_two_strings() { + // s1 = "ABAC", s2 = "BACA" + // LCS = "BAC" or "AAC" or "ABA", length 3 + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'A', b'C'], + vec![b'B', b'A', b'C', b'A'], + ]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find a solution"); + let metric = problem.evaluate(&solution); + assert!(metric.is_valid()); + assert_eq!(metric.unwrap(), 3); +} + +#[test] +fn test_lcs_identical_strings() { + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'C'], + vec![b'A', b'B', b'C'], + ]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find a solution"); + let metric = problem.evaluate(&solution); + assert_eq!(metric.unwrap(), 3); +} + +#[test] +fn test_lcs_no_common_chars() { + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B'], + vec![b'C', b'D'], + ]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find a solution"); + let metric = problem.evaluate(&solution); + assert_eq!(metric.unwrap(), 0); +} + +#[test] +fn test_lcs_single_char_alphabet() { + // All same character - LCS is length of shortest string + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'A', b'A'], + vec![b'A', b'A'], + ]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find a solution"); + let metric = problem.evaluate(&solution); + assert_eq!(metric.unwrap(), 2); +} + +#[test] +fn test_lcs_three_strings() { + // Example from issue #108 + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'C', b'D', b'A', b'B'], + vec![b'B', b'D', b'C', b'A', b'B', b'A'], + vec![b'B', b'C', b'A', b'D', b'B', b'A'], + ]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find a solution"); + let metric = problem.evaluate(&solution); + assert!(metric.is_valid()); + assert_eq!(metric.unwrap(), 4); // "BCAB" or equivalent +} + +#[test] +fn test_lcs_serialization() { + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'C'], + vec![b'A', b'C', b'B'], + ]); + let json = serde_json::to_value(&problem).unwrap(); + let restored: LongestCommonSubsequence = serde_json::from_value(json).unwrap(); + assert_eq!(restored.strings(), problem.strings()); +} + +#[test] +fn test_lcs_empty_string_in_input() { + let problem = LongestCommonSubsequence::new(vec![ + vec![], + vec![b'A', b'B', b'C'], + ]); + assert_eq!(problem.dims(), Vec::::new()); + assert!(problem.evaluate(&[]).is_valid()); + assert_eq!(problem.evaluate(&[]).unwrap(), 0); +} + +#[test] +#[should_panic(expected = "LCS requires at least 2 strings")] +fn test_lcs_single_string_panics() { + LongestCommonSubsequence::new(vec![vec![b'A', b'B']]); +} diff --git a/src/unit_tests/rules/longestcommonsubsequence_ilp.rs b/src/unit_tests/rules/longestcommonsubsequence_ilp.rs new file mode 100644 index 000000000..101da6b99 --- /dev/null +++ b/src/unit_tests/rules/longestcommonsubsequence_ilp.rs @@ -0,0 +1,183 @@ +use super::*; +use crate::solvers::{BruteForce, ILPSolver, Solver}; +use crate::traits::Problem; + +#[test] +fn test_lcs_to_ilp_issue_example() { + // From issue #110: s1 = "ABAC", s2 = "BACA" + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'A', b'C'], + vec![b'B', b'A', b'C', b'A'], + ]); + let reduction: ReductionLCSToILP = + ReduceTo::>::reduce_to(&problem); + let ilp = reduction.target_problem(); + + // 6 match pairs as described in the issue + assert_eq!(ilp.num_vars, 6); + + // Solve ILP + let ilp_solver = ILPSolver::new(); + let ilp_solution = ilp_solver.solve(ilp).expect("ILP should be solvable"); + + // Extract solution back to LCS config + let extracted = reduction.extract_solution(&ilp_solution); + + // Verify the solution is valid and optimal (length 3) + let metric = problem.evaluate(&extracted); + assert!(metric.is_valid()); + assert_eq!(metric.unwrap(), 3); +} + +#[test] +fn test_lcs_to_ilp_closed_loop() { + // Compare brute force LCS with ILP-based solution + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'A', b'C'], + vec![b'B', b'A', b'C', b'A'], + ]); + + // Brute force optimal + let bf = BruteForce::new(); + let bf_solution = bf.find_best(&problem).expect("should find a solution"); + let bf_metric = problem.evaluate(&bf_solution); + assert!(bf_metric.is_valid()); + let bf_value = bf_metric.unwrap(); + + // ILP optimal + let reduction: ReductionLCSToILP = + ReduceTo::>::reduce_to(&problem); + let ilp = reduction.target_problem(); + let ilp_solver = ILPSolver::new(); + let ilp_solution = ilp_solver.solve(ilp).expect("ILP should be solvable"); + let extracted = reduction.extract_solution(&ilp_solution); + let ilp_metric = problem.evaluate(&extracted); + assert!(ilp_metric.is_valid()); + let ilp_value = ilp_metric.unwrap(); + + // Both should give the same optimal value + assert_eq!( + bf_value, ilp_value, + "BF optimal {} != ILP optimal {}", + bf_value, ilp_value + ); +} + +#[test] +fn test_lcs_to_ilp_identical_strings() { + // LCS of identical strings = the string itself + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'C'], + vec![b'A', b'B', b'C'], + ]); + let reduction: ReductionLCSToILP = + ReduceTo::>::reduce_to(&problem); + let ilp = reduction.target_problem(); + + let ilp_solver = ILPSolver::new(); + let ilp_solution = ilp_solver.solve(ilp).expect("ILP should be solvable"); + let extracted = reduction.extract_solution(&ilp_solution); + + let metric = problem.evaluate(&extracted); + assert!(metric.is_valid()); + assert_eq!(metric.unwrap(), 3); +} + +#[test] +fn test_lcs_to_ilp_no_common_chars() { + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B'], + vec![b'C', b'D'], + ]); + let reduction: ReductionLCSToILP = + ReduceTo::>::reduce_to(&problem); + let ilp = reduction.target_problem(); + + // No match pairs → 0 variables + assert_eq!(ilp.num_vars, 0); + + let ilp_solver = ILPSolver::new(); + let ilp_solution = ilp_solver.solve(ilp).expect("ILP should be solvable"); + let extracted = reduction.extract_solution(&ilp_solution); + + let metric = problem.evaluate(&extracted); + assert!(metric.is_valid()); + assert_eq!(metric.unwrap(), 0); +} + +#[test] +fn test_lcs_to_ilp_single_char_alphabet() { + // All same chars → LCS = min length + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'A', b'A'], + vec![b'A', b'A'], + ]); + let reduction: ReductionLCSToILP = + ReduceTo::>::reduce_to(&problem); + let ilp = reduction.target_problem(); + + let ilp_solver = ILPSolver::new(); + let ilp_solution = ilp_solver.solve(ilp).expect("ILP should be solvable"); + let extracted = reduction.extract_solution(&ilp_solution); + + let metric = problem.evaluate(&extracted); + assert!(metric.is_valid()); + assert_eq!(metric.unwrap(), 2); +} + +#[test] +fn test_lcs_to_ilp_asymmetric_lengths() { + // s1 = "AB", s2 = "AABB" + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B'], + vec![b'A', b'A', b'B', b'B'], + ]); + + // BF optimal + let bf = BruteForce::new(); + let bf_solution = bf.find_best(&problem).unwrap(); + let bf_value = problem.evaluate(&bf_solution).unwrap(); + + // ILP optimal + let reduction: ReductionLCSToILP = + ReduceTo::>::reduce_to(&problem); + let ilp = reduction.target_problem(); + let ilp_solver = ILPSolver::new(); + let ilp_solution = ilp_solver.solve(ilp).unwrap(); + let extracted = reduction.extract_solution(&ilp_solution); + let ilp_value = problem.evaluate(&extracted).unwrap(); + + assert_eq!(bf_value, ilp_value); + assert_eq!(ilp_value, 2); // "AB" is subseq of both +} + +#[test] +fn test_lcs_to_ilp_constraint_structure() { + // Verify basic ILP structure for a small example + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B'], + vec![b'B', b'A'], + ]); + let reduction: ReductionLCSToILP = + ReduceTo::>::reduce_to(&problem); + let ilp = reduction.target_problem(); + + // Match pairs: (0,1)=A, (1,0)=B → 2 variables + assert_eq!(ilp.num_vars, 2); + + // Constraints: + // - s1 pos 0: m_{0,1} <= 1 (1 constraint) + // - s1 pos 1: m_{1,0} <= 1 (1 constraint) + // - s2 pos 0: m_{1,0} <= 1 (1 constraint) + // - s2 pos 1: m_{0,1} <= 1 (1 constraint) + // - Crossing: j1=0 < j1'=1 and j2=1 > j2'=0 → m_{0,1} + m_{1,0} <= 1 (1 constraint) + // Total: 5 + assert_eq!(ilp.constraints.len(), 5); + + // Optimal: can only pick one of the two → LCS length 1 + let ilp_solver = ILPSolver::new(); + let ilp_solution = ilp_solver.solve(ilp).unwrap(); + let extracted = reduction.extract_solution(&ilp_solution); + let metric = problem.evaluate(&extracted); + assert_eq!(metric.unwrap(), 1); +} diff --git a/tests/suites/examples.rs b/tests/suites/examples.rs index 3002a6838..1926fe0c1 100644 --- a/tests/suites/examples.rs +++ b/tests/suites/examples.rs @@ -48,6 +48,7 @@ example_test!(reduction_satisfiability_to_circuitsat); example_test!(reduction_satisfiability_to_ksatisfiability); example_test!(reduction_satisfiability_to_maximumindependentset); example_test!(reduction_satisfiability_to_minimumdominatingset); +example_test!(reduction_longestcommonsubsequence_to_ilp); example_test!(reduction_spinglass_to_maxcut); example_test!(reduction_spinglass_to_qubo); example_test!(reduction_travelingsalesman_to_ilp); @@ -183,6 +184,10 @@ example_fn!( test_satisfiability_to_minimumdominatingset, reduction_satisfiability_to_minimumdominatingset ); +example_fn!( + test_longestcommonsubsequence_to_ilp, + reduction_longestcommonsubsequence_to_ilp +); example_fn!(test_spinglass_to_maxcut, reduction_spinglass_to_maxcut); example_fn!(test_spinglass_to_qubo, reduction_spinglass_to_qubo); example_fn!(