From 8f9fb061f6901c41a01b3559eeb5c451813b96fc Mon Sep 17 00:00:00 2001 From: zazabap Date: Thu, 12 Mar 2026 12:08:40 +0000 Subject: [PATCH 1/6] Add plan for #110: LongestCommonSubsequence to ILP --- docs/plans/2026-03-12-lcs-to-ilp.md | 105 ++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 docs/plans/2026-03-12-lcs-to-ilp.md diff --git a/docs/plans/2026-03-12-lcs-to-ilp.md b/docs/plans/2026-03-12-lcs-to-ilp.md new file mode 100644 index 000000000..ffcb86403 --- /dev/null +++ b/docs/plans/2026-03-12-lcs-to-ilp.md @@ -0,0 +1,105 @@ +# Plan: LongestCommonSubsequence model + LCS to ILP reduction + +Fixes #110 (and partially addresses #108 since the LCS model is needed). + +## Overview + +Since the LongestCommonSubsequence model does not exist in main (PR #170 branch was deleted), this PR implements both: +1. The LCS model (from issue #108) +2. The LCS-to-ILP reduction rule (from issue #110) + +## Step 1: Implement the LCS model + +### 1a. Create `src/models/misc/longest_common_subsequence.rs` + +- Struct: `LongestCommonSubsequence` with field `strings: Vec>` +- Getters: `strings()`, `num_strings()`, `total_length()`, `num_chars_first()`, `num_chars_second()` +- `Problem` impl: + - `NAME = "LongestCommonSubsequence"` + - `Metric = SolutionSize` (length of common subsequence) + - `dims()`: For k strings, enumerate all possible subsequences of the shortest string. + Use binary selection: each position of the shortest string is included (1) or not (0) → `vec![2; shortest_len]` + - `evaluate()`: Check if the selected subsequence of the shortest string is a subsequence of all other strings. Return length if valid, Invalid otherwise. + - `variant()`: `variant_params![]` (no type parameters) +- `OptimizationProblem` impl: `Direction::Maximize`, `Value = i32` +- `declare_variants!`: `LongestCommonSubsequence => "2^total_length"` (NP-hard for k strings) +- Serde derive for JSON serialization + +### 1b. Register in `src/models/misc/mod.rs` + +Add `mod longest_common_subsequence;` and `pub use`. + +### 1c. Register in `src/models/mod.rs` + +Add to re-exports. + +### 1d. Register in `src/lib.rs` prelude + +Add `LongestCommonSubsequence` to prelude. + +### 1e. Register in CLI + +- `problemreductions-cli/src/dispatch.rs`: Add load/serialize entries +- `problemreductions-cli/src/problem_name.rs`: Add alias mapping +- `problemreductions-cli/src/commands/create.rs`: Add creation with `--strings` flag + +### 1f. Unit tests + +Create `src/unit_tests/models/misc/longest_common_subsequence.rs`: +- `test_lcs_basic`: creation, evaluate valid/invalid +- `test_lcs_serialization`: serde roundtrip +- Brute force solver test + +### 1g. Schema registration + +Add `inventory::submit!` for `ProblemSchemaEntry`. + +## Step 2: Implement LCS to ILP reduction + +### 2a. Create `src/rules/longestcommonsubsequence_ilp.rs` + +Following the match-pair formulation from Blum et al. 2021: + +**Variable mapping (2-string case):** +For each pair (j1, j2) where s1[j1] == s2[j2], create binary variable m_{j1,j2}. + +**Constraints:** +1. Each position in s1 matched at most once +2. Each position in s2 matched at most once +3. Order preservation (no crossings): for (j1,j2),(j1',j2') with j1j2': m_{j1,j2} + m_{j1',j2'} <= 1 + +**Objective:** maximize sum of all match variables + +**Solution extraction:** matched positions with m=1, sorted by j1, give LCS characters. + +### 2b. Register in `src/rules/mod.rs` + +Add `#[cfg(feature = "ilp-solver")] mod longestcommonsubsequence_ilp;` + +### 2c. Overhead + +```rust +#[reduction(overhead = { + num_vars = "num_chars_first * num_chars_second", + num_constraints = "num_chars_first + num_chars_second + (num_chars_first * num_chars_second) ^ 2", +})] +``` + +### 2d. Unit tests + +Create `src/unit_tests/rules/longestcommonsubsequence_ilp.rs`: +- `test_longestcommonsubsequence_to_ilp_closed_loop`: brute force vs ILP comparison +- Test with issue example: ABAC, BACA → BAC (length 3) +- Edge cases: identical strings, no common chars, single char + +### 2e. Example + +Create `examples/reduction_longestcommonsubsequence_to_ilp.rs` with the ABAC/BACA example. +Register in `tests/suites/examples.rs`. + +## Step 3: Paper updates + +Add to `docs/paper/reductions.typ`: +- `problem-def("LongestCommonSubsequence")` entry +- `display-name` entry +- `reduction-rule("LongestCommonSubsequence", "ILP")` entry From 9233b4d7041937d7e7dfc314f01e0f39223f1b2c Mon Sep 17 00:00:00 2001 From: zazabap Date: Thu, 12 Mar 2026 12:17:25 +0000 Subject: [PATCH 2/6] Implement #110: LongestCommonSubsequence model and LCS to ILP reduction Add the LongestCommonSubsequence problem model (also addresses #108) and implement the match-pair ILP formulation from Blum et al. (2021) for reducing LCS to Integer Linear Programming. Model: binary selection over shortest string positions, maximizing common subsequence length across all input strings. Reduction: for 2-string case, creates binary variables for character match pairs with assignment and no-crossing constraints. Objective maximizes matched pairs. Includes CLI registration (create, dispatch, alias), unit tests with closed-loop verification, example program, and paper entries. Co-Authored-By: Claude Opus 4.6 --- docs/paper/reductions.typ | 25 +++ docs/paper/references.bib | 21 ++ ...duction_longestcommonsubsequence_to_ilp.rs | 109 ++++++++++ problemreductions-cli/src/cli.rs | 4 + problemreductions-cli/src/commands/create.rs | 21 +- problemreductions-cli/src/dispatch.rs | 4 +- problemreductions-cli/src/problem_name.rs | 2 + src/lib.rs | 2 +- src/models/misc/longest_common_subsequence.rs | 188 ++++++++++++++++++ src/models/misc/mod.rs | 3 + src/models/mod.rs | 2 +- src/rules/longestcommonsubsequence_ilp.rs | 174 ++++++++++++++++ src/rules/mod.rs | 2 + .../models/misc/longest_common_subsequence.rs | 168 ++++++++++++++++ .../rules/longestcommonsubsequence_ilp.rs | 182 +++++++++++++++++ tests/suites/examples.rs | 5 + 16 files changed, 908 insertions(+), 4 deletions(-) create mode 100644 examples/reduction_longestcommonsubsequence_to_ilp.rs create mode 100644 src/models/misc/longest_common_subsequence.rs create mode 100644 src/rules/longestcommonsubsequence_ilp.rs create mode 100644 src/unit_tests/models/misc/longest_common_subsequence.rs create mode 100644 src/unit_tests/rules/longestcommonsubsequence_ilp.rs diff --git a/docs/paper/reductions.typ b/docs/paper/reductions.typ index 915878827..841829cc9 100644 --- a/docs/paper/reductions.typ +++ b/docs/paper/reductions.typ @@ -52,6 +52,7 @@ "BicliqueCover": [Biclique Cover], "BinPacking": [Bin Packing], "ClosestVectorProblem": [Closest Vector Problem], + "LongestCommonSubsequence": [Longest Common Subsequence], ) // Definition label: "def:" — each definition block must have a matching label @@ -895,6 +896,14 @@ Biclique Cover is equivalent to factoring the biadjacency matrix $M$ of the bipa *Example.* Let $n = 4$ items with weights $(2, 3, 4, 5)$, values $(3, 4, 5, 7)$, and capacity $C = 7$. Selecting $S = {1, 2}$ (items with weights 3 and 4) gives total weight $3 + 4 = 7 lt.eq C$ and total value $4 + 5 = 9$. Selecting $S = {0, 3}$ (weights 2 and 5) gives weight $2 + 5 = 7 lt.eq C$ and value $3 + 7 = 10$, which is optimal. ] +#problem-def("LongestCommonSubsequence")[ + Given $k$ strings $s_1, dots, s_k$ over a finite alphabet $Sigma$, find a longest string $w$ that is a subsequence of every $s_i$. A string $w$ is a _subsequence_ of $s$ if $w$ can be obtained by deleting zero or more characters from $s$ without changing the order of the remaining characters. +][ + The LCS problem is polynomial-time solvable for $k = 2$ strings via dynamic programming in $O(n_1 n_2)$ time (Wagner & Fischer, 1974), but NP-hard for $k gt.eq 3$ strings @maier1978. It is a foundational problem in bioinformatics (sequence alignment), version control (diff algorithms), and data compression. The problem is listed as SR10 in Garey & Johnson @garey1979. + + *Example.* Let $s_1 = $ `ABAC` and $s_2 = $ `BACA` over $Sigma = {A, B, C}$. The longest common subsequence has length 3, e.g., `BAC`: positions 1, 2, 3 of $s_1$ match positions 0, 1, 2 of $s_2$. +] + // Completeness check: warn about problem types in JSON but missing from paper #{ let json-models = { @@ -1545,6 +1554,22 @@ The following reductions to Integer Linear Programming are straightforward formu _Solution extraction._ For each position $k$, find vertex $v$ with $x_(v,k) = 1$ to recover the tour permutation; then select edges between consecutive positions. ] +#reduction-rule("LongestCommonSubsequence", "ILP")[ + The match-pair ILP formulation @blum2021 encodes subsequence alignment as a binary optimization. For two strings $s_1$ (length $n_1$) and $s_2$ (length $n_2$), each position pair $(j_1, j_2)$ where $s_1[j_1] = s_2[j_2]$ yields a binary variable. Constraints enforce one-to-one matching and order preservation (no crossings). The objective maximizes the number of matched pairs. +][ + _Construction._ Given strings $s_1$ and $s_2$: + + _Variables:_ Binary $m_(j_1, j_2) in {0, 1}$ for each $(j_1, j_2)$ with $s_1[j_1] = s_2[j_2]$. Interpretation: $m_(j_1, j_2) = 1$ iff position $j_1$ of $s_1$ is matched to position $j_2$ of $s_2$. + + _Constraints:_ (1) Each position in $s_1$ matched at most once: $sum_(j_2 : (j_1, j_2) in M) m_(j_1, j_2) lt.eq 1$ for all $j_1$. (2) Each position in $s_2$ matched at most once: $sum_(j_1 : (j_1, j_2) in M) m_(j_1, j_2) lt.eq 1$ for all $j_2$. (3) No crossings: for $(j_1, j_2), (j'_1, j'_2) in M$ with $j_1 < j'_1$ and $j_2 > j'_2$: $m_(j_1, j_2) + m_(j'_1, j'_2) lt.eq 1$. + + _Objective:_ Maximize $sum_((j_1, j_2) in M) m_(j_1, j_2)$. + + _Correctness._ ($arrow.r.double$) A common subsequence of length $ell$ defines $ell$ matched pairs that are order-preserving (no crossings) and one-to-one, yielding a feasible ILP solution with objective $ell$. ($arrow.l.double$) An ILP solution with objective $ell$ defines $ell$ matched pairs; constraints (1)--(2) ensure one-to-one matching, and constraint (3) ensures order preservation, so the matched characters form a common subsequence of length $ell$. + + _Solution extraction._ Collect pairs $(j_1, j_2)$ with $m_(j_1, j_2) = 1$, sort by $j_1$, and read the characters. +] + == Unit Disk Mapping #reduction-rule("MaximumIndependentSet", "KingsSubgraph")[ diff --git a/docs/paper/references.bib b/docs/paper/references.bib index c3129a691..fd70679b4 100644 --- a/docs/paper/references.bib +++ b/docs/paper/references.bib @@ -387,3 +387,24 @@ @article{ibarra1975 year = {1975}, doi = {10.1145/321906.321909} } + +@article{maier1978, + author = {David Maier}, + title = {The Complexity of Some Problems on Subsequences and Supersequences}, + journal = {Journal of the ACM}, + volume = {25}, + number = {2}, + pages = {322--336}, + year = {1978}, + doi = {10.1145/322063.322075} +} + +@article{blum2021, + author = {Christian Blum and Maria J. Blesa and Borja Calvo}, + title = {{ILP}-based reduced variable neighborhood search for the longest common subsequence problem}, + journal = {Computers \& Operations Research}, + volume = {125}, + pages = {105089}, + year = {2021}, + doi = {10.1016/j.cor.2020.105089} +} diff --git a/examples/reduction_longestcommonsubsequence_to_ilp.rs b/examples/reduction_longestcommonsubsequence_to_ilp.rs new file mode 100644 index 000000000..5ff1529ac --- /dev/null +++ b/examples/reduction_longestcommonsubsequence_to_ilp.rs @@ -0,0 +1,109 @@ +// # LongestCommonSubsequence to ILP Reduction +// +// ## Mathematical Formulation +// Uses the match-pair formulation (Blum et al., 2021). +// For each position pair (j1, j2) where s1[j1] == s2[j2], a binary variable m_{j1,j2}. +// Constraints: +// (1) Each s1 position matched at most once +// (2) Each s2 position matched at most once +// (3) Order preservation: no crossings among matched pairs +// Objective: maximize total matched pairs. +// +// ## This Example +// - Instance: s1 = "ABAC", s2 = "BACA" +// - 6 match pairs, LCS = "BAC" (length 3) +// +// ## Output +// Exports `docs/paper/examples/longestcommonsubsequence_to_ilp.json`. + +use problemreductions::export::*; +use problemreductions::models::algebraic::ILP; +use problemreductions::prelude::*; +use problemreductions::solvers::ILPSolver; + +pub fn run() { + // 1. Create LCS instance: s1 = "ABAC", s2 = "BACA" + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'A', b'C'], + vec![b'B', b'A', b'C', b'A'], + ]); + + // 2. Reduce to ILP + let reduction = ReduceTo::>::reduce_to(&problem); + let ilp = reduction.target_problem(); + + // 3. Print transformation + println!("\n=== Problem Transformation ==="); + println!( + "Source: LCS with {} strings, total length {}", + problem.num_strings(), + problem.total_length() + ); + println!( + "Target: ILP with {} variables, {} constraints", + ilp.num_vars, + ilp.constraints.len() + ); + + // 4. Solve ILP + let solver = ILPSolver::new(); + let ilp_solution = solver + .solve(ilp) + .expect("ILP should be feasible for ABAC/BACA"); + println!("\n=== Solution ==="); + println!("ILP solution: {:?}", &ilp_solution); + + // 5. Extract LCS solution + let extracted = reduction.extract_solution(&ilp_solution); + println!("Source LCS config: {:?}", extracted); + + // 6. Verify + let metric = problem.evaluate(&extracted); + assert!(metric.is_valid()); + let lcs_length = metric.unwrap(); + println!("LCS length: {}", lcs_length); + assert_eq!(lcs_length, 3); + println!("\nReduction verified successfully"); + + // 7. Collect solutions and export JSON + let solutions = vec![SolutionPair { + source_config: extracted, + target_config: ilp_solution, + }]; + + let source_variant = variant_to_map(LongestCommonSubsequence::variant()); + let target_variant = variant_to_map(ILP::::variant()); + let overhead = + lookup_overhead("LongestCommonSubsequence", &source_variant, "ILP", &target_variant) + .expect("LCS -> ILP overhead not found"); + + let data = ReductionData { + source: ProblemSide { + problem: LongestCommonSubsequence::NAME.to_string(), + variant: source_variant, + instance: serde_json::json!({ + "strings": [ + [65, 66, 65, 67], + [66, 65, 67, 65], + ], + }), + }, + target: ProblemSide { + problem: ILP::::NAME.to_string(), + variant: target_variant, + instance: serde_json::json!({ + "num_vars": ilp.num_vars, + "num_constraints": ilp.constraints.len(), + }), + }, + overhead: overhead_to_json(&overhead), + }; + + let results = ResultData { solutions }; + let name = "longestcommonsubsequence_to_ilp"; + write_example(name, &data, &results); +} + +fn main() { + run() +} diff --git a/problemreductions-cli/src/cli.rs b/problemreductions-cli/src/cli.rs index 91e9bd252..d83ab9c05 100644 --- a/problemreductions-cli/src/cli.rs +++ b/problemreductions-cli/src/cli.rs @@ -216,6 +216,7 @@ Flags by problem type: BicliqueCover --left, --right, --biedges, --k BMF --matrix (0/1), --rank CVP --basis, --target-vec [--bounds] + LCS --strings ILP, CircuitSAT (via reduction only) Geometry graph variants (use slash notation, e.g., MIS/KingsSubgraph): @@ -326,6 +327,9 @@ pub struct CreateArgs { /// Variable bounds for CVP as "lower,upper" (e.g., "-10,10") [default: -10,10] #[arg(long, allow_hyphen_values = true)] pub bounds: Option, + /// Input strings for LCS (semicolon-separated, e.g., "ABAC;BACA") + #[arg(long)] + pub strings: Option, } #[derive(clap::Args)] diff --git a/problemreductions-cli/src/commands/create.rs b/problemreductions-cli/src/commands/create.rs index 3594a24a0..a30ee917c 100644 --- a/problemreductions-cli/src/commands/create.rs +++ b/problemreductions-cli/src/commands/create.rs @@ -5,7 +5,7 @@ use crate::problem_name::{parse_problem_spec, resolve_variant}; use crate::util; use anyhow::{bail, Context, Result}; use problemreductions::models::algebraic::{ClosestVectorProblem, BMF}; -use problemreductions::models::misc::{BinPacking, PaintShop}; +use problemreductions::models::misc::{BinPacking, LongestCommonSubsequence, PaintShop}; use problemreductions::prelude::*; use problemreductions::registry::collect_schemas; use problemreductions::topology::{ @@ -45,6 +45,7 @@ fn all_data_flags_empty(args: &CreateArgs) -> bool { && args.basis.is_none() && args.target_vec.is_none() && args.bounds.is_none() + && args.strings.is_none() } fn type_format_hint(type_name: &str, graph_type: Option<&str>) -> &'static str { @@ -407,6 +408,24 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> { (ser(BMF::new(matrix, rank))?, resolved_variant.clone()) } + // LongestCommonSubsequence + "LongestCommonSubsequence" => { + let strings_str = args.strings.as_deref().ok_or_else(|| { + anyhow::anyhow!( + "LCS requires --strings\n\n\ + Usage: pred create LCS --strings \"ABAC;BACA\"" + ) + })?; + let strings: Vec> = strings_str + .split(';') + .map(|s| s.trim().as_bytes().to_vec()) + .collect(); + ( + ser(LongestCommonSubsequence::new(strings))?, + resolved_variant.clone(), + ) + } + // ClosestVectorProblem "ClosestVectorProblem" => { let basis_str = args.basis.as_deref().ok_or_else(|| { diff --git a/problemreductions-cli/src/dispatch.rs b/problemreductions-cli/src/dispatch.rs index 7a8498421..f2e43c68b 100644 --- a/problemreductions-cli/src/dispatch.rs +++ b/problemreductions-cli/src/dispatch.rs @@ -1,6 +1,6 @@ use anyhow::{bail, Context, Result}; use problemreductions::models::algebraic::{ClosestVectorProblem, ILP}; -use problemreductions::models::misc::{BinPacking, Knapsack}; +use problemreductions::models::misc::{BinPacking, Knapsack, LongestCommonSubsequence}; use problemreductions::prelude::*; use problemreductions::rules::{MinimizeSteps, ReductionGraph}; use problemreductions::solvers::{BruteForce, ILPSolver, Solver}; @@ -245,6 +245,7 @@ pub fn load_problem( _ => deser_opt::>(data), }, "Knapsack" => deser_opt::(data), + "LongestCommonSubsequence" => deser_opt::(data), _ => bail!("{}", crate::problem_name::unknown_problem_error(&canonical)), } } @@ -305,6 +306,7 @@ pub fn serialize_any_problem( _ => try_ser::>(any), }, "Knapsack" => try_ser::(any), + "LongestCommonSubsequence" => try_ser::(any), _ => bail!("{}", crate::problem_name::unknown_problem_error(&canonical)), } } diff --git a/problemreductions-cli/src/problem_name.rs b/problemreductions-cli/src/problem_name.rs index acd9b4b59..71eea3b07 100644 --- a/problemreductions-cli/src/problem_name.rs +++ b/problemreductions-cli/src/problem_name.rs @@ -20,6 +20,7 @@ pub const ALIASES: &[(&str, &str)] = &[ ("KSAT", "KSatisfiability"), ("TSP", "TravelingSalesman"), ("CVP", "ClosestVectorProblem"), + ("LCS", "LongestCommonSubsequence"), ("MaxMatching", "MaximumMatching"), ]; @@ -52,6 +53,7 @@ pub fn resolve_alias(input: &str) -> String { "binpacking" => "BinPacking".to_string(), "cvp" | "closestvectorproblem" => "ClosestVectorProblem".to_string(), "knapsack" => "Knapsack".to_string(), + "lcs" | "longestcommonsubsequence" => "LongestCommonSubsequence".to_string(), _ => input.to_string(), // pass-through for exact names } } diff --git a/src/lib.rs b/src/lib.rs index b0d99699a..85629c58f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -45,7 +45,7 @@ pub mod prelude { KColoring, MaxCut, MaximalIS, MaximumClique, MaximumIndependentSet, MaximumMatching, MinimumDominatingSet, MinimumVertexCover, TravelingSalesman, }; - pub use crate::models::misc::{BinPacking, Factoring, Knapsack, PaintShop}; + pub use crate::models::misc::{BinPacking, Factoring, Knapsack, LongestCommonSubsequence, PaintShop}; pub use crate::models::set::{MaximumSetPacking, MinimumSetCovering}; // Core traits diff --git a/src/models/misc/longest_common_subsequence.rs b/src/models/misc/longest_common_subsequence.rs new file mode 100644 index 000000000..4cdf09bff --- /dev/null +++ b/src/models/misc/longest_common_subsequence.rs @@ -0,0 +1,188 @@ +//! Longest Common Subsequence (LCS) problem implementation. +//! +//! Given a set of strings over a finite alphabet, find the longest string +//! that is a subsequence of every input string. Polynomial-time for 2 strings +//! via dynamic programming, but NP-hard for k >= 3 strings (Maier, 1978). + +use crate::registry::{FieldInfo, ProblemSchemaEntry}; +use crate::traits::{OptimizationProblem, Problem}; +use crate::types::{Direction, SolutionSize}; +use serde::{Deserialize, Serialize}; + +inventory::submit! { + ProblemSchemaEntry { + name: "LongestCommonSubsequence", + module_path: module_path!(), + description: "Find the longest string that is a subsequence of every input string", + fields: &[ + FieldInfo { name: "strings", type_name: "Vec>", description: "The input strings" }, + ], + } +} + +/// The Longest Common Subsequence problem. +/// +/// Given `k` strings `s_1, ..., s_k` over a finite alphabet, find a longest +/// string `w` that is a subsequence of every `s_i`. +/// +/// A string `w` is a **subsequence** of `s` if `w` can be obtained by deleting +/// zero or more characters from `s` without changing the order of the remaining +/// characters. +/// +/// # Representation +/// +/// The configuration is a binary vector of length equal to the shortest string. +/// Each entry indicates whether the corresponding character of the shortest +/// string is included in the candidate subsequence. +/// +/// # Example +/// +/// ``` +/// use problemreductions::models::misc::LongestCommonSubsequence; +/// use problemreductions::{Problem, Solver, BruteForce}; +/// +/// let problem = LongestCommonSubsequence::new(vec![ +/// vec![b'A', b'B', b'C'], +/// vec![b'A', b'C', b'B'], +/// ]); +/// let solver = BruteForce::new(); +/// let solution = solver.find_best(&problem); +/// assert!(solution.is_some()); +/// ``` +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LongestCommonSubsequence { + /// The input strings. + strings: Vec>, +} + +impl LongestCommonSubsequence { + /// Create a new LCS instance from a list of strings. + /// + /// # Panics + /// Panics if fewer than 2 strings are provided. + pub fn new(strings: Vec>) -> Self { + assert!( + strings.len() >= 2, + "LCS requires at least 2 strings, got {}", + strings.len() + ); + Self { strings } + } + + /// Get the input strings. + pub fn strings(&self) -> &[Vec] { + &self.strings + } + + /// Get the number of strings. + pub fn num_strings(&self) -> usize { + self.strings.len() + } + + /// Get the total length of all strings. + pub fn total_length(&self) -> usize { + self.strings.iter().map(|s| s.len()).sum() + } + + /// Get the length of the first string. + pub fn num_chars_first(&self) -> usize { + self.strings[0].len() + } + + /// Get the length of the second string. + pub fn num_chars_second(&self) -> usize { + self.strings[1].len() + } + + /// Index of the shortest string. + fn shortest_index(&self) -> usize { + self.strings + .iter() + .enumerate() + .min_by_key(|(_, s)| s.len()) + .map(|(i, _)| i) + .unwrap() + } + + /// Length of the shortest string. + fn shortest_len(&self) -> usize { + self.strings[self.shortest_index()].len() + } +} + +/// Check if `candidate` is a subsequence of `target`. +fn is_subsequence(candidate: &[u8], target: &[u8]) -> bool { + let mut ti = 0; + for &ch in candidate { + while ti < target.len() && target[ti] != ch { + ti += 1; + } + if ti >= target.len() { + return false; + } + ti += 1; + } + true +} + +impl Problem for LongestCommonSubsequence { + const NAME: &'static str = "LongestCommonSubsequence"; + type Metric = SolutionSize; + + fn variant() -> Vec<(&'static str, &'static str)> { + crate::variant_params![] + } + + fn dims(&self) -> Vec { + vec![2; self.shortest_len()] + } + + fn evaluate(&self, config: &[usize]) -> SolutionSize { + let shortest_idx = self.shortest_index(); + let shortest = &self.strings[shortest_idx]; + + if config.len() != shortest.len() { + return SolutionSize::Invalid; + } + if config.iter().any(|&v| v >= 2) { + return SolutionSize::Invalid; + } + + // Build candidate subsequence from selected positions of shortest string + let candidate: Vec = config + .iter() + .enumerate() + .filter(|(_, &x)| x == 1) + .map(|(i, _)| shortest[i]) + .collect(); + + // Check that candidate is a subsequence of ALL strings + for (i, s) in self.strings.iter().enumerate() { + if i == shortest_idx { + // The candidate is always a subsequence of the string it was built from + continue; + } + if !is_subsequence(&candidate, s) { + return SolutionSize::Invalid; + } + } + + SolutionSize::Valid(candidate.len() as i32) + } +} + +impl OptimizationProblem for LongestCommonSubsequence { + type Value = i32; + + fn direction(&self) -> Direction { + Direction::Maximize + } +} + +crate::declare_variants! { + LongestCommonSubsequence => "2^total_length", +} + +#[cfg(test)] +#[path = "../../unit_tests/models/misc/longest_common_subsequence.rs"] +mod tests; diff --git a/src/models/misc/mod.rs b/src/models/misc/mod.rs index cdb66e969..4f2b590f7 100644 --- a/src/models/misc/mod.rs +++ b/src/models/misc/mod.rs @@ -4,14 +4,17 @@ //! - [`BinPacking`]: Bin Packing (minimize bins) //! - [`Factoring`]: Integer factorization //! - [`Knapsack`]: 0-1 Knapsack (maximize value subject to weight capacity) +//! - [`LongestCommonSubsequence`]: Longest Common Subsequence //! - [`PaintShop`]: Minimize color switches in paint shop scheduling mod bin_packing; pub(crate) mod factoring; mod knapsack; +mod longest_common_subsequence; pub(crate) mod paintshop; pub use bin_packing::BinPacking; pub use factoring::Factoring; pub use knapsack::Knapsack; +pub use longest_common_subsequence::LongestCommonSubsequence; pub use paintshop::PaintShop; diff --git a/src/models/mod.rs b/src/models/mod.rs index 96b4b79d1..524bf9ba6 100644 --- a/src/models/mod.rs +++ b/src/models/mod.rs @@ -15,5 +15,5 @@ pub use graph::{ BicliqueCover, KColoring, MaxCut, MaximalIS, MaximumClique, MaximumIndependentSet, MaximumMatching, MinimumDominatingSet, MinimumVertexCover, SpinGlass, TravelingSalesman, }; -pub use misc::{BinPacking, Factoring, Knapsack, PaintShop}; +pub use misc::{BinPacking, Factoring, Knapsack, LongestCommonSubsequence, PaintShop}; pub use set::{MaximumSetPacking, MinimumSetCovering}; diff --git a/src/rules/longestcommonsubsequence_ilp.rs b/src/rules/longestcommonsubsequence_ilp.rs new file mode 100644 index 000000000..77d79ae3c --- /dev/null +++ b/src/rules/longestcommonsubsequence_ilp.rs @@ -0,0 +1,174 @@ +//! Reduction from LongestCommonSubsequence to ILP (Integer Linear Programming). +//! +//! Uses the match-pair formulation (Blum et al., 2021; Althaus et al., 2006). +//! For 2 strings s1 (length n1) and s2 (length n2): +//! +//! ## Variables +//! For each pair (j1, j2) where s1[j1] == s2[j2], a binary variable m_{j1,j2}. +//! +//! ## Constraints +//! 1. Each position in s1 matched at most once +//! 2. Each position in s2 matched at most once +//! 3. Order preservation (no crossings): for (j1,j2),(j1',j2') with j1 < j1' and j2 > j2': +//! m_{j1,j2} + m_{j1',j2'} <= 1 +//! +//! ## Objective +//! Maximize sum of all match variables. + +use crate::models::algebraic::{LinearConstraint, ObjectiveSense, ILP}; +use crate::models::misc::LongestCommonSubsequence; +use crate::reduction; +use crate::rules::traits::{ReduceTo, ReductionResult}; + +/// Result of reducing LongestCommonSubsequence to ILP. +#[derive(Debug, Clone)] +pub struct ReductionLCSToILP { + target: ILP, + /// The match pairs: (j1, j2) for each variable index. + match_pairs: Vec<(usize, usize)>, + /// Number of characters in the first string. + n1: usize, + /// Number of characters in the second string. + n2: usize, +} + +impl ReductionResult for ReductionLCSToILP { + type Source = LongestCommonSubsequence; + type Target = ILP; + + fn target_problem(&self) -> &ILP { + &self.target + } + + /// Extract solution from ILP back to LCS. + /// + /// The ILP solution has binary variables for match pairs. We extract the + /// matched positions, build the LCS, and map back to a binary selection + /// on the shortest source string. + fn extract_solution(&self, target_solution: &[usize]) -> Vec { + // The source problem's dims() is based on the shortest string. + // We build match pairs on s1=strings[0], s2=strings[1]. + // If shortest is s1, we use j1 positions; if s2, we use j2 positions. + let shortest_len = std::cmp::min(self.n1, self.n2); + let shortest_is_first = self.n1 <= self.n2; + + let matched_positions: Vec = self + .match_pairs + .iter() + .enumerate() + .filter(|(i, _)| target_solution.get(*i).copied().unwrap_or(0) == 1) + .map(|(_, &(j1, j2))| { + if shortest_is_first { + j1 + } else { + j2 + } + }) + .collect(); + + let mut config = vec![0usize; shortest_len]; + for pos in matched_positions { + if pos < config.len() { + config[pos] = 1; + } + } + config + } +} + +#[reduction(overhead = { + num_vars = "num_chars_first * num_chars_second", + num_constraints = "num_chars_first + num_chars_second + (num_chars_first * num_chars_second) ^ 2", +})] +impl ReduceTo> for LongestCommonSubsequence { + type Result = ReductionLCSToILP; + + fn reduce_to(&self) -> Self::Result { + let strings = self.strings(); + assert!( + strings.len() >= 2, + "LCS to ILP reduction requires at least 2 strings" + ); + + let s1 = &strings[0]; + let s2 = &strings[1]; + let n1 = s1.len(); + let n2 = s2.len(); + + // Build match pairs: (j1, j2) where s1[j1] == s2[j2] + let mut match_pairs: Vec<(usize, usize)> = Vec::new(); + for (j1, &c1) in s1.iter().enumerate() { + for (j2, &c2) in s2.iter().enumerate() { + if c1 == c2 { + match_pairs.push((j1, j2)); + } + } + } + + let num_vars = match_pairs.len(); + let mut constraints = Vec::new(); + + // Constraint 1: Each position in s1 matched at most once + for j1 in 0..n1 { + let terms: Vec<(usize, f64)> = match_pairs + .iter() + .enumerate() + .filter(|(_, &(a, _))| a == j1) + .map(|(idx, _)| (idx, 1.0)) + .collect(); + if !terms.is_empty() { + constraints.push(LinearConstraint::le(terms, 1.0)); + } + } + + // Constraint 2: Each position in s2 matched at most once + for j2 in 0..n2 { + let terms: Vec<(usize, f64)> = match_pairs + .iter() + .enumerate() + .filter(|(_, &(_, b))| b == j2) + .map(|(idx, _)| (idx, 1.0)) + .collect(); + if !terms.is_empty() { + constraints.push(LinearConstraint::le(terms, 1.0)); + } + } + + // Constraint 3: Order preservation (no crossings) + // For all pairs (j1, j2) and (j1', j2') with j1 < j1' and j2 > j2': + // m_{j1,j2} + m_{j1',j2'} <= 1 + for (i, &(j1, j2)) in match_pairs.iter().enumerate() { + for (k, &(j1p, j2p)) in match_pairs.iter().enumerate() { + if i < k && j1 < j1p && j2 > j2p { + constraints.push(LinearConstraint::le( + vec![(i, 1.0), (k, 1.0)], + 1.0, + )); + } + // Also check the reverse: j1 > j1p and j2 < j2p + if i < k && j1 > j1p && j2 < j2p { + constraints.push(LinearConstraint::le( + vec![(i, 1.0), (k, 1.0)], + 1.0, + )); + } + } + } + + // Objective: maximize sum of all match variables + let objective: Vec<(usize, f64)> = (0..num_vars).map(|i| (i, 1.0)).collect(); + + let ilp = ILP::::new(num_vars, constraints, objective, ObjectiveSense::Maximize); + + ReductionLCSToILP { + target: ilp, + match_pairs, + n1, + n2, + } + } +} + +#[cfg(test)] +#[path = "../unit_tests/rules/longestcommonsubsequence_ilp.rs"] +mod tests; diff --git a/src/rules/mod.rs b/src/rules/mod.rs index 80bdb735b..04b224d94 100644 --- a/src/rules/mod.rs +++ b/src/rules/mod.rs @@ -43,6 +43,8 @@ mod factoring_ilp; #[cfg(feature = "ilp-solver")] mod ilp_bool_ilp_i32; #[cfg(feature = "ilp-solver")] +mod longestcommonsubsequence_ilp; +#[cfg(feature = "ilp-solver")] mod ilp_qubo; #[cfg(feature = "ilp-solver")] mod maximumclique_ilp; diff --git a/src/unit_tests/models/misc/longest_common_subsequence.rs b/src/unit_tests/models/misc/longest_common_subsequence.rs new file mode 100644 index 000000000..6cfef1067 --- /dev/null +++ b/src/unit_tests/models/misc/longest_common_subsequence.rs @@ -0,0 +1,168 @@ +use super::*; +use crate::solvers::{BruteForce, Solver}; +use crate::traits::{OptimizationProblem, Problem}; +use crate::types::Direction; + +#[test] +fn test_lcs_basic() { + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'C', b'D', b'A', b'B'], + vec![b'B', b'D', b'C', b'A', b'B', b'A'], + ]); + assert_eq!(problem.num_strings(), 2); + assert_eq!(problem.total_length(), 12); + assert_eq!(problem.num_chars_first(), 6); + assert_eq!(problem.num_chars_second(), 6); + assert_eq!(problem.direction(), Direction::Maximize); + assert_eq!(::NAME, "LongestCommonSubsequence"); + assert_eq!(::variant(), vec![]); +} + +#[test] +fn test_lcs_dims() { + // Shortest string has 4 chars + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'A', b'C'], + vec![b'B', b'A', b'C', b'A', b'B', b'C'], + ]); + assert_eq!(problem.dims(), vec![2; 4]); +} + +#[test] +fn test_lcs_evaluate_valid() { + // s1 = "ABC", s2 = "ACB" + // Selecting positions 0,2 of s1 (shorter) gives "AC" which is subseq of "ACB" + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'C'], + vec![b'A', b'C', b'B'], + ]); + let result = problem.evaluate(&[1, 0, 1]); // "AC" + assert!(result.is_valid()); + assert_eq!(result.unwrap(), 2); +} + +#[test] +fn test_lcs_evaluate_invalid_subsequence() { + // s1 = "ABC", s2 = "CAB" + // Selecting positions 1,2 of s1 gives "BC" - is "BC" a subseq of "CAB"? No + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'C'], + vec![b'C', b'A', b'B'], + ]); + let result = problem.evaluate(&[0, 1, 1]); // "BC" + assert!(!result.is_valid()); +} + +#[test] +fn test_lcs_evaluate_empty_selection() { + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'C'], + vec![b'X', b'Y', b'Z'], + ]); + let result = problem.evaluate(&[0, 0, 0]); // empty + assert!(result.is_valid()); + assert_eq!(result.unwrap(), 0); +} + +#[test] +fn test_lcs_evaluate_wrong_config_length() { + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B'], + vec![b'A', b'B', b'C'], + ]); + assert!(!problem.evaluate(&[1]).is_valid()); + assert!(!problem.evaluate(&[1, 0, 0]).is_valid()); +} + +#[test] +fn test_lcs_evaluate_invalid_variable_value() { + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B'], + vec![b'A', b'B'], + ]); + assert!(!problem.evaluate(&[2, 0]).is_valid()); +} + +#[test] +fn test_lcs_brute_force_two_strings() { + // s1 = "ABAC", s2 = "BACA" + // LCS = "BAC" or "AAC" or "ABA", length 3 + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'A', b'C'], + vec![b'B', b'A', b'C', b'A'], + ]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find a solution"); + let metric = problem.evaluate(&solution); + assert!(metric.is_valid()); + assert_eq!(metric.unwrap(), 3); +} + +#[test] +fn test_lcs_identical_strings() { + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'C'], + vec![b'A', b'B', b'C'], + ]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find a solution"); + let metric = problem.evaluate(&solution); + assert_eq!(metric.unwrap(), 3); +} + +#[test] +fn test_lcs_no_common_chars() { + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B'], + vec![b'C', b'D'], + ]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find a solution"); + let metric = problem.evaluate(&solution); + assert_eq!(metric.unwrap(), 0); +} + +#[test] +fn test_lcs_single_char_alphabet() { + // All same character - LCS is length of shortest string + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'A', b'A'], + vec![b'A', b'A'], + ]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find a solution"); + let metric = problem.evaluate(&solution); + assert_eq!(metric.unwrap(), 2); +} + +#[test] +fn test_lcs_three_strings() { + // Example from issue #108 + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'C', b'D', b'A', b'B'], + vec![b'B', b'D', b'C', b'A', b'B', b'A'], + vec![b'B', b'C', b'A', b'D', b'B', b'A'], + ]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find a solution"); + let metric = problem.evaluate(&solution); + assert!(metric.is_valid()); + assert_eq!(metric.unwrap(), 4); // "BCAB" or equivalent +} + +#[test] +fn test_lcs_serialization() { + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'C'], + vec![b'A', b'C', b'B'], + ]); + let json = serde_json::to_value(&problem).unwrap(); + let restored: LongestCommonSubsequence = serde_json::from_value(json).unwrap(); + assert_eq!(restored.strings(), problem.strings()); +} + +#[test] +#[should_panic(expected = "LCS requires at least 2 strings")] +fn test_lcs_single_string_panics() { + LongestCommonSubsequence::new(vec![vec![b'A', b'B']]); +} diff --git a/src/unit_tests/rules/longestcommonsubsequence_ilp.rs b/src/unit_tests/rules/longestcommonsubsequence_ilp.rs new file mode 100644 index 000000000..2deb5c8d6 --- /dev/null +++ b/src/unit_tests/rules/longestcommonsubsequence_ilp.rs @@ -0,0 +1,182 @@ +use super::*; +use crate::solvers::{BruteForce, ILPSolver}; + +#[test] +fn test_lcs_to_ilp_issue_example() { + // From issue #110: s1 = "ABAC", s2 = "BACA" + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'A', b'C'], + vec![b'B', b'A', b'C', b'A'], + ]); + let reduction: ReductionLCSToILP = + ReduceTo::>::reduce_to(&problem); + let ilp = reduction.target_problem(); + + // 6 match pairs as described in the issue + assert_eq!(ilp.num_vars, 6); + + // Solve ILP + let ilp_solver = ILPSolver::new(); + let ilp_solution = ilp_solver.solve(ilp).expect("ILP should be solvable"); + + // Extract solution back to LCS config + let extracted = reduction.extract_solution(&ilp_solution); + + // Verify the solution is valid and optimal (length 3) + let metric = problem.evaluate(&extracted); + assert!(metric.is_valid()); + assert_eq!(metric.unwrap(), 3); +} + +#[test] +fn test_lcs_to_ilp_closed_loop() { + // Compare brute force LCS with ILP-based solution + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'A', b'C'], + vec![b'B', b'A', b'C', b'A'], + ]); + + // Brute force optimal + let bf = BruteForce::new(); + let bf_solution = bf.find_best(&problem).expect("should find a solution"); + let bf_metric = problem.evaluate(&bf_solution); + assert!(bf_metric.is_valid()); + let bf_value = bf_metric.unwrap(); + + // ILP optimal + let reduction: ReductionLCSToILP = + ReduceTo::>::reduce_to(&problem); + let ilp = reduction.target_problem(); + let ilp_solver = ILPSolver::new(); + let ilp_solution = ilp_solver.solve(ilp).expect("ILP should be solvable"); + let extracted = reduction.extract_solution(&ilp_solution); + let ilp_metric = problem.evaluate(&extracted); + assert!(ilp_metric.is_valid()); + let ilp_value = ilp_metric.unwrap(); + + // Both should give the same optimal value + assert_eq!( + bf_value, ilp_value, + "BF optimal {} != ILP optimal {}", + bf_value, ilp_value + ); +} + +#[test] +fn test_lcs_to_ilp_identical_strings() { + // LCS of identical strings = the string itself + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'C'], + vec![b'A', b'B', b'C'], + ]); + let reduction: ReductionLCSToILP = + ReduceTo::>::reduce_to(&problem); + let ilp = reduction.target_problem(); + + let ilp_solver = ILPSolver::new(); + let ilp_solution = ilp_solver.solve(ilp).expect("ILP should be solvable"); + let extracted = reduction.extract_solution(&ilp_solution); + + let metric = problem.evaluate(&extracted); + assert!(metric.is_valid()); + assert_eq!(metric.unwrap(), 3); +} + +#[test] +fn test_lcs_to_ilp_no_common_chars() { + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B'], + vec![b'C', b'D'], + ]); + let reduction: ReductionLCSToILP = + ReduceTo::>::reduce_to(&problem); + let ilp = reduction.target_problem(); + + // No match pairs → 0 variables + assert_eq!(ilp.num_vars, 0); + + let ilp_solver = ILPSolver::new(); + let ilp_solution = ilp_solver.solve(ilp).expect("ILP should be solvable"); + let extracted = reduction.extract_solution(&ilp_solution); + + let metric = problem.evaluate(&extracted); + assert!(metric.is_valid()); + assert_eq!(metric.unwrap(), 0); +} + +#[test] +fn test_lcs_to_ilp_single_char_alphabet() { + // All same chars → LCS = min length + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'A', b'A'], + vec![b'A', b'A'], + ]); + let reduction: ReductionLCSToILP = + ReduceTo::>::reduce_to(&problem); + let ilp = reduction.target_problem(); + + let ilp_solver = ILPSolver::new(); + let ilp_solution = ilp_solver.solve(ilp).expect("ILP should be solvable"); + let extracted = reduction.extract_solution(&ilp_solution); + + let metric = problem.evaluate(&extracted); + assert!(metric.is_valid()); + assert_eq!(metric.unwrap(), 2); +} + +#[test] +fn test_lcs_to_ilp_asymmetric_lengths() { + // s1 = "AB", s2 = "AABB" + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B'], + vec![b'A', b'A', b'B', b'B'], + ]); + + // BF optimal + let bf = BruteForce::new(); + let bf_solution = bf.find_best(&problem).unwrap(); + let bf_value = problem.evaluate(&bf_solution).unwrap(); + + // ILP optimal + let reduction: ReductionLCSToILP = + ReduceTo::>::reduce_to(&problem); + let ilp = reduction.target_problem(); + let ilp_solver = ILPSolver::new(); + let ilp_solution = ilp_solver.solve(ilp).unwrap(); + let extracted = reduction.extract_solution(&ilp_solution); + let ilp_value = problem.evaluate(&extracted).unwrap(); + + assert_eq!(bf_value, ilp_value); + assert_eq!(ilp_value, 2); // "AB" is subseq of both +} + +#[test] +fn test_lcs_to_ilp_constraint_structure() { + // Verify basic ILP structure for a small example + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B'], + vec![b'B', b'A'], + ]); + let reduction: ReductionLCSToILP = + ReduceTo::>::reduce_to(&problem); + let ilp = reduction.target_problem(); + + // Match pairs: (0,1)=A, (1,0)=B → 2 variables + assert_eq!(ilp.num_vars, 2); + + // Constraints: + // - s1 pos 0: m_{0,1} <= 1 (1 constraint) + // - s1 pos 1: m_{1,0} <= 1 (1 constraint) + // - s2 pos 0: m_{1,0} <= 1 (1 constraint) + // - s2 pos 1: m_{0,1} <= 1 (1 constraint) + // - Crossing: j1=0 < j1'=1 and j2=1 > j2'=0 → m_{0,1} + m_{1,0} <= 1 (1 constraint) + // Total: 5 + assert_eq!(ilp.constraints.len(), 5); + + // Optimal: can only pick one of the two → LCS length 1 + let ilp_solver = ILPSolver::new(); + let ilp_solution = ilp_solver.solve(ilp).unwrap(); + let extracted = reduction.extract_solution(&ilp_solution); + let metric = problem.evaluate(&extracted); + assert_eq!(metric.unwrap(), 1); +} diff --git a/tests/suites/examples.rs b/tests/suites/examples.rs index 3c9ad8033..2400a983c 100644 --- a/tests/suites/examples.rs +++ b/tests/suites/examples.rs @@ -45,6 +45,7 @@ example_test!(reduction_satisfiability_to_circuitsat); example_test!(reduction_satisfiability_to_ksatisfiability); example_test!(reduction_satisfiability_to_maximumindependentset); example_test!(reduction_satisfiability_to_minimumdominatingset); +example_test!(reduction_longestcommonsubsequence_to_ilp); example_test!(reduction_spinglass_to_maxcut); example_test!(reduction_spinglass_to_qubo); example_test!(reduction_travelingsalesman_to_ilp); @@ -171,6 +172,10 @@ example_fn!( test_satisfiability_to_minimumdominatingset, reduction_satisfiability_to_minimumdominatingset ); +example_fn!( + test_longestcommonsubsequence_to_ilp, + reduction_longestcommonsubsequence_to_ilp +); example_fn!(test_spinglass_to_maxcut, reduction_spinglass_to_maxcut); example_fn!(test_spinglass_to_qubo, reduction_spinglass_to_qubo); example_fn!( From 5612df50d5fc44f09ccf02eeff131269539a79a0 Mon Sep 17 00:00:00 2001 From: zazabap Date: Thu, 12 Mar 2026 12:17:29 +0000 Subject: [PATCH 3/6] chore: remove plan file after implementation --- docs/plans/2026-03-12-lcs-to-ilp.md | 105 ---------------------------- 1 file changed, 105 deletions(-) delete mode 100644 docs/plans/2026-03-12-lcs-to-ilp.md diff --git a/docs/plans/2026-03-12-lcs-to-ilp.md b/docs/plans/2026-03-12-lcs-to-ilp.md deleted file mode 100644 index ffcb86403..000000000 --- a/docs/plans/2026-03-12-lcs-to-ilp.md +++ /dev/null @@ -1,105 +0,0 @@ -# Plan: LongestCommonSubsequence model + LCS to ILP reduction - -Fixes #110 (and partially addresses #108 since the LCS model is needed). - -## Overview - -Since the LongestCommonSubsequence model does not exist in main (PR #170 branch was deleted), this PR implements both: -1. The LCS model (from issue #108) -2. The LCS-to-ILP reduction rule (from issue #110) - -## Step 1: Implement the LCS model - -### 1a. Create `src/models/misc/longest_common_subsequence.rs` - -- Struct: `LongestCommonSubsequence` with field `strings: Vec>` -- Getters: `strings()`, `num_strings()`, `total_length()`, `num_chars_first()`, `num_chars_second()` -- `Problem` impl: - - `NAME = "LongestCommonSubsequence"` - - `Metric = SolutionSize` (length of common subsequence) - - `dims()`: For k strings, enumerate all possible subsequences of the shortest string. - Use binary selection: each position of the shortest string is included (1) or not (0) → `vec![2; shortest_len]` - - `evaluate()`: Check if the selected subsequence of the shortest string is a subsequence of all other strings. Return length if valid, Invalid otherwise. - - `variant()`: `variant_params![]` (no type parameters) -- `OptimizationProblem` impl: `Direction::Maximize`, `Value = i32` -- `declare_variants!`: `LongestCommonSubsequence => "2^total_length"` (NP-hard for k strings) -- Serde derive for JSON serialization - -### 1b. Register in `src/models/misc/mod.rs` - -Add `mod longest_common_subsequence;` and `pub use`. - -### 1c. Register in `src/models/mod.rs` - -Add to re-exports. - -### 1d. Register in `src/lib.rs` prelude - -Add `LongestCommonSubsequence` to prelude. - -### 1e. Register in CLI - -- `problemreductions-cli/src/dispatch.rs`: Add load/serialize entries -- `problemreductions-cli/src/problem_name.rs`: Add alias mapping -- `problemreductions-cli/src/commands/create.rs`: Add creation with `--strings` flag - -### 1f. Unit tests - -Create `src/unit_tests/models/misc/longest_common_subsequence.rs`: -- `test_lcs_basic`: creation, evaluate valid/invalid -- `test_lcs_serialization`: serde roundtrip -- Brute force solver test - -### 1g. Schema registration - -Add `inventory::submit!` for `ProblemSchemaEntry`. - -## Step 2: Implement LCS to ILP reduction - -### 2a. Create `src/rules/longestcommonsubsequence_ilp.rs` - -Following the match-pair formulation from Blum et al. 2021: - -**Variable mapping (2-string case):** -For each pair (j1, j2) where s1[j1] == s2[j2], create binary variable m_{j1,j2}. - -**Constraints:** -1. Each position in s1 matched at most once -2. Each position in s2 matched at most once -3. Order preservation (no crossings): for (j1,j2),(j1',j2') with j1j2': m_{j1,j2} + m_{j1',j2'} <= 1 - -**Objective:** maximize sum of all match variables - -**Solution extraction:** matched positions with m=1, sorted by j1, give LCS characters. - -### 2b. Register in `src/rules/mod.rs` - -Add `#[cfg(feature = "ilp-solver")] mod longestcommonsubsequence_ilp;` - -### 2c. Overhead - -```rust -#[reduction(overhead = { - num_vars = "num_chars_first * num_chars_second", - num_constraints = "num_chars_first + num_chars_second + (num_chars_first * num_chars_second) ^ 2", -})] -``` - -### 2d. Unit tests - -Create `src/unit_tests/rules/longestcommonsubsequence_ilp.rs`: -- `test_longestcommonsubsequence_to_ilp_closed_loop`: brute force vs ILP comparison -- Test with issue example: ABAC, BACA → BAC (length 3) -- Edge cases: identical strings, no common chars, single char - -### 2e. Example - -Create `examples/reduction_longestcommonsubsequence_to_ilp.rs` with the ABAC/BACA example. -Register in `tests/suites/examples.rs`. - -## Step 3: Paper updates - -Add to `docs/paper/reductions.typ`: -- `problem-def("LongestCommonSubsequence")` entry -- `display-name` entry -- `reduction-rule("LongestCommonSubsequence", "ILP")` entry From a657f699badab8e37aeb7bc4af00847dbd75261f Mon Sep 17 00:00:00 2001 From: zazabap Date: Thu, 12 Mar 2026 12:20:13 +0000 Subject: [PATCH 4/6] fix: add missing Problem and Solver trait imports in reduction tests The reduction test file needs Problem trait for evaluate() calls and Solver trait for BruteForce::find_best() calls. Co-Authored-By: Claude Opus 4.6 --- src/unit_tests/rules/longestcommonsubsequence_ilp.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/unit_tests/rules/longestcommonsubsequence_ilp.rs b/src/unit_tests/rules/longestcommonsubsequence_ilp.rs index 2deb5c8d6..101da6b99 100644 --- a/src/unit_tests/rules/longestcommonsubsequence_ilp.rs +++ b/src/unit_tests/rules/longestcommonsubsequence_ilp.rs @@ -1,5 +1,6 @@ use super::*; -use crate::solvers::{BruteForce, ILPSolver}; +use crate::solvers::{BruteForce, ILPSolver, Solver}; +use crate::traits::Problem; #[test] fn test_lcs_to_ilp_issue_example() { From 27d61e7f5a2c1d7545a2dd4eb03b25ab734b1e35 Mon Sep 17 00:00:00 2001 From: GiggleLiu Date: Fri, 13 Mar 2026 08:54:02 +0800 Subject: [PATCH 5/6] Fix Copilot review comments: assert exactly 2 strings, fix complexity, fix module order - Assert strings.len() == 2 (not >= 2) in LCS-to-ILP reduction to prevent silently ignoring extra strings - Fix complexity from 2^total_length to 2^min_string_length to match actual brute-force search space (dims() uses shortest string) - Fix alphabetical ordering of longestcommonsubsequence_ilp in rules/mod.rs Co-Authored-By: Claude Opus 4.6 --- src/models/misc/longest_common_subsequence.rs | 6 +++--- src/rules/longestcommonsubsequence_ilp.rs | 5 +++-- src/rules/mod.rs | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/models/misc/longest_common_subsequence.rs b/src/models/misc/longest_common_subsequence.rs index 4cdf09bff..8d4fd6e0c 100644 --- a/src/models/misc/longest_common_subsequence.rs +++ b/src/models/misc/longest_common_subsequence.rs @@ -105,7 +105,7 @@ impl LongestCommonSubsequence { } /// Length of the shortest string. - fn shortest_len(&self) -> usize { + pub fn min_string_length(&self) -> usize { self.strings[self.shortest_index()].len() } } @@ -134,7 +134,7 @@ impl Problem for LongestCommonSubsequence { } fn dims(&self) -> Vec { - vec![2; self.shortest_len()] + vec![2; self.min_string_length()] } fn evaluate(&self, config: &[usize]) -> SolutionSize { @@ -180,7 +180,7 @@ impl OptimizationProblem for LongestCommonSubsequence { } crate::declare_variants! { - LongestCommonSubsequence => "2^total_length", + LongestCommonSubsequence => "2^min_string_length", } #[cfg(test)] diff --git a/src/rules/longestcommonsubsequence_ilp.rs b/src/rules/longestcommonsubsequence_ilp.rs index 77d79ae3c..a69610a35 100644 --- a/src/rules/longestcommonsubsequence_ilp.rs +++ b/src/rules/longestcommonsubsequence_ilp.rs @@ -86,8 +86,9 @@ impl ReduceTo> for LongestCommonSubsequence { fn reduce_to(&self) -> Self::Result { let strings = self.strings(); assert!( - strings.len() >= 2, - "LCS to ILP reduction requires at least 2 strings" + strings.len() == 2, + "LCS to ILP reduction is defined for exactly 2 strings, got {}", + strings.len() ); let s1 = &strings[0]; diff --git a/src/rules/mod.rs b/src/rules/mod.rs index c8ec4ea10..5414031ec 100644 --- a/src/rules/mod.rs +++ b/src/rules/mod.rs @@ -47,10 +47,10 @@ mod factoring_ilp; #[cfg(feature = "ilp-solver")] mod ilp_bool_ilp_i32; #[cfg(feature = "ilp-solver")] -mod longestcommonsubsequence_ilp; -#[cfg(feature = "ilp-solver")] mod ilp_qubo; #[cfg(feature = "ilp-solver")] +mod longestcommonsubsequence_ilp; +#[cfg(feature = "ilp-solver")] mod maximumclique_ilp; #[cfg(feature = "ilp-solver")] mod maximummatching_ilp; From 1ee99a4bfec14f11490082a6db03dd2c4951148e Mon Sep 17 00:00:00 2001 From: GiggleLiu Date: Fri, 13 Mar 2026 15:14:56 +0800 Subject: [PATCH 6/6] Add empty string edge case test for LCS model Inspired by PR #170 which tested this edge case. Co-Authored-By: Claude Opus 4.6 --- .../models/misc/longest_common_subsequence.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/unit_tests/models/misc/longest_common_subsequence.rs b/src/unit_tests/models/misc/longest_common_subsequence.rs index 6cfef1067..e9059ddaf 100644 --- a/src/unit_tests/models/misc/longest_common_subsequence.rs +++ b/src/unit_tests/models/misc/longest_common_subsequence.rs @@ -161,6 +161,17 @@ fn test_lcs_serialization() { assert_eq!(restored.strings(), problem.strings()); } +#[test] +fn test_lcs_empty_string_in_input() { + let problem = LongestCommonSubsequence::new(vec![ + vec![], + vec![b'A', b'B', b'C'], + ]); + assert_eq!(problem.dims(), Vec::::new()); + assert!(problem.evaluate(&[]).is_valid()); + assert_eq!(problem.evaluate(&[]).unwrap(), 0); +} + #[test] #[should_panic(expected = "LCS requires at least 2 strings")] fn test_lcs_single_string_panics() {