From 63259abae37e78b8797b7af5e3d0acd22aba805d Mon Sep 17 00:00:00 2001 From: zazabap Date: Wed, 4 Mar 2026 18:33:57 +0000 Subject: [PATCH 1/4] Add plan for #108: LongestCommonSubsequence Co-Authored-By: Claude Opus 4.6 --- .../2026-03-04-longest-common-subsequence.md | 528 ++++++++++++++++++ 1 file changed, 528 insertions(+) create mode 100644 docs/plans/2026-03-04-longest-common-subsequence.md diff --git a/docs/plans/2026-03-04-longest-common-subsequence.md b/docs/plans/2026-03-04-longest-common-subsequence.md new file mode 100644 index 000000000..099039cdb --- /dev/null +++ b/docs/plans/2026-03-04-longest-common-subsequence.md @@ -0,0 +1,528 @@ +# LongestCommonSubsequence Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Add the `LongestCommonSubsequence` problem model — given k strings over an alphabet, find the longest string that is a subsequence of every input string. + +**Architecture:** New model in `src/models/misc/` with no type parameters. Configuration space is binary selection over characters of the shortest string (dims = `vec![2; m]` where m = shortest string length). Feasibility checks that the selected subsequence of the shortest string is also a subsequence of all other strings. + +**Tech Stack:** Rust, serde, inventory (schema registration) + +--- + +### Task 1: Create the model file with struct and schema registration + +**Files:** +- Create: `src/models/misc/longest_common_subsequence.rs` + +**Step 1: Write the full model file** + +```rust +//! Longest Common Subsequence problem implementation. +//! +//! Given k strings over an alphabet, find the longest string that is a +//! subsequence of every input string. NP-hard for variable k (Maier, 1978). + +use crate::registry::{FieldInfo, ProblemSchemaEntry}; +use crate::traits::{OptimizationProblem, Problem}; +use crate::types::{Direction, SolutionSize}; +use serde::{Deserialize, Serialize}; + +inventory::submit! { + ProblemSchemaEntry { + name: "LongestCommonSubsequence", + module_path: module_path!(), + description: "Find the longest string that is a subsequence of every input string", + fields: &[ + FieldInfo { name: "strings", type_name: "Vec>", description: "The input strings" }, + ], + } +} + +/// The Longest Common Subsequence problem. +/// +/// Given `k` strings `s_1, ..., s_k` over an alphabet, find a longest +/// string `w` that is a subsequence of every `s_i`. +/// +/// A string `w` is a **subsequence** of `s` if `w` can be obtained by +/// deleting zero or more characters from `s` without changing the order +/// of the remaining characters. +/// +/// # Representation +/// +/// Configuration is binary selection over the characters of the shortest +/// string. Each variable in `{0, 1}` indicates whether the corresponding +/// character of the shortest string is included in the candidate subsequence. +/// The candidate is valid if the resulting subsequence is also a subsequence +/// of every other input string. +/// +/// # Example +/// +/// ``` +/// use problemreductions::models::misc::LongestCommonSubsequence; +/// use problemreductions::{Problem, Solver, BruteForce}; +/// +/// let problem = LongestCommonSubsequence::new(vec![ +/// vec![b'A', b'B', b'C', b'D', b'A', b'B'], +/// vec![b'B', b'D', b'C', b'A', b'B', b'A'], +/// vec![b'B', b'C', b'A', b'D', b'B', b'A'], +/// ]); +/// let solver = BruteForce::new(); +/// let solution = solver.find_best(&problem); +/// assert!(solution.is_some()); +/// ``` +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LongestCommonSubsequence { + /// The input strings. + strings: Vec>, +} + +impl LongestCommonSubsequence { + /// Create a new LCS problem from a list of strings. + /// + /// # Panics + /// + /// Panics if `strings` is empty. + pub fn new(strings: Vec>) -> Self { + assert!(!strings.is_empty(), "must have at least one string"); + Self { strings } + } + + /// Get the input strings. + pub fn strings(&self) -> &[Vec] { + &self.strings + } + + /// Get the number of input strings. + pub fn num_strings(&self) -> usize { + self.strings.len() + } + + /// Get the total length of all input strings. + pub fn total_length(&self) -> usize { + self.strings.iter().map(|s| s.len()).sum() + } + + /// Index of the shortest string. + fn shortest_index(&self) -> usize { + self.strings + .iter() + .enumerate() + .min_by_key(|(_, s)| s.len()) + .map(|(i, _)| i) + .unwrap_or(0) + } + + /// Length of the shortest string. + fn shortest_len(&self) -> usize { + self.strings.iter().map(|s| s.len()).min().unwrap_or(0) + } +} + +impl Problem for LongestCommonSubsequence { + const NAME: &'static str = "LongestCommonSubsequence"; + type Metric = SolutionSize; + + fn variant() -> Vec<(&'static str, &'static str)> { + crate::variant_params![] + } + + fn dims(&self) -> Vec { + vec![2; self.shortest_len()] + } + + fn evaluate(&self, config: &[usize]) -> SolutionSize { + let si = self.shortest_index(); + let shortest = &self.strings[si]; + if config.len() != shortest.len() { + return SolutionSize::Invalid; + } + if config.iter().any(|&v| v > 1) { + return SolutionSize::Invalid; + } + // Build the candidate subsequence from selected characters + let candidate: Vec = config + .iter() + .enumerate() + .filter(|(_, &v)| v == 1) + .map(|(i, _)| shortest[i]) + .collect(); + // Check that candidate is a subsequence of every other string + for (j, s) in self.strings.iter().enumerate() { + if j == si { + continue; + } + if !is_subsequence(&candidate, s) { + return SolutionSize::Invalid; + } + } + SolutionSize::Valid(candidate.len() as i32) + } +} + +impl OptimizationProblem for LongestCommonSubsequence { + type Value = i32; + + fn direction(&self) -> Direction { + Direction::Maximize + } +} + +/// Check if `sub` is a subsequence of `full`. +fn is_subsequence(sub: &[u8], full: &[u8]) -> bool { + let mut it = full.iter(); + for &c in sub { + if it.find(|&&x| x == c).is_none() { + return false; + } + } + true +} + +crate::declare_variants! { + LongestCommonSubsequence => "2^total_length", +} + +#[cfg(test)] +#[path = "../../unit_tests/models/misc/longest_common_subsequence.rs"] +mod tests; +``` + +**Step 2: Commit** + +```bash +git add src/models/misc/longest_common_subsequence.rs +git commit -m "feat: add LongestCommonSubsequence model (struct + traits)" +``` + +--- + +### Task 2: Register the model in module tree and prelude + +**Files:** +- Modify: `src/models/misc/mod.rs` +- Modify: `src/models/mod.rs` +- Modify: `src/lib.rs` (prelude) + +**Step 1: Update `src/models/misc/mod.rs`** + +Add to the doc comment: +```rust +//! - [`LongestCommonSubsequence`]: Longest Common Subsequence (maximize common subsequence length) +``` + +Add module declaration and re-export: +```rust +pub(crate) mod longest_common_subsequence; +pub use longest_common_subsequence::LongestCommonSubsequence; +``` + +**Step 2: Update `src/models/mod.rs` line 18** + +Change: +```rust +pub use misc::{BinPacking, Factoring, PaintShop}; +``` +To: +```rust +pub use misc::{BinPacking, Factoring, LongestCommonSubsequence, PaintShop}; +``` + +**Step 3: Update `src/lib.rs` line 46 prelude** + +Change: +```rust +pub use crate::models::misc::{BinPacking, Factoring, PaintShop}; +``` +To: +```rust +pub use crate::models::misc::{BinPacking, Factoring, LongestCommonSubsequence, PaintShop}; +``` + +**Step 4: Verify compilation** + +Run: `cargo build 2>&1 | tail -5` + +**Step 5: Commit** + +```bash +git add src/models/misc/mod.rs src/models/mod.rs src/lib.rs +git commit -m "feat: register LongestCommonSubsequence in module tree" +``` + +--- + +### Task 3: Write unit tests + +**Files:** +- Create: `src/unit_tests/models/misc/longest_common_subsequence.rs` + +**Step 1: Write the test file** + +```rust +use super::*; +use crate::solvers::{BruteForce, Solver}; +use crate::traits::{OptimizationProblem, Problem}; +use crate::types::Direction; + +#[test] +fn test_lcs_creation() { + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'C'], + vec![b'A', b'C'], + vec![b'B', b'A', b'C', b'D'], + ]); + assert_eq!(problem.num_strings(), 3); + assert_eq!(problem.total_length(), 9); + // Shortest string is "AC" with length 2 + assert_eq!(problem.dims(), vec![2; 2]); +} + +#[test] +fn test_lcs_direction() { + let problem = LongestCommonSubsequence::new(vec![vec![b'A'], vec![b'A']]); + assert_eq!(problem.direction(), Direction::Maximize); +} + +#[test] +fn test_lcs_evaluate_all_selected() { + // s1 = "AC", s2 = "ABC" → selecting both chars of s1 gives "AC" + // "AC" is subsequence of "ABC"? A..C — yes + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'C'], + vec![b'A', b'B', b'C'], + ]); + let result = problem.evaluate(&[1, 1]); + assert!(result.is_valid()); + assert_eq!(result.unwrap(), 2); +} + +#[test] +fn test_lcs_evaluate_partial_selection() { + // s1 = "ABC", s2 = "AXC" → select A and C (indices 0, 2) + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'C'], + vec![b'A', b'X', b'C'], + ]); + // Config [1, 0, 1] selects "AC" — subsequence of both + let result = problem.evaluate(&[1, 0, 1]); + assert!(result.is_valid()); + assert_eq!(result.unwrap(), 2); +} + +#[test] +fn test_lcs_evaluate_invalid_not_subsequence() { + // s1 = "BA", s2 = "AB" → select both chars of s1 gives "BA" + // "BA" is NOT a subsequence of "AB" (B comes after A in "AB") + let problem = LongestCommonSubsequence::new(vec![ + vec![b'B', b'A'], + vec![b'A', b'B'], + ]); + let result = problem.evaluate(&[1, 1]); + assert!(!result.is_valid()); +} + +#[test] +fn test_lcs_evaluate_empty_selection() { + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B'], + vec![b'C', b'D'], + ]); + // Select nothing — empty string is always a valid subsequence + let result = problem.evaluate(&[0, 0]); + assert!(result.is_valid()); + assert_eq!(result.unwrap(), 0); +} + +#[test] +fn test_lcs_evaluate_wrong_config_length() { + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B'], + vec![b'A', b'C'], + ]); + assert!(!problem.evaluate(&[1]).is_valid()); + assert!(!problem.evaluate(&[1, 0, 1]).is_valid()); +} + +#[test] +fn test_lcs_problem_name() { + assert_eq!(LongestCommonSubsequence::NAME, "LongestCommonSubsequence"); +} + +#[test] +fn test_lcs_variant() { + let v = ::variant(); + assert!(v.is_empty()); +} + +#[test] +fn test_lcs_brute_force_issue_example() { + // Example from issue #108: + // s1 = "ABCDAB", s2 = "BDCABA", s3 = "BCADBA" + // Optimal LCS = "BCAB", length 4 + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'C', b'D', b'A', b'B'], + vec![b'B', b'D', b'C', b'A', b'B', b'A'], + vec![b'B', b'C', b'A', b'D', b'B', b'A'], + ]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find a solution"); + let metric = problem.evaluate(&solution); + assert!(metric.is_valid()); + assert_eq!(metric.unwrap(), 4); +} + +#[test] +fn test_lcs_brute_force_two_strings() { + // s1 = "ABCBDAB", s2 = "BDCAB" → LCS = "BCAB", length 4 + let problem = LongestCommonSubsequence::new(vec![ + vec![b'B', b'D', b'C', b'A', b'B'], + vec![b'A', b'B', b'C', b'B', b'D', b'A', b'B'], + ]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find a solution"); + let metric = problem.evaluate(&solution); + assert!(metric.is_valid()); + assert_eq!(metric.unwrap(), 4); +} + +#[test] +fn test_lcs_single_string() { + // Single string — LCS is the string itself + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'C'], + ]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find a solution"); + let metric = problem.evaluate(&solution); + assert!(metric.is_valid()); + assert_eq!(metric.unwrap(), 3); +} + +#[test] +fn test_lcs_no_common() { + // s1 = "AB", s2 = "CD" → LCS = "", length 0 + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B'], + vec![b'C', b'D'], + ]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find a solution"); + let metric = problem.evaluate(&solution); + assert!(metric.is_valid()); + assert_eq!(metric.unwrap(), 0); +} + +#[test] +fn test_lcs_serialization() { + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'C'], + vec![b'A', b'C'], + ]); + let json = serde_json::to_value(&problem).unwrap(); + let restored: LongestCommonSubsequence = serde_json::from_value(json).unwrap(); + assert_eq!(restored.strings(), problem.strings()); +} +``` + +**Step 2: Run tests** + +Run: `cargo test --lib longest_common_subsequence -- --nocapture 2>&1 | tail -20` +Expected: All tests pass. + +**Step 3: Commit** + +```bash +git add src/unit_tests/models/misc/longest_common_subsequence.rs +git commit -m "test: add LongestCommonSubsequence unit tests" +``` + +--- + +### Task 4: Register in CLI dispatch + +**Files:** +- Modify: `problemreductions-cli/src/dispatch.rs` +- Modify: `problemreductions-cli/src/problem_name.rs` + +**Step 1: Update `dispatch.rs` — add import** + +Add to imports at top: +```rust +use problemreductions::models::misc::LongestCommonSubsequence; +``` + +**Step 2: Update `dispatch.rs` — `load_problem()` match** + +Add after the `"BinPacking"` arm (around line 242): +```rust +"LongestCommonSubsequence" => deser_opt::(data), +``` + +**Step 3: Update `dispatch.rs` — `serialize_any_problem()` match** + +Add after the `"BinPacking"` arm (around line 301): +```rust +"LongestCommonSubsequence" => try_ser::(any), +``` + +**Step 4: Update `problem_name.rs` — ALIASES** + +Add to `ALIASES` const: +```rust +("LCS", "LongestCommonSubsequence"), +``` + +**Step 5: Update `problem_name.rs` — `resolve_alias()`** + +Add case: +```rust +"lcs" | "longestcommonsubsequence" => "LongestCommonSubsequence".to_string(), +``` + +**Step 6: Verify build** + +Run: `cargo build --workspace 2>&1 | tail -5` + +**Step 7: Commit** + +```bash +git add problemreductions-cli/src/dispatch.rs problemreductions-cli/src/problem_name.rs +git commit -m "feat: register LongestCommonSubsequence in CLI dispatch" +``` + +--- + +### Task 5: Run full checks + +**Step 1: Run formatting, clippy, and tests** + +Run: `make fmt && make clippy && make test` +Expected: All pass. + +**Step 2: Fix any issues found** + +If clippy or tests fail, fix the issues before proceeding. + +**Step 3: Commit any fixes** + +```bash +git add -u +git commit -m "fix: address clippy/test issues for LongestCommonSubsequence" +``` + +--- + +### Task 6: Document in paper + +Invoke `/write-model-in-paper` to add the problem-def entry for LongestCommonSubsequence in `docs/paper/reductions.typ`. Include: +- Formal definition referencing Maier (1978) and Garey & Johnson SR10 +- Background on 2-string polynomial DP vs k-string NP-hardness +- Example from the issue (3 strings, LCS = "BCAB") +- Algorithm list + +--- + +### Task 7: Final review + +Invoke `/review-implementation` to verify all structural and semantic checks pass. From 7f5e79d019319a928da805c054159d3a769fb711 Mon Sep 17 00:00:00 2001 From: zazabap Date: Wed, 4 Mar 2026 18:48:58 +0000 Subject: [PATCH 2/4] feat: add LongestCommonSubsequence model Implement the k-string Longest Common Subsequence problem: - Model in src/models/misc/ with binary config over shortest string - 14 unit tests (creation, evaluation, brute force, serialization) - CLI dispatch and LCS alias - Registered in module tree and prelude Closes CodingThrust/problem-reductions#108 Co-Authored-By: Claude Opus 4.6 --- problemreductions-cli/src/dispatch.rs | 29 +-- problemreductions-cli/src/problem_name.rs | 5 +- src/lib.rs | 44 ++--- src/models/misc/longest_common_subsequence.rs | 170 ++++++++++++++++++ src/models/misc/mod.rs | 3 + src/models/mod.rs | 2 +- .../models/misc/longest_common_subsequence.rs | 143 +++++++++++++++ 7 files changed, 358 insertions(+), 38 deletions(-) create mode 100644 src/models/misc/longest_common_subsequence.rs create mode 100644 src/unit_tests/models/misc/longest_common_subsequence.rs diff --git a/problemreductions-cli/src/dispatch.rs b/problemreductions-cli/src/dispatch.rs index a6b0011ff..fb9825b95 100644 --- a/problemreductions-cli/src/dispatch.rs +++ b/problemreductions-cli/src/dispatch.rs @@ -1,19 +1,20 @@ +use std::{any::Any, collections::BTreeMap, fmt, ops::Deref, path::Path}; + use anyhow::{bail, Context, Result}; -use problemreductions::models::algebraic::{ClosestVectorProblem, ILP}; -use problemreductions::models::misc::BinPacking; -use problemreductions::prelude::*; -use problemreductions::rules::{MinimizeSteps, ReductionGraph}; -use problemreductions::solvers::{BruteForce, ILPSolver, Solver}; -use problemreductions::topology::{KingsSubgraph, SimpleGraph, TriangularSubgraph, UnitDiskGraph}; -use problemreductions::types::ProblemSize; -use problemreductions::variant::{K2, K3, KN}; +use problemreductions::{ + models::{ + algebraic::{ClosestVectorProblem, ILP}, + misc::{BinPacking, LongestCommonSubsequence}, + }, + prelude::*, + rules::{MinimizeSteps, ReductionGraph}, + solvers::{BruteForce, ILPSolver, Solver}, + topology::{KingsSubgraph, SimpleGraph, TriangularSubgraph, UnitDiskGraph}, + types::ProblemSize, + variant::{K2, K3, KN}, +}; use serde::Serialize; use serde_json::Value; -use std::any::Any; -use std::collections::BTreeMap; -use std::fmt; -use std::ops::Deref; -use std::path::Path; use crate::problem_name::resolve_alias; @@ -244,6 +245,7 @@ pub fn load_problem( Some("f64") => deser_opt::>(data), _ => deser_opt::>(data), }, + "LongestCommonSubsequence" => deser_opt::(data), _ => bail!("{}", crate::problem_name::unknown_problem_error(&canonical)), } } @@ -303,6 +305,7 @@ pub fn serialize_any_problem( Some("f64") => try_ser::>(any), _ => try_ser::>(any), }, + "LongestCommonSubsequence" => try_ser::(any), _ => bail!("{}", crate::problem_name::unknown_problem_error(&canonical)), } } diff --git a/problemreductions-cli/src/problem_name.rs b/problemreductions-cli/src/problem_name.rs index 43a5f2c42..249350fd8 100644 --- a/problemreductions-cli/src/problem_name.rs +++ b/problemreductions-cli/src/problem_name.rs @@ -1,5 +1,4 @@ -use std::collections::BTreeMap; -use std::ffi::OsStr; +use std::{collections::BTreeMap, ffi::OsStr}; /// A parsed problem specification: name + optional variant values. #[derive(Debug, Clone)] @@ -21,6 +20,7 @@ pub const ALIASES: &[(&str, &str)] = &[ ("TSP", "TravelingSalesman"), ("BP", "BinPacking"), ("CVP", "ClosestVectorProblem"), + ("LCS", "LongestCommonSubsequence"), ]; /// Resolve a short alias to the canonical problem name. @@ -51,6 +51,7 @@ pub fn resolve_alias(input: &str) -> String { "bicliquecover" => "BicliqueCover".to_string(), "bp" | "binpacking" => "BinPacking".to_string(), "cvp" | "closestvectorproblem" => "ClosestVectorProblem".to_string(), + "lcs" | "longestcommonsubsequence" => "LongestCommonSubsequence".to_string(), _ => input.to_string(), // pass-through for exact names } } diff --git a/src/lib.rs b/src/lib.rs index ef67ab53c..4967dcc9b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -36,28 +36,34 @@ pub mod variant; /// Prelude module for convenient imports. pub mod prelude { // Problem types - pub use crate::models::algebraic::{BMF, QUBO}; - pub use crate::models::formula::{CNFClause, CircuitSAT, KSatisfiability, Satisfiability}; - pub use crate::models::graph::{BicliqueCover, SpinGlass}; - pub use crate::models::graph::{ - KColoring, MaxCut, MaximalIS, MaximumClique, MaximumIndependentSet, MaximumMatching, - MinimumDominatingSet, MinimumVertexCover, TravelingSalesman, - }; - pub use crate::models::misc::{BinPacking, Factoring, PaintShop}; - pub use crate::models::set::{MaximumSetPacking, MinimumSetCovering}; - - // Core traits - pub use crate::rules::{ReduceTo, ReductionResult}; - pub use crate::solvers::{BruteForce, Solver}; - pub use crate::traits::{OptimizationProblem, Problem, SatisfactionProblem}; - // Types pub use crate::error::{ProblemError, Result}; - pub use crate::types::{Direction, One, ProblemSize, SolutionSize, Unweighted}; + // Core traits + pub use crate::rules::{ReduceTo, ReductionResult}; + pub use crate::{ + models::{ + algebraic::{BMF, QUBO}, + formula::{CNFClause, CircuitSAT, KSatisfiability, Satisfiability}, + graph::{ + BicliqueCover, KColoring, MaxCut, MaximalIS, MaximumClique, MaximumIndependentSet, + MaximumMatching, MinimumDominatingSet, MinimumVertexCover, SpinGlass, + TravelingSalesman, + }, + misc::{BinPacking, Factoring, LongestCommonSubsequence, PaintShop}, + set::{MaximumSetPacking, MinimumSetCovering}, + }, + solvers::{BruteForce, Solver}, + traits::{OptimizationProblem, Problem, SatisfactionProblem}, + types::{Direction, One, ProblemSize, SolutionSize, Unweighted}, + }; } // Re-export commonly used items at crate root pub use error::{ProblemError, Result}; +// Re-export inventory so `declare_variants!` can use `$crate::inventory::submit!` +pub use inventory; +// Re-export proc macros for reduction registration and variant declaration +pub use problemreductions_macros::{declare_variants, reduction}; pub use registry::{ComplexityClass, ProblemInfo}; pub use solvers::{BruteForce, Solver}; pub use traits::{OptimizationProblem, Problem, SatisfactionProblem}; @@ -65,12 +71,6 @@ pub use types::{ Direction, NumericSize, One, ProblemSize, SolutionSize, Unweighted, WeightElement, }; -// Re-export proc macros for reduction registration and variant declaration -pub use problemreductions_macros::{declare_variants, reduction}; - -// Re-export inventory so `declare_variants!` can use `$crate::inventory::submit!` -pub use inventory; - #[cfg(test)] #[path = "unit_tests/graph_models.rs"] mod test_graph_models; diff --git a/src/models/misc/longest_common_subsequence.rs b/src/models/misc/longest_common_subsequence.rs new file mode 100644 index 000000000..8a430061d --- /dev/null +++ b/src/models/misc/longest_common_subsequence.rs @@ -0,0 +1,170 @@ +//! Longest Common Subsequence problem implementation. +//! +//! Given k strings over an alphabet, find the longest string that is a +//! subsequence of every input string. NP-hard for variable k (Maier, 1978). + +use serde::{Deserialize, Serialize}; + +use crate::{ + registry::{FieldInfo, ProblemSchemaEntry}, + traits::{OptimizationProblem, Problem}, + types::{Direction, SolutionSize}, +}; + +inventory::submit! { + ProblemSchemaEntry { + name: "LongestCommonSubsequence", + module_path: module_path!(), + description: "Find the longest string that is a subsequence of every input string", + fields: &[ + FieldInfo { name: "strings", type_name: "Vec>", description: "The input strings" }, + ], + } +} + +/// The Longest Common Subsequence problem. +/// +/// Given `k` strings `s_1, ..., s_k` over an alphabet, find a longest +/// string `w` that is a subsequence of every `s_i`. +/// +/// A string `w` is a **subsequence** of `s` if `w` can be obtained by +/// deleting zero or more characters from `s` without changing the order +/// of the remaining characters. +/// +/// # Representation +/// +/// Configuration is binary selection over the characters of the shortest +/// string. Each variable in `{0, 1}` indicates whether the corresponding +/// character of the shortest string is included in the candidate subsequence. +/// The candidate is valid if the resulting subsequence is also a subsequence +/// of every other input string. +/// +/// # Example +/// +/// ``` +/// use problemreductions::{models::misc::LongestCommonSubsequence, BruteForce, Problem, Solver}; +/// +/// let problem = LongestCommonSubsequence::new(vec![ +/// vec![b'A', b'B', b'C', b'D', b'A', b'B'], +/// vec![b'B', b'D', b'C', b'A', b'B', b'A'], +/// vec![b'B', b'C', b'A', b'D', b'B', b'A'], +/// ]); +/// let solver = BruteForce::new(); +/// let solution = solver.find_best(&problem); +/// assert!(solution.is_some()); +/// ``` +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LongestCommonSubsequence { + /// The input strings. + strings: Vec>, +} + +impl LongestCommonSubsequence { + /// Create a new LCS problem from a list of strings. + /// + /// # Panics + /// + /// Panics if `strings` is empty. + pub fn new(strings: Vec>) -> Self { + assert!(!strings.is_empty(), "must have at least one string"); + Self { strings } + } + + /// Get the input strings. + pub fn strings(&self) -> &[Vec] { + &self.strings + } + + /// Get the number of input strings. + pub fn num_strings(&self) -> usize { + self.strings.len() + } + + /// Get the total length of all input strings. + pub fn total_length(&self) -> usize { + self.strings.iter().map(|s| s.len()).sum() + } + + /// Index of the shortest string. + fn shortest_index(&self) -> usize { + self.strings + .iter() + .enumerate() + .min_by_key(|(_, s)| s.len()) + .map(|(i, _)| i) + .unwrap_or(0) + } + + /// Length of the shortest string. + fn shortest_len(&self) -> usize { + self.strings.iter().map(|s| s.len()).min().unwrap_or(0) + } +} + +impl Problem for LongestCommonSubsequence { + const NAME: &'static str = "LongestCommonSubsequence"; + type Metric = SolutionSize; + + fn variant() -> Vec<(&'static str, &'static str)> { + crate::variant_params![] + } + + fn dims(&self) -> Vec { + vec![2; self.shortest_len()] + } + + fn evaluate(&self, config: &[usize]) -> SolutionSize { + let si = self.shortest_index(); + let shortest = &self.strings[si]; + if config.len() != shortest.len() { + return SolutionSize::Invalid; + } + if config.iter().any(|&v| v > 1) { + return SolutionSize::Invalid; + } + // Build the candidate subsequence from selected characters + let candidate: Vec = config + .iter() + .enumerate() + .filter(|(_, &v)| v == 1) + .map(|(i, _)| shortest[i]) + .collect(); + // Check that candidate is a subsequence of every other string + for (j, s) in self.strings.iter().enumerate() { + if j == si { + continue; + } + if !is_subsequence(&candidate, s) { + return SolutionSize::Invalid; + } + } + SolutionSize::Valid(candidate.len() as i32) + } +} + +impl OptimizationProblem for LongestCommonSubsequence { + type Value = i32; + + fn direction(&self) -> Direction { + Direction::Maximize + } +} + +/// Check if `sub` is a subsequence of `full`. +fn is_subsequence(sub: &[u8], full: &[u8]) -> bool { + let mut it = full.iter(); + for &c in sub { + if !it.any(|&x| x == c) { + return false; + } + } + true +} + +crate::declare_variants! { + LongestCommonSubsequence => "2^total_length", +} + +#[cfg(test)] +#[path = "../../unit_tests/models/misc/longest_common_subsequence.rs"] +mod tests; diff --git a/src/models/misc/mod.rs b/src/models/misc/mod.rs index 6e2fa084a..7eb69e941 100644 --- a/src/models/misc/mod.rs +++ b/src/models/misc/mod.rs @@ -3,12 +3,15 @@ //! Problems with unique input structures that don't fit other categories: //! - [`BinPacking`]: Bin Packing (minimize bins) //! - [`Factoring`]: Integer factorization +//! - [`LongestCommonSubsequence`]: Longest Common Subsequence (maximize common subsequence length) //! - [`PaintShop`]: Minimize color switches in paint shop scheduling mod bin_packing; pub(crate) mod factoring; +pub(crate) mod longest_common_subsequence; pub(crate) mod paintshop; pub use bin_packing::BinPacking; pub use factoring::Factoring; +pub use longest_common_subsequence::LongestCommonSubsequence; pub use paintshop::PaintShop; diff --git a/src/models/mod.rs b/src/models/mod.rs index 15df5cfa3..44cfb6466 100644 --- a/src/models/mod.rs +++ b/src/models/mod.rs @@ -15,5 +15,5 @@ pub use graph::{ BicliqueCover, KColoring, MaxCut, MaximalIS, MaximumClique, MaximumIndependentSet, MaximumMatching, MinimumDominatingSet, MinimumVertexCover, SpinGlass, TravelingSalesman, }; -pub use misc::{BinPacking, Factoring, PaintShop}; +pub use misc::{BinPacking, Factoring, LongestCommonSubsequence, PaintShop}; pub use set::{MaximumSetPacking, MinimumSetCovering}; diff --git a/src/unit_tests/models/misc/longest_common_subsequence.rs b/src/unit_tests/models/misc/longest_common_subsequence.rs new file mode 100644 index 000000000..61511e0ff --- /dev/null +++ b/src/unit_tests/models/misc/longest_common_subsequence.rs @@ -0,0 +1,143 @@ +use super::*; +use crate::{ + solvers::{BruteForce, Solver}, + traits::{OptimizationProblem, Problem}, + types::Direction, +}; + +#[test] +fn test_lcs_creation() { + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'C'], + vec![b'A', b'C'], + vec![b'B', b'A', b'C', b'D'], + ]); + assert_eq!(problem.num_strings(), 3); + assert_eq!(problem.total_length(), 9); + // Shortest string is "AC" with length 2 + assert_eq!(problem.dims(), vec![2; 2]); +} + +#[test] +fn test_lcs_direction() { + let problem = LongestCommonSubsequence::new(vec![vec![b'A'], vec![b'A']]); + assert_eq!(problem.direction(), Direction::Maximize); +} + +#[test] +fn test_lcs_evaluate_all_selected() { + // s1 = "AC", s2 = "ABC" → selecting both chars of s1 gives "AC" + // "AC" is subsequence of "ABC"? A..C — yes + let problem = LongestCommonSubsequence::new(vec![vec![b'A', b'C'], vec![b'A', b'B', b'C']]); + let result = problem.evaluate(&[1, 1]); + assert!(result.is_valid()); + assert_eq!(result.unwrap(), 2); +} + +#[test] +fn test_lcs_evaluate_partial_selection() { + // s1 = "ABC", s2 = "AXC" → select A and C (indices 0, 2) + let problem = + LongestCommonSubsequence::new(vec![vec![b'A', b'B', b'C'], vec![b'A', b'X', b'C']]); + // Config [1, 0, 1] selects "AC" — subsequence of both + let result = problem.evaluate(&[1, 0, 1]); + assert!(result.is_valid()); + assert_eq!(result.unwrap(), 2); +} + +#[test] +fn test_lcs_evaluate_invalid_not_subsequence() { + // s1 = "BA", s2 = "AB" → select both chars of s1 gives "BA" + // "BA" is NOT a subsequence of "AB" (B comes after A in "AB") + let problem = LongestCommonSubsequence::new(vec![vec![b'B', b'A'], vec![b'A', b'B']]); + let result = problem.evaluate(&[1, 1]); + assert!(!result.is_valid()); +} + +#[test] +fn test_lcs_evaluate_empty_selection() { + let problem = LongestCommonSubsequence::new(vec![vec![b'A', b'B'], vec![b'C', b'D']]); + // Select nothing — empty string is always a valid subsequence + let result = problem.evaluate(&[0, 0]); + assert!(result.is_valid()); + assert_eq!(result.unwrap(), 0); +} + +#[test] +fn test_lcs_evaluate_wrong_config_length() { + let problem = LongestCommonSubsequence::new(vec![vec![b'A', b'B'], vec![b'A', b'C']]); + assert!(!problem.evaluate(&[1]).is_valid()); + assert!(!problem.evaluate(&[1, 0, 1]).is_valid()); +} + +#[test] +fn test_lcs_problem_name() { + assert_eq!(LongestCommonSubsequence::NAME, "LongestCommonSubsequence"); +} + +#[test] +fn test_lcs_variant() { + let v = ::variant(); + assert!(v.is_empty()); +} + +#[test] +fn test_lcs_brute_force_issue_example() { + // Example from issue #108: + // s1 = "ABCDAB", s2 = "BDCABA", s3 = "BCADBA" + // Optimal LCS = "BCAB", length 4 + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'C', b'D', b'A', b'B'], + vec![b'B', b'D', b'C', b'A', b'B', b'A'], + vec![b'B', b'C', b'A', b'D', b'B', b'A'], + ]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find a solution"); + let metric = problem.evaluate(&solution); + assert!(metric.is_valid()); + assert_eq!(metric.unwrap(), 4); +} + +#[test] +fn test_lcs_brute_force_two_strings() { + // s1 = "ABCBDAB", s2 = "BDCAB" → LCS = "BCAB", length 4 + let problem = LongestCommonSubsequence::new(vec![ + vec![b'B', b'D', b'C', b'A', b'B'], + vec![b'A', b'B', b'C', b'B', b'D', b'A', b'B'], + ]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find a solution"); + let metric = problem.evaluate(&solution); + assert!(metric.is_valid()); + assert_eq!(metric.unwrap(), 4); +} + +#[test] +fn test_lcs_single_string() { + // Single string — LCS is the string itself + let problem = LongestCommonSubsequence::new(vec![vec![b'A', b'B', b'C']]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find a solution"); + let metric = problem.evaluate(&solution); + assert!(metric.is_valid()); + assert_eq!(metric.unwrap(), 3); +} + +#[test] +fn test_lcs_no_common() { + // s1 = "AB", s2 = "CD" → LCS = "", length 0 + let problem = LongestCommonSubsequence::new(vec![vec![b'A', b'B'], vec![b'C', b'D']]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find a solution"); + let metric = problem.evaluate(&solution); + assert!(metric.is_valid()); + assert_eq!(metric.unwrap(), 0); +} + +#[test] +fn test_lcs_serialization() { + let problem = LongestCommonSubsequence::new(vec![vec![b'A', b'B', b'C'], vec![b'A', b'C']]); + let json = serde_json::to_value(&problem).unwrap(); + let restored: LongestCommonSubsequence = serde_json::from_value(json).unwrap(); + assert_eq!(restored.strings(), problem.strings()); +} From c9cc8f1de97c2457ee9c1162c52d368cb50efe20 Mon Sep 17 00:00:00 2001 From: zazabap Date: Wed, 4 Mar 2026 18:55:08 +0000 Subject: [PATCH 3/4] docs: add LongestCommonSubsequence to paper and update JSON exports - Add problem-def entry with formal definition, background, and example - Add display-name entry - Add bibliography entries for Maier 1978 and Wagner & Fischer 1974 - Update reduction_graph.json and problem_schemas.json Co-Authored-By: Claude Opus 4.6 --- docs/paper/reductions.typ | 41 ++++++ docs/paper/references.bib | 20 +++ docs/src/reductions/problem_schemas.json | 11 ++ docs/src/reductions/reduction_graph.json | 171 ++++++++++++----------- 4 files changed, 161 insertions(+), 82 deletions(-) diff --git a/docs/paper/reductions.typ b/docs/paper/reductions.typ index d119290fd..44c67d1ac 100644 --- a/docs/paper/reductions.typ +++ b/docs/paper/reductions.typ @@ -51,6 +51,7 @@ "BicliqueCover": [Biclique Cover], "BinPacking": [Bin Packing], "ClosestVectorProblem": [Closest Vector Problem], + "LongestCommonSubsequence": [Longest Common Subsequence], ) // Definition label: "def:" — each definition block must have a matching label @@ -886,6 +887,46 @@ Biclique Cover is equivalent to factoring the biadjacency matrix $M$ of the bipa ) ] +#problem-def("LongestCommonSubsequence")[ + Given a finite alphabet $Sigma$ and $k$ strings $s_1, dots, s_k$ over $Sigma$, find a longest string $w$ that is a subsequence of every $s_i$. A string $w$ is a _subsequence_ of $s$ if $w$ can be obtained by deleting zero or more characters from $s$ without changing the order of the remaining characters. +][ + The Longest Common Subsequence (LCS) problem is one of the fundamental string problems in computer science, listed as SR10 in @garey1979. For $k = 2$ strings, it is solvable in $O(m n)$ time via dynamic programming @wagner1974. However, Maier @maier1978 proved that the problem is NP-hard when $k$ is part of the input, even over a binary alphabet. LCS is central to diff and version control (e.g., `git diff`), bioinformatics (DNA/protein alignment), and data compression. The best known exact algorithm for the general $k$-string case runs in $O^*(2^n)$ by brute-force enumeration over subsequences of the shortest string#footnote[No algorithm improving on brute-force enumeration is known for the general $k$-string LCS.], where $n$ is the total length of all strings. + + *Example.* Consider $k = 3$ strings over $Sigma = {A, B, C, D}$: $s_1 = mono("ABCDAB")$, $s_2 = mono("BDCABA")$, $s_3 = mono("BCADBA")$. An optimal common subsequence is $w = mono("BCAB")$ with length 4. We verify that $w$ is a subsequence of each string by identifying matching positions: + + #figure({ + canvas(length: 1cm, { + let strings = ( + ("A", "B", "C", "D", "A", "B"), + ("B", "D", "C", "A", "B", "A"), + ("B", "C", "A", "D", "B", "A"), + ) + let labels = ($s_1$, $s_2$, $s_3$) + // Positions matched in each string for BCAB + let matched = ((1, 2, 4, 5), (0, 2, 3, 4), (0, 1, 2, 4)) + let dx = 0.7 + let dy = -1.2 + for si in range(3) { + let y = si * dy + draw.content((-0.6, y), labels.at(si)) + for ci in range(strings.at(si).len()) { + let x = ci * dx + let is-matched = ci in matched.at(si) + let fill-color = if is-matched { graph-colors.at(0) } else { luma(230) } + let text-color = if is-matched { white } else { black } + draw.rect((x - 0.25, y - 0.25), (x + 0.25, y + 0.25), + fill: fill-color, stroke: 0.4pt + luma(120), radius: 2pt) + draw.content((x, y), text(9pt, fill: text-color, font: "DejaVu Sans Mono")[#strings.at(si).at(ci)]) + } + } + draw.content((strings.at(0).len() * dx / 2 - 0.15, 3 * dy + 0.1), + text(8pt)[$w = mono("BCAB"), |w| = 4$]) + }) + }, + caption: [Longest Common Subsequence of three strings. Blue cells mark the positions forming the common subsequence $w = mono("BCAB")$ of length 4.], + ) +] + // Completeness check: warn about problem types in JSON but missing from paper #{ let json-models = { diff --git a/docs/paper/references.bib b/docs/paper/references.bib index e75278765..689dd3b7c 100644 --- a/docs/paper/references.bib +++ b/docs/paper/references.bib @@ -1,3 +1,23 @@ +@article{maier1978, + author = {David Maier}, + title = {The Complexity of Some Problems on Subsequences and Supersequences}, + journal = {Journal of the ACM}, + volume = {25}, + number = {2}, + pages = {322--336}, + year = {1978} +} + +@article{wagner1974, + author = {Robert A. Wagner and Michael J. Fischer}, + title = {The String-to-String Correction Problem}, + journal = {Journal of the ACM}, + volume = {21}, + number = {1}, + pages = {168--173}, + year = {1974} +} + @inproceedings{karp1972, author = {Richard M. Karp}, title = {Reducibility among Combinatorial Problems}, diff --git a/docs/src/reductions/problem_schemas.json b/docs/src/reductions/problem_schemas.json index 8cc8d2ae9..50e25596d 100644 --- a/docs/src/reductions/problem_schemas.json +++ b/docs/src/reductions/problem_schemas.json @@ -183,6 +183,17 @@ } ] }, + { + "name": "LongestCommonSubsequence", + "description": "Find the longest string that is a subsequence of every input string", + "fields": [ + { + "name": "strings", + "type_name": "Vec>", + "description": "The input strings" + } + ] + }, { "name": "MaxCut", "description": "Find maximum weight cut in a graph", diff --git a/docs/src/reductions/reduction_graph.json b/docs/src/reductions/reduction_graph.json index 0b0c68506..e3076c868 100644 --- a/docs/src/reductions/reduction_graph.json +++ b/docs/src/reductions/reduction_graph.json @@ -148,6 +148,13 @@ "doc_path": "models/formula/struct.KSatisfiability.html", "complexity": "2^num_variables" }, + { + "name": "LongestCommonSubsequence", + "variant": {}, + "category": "misc", + "doc_path": "models/misc/struct.LongestCommonSubsequence.html", + "complexity": "2^total_length" + }, { "name": "MaxCut", "variant": { @@ -386,7 +393,7 @@ }, { "source": 4, - "target": 38, + "target": 39, "overhead": [ { "field": "num_spins", @@ -431,7 +438,7 @@ }, { "source": 8, - "target": 35, + "target": 36, "overhead": [ { "field": "num_vars", @@ -472,7 +479,7 @@ }, { "source": 13, - "target": 35, + "target": 36, "overhead": [ { "field": "num_vars", @@ -498,7 +505,7 @@ }, { "source": 14, - "target": 35, + "target": 36, "overhead": [ { "field": "num_vars", @@ -509,7 +516,7 @@ }, { "source": 14, - "target": 36, + "target": 37, "overhead": [ { "field": "num_clauses", @@ -543,7 +550,7 @@ }, { "source": 15, - "target": 35, + "target": 36, "overhead": [ { "field": "num_vars", @@ -554,7 +561,7 @@ }, { "source": 15, - "target": 36, + "target": 37, "overhead": [ { "field": "num_clauses", @@ -573,7 +580,7 @@ }, { "source": 16, - "target": 36, + "target": 37, "overhead": [ { "field": "num_clauses", @@ -591,8 +598,8 @@ "doc_path": "rules/sat_ksat/index.html" }, { - "source": 17, - "target": 38, + "source": 18, + "target": 39, "overhead": [ { "field": "num_spins", @@ -606,7 +613,7 @@ "doc_path": "rules/spinglass_maxcut/index.html" }, { - "source": 19, + "source": 20, "target": 8, "overhead": [ { @@ -621,8 +628,8 @@ "doc_path": "rules/maximumclique_ilp/index.html" }, { - "source": 20, - "target": 21, + "source": 21, + "target": 22, "overhead": [ { "field": "num_vertices", @@ -636,8 +643,8 @@ "doc_path": "rules/maximumindependentset_casts/index.html" }, { - "source": 20, - "target": 25, + "source": 21, + "target": 26, "overhead": [ { "field": "num_vertices", @@ -651,8 +658,8 @@ "doc_path": "rules/maximumindependentset_casts/index.html" }, { - "source": 21, - "target": 26, + "source": 22, + "target": 27, "overhead": [ { "field": "num_vertices", @@ -666,8 +673,8 @@ "doc_path": "rules/maximumindependentset_casts/index.html" }, { - "source": 22, - "target": 20, + "source": 23, + "target": 21, "overhead": [ { "field": "num_vertices", @@ -681,8 +688,8 @@ "doc_path": "rules/maximumindependentset_gridgraph/index.html" }, { - "source": 22, - "target": 21, + "source": 23, + "target": 22, "overhead": [ { "field": "num_vertices", @@ -696,8 +703,8 @@ "doc_path": "rules/maximumindependentset_gridgraph/index.html" }, { - "source": 22, - "target": 23, + "source": 23, + "target": 24, "overhead": [ { "field": "num_vertices", @@ -711,8 +718,8 @@ "doc_path": "rules/maximumindependentset_casts/index.html" }, { - "source": 22, - "target": 24, + "source": 23, + "target": 25, "overhead": [ { "field": "num_vertices", @@ -726,8 +733,8 @@ "doc_path": "rules/maximumindependentset_triangular/index.html" }, { - "source": 22, - "target": 28, + "source": 23, + "target": 29, "overhead": [ { "field": "num_sets", @@ -741,7 +748,7 @@ "doc_path": "rules/maximumindependentset_maximumsetpacking/index.html" }, { - "source": 23, + "source": 24, "target": 8, "overhead": [ { @@ -756,8 +763,8 @@ "doc_path": "rules/maximumindependentset_ilp/index.html" }, { - "source": 23, - "target": 30, + "source": 24, + "target": 31, "overhead": [ { "field": "num_sets", @@ -771,8 +778,8 @@ "doc_path": "rules/maximumindependentset_maximumsetpacking/index.html" }, { - "source": 23, - "target": 33, + "source": 24, + "target": 34, "overhead": [ { "field": "num_vertices", @@ -786,8 +793,8 @@ "doc_path": "rules/minimumvertexcover_maximumindependentset/index.html" }, { - "source": 23, - "target": 35, + "source": 24, + "target": 36, "overhead": [ { "field": "num_vars", @@ -797,8 +804,8 @@ "doc_path": "rules/maximumindependentset_qubo/index.html" }, { - "source": 24, - "target": 26, + "source": 25, + "target": 27, "overhead": [ { "field": "num_vertices", @@ -812,8 +819,8 @@ "doc_path": "rules/maximumindependentset_casts/index.html" }, { - "source": 25, - "target": 22, + "source": 26, + "target": 23, "overhead": [ { "field": "num_vertices", @@ -827,8 +834,8 @@ "doc_path": "rules/maximumindependentset_casts/index.html" }, { - "source": 25, - "target": 26, + "source": 26, + "target": 27, "overhead": [ { "field": "num_vertices", @@ -842,8 +849,8 @@ "doc_path": "rules/maximumindependentset_casts/index.html" }, { - "source": 26, - "target": 23, + "source": 27, + "target": 24, "overhead": [ { "field": "num_vertices", @@ -857,7 +864,7 @@ "doc_path": "rules/maximumindependentset_casts/index.html" }, { - "source": 27, + "source": 28, "target": 8, "overhead": [ { @@ -872,8 +879,8 @@ "doc_path": "rules/maximummatching_ilp/index.html" }, { - "source": 27, - "target": 30, + "source": 28, + "target": 31, "overhead": [ { "field": "num_sets", @@ -887,8 +894,8 @@ "doc_path": "rules/maximummatching_maximumsetpacking/index.html" }, { - "source": 28, - "target": 22, + "source": 29, + "target": 23, "overhead": [ { "field": "num_vertices", @@ -902,8 +909,8 @@ "doc_path": "rules/maximumindependentset_maximumsetpacking/index.html" }, { - "source": 28, - "target": 30, + "source": 29, + "target": 31, "overhead": [ { "field": "num_sets", @@ -917,8 +924,8 @@ "doc_path": "rules/maximumsetpacking_casts/index.html" }, { - "source": 29, - "target": 35, + "source": 30, + "target": 36, "overhead": [ { "field": "num_vars", @@ -928,7 +935,7 @@ "doc_path": "rules/maximumsetpacking_qubo/index.html" }, { - "source": 30, + "source": 31, "target": 8, "overhead": [ { @@ -943,8 +950,8 @@ "doc_path": "rules/maximumsetpacking_ilp/index.html" }, { - "source": 30, - "target": 23, + "source": 31, + "target": 24, "overhead": [ { "field": "num_vertices", @@ -958,8 +965,8 @@ "doc_path": "rules/maximumindependentset_maximumsetpacking/index.html" }, { - "source": 30, - "target": 29, + "source": 31, + "target": 30, "overhead": [ { "field": "num_sets", @@ -973,7 +980,7 @@ "doc_path": "rules/maximumsetpacking_casts/index.html" }, { - "source": 31, + "source": 32, "target": 8, "overhead": [ { @@ -988,7 +995,7 @@ "doc_path": "rules/minimumdominatingset_ilp/index.html" }, { - "source": 32, + "source": 33, "target": 8, "overhead": [ { @@ -1003,7 +1010,7 @@ "doc_path": "rules/minimumsetcovering_ilp/index.html" }, { - "source": 33, + "source": 34, "target": 8, "overhead": [ { @@ -1018,8 +1025,8 @@ "doc_path": "rules/minimumvertexcover_ilp/index.html" }, { - "source": 33, - "target": 23, + "source": 34, + "target": 24, "overhead": [ { "field": "num_vertices", @@ -1033,8 +1040,8 @@ "doc_path": "rules/minimumvertexcover_maximumindependentset/index.html" }, { - "source": 33, - "target": 32, + "source": 34, + "target": 33, "overhead": [ { "field": "num_sets", @@ -1048,8 +1055,8 @@ "doc_path": "rules/minimumvertexcover_minimumsetcovering/index.html" }, { - "source": 33, - "target": 35, + "source": 34, + "target": 36, "overhead": [ { "field": "num_vars", @@ -1059,7 +1066,7 @@ "doc_path": "rules/minimumvertexcover_qubo/index.html" }, { - "source": 35, + "source": 36, "target": 8, "overhead": [ { @@ -1074,8 +1081,8 @@ "doc_path": "rules/qubo_ilp/index.html" }, { - "source": 35, - "target": 37, + "source": 36, + "target": 38, "overhead": [ { "field": "num_spins", @@ -1085,7 +1092,7 @@ "doc_path": "rules/spinglass_qubo/index.html" }, { - "source": 36, + "source": 37, "target": 4, "overhead": [ { @@ -1100,7 +1107,7 @@ "doc_path": "rules/sat_circuitsat/index.html" }, { - "source": 36, + "source": 37, "target": 10, "overhead": [ { @@ -1115,7 +1122,7 @@ "doc_path": "rules/sat_coloring/index.html" }, { - "source": 36, + "source": 37, "target": 15, "overhead": [ { @@ -1130,8 +1137,8 @@ "doc_path": "rules/sat_ksat/index.html" }, { - "source": 36, - "target": 22, + "source": 37, + "target": 23, "overhead": [ { "field": "num_vertices", @@ -1145,8 +1152,8 @@ "doc_path": "rules/sat_maximumindependentset/index.html" }, { - "source": 36, - "target": 31, + "source": 37, + "target": 32, "overhead": [ { "field": "num_vertices", @@ -1160,8 +1167,8 @@ "doc_path": "rules/sat_minimumdominatingset/index.html" }, { - "source": 37, - "target": 35, + "source": 38, + "target": 36, "overhead": [ { "field": "num_vars", @@ -1171,8 +1178,8 @@ "doc_path": "rules/spinglass_qubo/index.html" }, { - "source": 38, - "target": 17, + "source": 39, + "target": 18, "overhead": [ { "field": "num_vertices", @@ -1186,8 +1193,8 @@ "doc_path": "rules/spinglass_maxcut/index.html" }, { - "source": 38, - "target": 37, + "source": 39, + "target": 38, "overhead": [ { "field": "num_spins", @@ -1201,7 +1208,7 @@ "doc_path": "rules/spinglass_casts/index.html" }, { - "source": 39, + "source": 40, "target": 8, "overhead": [ { From ac4791fe2d9759db97511afbb12e2d1bc392c164 Mon Sep 17 00:00:00 2001 From: zazabap Date: Wed, 4 Mar 2026 19:01:07 +0000 Subject: [PATCH 4/4] fix: tighten LCS complexity to 2^min_string_length, add edge-case tests - Change declare_variants! from 2^total_length to 2^min_string_length (actual brute-force enumerates subsequences of shortest string) - Make min_string_length() public getter (was private shortest_len) - Add #[should_panic] test for empty input - Add edge-case test for empty string in input - Fix paper text to reference shortest string length, not total Co-Authored-By: Claude Opus 4.6 --- docs/paper/reductions.typ | 2 +- src/models/misc/longest_common_subsequence.rs | 8 ++++---- .../models/misc/longest_common_subsequence.rs | 16 ++++++++++++++++ 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/docs/paper/reductions.typ b/docs/paper/reductions.typ index 44c67d1ac..e93435576 100644 --- a/docs/paper/reductions.typ +++ b/docs/paper/reductions.typ @@ -890,7 +890,7 @@ Biclique Cover is equivalent to factoring the biadjacency matrix $M$ of the bipa #problem-def("LongestCommonSubsequence")[ Given a finite alphabet $Sigma$ and $k$ strings $s_1, dots, s_k$ over $Sigma$, find a longest string $w$ that is a subsequence of every $s_i$. A string $w$ is a _subsequence_ of $s$ if $w$ can be obtained by deleting zero or more characters from $s$ without changing the order of the remaining characters. ][ - The Longest Common Subsequence (LCS) problem is one of the fundamental string problems in computer science, listed as SR10 in @garey1979. For $k = 2$ strings, it is solvable in $O(m n)$ time via dynamic programming @wagner1974. However, Maier @maier1978 proved that the problem is NP-hard when $k$ is part of the input, even over a binary alphabet. LCS is central to diff and version control (e.g., `git diff`), bioinformatics (DNA/protein alignment), and data compression. The best known exact algorithm for the general $k$-string case runs in $O^*(2^n)$ by brute-force enumeration over subsequences of the shortest string#footnote[No algorithm improving on brute-force enumeration is known for the general $k$-string LCS.], where $n$ is the total length of all strings. + The Longest Common Subsequence (LCS) problem is one of the fundamental string problems in computer science, listed as SR10 in @garey1979. For $k = 2$ strings, it is solvable in $O(m n)$ time via dynamic programming @wagner1974. However, Maier @maier1978 proved that the problem is NP-hard when $k$ is part of the input, even over a binary alphabet. LCS is central to diff and version control (e.g., `git diff`), bioinformatics (DNA/protein alignment), and data compression. The best known exact algorithm for the general $k$-string case runs in $O^*(2^m)$ by brute-force enumeration over subsequences of the shortest string#footnote[No algorithm improving on brute-force enumeration is known for the general $k$-string LCS.], where $m = min_i |s_i|$ is the length of the shortest string. *Example.* Consider $k = 3$ strings over $Sigma = {A, B, C, D}$: $s_1 = mono("ABCDAB")$, $s_2 = mono("BDCABA")$, $s_3 = mono("BCADBA")$. An optimal common subsequence is $w = mono("BCAB")$ with length 4. We verify that $w$ is a subsequence of each string by identifying matching positions: diff --git a/src/models/misc/longest_common_subsequence.rs b/src/models/misc/longest_common_subsequence.rs index 8a430061d..9176f5860 100644 --- a/src/models/misc/longest_common_subsequence.rs +++ b/src/models/misc/longest_common_subsequence.rs @@ -95,8 +95,8 @@ impl LongestCommonSubsequence { .unwrap_or(0) } - /// Length of the shortest string. - fn shortest_len(&self) -> usize { + /// Length of the shortest string (upper bound on LCS length). + pub fn min_string_length(&self) -> usize { self.strings.iter().map(|s| s.len()).min().unwrap_or(0) } } @@ -110,7 +110,7 @@ impl Problem for LongestCommonSubsequence { } fn dims(&self) -> Vec { - vec![2; self.shortest_len()] + vec![2; self.min_string_length()] } fn evaluate(&self, config: &[usize]) -> SolutionSize { @@ -162,7 +162,7 @@ fn is_subsequence(sub: &[u8], full: &[u8]) -> bool { } crate::declare_variants! { - LongestCommonSubsequence => "2^total_length", + LongestCommonSubsequence => "2^min_string_length", } #[cfg(test)] diff --git a/src/unit_tests/models/misc/longest_common_subsequence.rs b/src/unit_tests/models/misc/longest_common_subsequence.rs index 61511e0ff..e7c199eb8 100644 --- a/src/unit_tests/models/misc/longest_common_subsequence.rs +++ b/src/unit_tests/models/misc/longest_common_subsequence.rs @@ -141,3 +141,19 @@ fn test_lcs_serialization() { let restored: LongestCommonSubsequence = serde_json::from_value(json).unwrap(); assert_eq!(restored.strings(), problem.strings()); } + +#[test] +#[should_panic(expected = "must have at least one string")] +fn test_lcs_empty_strings_panics() { + LongestCommonSubsequence::new(vec![]); +} + +#[test] +fn test_lcs_empty_string_in_input() { + // One empty string means LCS is always empty + let problem = LongestCommonSubsequence::new(vec![vec![], vec![b'A', b'B']]); + assert_eq!(problem.dims(), Vec::::new()); + let result = problem.evaluate(&[]); + assert!(result.is_valid()); + assert_eq!(result.unwrap(), 0); +}