diff --git a/docs/paper/reductions.typ b/docs/paper/reductions.typ index 8298a429c..57fbf4fd6 100644 --- a/docs/paper/reductions.typ +++ b/docs/paper/reductions.typ @@ -59,6 +59,7 @@ "SubsetSum": [Subset Sum], "MinimumFeedbackArcSet": [Minimum Feedback Arc Set], "MinimumFeedbackVertexSet": [Minimum Feedback Vertex Set], + "ShortestCommonSupersequence": [Shortest Common Supersequence], "MinimumSumMulticenter": [Minimum Sum Multicenter], "SubgraphIsomorphism": [Subgraph Isomorphism], "SubsetSum": [Subset Sum], @@ -1038,6 +1039,66 @@ Biclique Cover is equivalent to factoring the biadjacency matrix $M$ of the bipa *Example.* Let $A = {3, 7, 1, 8, 2, 4}$ ($n = 6$) and target $B = 11$. Selecting $A' = {3, 8}$ gives sum $3 + 8 = 11 = B$. Another solution: $A' = {7, 4}$ with sum $7 + 4 = 11 = B$. ] +#problem-def("ShortestCommonSupersequence")[ + Given a finite alphabet $Sigma$, a set $R = {r_1, dots, r_m}$ of strings over $Sigma^*$, and a positive integer $K$, determine whether there exists a string $w in Sigma^*$ with $|w| lt.eq K$ such that every string $r_i in R$ is a _subsequence_ of $w$: there exist indices $1 lt.eq j_1 < j_2 < dots < j_(|r_i|) lt.eq |w|$ with $w[j_k] = r_i [k]$ for all $k$. +][ + A classic NP-complete string problem, listed as problem SR8 in Garey and Johnson @garey1979. #cite(, form: "prose") proved NP-completeness; #cite(, form: "prose") showed the problem remains NP-complete even over a binary alphabet ($|Sigma| = 2$). Note that _subsequence_ (characters may be non-contiguous) differs from _substring_ (contiguous block): the Shortest Common Supersequence asks that each input string can be embedded into $w$ by selecting characters in order but not necessarily adjacently. + + For $|R| = 2$ strings, the problem is solvable in polynomial time via the duality with the Longest Common Subsequence (LCS): if $"LCS"(r_1, r_2)$ has length $ell$, then the shortest common supersequence has length $|r_1| + |r_2| - ell$, computable in $O(|r_1| dot |r_2|)$ time by dynamic programming. For general $|R| = m$, the brute-force search over all strings of length at most $K$ takes $O(|Sigma|^K)$ time. Applications include bioinformatics (reconstructing ancestral sequences from fragments), data compression (representing multiple strings compactly), and scheduling (merging instruction sequences). + + *Example.* Let $Sigma = {a, b, c}$ and $R = {"abc", "bac"}$. We seek the shortest string $w$ containing both $"abc"$ and $"bac"$ as subsequences. + + #figure({ + let w = ("b", "a", "b", "c") + let r1 = ("a", "b", "c") // "abc" + let r2 = ("b", "a", "c") // "bac" + let embed1 = (1, 2, 3) // positions of a, b, c in w (0-indexed) + let embed2 = (0, 1, 3) // positions of b, a, c in w (0-indexed) + let blue = graph-colors.at(0) + let teal = rgb("#76b7b2") + let red = graph-colors.at(1) + align(center, stack(dir: ttb, spacing: 0.6cm, + // Row 1: the supersequence w + stack(dir: ltr, spacing: 0pt, + box(width: 1.2cm, height: 0.5cm, align(center + horizon, text(8pt)[$w =$])), + ..w.enumerate().map(((i, ch)) => { + let is1 = embed1.contains(i) + let is2 = embed2.contains(i) + let fill = if is1 and is2 { blue.transparentize(60%) } else if is1 { blue.transparentize(80%) } else if is2 { teal.transparentize(80%) } else { white } + box(width: 0.55cm, height: 0.55cm, fill: fill, stroke: 0.5pt + luma(120), + align(center + horizon, text(9pt, weight: "bold", ch))) + }), + ), + // Row 2: embedding of r1 + stack(dir: ltr, spacing: 0pt, + box(width: 1.2cm, height: 0.5cm, align(center + horizon, text(8pt, fill: blue)[$r_1 =$])), + ..range(w.len()).map(i => { + let idx = embed1.position(j => j == i) + let ch = if idx != none { r1.at(idx) } else { sym.dot.c } + let col = if idx != none { blue } else { luma(200) } + box(width: 0.55cm, height: 0.55cm, + align(center + horizon, text(9pt, fill: col, weight: if idx != none { "bold" } else { "regular" }, ch))) + }), + ), + // Row 3: embedding of r2 + stack(dir: ltr, spacing: 0pt, + box(width: 1.2cm, height: 0.5cm, align(center + horizon, text(8pt, fill: teal)[$r_2 =$])), + ..range(w.len()).map(i => { + let idx = embed2.position(j => j == i) + let ch = if idx != none { r2.at(idx) } else { sym.dot.c } + let col = if idx != none { teal } else { luma(200) } + box(width: 0.55cm, height: 0.55cm, + align(center + horizon, text(9pt, fill: col, weight: if idx != none { "bold" } else { "regular" }, ch))) + }), + ), + )) + }, + caption: [Shortest Common Supersequence: $w = "babc"$ (length 4) contains $r_1 = "abc"$ (blue, positions 1,2,3) and $r_2 = "bac"$ (teal, positions 0,1,3) as subsequences. Dots mark unused positions in each embedding.], + ) + + The supersequence $w = "babc"$ has length 4 and contains both input strings as subsequences. This is optimal because $"LCS"("abc", "bac") = "ac"$ (length 2), so the shortest common supersequence has length $3 + 3 - 2 = 4$. +] + #problem-def("MinimumFeedbackArcSet")[ Given a directed graph $G = (V, A)$, find a minimum-size subset $A' subset.eq A$ such that $G - A'$ is a directed acyclic graph (DAG). Equivalently, $A'$ must contain at least one arc from every directed cycle in $G$. ][ diff --git a/docs/paper/references.bib b/docs/paper/references.bib index d73afb4e3..ec2b616b7 100644 --- a/docs/paper/references.bib +++ b/docs/paper/references.bib @@ -489,6 +489,17 @@ @article{cygan2014 doi = {10.1137/140990255} } +@article{raiha1981, + author = {Kari-Jouko R{\"a}ih{\"a} and Esko Ukkonen}, + title = {The Shortest Common Supersequence Problem over Binary Alphabet is {NP}-Complete}, + journal = {Theoretical Computer Science}, + volume = {16}, + number = {2}, + pages = {187--198}, + year = {1981}, + doi = {10.1016/0304-3975(81)90075-X} +} + @article{bodlaender2012, author = {Hans L. Bodlaender and Fedor V. Fomin and Arie M. C. A. Koster and Dieter Kratsch and Dimitrios M. Thilikos}, title = {A Note on Exact Algorithms for Vertex Ordering Problems on Graphs}, diff --git a/docs/src/reductions/problem_schemas.json b/docs/src/reductions/problem_schemas.json index 03bee10c8..ee9b0ee8f 100644 --- a/docs/src/reductions/problem_schemas.json +++ b/docs/src/reductions/problem_schemas.json @@ -488,6 +488,27 @@ } ] }, + { + "name": "ShortestCommonSupersequence", + "description": "Find a common supersequence of bounded length for a set of strings", + "fields": [ + { + "name": "alphabet_size", + "type_name": "usize", + "description": "Size of the alphabet" + }, + { + "name": "strings", + "type_name": "Vec>", + "description": "Input strings over the alphabet {0, ..., alphabet_size-1}" + }, + { + "name": "bound", + "type_name": "usize", + "description": "Bound on supersequence length (configuration has exactly this many symbols)" + } + ] + }, { "name": "SpinGlass", "description": "Minimize Ising Hamiltonian on a graph", diff --git a/problemreductions-cli/src/cli.rs b/problemreductions-cli/src/cli.rs index ae65be85e..5ab08a430 100644 --- a/problemreductions-cli/src/cli.rs +++ b/problemreductions-cli/src/cli.rs @@ -224,6 +224,7 @@ Flags by problem type: LCS --strings FAS --arcs [--weights] [--num-vertices] FVS --arcs [--weights] [--num-vertices] + SCS --strings, --bound [--alphabet-size] ILP, CircuitSAT (via reduction only) Geometry graph variants (use slash notation, e.g., MIS/KingsSubgraph): @@ -338,18 +339,21 @@ pub struct CreateArgs { /// Required edge indices for RuralPostman (comma-separated, e.g., "0,2,4") #[arg(long)] pub required_edges: Option, - /// Upper bound B for RuralPostman + /// Upper bound (for RuralPostman or SCS) #[arg(long)] - pub bound: Option, + pub bound: Option, /// Pattern graph edge list for SubgraphIsomorphism (e.g., 0-1,1-2,2-0) #[arg(long)] pub pattern: Option, - /// Input strings for LCS (semicolon-separated, e.g., "ABAC;BACA") + /// Input strings for LCS (e.g., "ABAC;BACA") or SCS (e.g., "0,1,2;1,2,0") #[arg(long)] pub strings: Option, /// Directed arcs for directed graph problems (e.g., 0>1,1>2,2>0) #[arg(long)] pub arcs: Option, + /// Alphabet size for SCS (optional; inferred from max symbol + 1 if omitted) + #[arg(long)] + pub alphabet_size: Option, } #[derive(clap::Args)] diff --git a/problemreductions-cli/src/commands/create.rs b/problemreductions-cli/src/commands/create.rs index beb1113d0..a38ff2902 100644 --- a/problemreductions-cli/src/commands/create.rs +++ b/problemreductions-cli/src/commands/create.rs @@ -6,7 +6,9 @@ use crate::util; use anyhow::{bail, Context, Result}; use problemreductions::models::algebraic::{ClosestVectorProblem, BMF}; use problemreductions::models::graph::{GraphPartitioning, HamiltonianPath}; -use problemreductions::models::misc::{BinPacking, LongestCommonSubsequence, PaintShop, SubsetSum}; +use problemreductions::models::misc::{ + BinPacking, LongestCommonSubsequence, PaintShop, ShortestCommonSupersequence, SubsetSum, +}; use problemreductions::prelude::*; use problemreductions::registry::collect_schemas; use problemreductions::topology::{ @@ -52,6 +54,7 @@ fn all_data_flags_empty(args: &CreateArgs) -> bool { && args.pattern.is_none() && args.strings.is_none() && args.arcs.is_none() + && args.alphabet_size.is_none() } fn type_format_hint(type_name: &str, graph_type: Option<&str>) -> &'static str { @@ -103,6 +106,7 @@ fn example_for(canonical: &str, graph_type: Option<&str>) -> &'static str { } "SubgraphIsomorphism" => "--graph 0-1,1-2,2-0 --pattern 0-1", "SubsetSum" => "--sizes 3,7,1,8,2,4 --target 11", + "ShortestCommonSupersequence" => "--strings \"0,1,2;1,2,0\" --bound 4", _ => "", } } @@ -280,7 +284,7 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> { "RuralPostman requires --bound\n\n\ Usage: pred create RuralPostman --graph 0-1,1-2,2-3 --edge-weights 1,1,1 --required-edges 0,2 --bound 6" ) - })?; + })? as i32; ( ser(RuralPostman::new( graph, @@ -667,6 +671,57 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> { ) } + // ShortestCommonSupersequence + "ShortestCommonSupersequence" => { + let usage = "Usage: pred create SCS --strings \"0,1,2;1,2,0\" --bound 4"; + let strings_str = args.strings.as_deref().ok_or_else(|| { + anyhow::anyhow!("ShortestCommonSupersequence requires --strings\n\n{usage}") + })?; + let bound = args.bound.ok_or_else(|| { + anyhow::anyhow!("ShortestCommonSupersequence requires --bound\n\n{usage}") + })? as usize; + let strings: Vec> = strings_str + .split(';') + .map(|s| { + let trimmed = s.trim(); + if trimmed.is_empty() { + return Ok(Vec::new()); + } + trimmed + .split(',') + .map(|v| { + v.trim() + .parse::() + .map_err(|e| anyhow::anyhow!("Invalid alphabet index: {}", e)) + }) + .collect::>>() + }) + .collect::>>()?; + let inferred = strings + .iter() + .flat_map(|s| s.iter()) + .copied() + .max() + .map(|m| m + 1) + .unwrap_or(0); + let alphabet_size = args.alphabet_size.unwrap_or(inferred); + if alphabet_size < inferred { + anyhow::bail!( + "--alphabet-size {} is smaller than the largest symbol + 1 ({}) in the strings", + alphabet_size, + inferred + ); + } + ( + ser(ShortestCommonSupersequence::new( + alphabet_size, + strings, + bound, + ))?, + resolved_variant.clone(), + ) + } + // MinimumFeedbackVertexSet "MinimumFeedbackVertexSet" => { let arcs_str = args.arcs.as_deref().ok_or_else(|| { diff --git a/problemreductions-cli/src/dispatch.rs b/problemreductions-cli/src/dispatch.rs index 480f05fc7..128d2734f 100644 --- a/problemreductions-cli/src/dispatch.rs +++ b/problemreductions-cli/src/dispatch.rs @@ -1,6 +1,8 @@ use anyhow::{bail, Context, Result}; use problemreductions::models::algebraic::{ClosestVectorProblem, ILP}; -use problemreductions::models::misc::{BinPacking, Knapsack, LongestCommonSubsequence, SubsetSum}; +use problemreductions::models::misc::{ + BinPacking, Knapsack, LongestCommonSubsequence, ShortestCommonSupersequence, SubsetSum, +}; use problemreductions::prelude::*; use problemreductions::rules::{MinimizeSteps, ReductionGraph}; use problemreductions::solvers::{BruteForce, ILPSolver, Solver}; @@ -254,6 +256,7 @@ pub fn load_problem( "LongestCommonSubsequence" => deser_opt::(data), "MinimumFeedbackVertexSet" => deser_opt::>(data), "SubsetSum" => deser_sat::(data), + "ShortestCommonSupersequence" => deser_sat::(data), "MinimumFeedbackArcSet" => deser_opt::>(data), _ => bail!("{}", crate::problem_name::unknown_problem_error(&canonical)), } @@ -324,6 +327,7 @@ pub fn serialize_any_problem( "LongestCommonSubsequence" => try_ser::(any), "MinimumFeedbackVertexSet" => try_ser::>(any), "SubsetSum" => try_ser::(any), + "ShortestCommonSupersequence" => try_ser::(any), "MinimumFeedbackArcSet" => try_ser::>(any), _ => bail!("{}", crate::problem_name::unknown_problem_error(&canonical)), } diff --git a/problemreductions-cli/src/problem_name.rs b/problemreductions-cli/src/problem_name.rs index b01fe0060..ffd9094a5 100644 --- a/problemreductions-cli/src/problem_name.rs +++ b/problemreductions-cli/src/problem_name.rs @@ -24,6 +24,7 @@ pub const ALIASES: &[(&str, &str)] = &[ ("LCS", "LongestCommonSubsequence"), ("MaxMatching", "MaximumMatching"), ("FVS", "MinimumFeedbackVertexSet"), + ("SCS", "ShortestCommonSupersequence"), ("FAS", "MinimumFeedbackArcSet"), ("pmedian", "MinimumSumMulticenter"), ]; @@ -66,6 +67,7 @@ pub fn resolve_alias(input: &str) -> String { "fas" | "minimumfeedbackarcset" => "MinimumFeedbackArcSet".to_string(), "minimumsummulticenter" | "pmedian" => "MinimumSumMulticenter".to_string(), "subsetsum" => "SubsetSum".to_string(), + "scs" | "shortestcommonsupersequence" => "ShortestCommonSupersequence".to_string(), "hamiltonianpath" => "HamiltonianPath".to_string(), _ => input.to_string(), // pass-through for exact names } diff --git a/src/lib.rs b/src/lib.rs index 6d9cd9a1d..b66a5bdfe 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -46,11 +46,12 @@ pub mod prelude { pub use crate::models::graph::{ KColoring, MaxCut, MaximalIS, MaximumClique, MaximumIndependentSet, MaximumMatching, MinimumDominatingSet, MinimumFeedbackArcSet, MinimumFeedbackVertexSet, - MinimumSumMulticenter, MinimumVertexCover, - PartitionIntoTriangles, RuralPostman, TravelingSalesman, + MinimumSumMulticenter, MinimumVertexCover, PartitionIntoTriangles, + RuralPostman, TravelingSalesman, }; pub use crate::models::misc::{ - BinPacking, Factoring, Knapsack, LongestCommonSubsequence, PaintShop, SubsetSum, + BinPacking, Factoring, Knapsack, LongestCommonSubsequence, PaintShop, + ShortestCommonSupersequence, SubsetSum, }; pub use crate::models::set::{MaximumSetPacking, MinimumSetCovering}; diff --git a/src/models/misc/mod.rs b/src/models/misc/mod.rs index 943b758a2..0df20e720 100644 --- a/src/models/misc/mod.rs +++ b/src/models/misc/mod.rs @@ -6,6 +6,7 @@ //! - [`Knapsack`]: 0-1 Knapsack (maximize value subject to weight capacity) //! - [`LongestCommonSubsequence`]: Longest Common Subsequence //! - [`PaintShop`]: Minimize color switches in paint shop scheduling +//! - [`ShortestCommonSupersequence`]: Find a common supersequence of bounded length //! - [`SubsetSum`]: Find a subset summing to exactly a target value mod bin_packing; @@ -13,6 +14,7 @@ pub(crate) mod factoring; mod knapsack; mod longest_common_subsequence; pub(crate) mod paintshop; +pub(crate) mod shortest_common_supersequence; mod subset_sum; pub use bin_packing::BinPacking; @@ -20,4 +22,5 @@ pub use factoring::Factoring; pub use knapsack::Knapsack; pub use longest_common_subsequence::LongestCommonSubsequence; pub use paintshop::PaintShop; +pub use shortest_common_supersequence::ShortestCommonSupersequence; pub use subset_sum::SubsetSum; diff --git a/src/models/misc/shortest_common_supersequence.rs b/src/models/misc/shortest_common_supersequence.rs new file mode 100644 index 000000000..a6da920f0 --- /dev/null +++ b/src/models/misc/shortest_common_supersequence.rs @@ -0,0 +1,154 @@ +//! Shortest Common Supersequence problem implementation. +//! +//! Given a set of strings over an alphabet and a bound `B`, the problem asks +//! whether there exists a common supersequence of length at most `B`. A string +//! `w` is a supersequence of `s` if `s` is a subsequence of `w` (i.e., `s` can +//! be obtained by deleting zero or more characters from `w`). +//! +//! The configuration uses a fixed-length representation of exactly `B` symbols. +//! Since any supersequence shorter than `B` can be padded with an arbitrary +//! symbol to reach length `B` (when `alphabet_size > 0`), this is equivalent +//! to the standard `|w| ≤ B` formulation. This problem is NP-hard (Maier, 1978). + +use crate::registry::{FieldInfo, ProblemSchemaEntry}; +use crate::traits::{Problem, SatisfactionProblem}; +use serde::{Deserialize, Serialize}; + +inventory::submit! { + ProblemSchemaEntry { + name: "ShortestCommonSupersequence", + module_path: module_path!(), + description: "Find a common supersequence of bounded length for a set of strings", + fields: &[ + FieldInfo { name: "alphabet_size", type_name: "usize", description: "Size of the alphabet" }, + FieldInfo { name: "strings", type_name: "Vec>", description: "Input strings over the alphabet {0, ..., alphabet_size-1}" }, + FieldInfo { name: "bound", type_name: "usize", description: "Bound on supersequence length (configuration has exactly this many symbols)" }, + ], + } +} + +/// The Shortest Common Supersequence problem. +/// +/// Given an alphabet of size `k`, a set of strings over `{0, ..., k-1}`, and a +/// bound `B`, determine whether there exists a string `w` of length at most `B` +/// such that every input string is a subsequence of `w`. The configuration uses +/// exactly `B` symbols (equivalent via padding when `alphabet_size > 0`). +/// +/// # Representation +/// +/// The configuration is a vector of length `bound`, where each entry is a symbol +/// in `{0, ..., alphabet_size-1}`. The problem is satisfiable iff every input +/// string is a subsequence of the configuration. +/// +/// # Example +/// +/// ``` +/// use problemreductions::models::misc::ShortestCommonSupersequence; +/// use problemreductions::{Problem, Solver, BruteForce}; +/// +/// // Alphabet {0, 1}, strings [0,1] and [1,0], bound 3 +/// let problem = ShortestCommonSupersequence::new(2, vec![vec![0, 1], vec![1, 0]], 3); +/// let solver = BruteForce::new(); +/// let solution = solver.find_satisfying(&problem); +/// assert!(solution.is_some()); +/// ``` +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ShortestCommonSupersequence { + alphabet_size: usize, + strings: Vec>, + bound: usize, +} + +impl ShortestCommonSupersequence { + /// Create a new ShortestCommonSupersequence instance. + /// + /// # Panics + /// + /// Panics if `alphabet_size` is 0 and any input string is non-empty, or if + /// `bound > 0` and `alphabet_size == 0`. + pub fn new(alphabet_size: usize, strings: Vec>, bound: usize) -> Self { + assert!( + alphabet_size > 0 || (bound == 0 && strings.iter().all(|s| s.is_empty())), + "alphabet_size must be > 0 when bound > 0 or any input string is non-empty" + ); + Self { + alphabet_size, + strings, + bound, + } + } + + /// Returns the alphabet size. + pub fn alphabet_size(&self) -> usize { + self.alphabet_size + } + + /// Returns the input strings. + pub fn strings(&self) -> &[Vec] { + &self.strings + } + + /// Returns the bound on supersequence length. + pub fn bound(&self) -> usize { + self.bound + } + + /// Returns the number of input strings. + pub fn num_strings(&self) -> usize { + self.strings.len() + } + + /// Returns the total length of all input strings. + pub fn total_length(&self) -> usize { + self.strings.iter().map(|s| s.len()).sum() + } +} + +/// Check whether `needle` is a subsequence of `haystack` using greedy +/// left-to-right matching. +fn is_subsequence(needle: &[usize], haystack: &[usize]) -> bool { + let mut it = haystack.iter(); + for &ch in needle { + loop { + match it.next() { + Some(&c) if c == ch => break, + Some(_) => continue, + None => return false, + } + } + } + true +} + +impl Problem for ShortestCommonSupersequence { + const NAME: &'static str = "ShortestCommonSupersequence"; + type Metric = bool; + + fn variant() -> Vec<(&'static str, &'static str)> { + crate::variant_params![] + } + + fn dims(&self) -> Vec { + vec![self.alphabet_size; self.bound] + } + + fn evaluate(&self, config: &[usize]) -> bool { + if config.len() != self.bound { + return false; + } + if config.iter().any(|&v| v >= self.alphabet_size) { + return false; + } + self.strings.iter().all(|s| is_subsequence(s, config)) + } +} + +impl SatisfactionProblem for ShortestCommonSupersequence {} + +crate::declare_variants! { + ShortestCommonSupersequence => "alphabet_size ^ bound", +} + +#[cfg(test)] +#[path = "../../unit_tests/models/misc/shortest_common_supersequence.rs"] +mod tests; diff --git a/src/models/mod.rs b/src/models/mod.rs index 4300b2138..9e875fa27 100644 --- a/src/models/mod.rs +++ b/src/models/mod.rs @@ -17,5 +17,8 @@ pub use graph::{ MinimumFeedbackVertexSet, MinimumSumMulticenter, MinimumVertexCover, PartitionIntoTriangles, RuralPostman, SpinGlass, SubgraphIsomorphism, TravelingSalesman, }; -pub use misc::{BinPacking, Factoring, Knapsack, LongestCommonSubsequence, PaintShop, SubsetSum}; +pub use misc::{ + BinPacking, Factoring, Knapsack, LongestCommonSubsequence, PaintShop, + ShortestCommonSupersequence, SubsetSum, +}; pub use set::{MaximumSetPacking, MinimumSetCovering}; diff --git a/src/unit_tests/models/misc/shortest_common_supersequence.rs b/src/unit_tests/models/misc/shortest_common_supersequence.rs new file mode 100644 index 000000000..00fe116e7 --- /dev/null +++ b/src/unit_tests/models/misc/shortest_common_supersequence.rs @@ -0,0 +1,114 @@ +use super::*; +use crate::solvers::{BruteForce, Solver}; +use crate::traits::Problem; + +#[test] +fn test_shortestcommonsupersequence_basic() { + let problem = ShortestCommonSupersequence::new( + 3, + vec![vec![0, 1, 2, 1], vec![1, 2, 0, 1], vec![0, 2, 1, 0]], + 7, + ); + assert_eq!(problem.alphabet_size(), 3); + assert_eq!(problem.num_strings(), 3); + assert_eq!(problem.bound(), 7); + assert_eq!(problem.total_length(), 12); + assert_eq!(problem.dims(), vec![3; 7]); + assert_eq!( + ::NAME, + "ShortestCommonSupersequence" + ); + assert_eq!(::variant(), vec![]); +} + +#[test] +fn test_shortestcommonsupersequence_evaluate_yes() { + // alphabet {a=0, b=1, c=2} + // strings: [0,1,2,1] "abcb", [1,2,0,1] "bcab", [0,2,1,0] "acba" + // supersequence config [0,1,2,0,2,1,0] = "abcacba" + let problem = ShortestCommonSupersequence::new( + 3, + vec![vec![0, 1, 2, 1], vec![1, 2, 0, 1], vec![0, 2, 1, 0]], + 7, + ); + // [0,1,2,1] matches at positions 0,1,2,5 + // [1,2,0,1] matches at positions 1,2,3,5 + // [0,2,1,0] matches at positions 0,2,5,6 + assert!(problem.evaluate(&[0, 1, 2, 0, 2, 1, 0])); +} + +#[test] +fn test_shortestcommonsupersequence_evaluate_no() { + let problem = ShortestCommonSupersequence::new( + 3, + vec![vec![0, 1, 2, 1], vec![1, 2, 0, 1], vec![0, 2, 1, 0]], + 7, + ); + // [0,0,0,0,0,0,0] cannot contain [0,1,2,1] as subsequence + assert!(!problem.evaluate(&[0, 0, 0, 0, 0, 0, 0])); +} + +#[test] +fn test_shortestcommonsupersequence_out_of_range() { + let problem = ShortestCommonSupersequence::new(2, vec![vec![0, 1]], 3); + // value 2 is out of range for alphabet_size=2 + assert!(!problem.evaluate(&[0, 2, 1])); +} + +#[test] +fn test_shortestcommonsupersequence_wrong_length() { + let problem = ShortestCommonSupersequence::new(2, vec![vec![0, 1]], 3); + // too short + assert!(!problem.evaluate(&[0, 1])); + // too long + assert!(!problem.evaluate(&[0, 1, 0, 1])); +} + +#[test] +fn test_shortestcommonsupersequence_brute_force() { + // alphabet {0,1}, strings [0,1] and [1,0], bound 3 + // e.g. [0,1,0] or [1,0,1] should work + let problem = ShortestCommonSupersequence::new(2, vec![vec![0, 1], vec![1, 0]], 3); + let solver = BruteForce::new(); + let solution = solver + .find_satisfying(&problem) + .expect("should find a solution"); + assert!(problem.evaluate(&solution)); +} + +#[test] +fn test_shortestcommonsupersequence_empty_instance() { + // No strings, bound 0: vacuously satisfied on empty config + let problem = ShortestCommonSupersequence::new(2, vec![], 0); + assert_eq!(problem.dims(), Vec::::new()); + assert!(problem.evaluate(&[])); +} + +#[test] +fn test_shortestcommonsupersequence_unsatisfiable() { + // strings [0,1] and [1,0] over binary alphabet, bound 2: impossible + // Any length-2 binary string is either "00","01","10","11" + // "01" contains [0,1] but not [1,0]; "10" contains [1,0] but not [0,1] + let problem = ShortestCommonSupersequence::new(2, vec![vec![0, 1], vec![1, 0]], 2); + let solver = BruteForce::new(); + assert!(solver.find_satisfying(&problem).is_none()); +} + +#[test] +fn test_shortestcommonsupersequence_single_string() { + // Single string [0,1,2] over ternary alphabet, bound 3: the string itself is a solution + let problem = ShortestCommonSupersequence::new(3, vec![vec![0, 1, 2]], 3); + assert!(problem.evaluate(&[0, 1, 2])); + // A different string that doesn't contain [0,1,2] as subsequence + assert!(!problem.evaluate(&[2, 1, 0])); +} + +#[test] +fn test_shortestcommonsupersequence_serialization() { + let problem = ShortestCommonSupersequence::new(3, vec![vec![0, 1, 2], vec![2, 1, 0]], 5); + let json = serde_json::to_value(&problem).unwrap(); + let restored: ShortestCommonSupersequence = serde_json::from_value(json).unwrap(); + assert_eq!(restored.alphabet_size(), problem.alphabet_size()); + assert_eq!(restored.strings(), problem.strings()); + assert_eq!(restored.bound(), problem.bound()); +} diff --git a/src/unit_tests/trait_consistency.rs b/src/unit_tests/trait_consistency.rs index 91754a81d..5c6fe34bb 100644 --- a/src/unit_tests/trait_consistency.rs +++ b/src/unit_tests/trait_consistency.rs @@ -98,6 +98,10 @@ fn test_all_problems_implement_trait_correctly() { &HamiltonianPath::new(SimpleGraph::new(3, vec![(0, 1), (1, 2)])), "HamiltonianPath", ); + check_problem_trait( + &ShortestCommonSupersequence::new(2, vec![vec![0, 1], vec![1, 0]], 3), + "ShortestCommonSupersequence", + ); } #[test]