diff --git a/docs/paper/reductions.typ b/docs/paper/reductions.typ index 53188f3f8..ce41c6400 100644 --- a/docs/paper/reductions.typ +++ b/docs/paper/reductions.typ @@ -82,6 +82,7 @@ "TravelingSalesman": [Traveling Salesman], "MaximumClique": [Maximum Clique], "MaximumSetPacking": [Maximum Set Packing], + "MinimumHittingSet": [Minimum Hitting Set], "MinimumSetCovering": [Minimum Set Covering], "ComparativeContainment": [Comparative Containment], "SetBasis": [Set Basis], @@ -1794,6 +1795,56 @@ A classical NP-complete problem from Garey and Johnson @garey1979[Ch.~3, p.~76], ] } +#{ + let x = load-model-example("MinimumHittingSet") + let sets = x.instance.sets + let m = sets.len() + let U-size = x.instance.universe_size + let sol = (config: x.optimal_config, metric: x.optimal_value) + let selected = sol.config.enumerate().filter(((i, v)) => v == 1).map(((i, _)) => i) + let hit-size = sol.metric.Valid + let fmt-set(s) = if s.len() == 0 { + $emptyset$ + } else { + "${" + s.map(e => str(e + 1)).join(", ") + "}$" + } + let elems = ( + (-2.0, 0.7), + (-0.9, 1.4), + (-1.2, -0.4), + (0.2, 0.1), + (1.2, 1.0), + (1.5, -0.9), + ) + [ + #problem-def("MinimumHittingSet")[ + Given a finite universe $U$ and a collection $cal(S) = {S_1, dots, S_m}$ of subsets of $U$, find a subset $H subset.eq U$ minimizing $|H|$ such that $H inter S_i != emptyset$ for every $i in {1, dots, m}$. + ][ + Minimum Hitting Set is one of Karp's 21 NP-complete problems @karp1972. It is the incidence-dual of Set Covering: transposing the set-element incidence matrix swaps the choice of sets with the choice of universe elements. Vertex Cover is the special case in which every set has size $2$, so every edge is "hit" by selecting one of its endpoints. + + A direct exact algorithm enumerates all $2^n$ subsets $H subset.eq U$ for $n = |U|$ and checks whether each subset intersects every member of $cal(S)$. This yields an $O^*(2^n)$ exact algorithm#footnote[No exact worst-case algorithm improving on brute-force enumeration over the universe elements is recorded in the standard references used for this catalog entry.]. + + *Example.* Let $U = {1, 2, dots, #U-size}$ and $cal(S) = {#range(m).map(i => $S_#(i + 1)$).join(", ")}$ with #range(m).map(i => $S_#(i + 1) = #fmt-set(sets.at(i))$).join(", "). A minimum hitting set is $H = #fmt-set(selected)$ with $|H| = #hit-size$: every set in $cal(S)$ contains at least one of the selected elements. No $2$-element subset of $U$ hits all #m sets, so the optimum is exactly $#hit-size$. + + #figure( + canvas(length: 1cm, { + sregion((elems.at(0), elems.at(1), elems.at(2)), pad: 0.45, label: [$S_1$], ..sregion-dimmed) + sregion((elems.at(0), elems.at(3), elems.at(4)), pad: 0.48, label: [$S_2$], ..sregion-dimmed) + sregion((elems.at(1), elems.at(3), elems.at(5)), pad: 0.48, label: [$S_3$], ..sregion-dimmed) + sregion((elems.at(2), elems.at(4), elems.at(5)), pad: 0.48, label: [$S_4$], ..sregion-dimmed) + sregion((elems.at(0), elems.at(1), elems.at(5)), pad: 0.48, label: [$S_5$], ..sregion-dimmed) + sregion((elems.at(2), elems.at(3)), pad: 0.34, label: [$S_6$], ..sregion-dimmed) + sregion((elems.at(1), elems.at(4)), pad: 0.34, label: [$S_7$], ..sregion-dimmed) + for (k, pos) in elems.enumerate() { + selem(pos, label: [#(k + 1)], fill: if selected.contains(k) { graph-colors.at(0) } else { black }) + } + }), + caption: [Minimum hitting set: the blue elements $#fmt-set(selected)$ intersect every set region $S_1, dots, S_#m$, so they hit the entire collection $cal(S)$.] + ) + ] + ] +} + #{ let x = load-model-example("ConsecutiveSets") let m = x.instance.alphabet_size diff --git a/problemreductions-cli/src/cli.rs b/problemreductions-cli/src/cli.rs index 9fdeea3da..43deafec3 100644 --- a/problemreductions-cli/src/cli.rs +++ b/problemreductions-cli/src/cli.rs @@ -241,6 +241,7 @@ Flags by problem type: SumOfSquaresPartition --sizes, --num-groups, --bound PaintShop --sequence MaximumSetPacking --sets [--weights] + MinimumHittingSet --universe, --sets MinimumSetCovering --universe, --sets [--weights] ComparativeContainment --universe, --r-sets, --s-sets [--r-weights] [--s-weights] X3C (ExactCoverBy3Sets) --universe, --sets (3 elements each) @@ -434,7 +435,7 @@ pub struct CreateArgs { /// Car paint sequence for PaintShop (comma-separated, each label appears exactly twice, e.g., "a,b,a,c,c,b") #[arg(long)] pub sequence: Option, - /// Sets for SetPacking/SetCovering (semicolon-separated, e.g., "0,1;1,2;0,2") + /// Sets for set-system problems such as SetPacking, MinimumHittingSet, and SetCovering (semicolon-separated, e.g., "0,1;1,2;0,2") #[arg(long)] pub sets: Option, /// R-family sets for ComparativeContainment (semicolon-separated, e.g., "0,1;1,2") @@ -452,7 +453,7 @@ pub struct CreateArgs { /// Partition groups for arc-index partitions (semicolon-separated, e.g., "0,1;2,3") #[arg(long)] pub partition: Option, - /// Universe size for set-system problems such as MinimumSetCovering and ComparativeContainment + /// Universe size for set-system problems such as MinimumHittingSet, MinimumSetCovering, and ComparativeContainment #[arg(long)] pub universe: Option, /// Bipartite graph edges for BicliqueCover / BalancedCompleteBipartiteSubgraph (e.g., "0-0,0-1,1-2" for left-right pairs) diff --git a/problemreductions-cli/src/commands/create.rs b/problemreductions-cli/src/commands/create.rs index f04afb8cb..a5000e00a 100644 --- a/problemreductions-cli/src/commands/create.rs +++ b/problemreductions-cli/src/commands/create.rs @@ -1744,6 +1744,33 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> { ) } + // MinimumHittingSet + "MinimumHittingSet" => { + let universe = args.universe.ok_or_else(|| { + anyhow::anyhow!( + "MinimumHittingSet requires --universe and --sets\n\n\ + Usage: pred create MinimumHittingSet --universe 6 --sets \"0,1,2;0,3,4;1,3,5;2,4,5;0,1,5;2,3;1,4\"" + ) + })?; + let sets = parse_sets(args)?; + for (i, set) in sets.iter().enumerate() { + for &element in set { + if element >= universe { + bail!( + "Set {} contains element {} which is outside universe of size {}", + i, + element, + universe + ); + } + } + } + ( + ser(MinimumHittingSet::new(universe, sets))?, + resolved_variant.clone(), + ) + } + // MinimumSetCovering "MinimumSetCovering" => { let universe = args.universe.ok_or_else(|| { diff --git a/problemreductions-cli/tests/cli_tests.rs b/problemreductions-cli/tests/cli_tests.rs index a6aa1b924..f7a5b8cf8 100644 --- a/problemreductions-cli/tests/cli_tests.rs +++ b/problemreductions-cli/tests/cli_tests.rs @@ -1425,6 +1425,86 @@ fn test_create_comparative_containment_no_flags_shows_help() { assert!(!stderr.contains("--universe-size"), "stderr: {stderr}"); } +#[test] +fn test_create_minimum_hitting_set() { + let output_file = std::env::temp_dir().join("pred_test_create_minimum_hitting_set.json"); + let output = pred() + .args([ + "-o", + output_file.to_str().unwrap(), + "create", + "MinimumHittingSet", + "--universe", + "6", + "--sets", + "0,1,2;0,3,4;1,3,5;2,4,5;0,1,5;2,3;1,4", + ]) + .output() + .unwrap(); + assert!( + output.status.success(), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); + assert!(output_file.exists()); + + let content = std::fs::read_to_string(&output_file).unwrap(); + let json: serde_json::Value = serde_json::from_str(&content).unwrap(); + assert_eq!(json["type"], "MinimumHittingSet"); + assert_eq!(json["data"]["universe_size"], 6); + assert_eq!( + json["data"]["sets"], + serde_json::json!([ + [0, 1, 2], + [0, 3, 4], + [1, 3, 5], + [2, 4, 5], + [0, 1, 5], + [2, 3], + [1, 4] + ]) + ); + + std::fs::remove_file(&output_file).ok(); +} + +#[test] +fn test_create_minimum_hitting_set_rejects_out_of_range_elements_without_panicking() { + let output = pred() + .args([ + "create", + "MinimumHittingSet", + "--universe", + "4", + "--sets", + "0,1,4;1,2", + ]) + .output() + .unwrap(); + assert!(!output.status.success()); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("outside universe of size 4"), + "stderr: {stderr}" + ); + assert!(!stderr.contains("panicked at"), "stderr: {stderr}"); +} + +#[test] +fn test_create_help_lists_minimum_hitting_set_flags() { + let output = pred().args(["create", "--help"]).output().unwrap(); + assert!( + output.status.success(), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); + let stdout = String::from_utf8(output.stdout).unwrap(); + assert!( + stdout.contains("MinimumHittingSet") && stdout.contains("--universe, --sets"), + "stdout: {stdout}" + ); +} + #[test] fn test_create_set_basis_requires_k() { let output = pred() diff --git a/src/lib.rs b/src/lib.rs index 973c8476e..232b55940 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -76,7 +76,7 @@ pub mod prelude { }; pub use crate::models::set::{ ComparativeContainment, ConsecutiveSets, ExactCoverBy3Sets, MaximumSetPacking, - MinimumCardinalityKey, MinimumSetCovering, PrimeAttributeName, SetBasis, + MinimumCardinalityKey, MinimumHittingSet, MinimumSetCovering, PrimeAttributeName, SetBasis, }; // Core traits diff --git a/src/models/mod.rs b/src/models/mod.rs index 2415b1f9c..35d4beb18 100644 --- a/src/models/mod.rs +++ b/src/models/mod.rs @@ -43,6 +43,6 @@ pub use misc::{ }; pub use set::{ ComparativeContainment, ConsecutiveSets, ExactCoverBy3Sets, MaximumSetPacking, - MinimumCardinalityKey, MinimumSetCovering, PrimeAttributeName, SetBasis, + MinimumCardinalityKey, MinimumHittingSet, MinimumSetCovering, PrimeAttributeName, SetBasis, TwoDimensionalConsecutiveSets, }; diff --git a/src/models/set/minimum_hitting_set.rs b/src/models/set/minimum_hitting_set.rs new file mode 100644 index 000000000..06c362382 --- /dev/null +++ b/src/models/set/minimum_hitting_set.rs @@ -0,0 +1,181 @@ +//! Minimum Hitting Set problem implementation. +//! +//! The Minimum Hitting Set problem asks for a minimum-size subset of universe +//! elements that intersects every set in a collection. + +use crate::registry::{FieldInfo, ProblemSchemaEntry, ProblemSizeFieldEntry}; +use crate::traits::{OptimizationProblem, Problem}; +use crate::types::{Direction, SolutionSize}; +use serde::{Deserialize, Serialize}; + +inventory::submit! { + ProblemSchemaEntry { + name: "MinimumHittingSet", + display_name: "Minimum Hitting Set", + aliases: &[], + dimensions: &[], + module_path: module_path!(), + description: "Find a minimum-size subset of universe elements that hits every set", + fields: &[ + FieldInfo { name: "universe_size", type_name: "usize", description: "Size of the universe U" }, + FieldInfo { name: "sets", type_name: "Vec>", description: "Collection of subsets of U that must each be hit" }, + ], + } +} + +inventory::submit! { + ProblemSizeFieldEntry { + name: "MinimumHittingSet", + fields: &["num_sets", "universe_size"], + } +} + +/// The Minimum Hitting Set problem. +/// +/// Given a universe `U` and a collection of subsets of `U`, find a minimum-size +/// subset `H ⊆ U` such that `H` intersects every set in the collection. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MinimumHittingSet { + universe_size: usize, + sets: Vec>, +} + +impl MinimumHittingSet { + /// Create a new Minimum Hitting Set instance. + /// + /// # Panics + /// + /// Panics if any set contains an element outside `0..universe_size`. + pub fn new(universe_size: usize, sets: Vec>) -> Self { + let mut sets = sets; + for (set_index, set) in sets.iter_mut().enumerate() { + set.sort_unstable(); + set.dedup(); + for &element in set.iter() { + assert!( + element < universe_size, + "Set {set_index} contains element {element} which is outside universe of size {universe_size}" + ); + } + } + + Self { + universe_size, + sets, + } + } + + /// Get the universe size. + pub fn universe_size(&self) -> usize { + self.universe_size + } + + /// Get the number of sets. + pub fn num_sets(&self) -> usize { + self.sets.len() + } + + /// Get the sets. + pub fn sets(&self) -> &[Vec] { + &self.sets + } + + /// Get a specific set. + pub fn get_set(&self, index: usize) -> Option<&Vec> { + self.sets.get(index) + } + + /// Decode the selected universe elements from a binary configuration. + pub fn selected_elements(&self, config: &[usize]) -> Option> { + if config.len() != self.universe_size { + return None; + } + + let mut selected = Vec::new(); + for (element, &value) in config.iter().enumerate() { + match value { + 0 => {} + 1 => selected.push(element), + _ => return None, + } + } + Some(selected) + } + + /// Check whether a configuration hits every set in the collection. + pub fn is_valid_solution(&self, config: &[usize]) -> bool { + let Some(selected) = self.selected_elements(config) else { + return false; + }; + + self.sets.iter().all(|set| { + set.iter() + .any(|element| selected.binary_search(element).is_ok()) + }) + } +} + +impl Problem for MinimumHittingSet { + const NAME: &'static str = "MinimumHittingSet"; + type Metric = SolutionSize; + + fn dims(&self) -> Vec { + vec![2; self.universe_size] + } + + fn evaluate(&self, config: &[usize]) -> SolutionSize { + let Some(selected) = self.selected_elements(config) else { + return SolutionSize::Invalid; + }; + + if self.sets.iter().all(|set| { + set.iter() + .any(|element| selected.binary_search(element).is_ok()) + }) { + SolutionSize::Valid(selected.len()) + } else { + SolutionSize::Invalid + } + } + + fn variant() -> Vec<(&'static str, &'static str)> { + crate::variant_params![] + } +} + +impl OptimizationProblem for MinimumHittingSet { + type Value = usize; + + fn direction(&self) -> Direction { + Direction::Minimize + } +} + +crate::declare_variants! { + default opt MinimumHittingSet => "2^universe_size", +} + +#[cfg(feature = "example-db")] +pub(crate) fn canonical_model_example_specs() -> Vec { + vec![crate::example_db::specs::ModelExampleSpec { + id: "minimum_hitting_set", + instance: Box::new(MinimumHittingSet::new( + 6, + vec![ + vec![0, 1, 2], + vec![0, 3, 4], + vec![1, 3, 5], + vec![2, 4, 5], + vec![0, 1, 5], + vec![2, 3], + vec![1, 4], + ], + )), + optimal_config: vec![0, 1, 0, 1, 1, 0], + optimal_value: serde_json::json!({"Valid": 3}), + }] +} + +#[cfg(test)] +#[path = "../../unit_tests/models/set/minimum_hitting_set.rs"] +mod tests; diff --git a/src/models/set/mod.rs b/src/models/set/mod.rs index b0a101a42..d96b2d3e2 100644 --- a/src/models/set/mod.rs +++ b/src/models/set/mod.rs @@ -5,6 +5,7 @@ //! - [`ExactCoverBy3Sets`]: Exact cover by 3-element subsets (X3C) //! - [`ComparativeContainment`]: Compare containment-weight sums for two set families //! - [`MaximumSetPacking`]: Maximum weight set packing +//! - [`MinimumHittingSet`]: Minimum-size universe subset hitting every set //! - [`MinimumSetCovering`]: Minimum weight set cover //! - [`PrimeAttributeName`]: Determine if an attribute belongs to any candidate key @@ -13,6 +14,7 @@ pub(crate) mod consecutive_sets; pub(crate) mod exact_cover_by_3_sets; pub(crate) mod maximum_set_packing; pub(crate) mod minimum_cardinality_key; +pub(crate) mod minimum_hitting_set; pub(crate) mod minimum_set_covering; pub(crate) mod prime_attribute_name; pub(crate) mod set_basis; @@ -23,6 +25,7 @@ pub use consecutive_sets::ConsecutiveSets; pub use exact_cover_by_3_sets::ExactCoverBy3Sets; pub use maximum_set_packing::MaximumSetPacking; pub use minimum_cardinality_key::MinimumCardinalityKey; +pub use minimum_hitting_set::MinimumHittingSet; pub use minimum_set_covering::MinimumSetCovering; pub use prime_attribute_name::PrimeAttributeName; pub use set_basis::SetBasis; @@ -35,8 +38,9 @@ pub(crate) fn canonical_model_example_specs() -> Vec MinimumHittingSet { + MinimumHittingSet::new( + 6, + vec![ + vec![0, 1, 2], + vec![0, 3, 4], + vec![1, 3, 5], + vec![2, 4, 5], + vec![0, 1, 5], + vec![2, 3], + vec![1, 4], + ], + ) +} + +fn issue_example_config() -> Vec { + vec![0, 1, 0, 1, 1, 0] +} + +#[test] +fn test_minimum_hitting_set_creation_accessors_and_dimensions() { + let problem = MinimumHittingSet::new(4, vec![vec![2, 1, 1], vec![3]]); + + assert_eq!(problem.universe_size(), 4); + assert_eq!(problem.num_sets(), 2); + assert_eq!(problem.num_variables(), 4); + assert_eq!(problem.dims(), vec![2; 4]); + assert_eq!(problem.sets(), &[vec![1, 2], vec![3]]); + assert_eq!(problem.get_set(0), Some(&vec![1, 2])); + assert_eq!(problem.get_set(1), Some(&vec![3])); + assert_eq!(problem.get_set(2), None); +} + +#[test] +fn test_minimum_hitting_set_evaluate_valid_and_invalid() { + let problem = MinimumHittingSet::new(4, vec![vec![0, 1], vec![1, 2], vec![2, 3]]); + + assert_eq!(problem.selected_elements(&[0, 1, 0, 1]), Some(vec![1, 3])); + assert_eq!(problem.selected_elements(&[0, 2, 0, 1]), None); + assert_eq!(problem.evaluate(&[0, 1, 0, 1]), SolutionSize::Valid(2)); + assert_eq!(problem.evaluate(&[1, 0, 0, 0]), SolutionSize::Invalid); + assert_eq!(problem.evaluate(&[0, 2, 0, 1]), SolutionSize::Invalid); + assert!(problem.is_valid_solution(&[0, 1, 0, 1])); + assert!(!problem.is_valid_solution(&[1, 0, 0, 0])); + assert!(!problem.is_valid_solution(&[0, 2, 0, 1])); +} + +#[test] +fn test_minimum_hitting_set_empty_set_is_always_invalid() { + let problem = MinimumHittingSet::new(3, vec![vec![0, 1], vec![]]); + + assert_eq!(problem.evaluate(&[1, 1, 1]), SolutionSize::Invalid); + assert_eq!(problem.evaluate(&[0, 0, 0]), SolutionSize::Invalid); +} + +#[test] +fn test_minimum_hitting_set_constructor_normalizes_sets() { + let problem = MinimumHittingSet::new(5, vec![vec![3, 1, 3, 2], vec![4, 0, 0], vec![]]); + + assert_eq!(problem.sets(), &[vec![1, 2, 3], vec![0, 4], vec![]]); +} + +#[test] +#[should_panic(expected = "outside universe")] +fn test_minimum_hitting_set_rejects_out_of_range_elements() { + MinimumHittingSet::new(3, vec![vec![0, 3]]); +} + +#[test] +fn test_minimum_hitting_set_bruteforce_optimum_issue_example() { + let problem = issue_example_problem(); + let solver = BruteForce::new(); + + let best = solver.find_best(&problem).unwrap(); + assert_eq!(problem.evaluate(&best), SolutionSize::Valid(3)); + + let best_solutions = solver.find_all_best(&problem); + let best_solution_set: HashSet> = best_solutions.iter().cloned().collect(); + assert!(best_solution_set.contains(&issue_example_config())); + assert!(best_solutions + .iter() + .all(|config| problem.evaluate(config) == SolutionSize::Valid(3))); +} + +#[test] +fn test_minimum_hitting_set_serialization_round_trip() { + let problem = MinimumHittingSet::new(4, vec![vec![2, 1, 1], vec![3, 0]]); + let json = serde_json::to_string(&problem).unwrap(); + let deserialized: MinimumHittingSet = serde_json::from_str(&json).unwrap(); + + assert_eq!(deserialized.universe_size(), problem.universe_size()); + assert_eq!(deserialized.num_sets(), problem.num_sets()); + assert_eq!(deserialized.sets(), problem.sets()); + assert_eq!( + deserialized.evaluate(&[1, 1, 0, 0]), + problem.evaluate(&[1, 1, 0, 0]) + ); +} + +#[test] +fn test_minimum_hitting_set_paper_example_consistency() { + let problem = issue_example_problem(); + + assert_eq!( + problem.evaluate(&issue_example_config()), + SolutionSize::Valid(3) + ); +} + +#[test] +fn test_minimum_hitting_set_direction() { + let problem = MinimumHittingSet::new(3, vec![vec![0, 1], vec![1, 2]]); + assert_eq!(problem.direction(), Direction::Minimize); +} + +#[test] +fn test_minimum_hitting_set_declares_problem_size_fields() { + let fields: HashSet<&'static str> = declared_size_fields("MinimumHittingSet") + .into_iter() + .collect(); + assert_eq!(fields, HashSet::from(["num_sets", "universe_size"]),); +} + +#[cfg(feature = "example-db")] +#[test] +fn test_minimum_hitting_set_canonical_example_spec() { + let specs = canonical_model_example_specs(); + assert_eq!(specs.len(), 1); + let spec = &specs[0]; + + assert_eq!(spec.id, "minimum_hitting_set"); + assert_eq!(spec.optimal_config, issue_example_config()); + assert_eq!(spec.optimal_value, serde_json::json!({"Valid": 3})); + + let problem: MinimumHittingSet = + serde_json::from_value(spec.instance.serialize_json()).unwrap(); + assert_eq!(problem.universe_size(), 6); + assert_eq!(problem.sets().len(), 7); + + let solver = BruteForce::new(); + let best = solver.find_best(&problem).unwrap(); + assert_eq!(problem.evaluate(&best), SolutionSize::Valid(3)); +}