diff --git a/docs/paper/reductions.typ b/docs/paper/reductions.typ index 57dceb9b9..749a3c178 100644 --- a/docs/paper/reductions.typ +++ b/docs/paper/reductions.typ @@ -108,6 +108,7 @@ "BoundedComponentSpanningForest": [Bounded Component Spanning Forest], "BinPacking": [Bin Packing], "BoyceCoddNormalFormViolation": [Boyce-Codd Normal Form Violation], + "ConsistencyOfDatabaseFrequencyTables": [Consistency of Database Frequency Tables], "ClosestVectorProblem": [Closest Vector Problem], "ConsecutiveSets": [Consecutive Sets], "MinimumMultiwayCut": [Minimum Multiway Cut], @@ -3201,6 +3202,77 @@ A classical NP-complete problem from Garey and Johnson @garey1979[Ch.~3, p.~76], A relation satisfies _Boyce-Codd Normal Form_ (BCNF) if every non-trivial functional dependency $X arrow.r Y$ has $X$ as a superkey --- that is, $X^+$ = $A'$. This classical NP-complete problem from database theory asks whether the given attribute subset $A'$ violates BCNF. The NP-completeness was established by Beeri and Bernstein (1979) via reduction from Hitting Set. It appears as problem SR29 in Garey and Johnson's compendium (category A4: Storage and Retrieval). ] +#{ + let x = load-model-example("ConsistencyOfDatabaseFrequencyTables") + let num_objects = x.instance.num_objects + let num_attrs = x.instance.attribute_domains.len() + let domains = x.instance.attribute_domains + let table01 = x.instance.frequency_tables.at(0).counts + let table12 = x.instance.frequency_tables.at(1).counts + let config = x.optimal_config + let value = (object, attr) => config.at(object * num_attrs + attr) + [ + #problem-def("ConsistencyOfDatabaseFrequencyTables")[ + Given a finite set $V$ of objects, a finite set $A$ of attributes, a domain $D_a$ for each $a in A$, a collection of pairwise frequency tables $f_(a,b): D_a times D_b -> ZZ^(>=0)$ whose entries sum to $|V|$, and a set $K subset.eq V times A times union_(a in A) D_a$ of known triples $(v, a, x)$, determine whether there exist functions $g_a: V -> D_a$ such that $g_a(v) = x$ for every $(v, a, x) in K$ and, for every published table $f_(a,b)$, exactly $f_(a,b)(x, y)$ objects satisfy $(g_a(v), g_b(v)) = (x, y)$. + ][ + Consistency of Database Frequency Tables is Garey and Johnson's storage-and-retrieval problem SR35 @garey1979. It asks whether released pairwise marginals can come from some hidden microdata table while respecting already known individual attribute values, making it a natural decision problem in statistical disclosure control. The direct witness space implemented in this crate assigns one categorical variable to each object-attribute pair, so exhaustive search runs in $O^*((product_(a in A) |D_a|)^(|V|))$. #footnote[This is the exact search bound induced by the implementation's configuration space; no faster general exact worst-case algorithm is claimed here.] + + *Example.* Let $|V| = #num_objects$ with attributes $a_0, a_1, a_2$ having domain sizes $#domains.at(0)$, $#domains.at(1)$, and $#domains.at(2)$ respectively. Publish the pairwise tables + + #align(center, table( + columns: 4, + align: center, + table.header([$f_(a_0, a_1)$], [$0$], [$1$], [$2$]), + [$0$], [#table01.at(0).at(0)], [#table01.at(0).at(1)], [#table01.at(0).at(2)], + [$1$], [#table01.at(1).at(0)], [#table01.at(1).at(1)], [#table01.at(1).at(2)], + )) + + and + + #align(center, table( + columns: 3, + align: center, + table.header([$f_(a_1, a_2)$], [$0$], [$1$]), + [$0$], [#table12.at(0).at(0)], [#table12.at(0).at(1)], + [$1$], [#table12.at(1).at(0)], [#table12.at(1).at(1)], + [$2$], [#table12.at(2).at(0)], [#table12.at(2).at(1)], + )) + + together with the known values $K = {(v_0, a_0, 0), (v_3, a_0, 1), (v_1, a_2, 1)}$. One consistent completion is: + + #align(center, table( + columns: 4, + align: center, + table.header([object], [$a_0$], [$a_1$], [$a_2$]), + [$v_0$], [#value(0, 0)], [#value(0, 1)], [#value(0, 2)], + [$v_1$], [#value(1, 0)], [#value(1, 1)], [#value(1, 2)], + [$v_2$], [#value(2, 0)], [#value(2, 1)], [#value(2, 2)], + [$v_3$], [#value(3, 0)], [#value(3, 1)], [#value(3, 2)], + [$v_4$], [#value(4, 0)], [#value(4, 1)], [#value(4, 2)], + [$v_5$], [#value(5, 0)], [#value(5, 1)], [#value(5, 2)], + )) + + This witness satisfies every published count: in $f_(a_0, a_1)$ each of the six cells appears exactly once, while in $f_(a_1, a_2)$ the five occupied cells have multiplicities $1, 1, 2, 1, 1$ exactly as listed above. It also respects all three known triples, so the answer is YES. + ] + ] +} + +#reduction-rule("ConsistencyOfDatabaseFrequencyTables", "ILP")[ + Each object-attribute pair is encoded by a one-hot binary vector over its domain, and each pairwise frequency count becomes a linear equality over McCormick auxiliary variables that linearize the product of two one-hot indicators. Known values are fixed by pinning the corresponding indicator to 1. The resulting ILP is a pure feasibility problem (trivial objective). +][ + _Construction._ Let $V$ be the set of objects, $A$ the set of attributes with domains $D_a$, $cal(T)$ the set of published frequency tables, and $K$ the set of known triples $(v, a, x)$. + + _Variables:_ (1) Binary one-hot indicators $y_(v,a,x) in {0, 1}$ for each object $v in V$, attribute $a in A$, and value $x in D_a$: $y_(v,a,x) = 1$ iff object $v$ takes value $x$ for attribute $a$. (2) Binary auxiliary variables $z_(t,v,x,x') in {0, 1}$ for each table $t in cal(T)$ (with attribute pair $(a, b)$), object $v in V$, and cell $(x, x') in D_a times D_b$: $z_(t,v,x,x') = 1$ iff object $v$ realizes cell $(x, x')$ in table $t$. + + _Constraints:_ (1) One-hot: $sum_(x in D_a) y_(v,a,x) = 1$ for all $v in V$, $a in A$. (2) Known values: $y_(v,a,x) = 1$ for each $(v, a, x) in K$. (3) McCormick linearization for $z_(t,v,x,x') = y_(v,a,x) dot y_(v,b,x')$: $z_(t,v,x,x') lt.eq y_(v,a,x)$, $z_(t,v,x,x') lt.eq y_(v,b,x')$, $z_(t,v,x,x') gt.eq y_(v,a,x) + y_(v,b,x') - 1$. (4) Frequency counts: $sum_(v in V) z_(t,v,x,x') = f_t (x, x')$ for each table $t$ and cell $(x, x')$. + + _Objective:_ Minimize $0$ (feasibility problem). + + _Correctness._ ($arrow.r.double$) A consistent assignment defines one-hot indicators and their products; all constraints hold by construction, and the frequency equalities match the published counts. ($arrow.l.double$) Any feasible binary solution assigns exactly one value per object-attribute (one-hot), respects known values, and the McCormick constraints force $z_(t,v,x,x') = y_(v,a,x) dot y_(v,b,x')$ for binary variables, so the frequency equalities certify consistency. + + _Solution extraction._ For each object $v$ and attribute $a$, find $x$ with $y_(v,a,x) = 1$; assign value $x$ to $(v, a)$. +] + #problem-def("SumOfSquaresPartition")[ Given a finite set $A = {a_0, dots, a_(n-1)}$ with sizes $s(a_i) in ZZ^+$, a positive integer $K lt.eq |A|$ (number of groups), and a positive integer $J$ (bound), determine whether $A$ can be partitioned into $K$ disjoint sets $A_1, dots, A_K$ such that $sum_(i=1)^K (sum_(a in A_i) s(a))^2 lt.eq J$. ][ diff --git a/problemreductions-cli/src/cli.rs b/problemreductions-cli/src/cli.rs index 93f245868..d931e6b3a 100644 --- a/problemreductions-cli/src/cli.rs +++ b/problemreductions-cli/src/cli.rs @@ -265,6 +265,7 @@ Flags by problem type: RuralPostman (RPP) --graph, --edge-weights, --required-edges, --bound MultipleChoiceBranching --arcs [--weights] --partition --bound [--num-vertices] AdditionalKey --num-attributes, --dependencies, --relation-attrs [--known-keys] + ConsistencyOfDatabaseFrequencyTables --num-objects, --attribute-domains, --frequency-tables [--known-values] SubgraphIsomorphism --graph (host), --pattern (pattern) LCS --strings, --bound [--alphabet-size] FAS --arcs [--weights] [--num-vertices] @@ -312,6 +313,7 @@ Examples: pred create MIS/UnitDiskGraph --positions \"0,0;1,0;0.5,0.8\" --radius 1.5 pred create MIS --random --num-vertices 10 --edge-prob 0.3 pred create MultiprocessorScheduling --lengths 4,5,3,2,6 --num-processors 2 --deadline 10 + pred create ConsistencyOfDatabaseFrequencyTables --num-objects 6 --attribute-domains \"2,3,2\" --frequency-tables \"0,1:1,1,1|1,1,1;1,2:1,1|0,2|1,1\" --known-values \"0,0,0;3,0,1;1,2,1\" pred create BiconnectivityAugmentation --graph 0-1,1-2,2-3 --potential-edges 0-2:3,0-3:4,1-3:2 --budget 5 pred create FVS --arcs \"0>1,1>2,2>0\" --weights 1,1,1 pred create UndirectedTwoCommodityIntegralFlow --graph 0-2,1-2,2-3 --capacities 1,1,2 --source-1 0 --sink-1 3 --source-2 1 --sink-2 3 --requirement-1 1 --requirement-2 1 @@ -608,6 +610,18 @@ pub struct CreateArgs { /// Known candidate keys for AdditionalKey (e.g., "0,1;2,3") #[arg(long)] pub known_keys: Option, + /// Number of objects for ConsistencyOfDatabaseFrequencyTables + #[arg(long)] + pub num_objects: Option, + /// Attribute-domain sizes for ConsistencyOfDatabaseFrequencyTables (comma-separated, e.g., "2,3,2") + #[arg(long)] + pub attribute_domains: Option, + /// Pairwise frequency tables for ConsistencyOfDatabaseFrequencyTables (e.g., "0,1:1,1|0,1;1,2:1,0|0,1") + #[arg(long)] + pub frequency_tables: Option, + /// Known value triples for ConsistencyOfDatabaseFrequencyTables (e.g., "0,0,0;3,1,2") + #[arg(long)] + pub known_values: Option, /// Domain size for ConjunctiveBooleanQuery #[arg(long)] pub domain_size: Option, diff --git a/problemreductions-cli/src/commands/create.rs b/problemreductions-cli/src/commands/create.rs index 47beecf12..6b311f7d0 100644 --- a/problemreductions-cli/src/commands/create.rs +++ b/problemreductions-cli/src/commands/create.rs @@ -18,13 +18,14 @@ use problemreductions::models::graph::{ }; use problemreductions::models::misc::{ AdditionalKey, BinPacking, BoyceCoddNormalFormViolation, CbqRelation, ConjunctiveBooleanQuery, - FlowShopScheduling, LongestCommonSubsequence, MinimumTardinessSequencing, - MultiprocessorScheduling, PaintShop, PartiallyOrderedKnapsack, QueryArg, - RectilinearPictureCompression, ResourceConstrainedScheduling, - SchedulingWithIndividualDeadlines, SequencingToMinimizeMaximumCumulativeCost, - SequencingToMinimizeWeightedCompletionTime, SequencingToMinimizeWeightedTardiness, - SequencingWithReleaseTimesAndDeadlines, SequencingWithinIntervals, ShortestCommonSupersequence, - StringToStringCorrection, SubsetSum, SumOfSquaresPartition, TimetableDesign, + ConsistencyOfDatabaseFrequencyTables, FlowShopScheduling, FrequencyTable, KnownValue, + LongestCommonSubsequence, MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, + PartiallyOrderedKnapsack, QueryArg, RectilinearPictureCompression, + ResourceConstrainedScheduling, SchedulingWithIndividualDeadlines, + SequencingToMinimizeMaximumCumulativeCost, SequencingToMinimizeWeightedCompletionTime, + SequencingToMinimizeWeightedTardiness, SequencingWithReleaseTimesAndDeadlines, + SequencingWithinIntervals, ShortestCommonSupersequence, StringToStringCorrection, SubsetSum, + SumOfSquaresPartition, TimetableDesign, }; use problemreductions::models::BiconnectivityAugmentation; use problemreductions::prelude::*; @@ -149,6 +150,10 @@ fn all_data_flags_empty(args: &CreateArgs) -> bool { && args.dependencies.is_none() && args.relation_attrs.is_none() && args.known_keys.is_none() + && args.num_objects.is_none() + && args.attribute_domains.is_none() + && args.frequency_tables.is_none() + && args.known_values.is_none() && args.domain_size.is_none() && args.relations.is_none() && args.conjuncts_spec.is_none() @@ -197,6 +202,126 @@ fn ensure_attribute_indices_in_range( Ok(()) } +fn parse_cdft_frequency_tables( + raw: &str, + attribute_domains: &[usize], + num_objects: usize, +) -> Result> { + let num_attributes = attribute_domains.len(); + let mut seen_pairs = BTreeSet::new(); + + raw.split(';') + .filter(|entry| !entry.trim().is_empty()) + .map(|entry| { + let (pair_str, counts_str) = entry.trim().split_once(':').ok_or_else(|| { + anyhow::anyhow!( + "Invalid frequency table '{entry}', expected 'a,b:row0|row1|...'" + ) + })?; + let pair: Vec = util::parse_comma_list(pair_str.trim())?; + anyhow::ensure!( + pair.len() == 2, + "Frequency table '{entry}' must start with exactly two attribute indices" + ); + + let attribute_a = pair[0]; + let attribute_b = pair[1]; + ensure_attribute_indices_in_range( + &[attribute_a, attribute_b], + num_attributes, + &format!("Frequency table '{entry}'"), + )?; + anyhow::ensure!( + attribute_a != attribute_b, + "Frequency table '{entry}' must use two distinct attributes" + ); + + let pair_key = if attribute_a < attribute_b { + (attribute_a, attribute_b) + } else { + (attribute_b, attribute_a) + }; + anyhow::ensure!( + seen_pairs.insert(pair_key), + "Duplicate frequency table pair ({}, {})", + pair_key.0, + pair_key.1 + ); + + let rows: Vec> = counts_str + .split('|') + .map(|row| util::parse_comma_list(row.trim())) + .collect::>()?; + + let expected_rows = attribute_domains[attribute_a]; + anyhow::ensure!( + rows.len() == expected_rows, + "Frequency table '{entry}' has {} rows but attribute {attribute_a} has domain size {expected_rows}", + rows.len() + ); + + let expected_cols = attribute_domains[attribute_b]; + for (row_index, row) in rows.iter().enumerate() { + anyhow::ensure!( + row.len() == expected_cols, + "Frequency table '{entry}' row {row_index} has {} columns but attribute {attribute_b} has domain size {expected_cols}", + row.len() + ); + } + + let total: usize = rows.iter().flatten().copied().sum(); + anyhow::ensure!( + total == num_objects, + "Frequency table '{entry}' sums to {total}, expected num_objects={num_objects}" + ); + + Ok(FrequencyTable::new(attribute_a, attribute_b, rows)) + }) + .collect() +} + +fn parse_cdft_known_values( + raw: Option<&str>, + num_objects: usize, + attribute_domains: &[usize], +) -> Result> { + let num_attributes = attribute_domains.len(); + match raw { + None => Ok(vec![]), + Some(s) if s.trim().is_empty() => Ok(vec![]), + Some(s) => s + .split(';') + .filter(|entry| !entry.trim().is_empty()) + .map(|entry| { + let triple: Vec = util::parse_comma_list(entry.trim())?; + anyhow::ensure!( + triple.len() == 3, + "Known value '{entry}' must be an 'object,attribute,value' triple" + ); + let object = triple[0]; + let attribute = triple[1]; + let value = triple[2]; + + anyhow::ensure!( + object < num_objects, + "Known value '{entry}' has object index {object} out of range for num_objects={num_objects}" + ); + anyhow::ensure!( + attribute < num_attributes, + "Known value '{entry}' has attribute index {attribute} out of range for {num_attributes} attributes" + ); + let domain_size = attribute_domains[attribute]; + anyhow::ensure!( + value < domain_size, + "Known value '{entry}' has value {value} out of range for attribute {attribute} with domain size {domain_size}" + ); + + Ok(KnownValue::new(object, attribute, value)) + }) + .collect(), + } +} + fn resolve_example_problem_ref( input: &str, rgraph: &problemreductions::rules::ReductionGraph, @@ -468,6 +593,9 @@ fn example_for(canonical: &str, graph_type: Option<&str>) -> &'static str { "--arcs \"0>1,0>2,1>3,2>3,1>4,3>5,4>5,2>4\" --weights 3,2,4,1,2,3,1,3 --partition \"0,1;2,3;4,7;5,6\" --bound 10" } "AdditionalKey" => "--num-attributes 6 --dependencies \"0,1:2,3;2,3:4,5;4,5:0,1\" --relation-attrs 0,1,2,3,4,5 --known-keys \"0,1;2,3;4,5\"", + "ConsistencyOfDatabaseFrequencyTables" => { + "--num-objects 6 --attribute-domains \"2,3,2\" --frequency-tables \"0,1:1,1,1|1,1,1;1,2:1,1|0,2|1,1\" --known-values \"0,0,0;3,0,1;1,2,1\"" + } "SubgraphIsomorphism" => "--graph 0-1,1-2,2-0 --pattern 0-1", "RectilinearPictureCompression" => { "--matrix \"1,1,0,0;1,1,0,0;0,0,1,1;0,0,1,1\" --bound 2" @@ -598,6 +726,15 @@ fn help_flag_hint( } ("ShortestCommonSupersequence", "strings") => "symbol lists: \"0,1,2;1,2,0\"", ("MultipleChoiceBranching", "partition") => "semicolon-separated groups: \"0,1;2,3\"", + ("ConsistencyOfDatabaseFrequencyTables", "attribute_domains") => { + "comma-separated domain sizes: 2,3,2" + } + ("ConsistencyOfDatabaseFrequencyTables", "frequency_tables") => { + "semicolon-separated tables: \"0,1:1,1,1|1,1,1;1,2:1,1|0,2|1,1\"" + } + ("ConsistencyOfDatabaseFrequencyTables", "known_values") => { + "semicolon-separated triples: \"0,0,0;3,0,1;1,2,1\"" + } ("ConsecutiveOnesSubmatrix", "matrix") => "semicolon-separated 0/1 rows: \"1,0;0,1\"", ("TimetableDesign", "craftsman_avail") | ("TimetableDesign", "task_avail") => { "semicolon-separated 0/1 rows: \"1,1,0;0,1,1\"" @@ -1692,6 +1829,52 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> { ) } + "ConsistencyOfDatabaseFrequencyTables" => { + let usage = "Usage: pred create ConsistencyOfDatabaseFrequencyTables --num-objects 6 --attribute-domains \"2,3,2\" --frequency-tables \"0,1:1,1,1|1,1,1;1,2:1,1|0,2|1,1\" --known-values \"0,0,0;3,0,1;1,2,1\""; + let num_objects = args.num_objects.ok_or_else(|| { + anyhow::anyhow!( + "ConsistencyOfDatabaseFrequencyTables requires --num-objects\n\n{usage}" + ) + })?; + let attribute_domains_str = args.attribute_domains.as_deref().ok_or_else(|| { + anyhow::anyhow!( + "ConsistencyOfDatabaseFrequencyTables requires --attribute-domains\n\n{usage}" + ) + })?; + let frequency_tables_str = args.frequency_tables.as_deref().ok_or_else(|| { + anyhow::anyhow!( + "ConsistencyOfDatabaseFrequencyTables requires --frequency-tables\n\n{usage}" + ) + })?; + + let attribute_domains: Vec = util::parse_comma_list(attribute_domains_str)?; + for (index, &domain_size) in attribute_domains.iter().enumerate() { + anyhow::ensure!( + domain_size > 0, + "attribute domain at index {index} must be positive\n\n{usage}" + ); + } + let frequency_tables = + parse_cdft_frequency_tables(frequency_tables_str, &attribute_domains, num_objects) + .map_err(|e| anyhow::anyhow!("{e}\n\n{usage}"))?; + let known_values = parse_cdft_known_values( + args.known_values.as_deref(), + num_objects, + &attribute_domains, + ) + .map_err(|e| anyhow::anyhow!("{e}\n\n{usage}"))?; + + ( + ser(ConsistencyOfDatabaseFrequencyTables::new( + num_objects, + attribute_domains, + frequency_tables, + known_values, + ))?, + resolved_variant.clone(), + ) + } + // SubsetSum "SubsetSum" => { let sizes_str = args.sizes.as_deref().ok_or_else(|| { @@ -5470,6 +5653,10 @@ mod tests { conjuncts_spec: None, relation_attrs: None, known_keys: None, + num_objects: None, + attribute_domains: None, + frequency_tables: None, + known_values: None, costs: None, cut_bound: None, size_bound: None, diff --git a/problemreductions-cli/tests/cli_tests.rs b/problemreductions-cli/tests/cli_tests.rs index a6aa1b924..6229ac3d1 100644 --- a/problemreductions-cli/tests/cli_tests.rs +++ b/problemreductions-cli/tests/cli_tests.rs @@ -5768,6 +5768,67 @@ fn test_create_bcnf_rejects_out_of_range_target_attribute_indices() { ); } +#[test] +fn test_create_consistency_of_database_frequency_tables() { + let output = pred() + .args([ + "create", + "ConsistencyOfDatabaseFrequencyTables", + "--num-objects", + "6", + "--attribute-domains", + "2,3,2", + "--frequency-tables", + "0,1:1,1,1|1,1,1;1,2:1,1|0,2|1,1", + "--known-values", + "0,0,0;3,0,1;1,2,1", + ]) + .output() + .unwrap(); + assert!( + output.status.success(), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); + + let stdout = String::from_utf8(output.stdout).unwrap(); + let json: serde_json::Value = serde_json::from_str(&stdout).unwrap(); + assert_eq!(json["type"], "ConsistencyOfDatabaseFrequencyTables"); + assert_eq!(json["data"]["num_objects"], 6); + assert_eq!( + json["data"]["attribute_domains"], + serde_json::json!([2, 3, 2]) + ); + assert_eq!(json["data"]["frequency_tables"][0]["attribute_a"], 0); + assert_eq!(json["data"]["frequency_tables"][0]["attribute_b"], 1); + assert_eq!( + json["data"]["frequency_tables"][0]["counts"], + serde_json::json!([[1, 1, 1], [1, 1, 1]]) + ); + assert_eq!( + json["data"]["known_values"], + serde_json::json!([ + {"object": 0, "attribute": 0, "value": 0}, + {"object": 3, "attribute": 0, "value": 1}, + {"object": 1, "attribute": 2, "value": 1} + ]) + ); +} + +#[test] +fn test_create_consistency_of_database_frequency_tables_problem_help_uses_supported_flags() { + let output = pred() + .args(["create", "ConsistencyOfDatabaseFrequencyTables"]) + .output() + .unwrap(); + assert!(!output.status.success()); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!(stderr.contains("--num-objects"), "stderr: {stderr}"); + assert!(stderr.contains("--attribute-domains"), "stderr: {stderr}"); + assert!(stderr.contains("--frequency-tables"), "stderr: {stderr}"); + assert!(stderr.contains("--known-values"), "stderr: {stderr}"); +} + #[test] fn test_create_multiple_copy_file_allocation() { let output = pred() @@ -7286,6 +7347,28 @@ fn test_create_model_example_multiprocessor_scheduling() { assert_eq!(json["type"], "MultiprocessorScheduling"); } +#[test] +fn test_create_model_example_consistency_of_database_frequency_tables() { + let output = pred() + .args([ + "create", + "--example", + "ConsistencyOfDatabaseFrequencyTables", + ]) + .output() + .unwrap(); + assert!( + output.status.success(), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); + + let stdout = String::from_utf8(output.stdout).unwrap(); + let json: serde_json::Value = serde_json::from_str(&stdout).unwrap(); + assert_eq!(json["type"], "ConsistencyOfDatabaseFrequencyTables"); + assert_eq!(json["data"]["num_objects"], 6); +} + #[test] fn test_create_model_example_minimum_multiway_cut() { let output = pred() diff --git a/src/lib.rs b/src/lib.rs index 564d234b5..7997c6981 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -65,14 +65,15 @@ pub mod prelude { }; pub use crate::models::misc::{ AdditionalKey, BinPacking, BoyceCoddNormalFormViolation, CbqRelation, - ConjunctiveBooleanQuery, ConjunctiveQueryFoldability, Factoring, FlowShopScheduling, - Knapsack, LongestCommonSubsequence, MinimumTardinessSequencing, MultiprocessorScheduling, - PaintShop, Partition, QueryArg, RectilinearPictureCompression, - ResourceConstrainedScheduling, SchedulingWithIndividualDeadlines, - SequencingToMinimizeMaximumCumulativeCost, SequencingToMinimizeWeightedCompletionTime, - SequencingToMinimizeWeightedTardiness, SequencingWithReleaseTimesAndDeadlines, - SequencingWithinIntervals, ShortestCommonSupersequence, StaffScheduling, - StringToStringCorrection, SubsetSum, SumOfSquaresPartition, Term, TimetableDesign, + ConjunctiveBooleanQuery, ConjunctiveQueryFoldability, ConsistencyOfDatabaseFrequencyTables, + Factoring, FlowShopScheduling, Knapsack, LongestCommonSubsequence, + MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, Partition, QueryArg, + RectilinearPictureCompression, ResourceConstrainedScheduling, + SchedulingWithIndividualDeadlines, SequencingToMinimizeMaximumCumulativeCost, + SequencingToMinimizeWeightedCompletionTime, SequencingToMinimizeWeightedTardiness, + SequencingWithReleaseTimesAndDeadlines, SequencingWithinIntervals, + ShortestCommonSupersequence, StaffScheduling, StringToStringCorrection, SubsetSum, + SumOfSquaresPartition, Term, TimetableDesign, }; pub use crate::models::set::{ ComparativeContainment, ConsecutiveSets, ExactCoverBy3Sets, MaximumSetPacking, diff --git a/src/models/misc/consistency_of_database_frequency_tables.rs b/src/models/misc/consistency_of_database_frequency_tables.rs new file mode 100644 index 000000000..f052d42e3 --- /dev/null +++ b/src/models/misc/consistency_of_database_frequency_tables.rs @@ -0,0 +1,366 @@ +//! Consistency of Database Frequency Tables problem implementation. +//! +//! Given a finite set of objects, categorical attributes with finite domains, +//! pairwise frequency tables for selected attribute pairs, and some known +//! object-attribute values, determine whether there exists a complete +//! assignment of attribute values to all objects that matches every published +//! frequency table and every known value. + +use crate::registry::{FieldInfo, ProblemSchemaEntry}; +use crate::traits::{Problem, SatisfactionProblem}; +use serde::{Deserialize, Serialize}; +use std::collections::BTreeSet; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct FrequencyTable { + attribute_a: usize, + attribute_b: usize, + counts: Vec>, +} + +impl FrequencyTable { + /// Create a new pairwise frequency table. + pub fn new(attribute_a: usize, attribute_b: usize, counts: Vec>) -> Self { + Self { + attribute_a, + attribute_b, + counts, + } + } + + /// Returns the first attribute index. + pub fn attribute_a(&self) -> usize { + self.attribute_a + } + + /// Returns the second attribute index. + pub fn attribute_b(&self) -> usize { + self.attribute_b + } + + /// Returns the table counts. + pub fn counts(&self) -> &[Vec] { + &self.counts + } + + /// Returns the number of table cells. + pub fn num_cells(&self) -> usize { + self.counts.iter().map(Vec::len).sum() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct KnownValue { + object: usize, + attribute: usize, + value: usize, +} + +impl KnownValue { + /// Create a new known object-attribute value. + pub fn new(object: usize, attribute: usize, value: usize) -> Self { + Self { + object, + attribute, + value, + } + } + + /// Returns the object index. + pub fn object(&self) -> usize { + self.object + } + + /// Returns the attribute index. + pub fn attribute(&self) -> usize { + self.attribute + } + + /// Returns the fixed categorical value. + pub fn value(&self) -> usize { + self.value + } +} + +inventory::submit! { + ProblemSchemaEntry { + name: "ConsistencyOfDatabaseFrequencyTables", + display_name: "Consistency of Database Frequency Tables", + aliases: &[], + dimensions: &[], + module_path: module_path!(), + description: "Determine whether pairwise frequency tables and known values admit a consistent complete database assignment", + fields: &[ + FieldInfo { name: "num_objects", type_name: "usize", description: "Number of objects in the database" }, + FieldInfo { name: "attribute_domains", type_name: "Vec", description: "Domain size for each attribute" }, + FieldInfo { name: "frequency_tables", type_name: "Vec", description: "Published pairwise frequency tables" }, + FieldInfo { name: "known_values", type_name: "Vec", description: "Known object-attribute-value triples" }, + ], + } +} + +/// The Consistency of Database Frequency Tables decision problem. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ConsistencyOfDatabaseFrequencyTables { + num_objects: usize, + attribute_domains: Vec, + frequency_tables: Vec, + known_values: Vec, +} + +impl ConsistencyOfDatabaseFrequencyTables { + /// Create a new consistency-of-database-frequency-tables instance. + pub fn new( + num_objects: usize, + attribute_domains: Vec, + frequency_tables: Vec, + known_values: Vec, + ) -> Self { + for (attribute, &domain_size) in attribute_domains.iter().enumerate() { + assert!( + domain_size > 0, + "attribute domain size at index {attribute} must be positive" + ); + } + + let num_attributes = attribute_domains.len(); + let mut seen_pairs = BTreeSet::new(); + for table in &frequency_tables { + let attribute_a = table.attribute_a(); + let attribute_b = table.attribute_b(); + assert!( + attribute_a < num_attributes, + "frequency table attribute_a {attribute_a} out of range for {num_attributes} attributes" + ); + assert!( + attribute_b < num_attributes, + "frequency table attribute_b {attribute_b} out of range for {num_attributes} attributes" + ); + assert!( + attribute_a != attribute_b, + "frequency table attributes must be distinct" + ); + + let pair = if attribute_a < attribute_b { + (attribute_a, attribute_b) + } else { + (attribute_b, attribute_a) + }; + assert!( + seen_pairs.insert(pair), + "duplicate frequency table pair ({}, {})", + pair.0, + pair.1 + ); + + let expected_rows = attribute_domains[attribute_a]; + assert_eq!( + table.counts().len(), + expected_rows, + "frequency table rows ({}) must equal attribute_domains[{attribute_a}] ({expected_rows})", + table.counts().len() + ); + + let expected_cols = attribute_domains[attribute_b]; + for (row, row_counts) in table.counts().iter().enumerate() { + assert_eq!( + row_counts.len(), + expected_cols, + "frequency table columns ({}) in row {row} must equal attribute_domains[{attribute_b}] ({expected_cols})", + row_counts.len() + ); + } + + let total: usize = table.counts().iter().flatten().copied().sum(); + assert_eq!( + total, num_objects, + "frequency table total ({total}) must equal num_objects ({num_objects})" + ); + } + + for known_value in &known_values { + assert!( + known_value.object() < num_objects, + "known value object {} out of range for num_objects {}", + known_value.object(), + num_objects + ); + assert!( + known_value.attribute() < num_attributes, + "known value attribute {} out of range for {num_attributes} attributes", + known_value.attribute() + ); + let domain_size = attribute_domains[known_value.attribute()]; + assert!( + known_value.value() < domain_size, + "known value value {} out of range for attribute {} with domain size {}", + known_value.value(), + known_value.attribute(), + domain_size + ); + } + + Self { + num_objects, + attribute_domains, + frequency_tables, + known_values, + } + } + + /// Returns the number of objects. + pub fn num_objects(&self) -> usize { + self.num_objects + } + + /// Returns the number of attributes. + pub fn num_attributes(&self) -> usize { + self.attribute_domains.len() + } + + /// Returns the attribute-domain sizes. + pub fn attribute_domains(&self) -> &[usize] { + &self.attribute_domains + } + + /// Returns the published frequency tables. + pub fn frequency_tables(&self) -> &[FrequencyTable] { + &self.frequency_tables + } + + /// Returns the known values. + pub fn known_values(&self) -> &[KnownValue] { + &self.known_values + } + + /// Returns the product of attribute domain sizes. + pub fn domain_size_product(&self) -> usize { + self.attribute_domains.iter().copied().product() + } + + /// Returns the number of object-attribute assignment variables in the direct encoding. + pub fn num_assignment_variables(&self) -> usize { + self.num_objects * self.num_attributes() + } + + /// Returns the number of published frequency tables. + pub fn num_frequency_tables(&self) -> usize { + self.frequency_tables.len() + } + + /// Returns the number of known value constraints. + pub fn num_known_values(&self) -> usize { + self.known_values.len() + } + + /// Returns the number of one-hot assignment indicators used by the ILP reduction. + pub fn num_assignment_indicators(&self) -> usize { + self.num_objects * self.attribute_domains.iter().sum::() + } + + /// Returns the total number of published frequency-table cells. + pub fn num_frequency_cells(&self) -> usize { + self.frequency_tables + .iter() + .map(FrequencyTable::num_cells) + .sum() + } + + /// Returns the number of auxiliary ILP indicators used for frequency-cell counting. + pub fn num_auxiliary_frequency_indicators(&self) -> usize { + self.num_objects * self.num_frequency_cells() + } + + fn config_index(&self, object: usize, attribute: usize) -> usize { + object * self.num_attributes() + attribute + } +} + +impl Problem for ConsistencyOfDatabaseFrequencyTables { + const NAME: &'static str = "ConsistencyOfDatabaseFrequencyTables"; + type Metric = bool; + + fn variant() -> Vec<(&'static str, &'static str)> { + crate::variant_params![] + } + + fn dims(&self) -> Vec { + let mut dims = Vec::with_capacity(self.num_assignment_variables()); + for _ in 0..self.num_objects { + dims.extend(self.attribute_domains.iter().copied()); + } + dims + } + + fn evaluate(&self, config: &[usize]) -> bool { + if config.len() != self.num_assignment_variables() { + return false; + } + + for object in 0..self.num_objects { + for (attribute, &domain_size) in self.attribute_domains.iter().enumerate() { + if config[self.config_index(object, attribute)] >= domain_size { + return false; + } + } + } + + for known_value in &self.known_values { + if config[self.config_index(known_value.object(), known_value.attribute())] + != known_value.value() + { + return false; + } + } + + for table in &self.frequency_tables { + let rows = self.attribute_domains[table.attribute_a()]; + let cols = self.attribute_domains[table.attribute_b()]; + let mut observed = vec![vec![0usize; cols]; rows]; + + for object in 0..self.num_objects { + let value_a = config[self.config_index(object, table.attribute_a())]; + let value_b = config[self.config_index(object, table.attribute_b())]; + observed[value_a][value_b] += 1; + } + + if observed != table.counts { + return false; + } + } + + true + } +} + +impl SatisfactionProblem for ConsistencyOfDatabaseFrequencyTables {} + +crate::declare_variants! { + default sat ConsistencyOfDatabaseFrequencyTables => "domain_size_product^num_objects", +} + +#[cfg(feature = "example-db")] +pub(crate) fn canonical_model_example_specs() -> Vec { + vec![crate::example_db::specs::ModelExampleSpec { + id: "consistency_of_database_frequency_tables", + instance: Box::new(ConsistencyOfDatabaseFrequencyTables::new( + 6, + vec![2, 3, 2], + vec![ + FrequencyTable::new(0, 1, vec![vec![1, 1, 1], vec![1, 1, 1]]), + FrequencyTable::new(1, 2, vec![vec![1, 1], vec![0, 2], vec![1, 1]]), + ], + vec![ + KnownValue::new(0, 0, 0), + KnownValue::new(3, 0, 1), + KnownValue::new(1, 2, 1), + ], + )), + optimal_config: vec![0, 0, 0, 0, 1, 1, 0, 2, 1, 1, 0, 1, 1, 1, 1, 1, 2, 0], + optimal_value: serde_json::json!(true), + }] +} + +#[cfg(test)] +#[path = "../../unit_tests/models/misc/consistency_of_database_frequency_tables.rs"] +mod tests; diff --git a/src/models/misc/mod.rs b/src/models/misc/mod.rs index b3233305e..19f95b2ce 100644 --- a/src/models/misc/mod.rs +++ b/src/models/misc/mod.rs @@ -4,6 +4,7 @@ //! - [`AdditionalKey`]: Determine whether a relational schema has an additional candidate key //! - [`BinPacking`]: Bin Packing (minimize bins) //! - [`BoyceCoddNormalFormViolation`]: Boyce-Codd Normal Form Violation (BCNF) +//! - [`ConsistencyOfDatabaseFrequencyTables`]: Pairwise frequency-table consistency //! - [`ConjunctiveBooleanQuery`]: Evaluate a conjunctive Boolean query over relations //! - [`ConjunctiveQueryFoldability`]: Conjunctive Query Foldability //! - [`Factoring`]: Integer factorization @@ -35,6 +36,7 @@ mod bin_packing; mod boyce_codd_normal_form_violation; pub(crate) mod conjunctive_boolean_query; pub(crate) mod conjunctive_query_foldability; +mod consistency_of_database_frequency_tables; pub(crate) mod factoring; mod flow_shop_scheduling; mod knapsack; @@ -65,6 +67,9 @@ pub use bin_packing::BinPacking; pub use boyce_codd_normal_form_violation::BoyceCoddNormalFormViolation; pub use conjunctive_boolean_query::{ConjunctiveBooleanQuery, QueryArg, Relation as CbqRelation}; pub use conjunctive_query_foldability::{ConjunctiveQueryFoldability, Term}; +pub use consistency_of_database_frequency_tables::{ + ConsistencyOfDatabaseFrequencyTables, FrequencyTable, KnownValue, +}; pub use factoring::Factoring; pub use flow_shop_scheduling::FlowShopScheduling; pub use knapsack::Knapsack; @@ -94,6 +99,7 @@ pub use timetable_design::TimetableDesign; pub(crate) fn canonical_model_example_specs() -> Vec { let mut specs = Vec::new(); specs.extend(boyce_codd_normal_form_violation::canonical_model_example_specs()); + specs.extend(consistency_of_database_frequency_tables::canonical_model_example_specs()); specs.extend(conjunctive_boolean_query::canonical_model_example_specs()); specs.extend(conjunctive_query_foldability::canonical_model_example_specs()); specs.extend(factoring::canonical_model_example_specs()); diff --git a/src/models/mod.rs b/src/models/mod.rs index c30ecce0f..a6a427a21 100644 --- a/src/models/mod.rs +++ b/src/models/mod.rs @@ -33,14 +33,14 @@ pub use graph::{ pub use misc::PartiallyOrderedKnapsack; pub use misc::{ AdditionalKey, BinPacking, CbqRelation, ConjunctiveBooleanQuery, ConjunctiveQueryFoldability, - Factoring, FlowShopScheduling, Knapsack, LongestCommonSubsequence, MinimumTardinessSequencing, - MultiprocessorScheduling, PaintShop, Partition, PrecedenceConstrainedScheduling, QueryArg, - RectilinearPictureCompression, ResourceConstrainedScheduling, - SchedulingWithIndividualDeadlines, SequencingToMinimizeMaximumCumulativeCost, - SequencingToMinimizeWeightedCompletionTime, SequencingToMinimizeWeightedTardiness, - SequencingWithReleaseTimesAndDeadlines, SequencingWithinIntervals, ShortestCommonSupersequence, - StaffScheduling, StringToStringCorrection, SubsetSum, SumOfSquaresPartition, Term, - TimetableDesign, + ConsistencyOfDatabaseFrequencyTables, Factoring, FlowShopScheduling, Knapsack, + LongestCommonSubsequence, MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, + Partition, PrecedenceConstrainedScheduling, QueryArg, RectilinearPictureCompression, + ResourceConstrainedScheduling, SchedulingWithIndividualDeadlines, + SequencingToMinimizeMaximumCumulativeCost, SequencingToMinimizeWeightedCompletionTime, + SequencingToMinimizeWeightedTardiness, SequencingWithReleaseTimesAndDeadlines, + SequencingWithinIntervals, ShortestCommonSupersequence, StaffScheduling, + StringToStringCorrection, SubsetSum, SumOfSquaresPartition, Term, TimetableDesign, }; pub use set::{ ComparativeContainment, ConsecutiveSets, ExactCoverBy3Sets, MaximumSetPacking, diff --git a/src/rules/consistencyofdatabasefrequencytables_ilp.rs b/src/rules/consistencyofdatabasefrequencytables_ilp.rs new file mode 100644 index 000000000..9461ac9f5 --- /dev/null +++ b/src/rules/consistencyofdatabasefrequencytables_ilp.rs @@ -0,0 +1,242 @@ +//! Reduction from ConsistencyOfDatabaseFrequencyTables to ILP. +//! +//! The reduction uses a binary one-hot encoding: +//! - `y_{v,a,x}` is 1 iff object `v` receives value `x` for attribute `a` +//! - `z_{t,v,x,y}` is 1 iff, for table `t`, object `v` realizes cell `(x, y)` +//! +//! The pair-count equalities are linearized with standard McCormick constraints. + +use crate::models::algebraic::{LinearConstraint, ObjectiveSense, ILP}; +use crate::models::misc::ConsistencyOfDatabaseFrequencyTables; +use crate::reduction; +use crate::rules::traits::{ReduceTo, ReductionResult}; + +/// Result of reducing ConsistencyOfDatabaseFrequencyTables to ILP. +#[derive(Debug, Clone)] +pub struct ReductionCDFTToILP { + target: ILP, + source: ConsistencyOfDatabaseFrequencyTables, +} + +impl ReductionCDFTToILP { + fn assignment_block_size(&self) -> usize { + self.source.attribute_domains().iter().sum() + } + + fn attribute_offset(&self, attribute: usize) -> usize { + self.source.attribute_domains()[..attribute].iter().sum() + } + + fn assignment_var_index(&self, object: usize, attribute: usize, value: usize) -> usize { + object * self.assignment_block_size() + self.attribute_offset(attribute) + value + } + + fn auxiliary_block_start(&self, table_index: usize) -> usize { + self.source.num_assignment_indicators() + + self.source.frequency_tables()[..table_index] + .iter() + .map(|table| self.source.num_objects() * table.num_cells()) + .sum::() + } + + fn auxiliary_var_index( + &self, + table_index: usize, + object: usize, + value_a: usize, + value_b: usize, + ) -> usize { + let table = &self.source.frequency_tables()[table_index]; + let cols = self.source.attribute_domains()[table.attribute_b()]; + self.auxiliary_block_start(table_index) + + object * table.num_cells() + + value_a * cols + + value_b + } + + /// Encode a satisfying source assignment as a concrete ILP variable vector. + #[cfg_attr(not(any(test, feature = "example-db")), allow(dead_code))] + pub(crate) fn encode_source_solution(&self, source_solution: &[usize]) -> Vec { + let mut target_solution = vec![0usize; self.target.num_vars]; + let num_attributes = self.source.num_attributes(); + + for object in 0..self.source.num_objects() { + for attribute in 0..num_attributes { + let source_index = object * num_attributes + attribute; + let value = source_solution[source_index]; + let var = self.assignment_var_index(object, attribute, value); + target_solution[var] = 1; + } + } + + for (table_index, table) in self.source.frequency_tables().iter().enumerate() { + for object in 0..self.source.num_objects() { + let value_a = source_solution[object * num_attributes + table.attribute_a()]; + let value_b = source_solution[object * num_attributes + table.attribute_b()]; + let var = self.auxiliary_var_index(table_index, object, value_a, value_b); + target_solution[var] = 1; + } + } + + target_solution + } +} + +impl ReductionResult for ReductionCDFTToILP { + type Source = ConsistencyOfDatabaseFrequencyTables; + type Target = ILP; + + fn target_problem(&self) -> &ILP { + &self.target + } + + fn extract_solution(&self, target_solution: &[usize]) -> Vec { + let mut source_solution = Vec::with_capacity(self.source.num_assignment_variables()); + for object in 0..self.source.num_objects() { + for (attribute, &domain_size) in self.source.attribute_domains().iter().enumerate() { + let value = (0..domain_size) + .find(|&candidate| { + target_solution + .get(self.assignment_var_index(object, attribute, candidate)) + .copied() + .unwrap_or(0) + == 1 + }) + .unwrap_or(0); + source_solution.push(value); + } + } + source_solution + } +} + +#[reduction( + overhead = { + num_vars = "num_assignment_indicators + num_auxiliary_frequency_indicators", + num_constraints = "num_assignment_variables + num_known_values + num_frequency_cells + 3 * num_auxiliary_frequency_indicators", + } +)] +impl ReduceTo> for ConsistencyOfDatabaseFrequencyTables { + type Result = ReductionCDFTToILP; + + fn reduce_to(&self) -> Self::Result { + let source = self.clone(); + let helper = ReductionCDFTToILP { + target: ILP::empty(), + source: source.clone(), + }; + + let mut constraints = Vec::with_capacity( + source.num_assignment_variables() + + source.num_known_values() + + source.num_frequency_cells() + + 3 * source.num_auxiliary_frequency_indicators(), + ); + + for object in 0..source.num_objects() { + for (attribute, &domain_size) in source.attribute_domains().iter().enumerate() { + let terms = (0..domain_size) + .map(|value| (helper.assignment_var_index(object, attribute, value), 1.0)) + .collect(); + constraints.push(LinearConstraint::eq(terms, 1.0)); + } + } + + for known_value in source.known_values() { + constraints.push(LinearConstraint::eq( + vec![( + helper.assignment_var_index( + known_value.object(), + known_value.attribute(), + known_value.value(), + ), + 1.0, + )], + 1.0, + )); + } + + for (table_index, table) in source.frequency_tables().iter().enumerate() { + let rows = source.attribute_domains()[table.attribute_a()]; + let cols = source.attribute_domains()[table.attribute_b()]; + + for value_a in 0..rows { + for value_b in 0..cols { + let count_terms = (0..source.num_objects()) + .map(|object| { + ( + helper.auxiliary_var_index(table_index, object, value_a, value_b), + 1.0, + ) + }) + .collect(); + constraints.push(LinearConstraint::eq( + count_terms, + table.counts()[value_a][value_b] as f64, + )); + + for object in 0..source.num_objects() { + let z = helper.auxiliary_var_index(table_index, object, value_a, value_b); + let y_a = helper.assignment_var_index(object, table.attribute_a(), value_a); + let y_b = helper.assignment_var_index(object, table.attribute_b(), value_b); + + constraints.push(LinearConstraint::le(vec![(z, 1.0), (y_a, -1.0)], 0.0)); + constraints.push(LinearConstraint::le(vec![(z, 1.0), (y_b, -1.0)], 0.0)); + constraints.push(LinearConstraint::ge( + vec![(z, 1.0), (y_a, -1.0), (y_b, -1.0)], + -1.0, + )); + } + } + } + } + + let target = ILP::new( + source.num_assignment_indicators() + source.num_auxiliary_frequency_indicators(), + constraints, + vec![], + ObjectiveSense::Minimize, + ); + + ReductionCDFTToILP { target, source } + } +} + +#[cfg(feature = "example-db")] +pub(crate) fn canonical_rule_example_specs() -> Vec { + use crate::export::SolutionPair; + use crate::models::misc::{FrequencyTable, KnownValue}; + + vec![crate::example_db::specs::RuleExampleSpec { + id: "consistencyofdatabasefrequencytables_to_ilp", + build: || { + let source = ConsistencyOfDatabaseFrequencyTables::new( + 6, + vec![2, 3, 2], + vec![ + FrequencyTable::new(0, 1, vec![vec![1, 1, 1], vec![1, 1, 1]]), + FrequencyTable::new(1, 2, vec![vec![1, 1], vec![0, 2], vec![1, 1]]), + ], + vec![ + KnownValue::new(0, 0, 0), + KnownValue::new(3, 0, 1), + KnownValue::new(1, 2, 1), + ], + ); + let source_config = vec![0, 0, 0, 0, 1, 1, 0, 2, 1, 1, 0, 1, 1, 1, 1, 1, 2, 0]; + let reduction = source.clone().reduce_to(); + let target_config = reduction.encode_source_solution(&source_config); + crate::example_db::specs::rule_example_with_witness::<_, ILP>( + source, + SolutionPair { + source_config, + target_config, + }, + ) + }, + }] +} + +#[cfg(test)] +#[path = "../unit_tests/rules/consistencyofdatabasefrequencytables_ilp.rs"] +mod tests; diff --git a/src/rules/mod.rs b/src/rules/mod.rs index f5e15a4a1..d7929687b 100644 --- a/src/rules/mod.rs +++ b/src/rules/mod.rs @@ -54,6 +54,8 @@ pub(crate) mod circuit_ilp; #[cfg(feature = "ilp-solver")] pub(crate) mod coloring_ilp; #[cfg(feature = "ilp-solver")] +pub(crate) mod consistencyofdatabasefrequencytables_ilp; +#[cfg(feature = "ilp-solver")] pub(crate) mod factoring_ilp; #[cfg(feature = "ilp-solver")] pub(crate) mod graphpartitioning_ilp; @@ -128,6 +130,7 @@ pub(crate) fn canonical_rule_example_specs() -> Vec ConsistencyOfDatabaseFrequencyTables { + ConsistencyOfDatabaseFrequencyTables::new( + 6, + vec![2, 3, 2], + vec![ + FrequencyTable::new(0, 1, vec![vec![1, 1, 1], vec![1, 1, 1]]), + FrequencyTable::new(1, 2, vec![vec![1, 1], vec![0, 2], vec![1, 1]]), + ], + vec![ + KnownValue::new(0, 0, 0), + KnownValue::new(3, 0, 1), + KnownValue::new(1, 2, 1), + ], + ) +} + +fn issue_yes_witness() -> Vec { + vec![0, 0, 0, 0, 1, 1, 0, 2, 1, 1, 0, 1, 1, 1, 1, 1, 2, 0] +} + +fn small_yes_instance() -> ConsistencyOfDatabaseFrequencyTables { + ConsistencyOfDatabaseFrequencyTables::new( + 2, + vec![2, 2], + vec![FrequencyTable::new(0, 1, vec![vec![1, 0], vec![0, 1]])], + vec![KnownValue::new(0, 0, 0)], + ) +} + +fn small_no_instance() -> ConsistencyOfDatabaseFrequencyTables { + ConsistencyOfDatabaseFrequencyTables::new( + 2, + vec![2, 2], + vec![FrequencyTable::new(0, 1, vec![vec![1, 0], vec![0, 1]])], + vec![KnownValue::new(0, 0, 0), KnownValue::new(1, 1, 0)], + ) +} + +#[test] +fn test_cdft_creation_and_getters() { + let problem = issue_yes_instance(); + assert_eq!(problem.num_objects(), 6); + assert_eq!(problem.num_attributes(), 3); + assert_eq!(problem.domain_size_product(), 12); + assert_eq!(problem.num_assignment_variables(), 18); + assert_eq!(problem.attribute_domains(), &[2, 3, 2]); + assert_eq!(problem.frequency_tables().len(), 2); + assert_eq!(problem.known_values().len(), 3); + assert_eq!( + ::NAME, + "ConsistencyOfDatabaseFrequencyTables" + ); + assert_eq!( + ::variant(), + vec![] + ); +} + +#[test] +fn test_cdft_dims_repeat_attribute_domains_for_each_object() { + let problem = issue_yes_instance(); + assert_eq!( + problem.dims(), + vec![2, 3, 2, 2, 3, 2, 2, 3, 2, 2, 3, 2, 2, 3, 2, 2, 3, 2] + ); +} + +#[test] +fn test_cdft_evaluate_issue_witness() { + let problem = issue_yes_instance(); + assert!(problem.evaluate(&issue_yes_witness())); +} + +#[test] +fn test_cdft_evaluate_rejects_wrong_length() { + let problem = issue_yes_instance(); + assert!(!problem.evaluate(&[0, 0, 0])); + let mut too_long = issue_yes_witness(); + too_long.push(0); + assert!(!problem.evaluate(&too_long)); +} + +#[test] +fn test_cdft_evaluate_rejects_out_of_range_value() { + let problem = issue_yes_instance(); + let mut bad = issue_yes_witness(); + bad[1] = 3; + assert!(!problem.evaluate(&bad)); +} + +#[test] +fn test_cdft_evaluate_rejects_known_value_violation() { + let problem = issue_yes_instance(); + let mut bad = issue_yes_witness(); + bad[0] = 1; + assert!(!problem.evaluate(&bad)); +} + +#[test] +fn test_cdft_evaluate_rejects_frequency_table_mismatch() { + let problem = issue_yes_instance(); + let mut bad = issue_yes_witness(); + bad[17] = 1; + assert!(!problem.evaluate(&bad)); +} + +#[test] +fn test_cdft_bruteforce_finds_small_satisfying_assignment() { + let problem = small_yes_instance(); + let solver = BruteForce::new(); + let solution = solver + .find_satisfying(&problem) + .expect("small instance should be satisfiable"); + assert!(problem.evaluate(&solution)); +} + +#[test] +fn test_cdft_bruteforce_detects_small_unsat_instance() { + let problem = small_no_instance(); + let solver = BruteForce::new(); + assert!(solver.find_satisfying(&problem).is_none()); +} + +#[test] +fn test_cdft_serialization_round_trip() { + let problem = issue_yes_instance(); + let json = serde_json::to_value(&problem).unwrap(); + let restored: ConsistencyOfDatabaseFrequencyTables = serde_json::from_value(json).unwrap(); + assert_eq!(restored.num_objects(), problem.num_objects()); + assert_eq!(restored.attribute_domains(), problem.attribute_domains()); + assert_eq!(restored.frequency_tables(), problem.frequency_tables()); + assert_eq!(restored.known_values(), problem.known_values()); + assert!(restored.evaluate(&issue_yes_witness())); +} + +#[test] +fn test_cdft_paper_example_matches_issue_witness() { + let problem = issue_yes_instance(); + assert!(problem.evaluate(&issue_yes_witness())); +} + +#[test] +#[should_panic(expected = "frequency table rows")] +fn test_cdft_constructor_rejects_wrong_row_count() { + let _ = ConsistencyOfDatabaseFrequencyTables::new( + 2, + vec![2, 2], + vec![FrequencyTable::new(0, 1, vec![vec![1, 0]])], + vec![], + ); +} + +#[test] +#[should_panic(expected = "known value value")] +fn test_cdft_constructor_rejects_out_of_range_known_value() { + let _ = ConsistencyOfDatabaseFrequencyTables::new( + 2, + vec![2, 2], + vec![FrequencyTable::new(0, 1, vec![vec![1, 0], vec![0, 1]])], + vec![KnownValue::new(0, 1, 2)], + ); +} diff --git a/src/unit_tests/rules/consistencyofdatabasefrequencytables_ilp.rs b/src/unit_tests/rules/consistencyofdatabasefrequencytables_ilp.rs new file mode 100644 index 000000000..966febe3b --- /dev/null +++ b/src/unit_tests/rules/consistencyofdatabasefrequencytables_ilp.rs @@ -0,0 +1,123 @@ +use super::*; +use crate::models::algebraic::{ObjectiveSense, ILP}; +use crate::models::misc::{ConsistencyOfDatabaseFrequencyTables, FrequencyTable, KnownValue}; +use crate::rules::test_helpers::assert_satisfaction_round_trip_from_optimization_target; +use crate::rules::{ReduceTo, ReductionResult}; +use crate::solvers::ILPSolver; +use crate::traits::Problem; + +fn small_yes_instance() -> ConsistencyOfDatabaseFrequencyTables { + ConsistencyOfDatabaseFrequencyTables::new( + 2, + vec![2, 2], + vec![FrequencyTable::new(0, 1, vec![vec![1, 0], vec![0, 1]])], + vec![KnownValue::new(0, 0, 0)], + ) +} + +fn small_yes_witness() -> Vec { + vec![0, 0, 1, 1] +} + +fn small_no_instance() -> ConsistencyOfDatabaseFrequencyTables { + ConsistencyOfDatabaseFrequencyTables::new( + 2, + vec![2, 2], + vec![FrequencyTable::new(0, 1, vec![vec![1, 0], vec![0, 1]])], + vec![KnownValue::new(0, 0, 0), KnownValue::new(1, 1, 0)], + ) +} + +#[test] +fn test_cdft_to_ilp_structure() { + let problem = small_yes_instance(); + let reduction: ReductionCDFTToILP = ReduceTo::>::reduce_to(&problem); + let ilp = reduction.target_problem(); + + assert_eq!(ilp.num_vars, 16); + assert_eq!(ilp.constraints.len(), 33); + assert_eq!(ilp.sense, ObjectiveSense::Minimize); + assert!(ilp.objective.is_empty()); +} + +#[test] +fn test_cdft_to_ilp_closed_loop() { + let problem = small_yes_instance(); + let reduction: ReductionCDFTToILP = ReduceTo::>::reduce_to(&problem); + assert_satisfaction_round_trip_from_optimization_target( + &problem, + &reduction, + "ConsistencyOfDatabaseFrequencyTables->ILP closed loop", + ); +} + +#[test] +fn test_cdft_to_ilp_solution_encoding_round_trip() { + let problem = small_yes_instance(); + let reduction: ReductionCDFTToILP = ReduceTo::>::reduce_to(&problem); + let ilp_solution = reduction.encode_source_solution(&small_yes_witness()); + let extracted = reduction.extract_solution(&ilp_solution); + assert_eq!(extracted, small_yes_witness()); +} + +#[test] +fn test_cdft_to_ilp_unsat_instance_is_infeasible() { + let problem = small_no_instance(); + let reduction: ReductionCDFTToILP = ReduceTo::>::reduce_to(&problem); + let solver = ILPSolver::new(); + assert!(solver.solve(reduction.target_problem()).is_none()); +} + +#[test] +fn test_cdft_to_ilp_solve_reduced() { + let problem = small_yes_instance(); + let solver = ILPSolver::new(); + let solution = solver + .solve_reduced(&problem) + .expect("solve_reduced should find a satisfying assignment"); + assert!(problem.evaluate(&solution)); +} + +fn issue_instance() -> ConsistencyOfDatabaseFrequencyTables { + ConsistencyOfDatabaseFrequencyTables::new( + 6, + vec![2, 3, 2], + vec![ + FrequencyTable::new(0, 1, vec![vec![1, 1, 1], vec![1, 1, 1]]), + FrequencyTable::new(1, 2, vec![vec![1, 1], vec![0, 2], vec![1, 1]]), + ], + vec![ + KnownValue::new(0, 0, 0), + KnownValue::new(3, 0, 1), + KnownValue::new(1, 2, 1), + ], + ) +} + +fn issue_witness() -> Vec { + vec![0, 0, 0, 0, 1, 1, 0, 2, 1, 1, 0, 1, 1, 1, 1, 1, 2, 0] +} + +#[test] +fn test_cdft_to_ilp_issue_instance_closed_loop() { + let problem = issue_instance(); + let reduction: ReductionCDFTToILP = ReduceTo::>::reduce_to(&problem); + let solver = ILPSolver::new(); + let target_solution = solver + .solve(reduction.target_problem()) + .expect("ILP solver should find a feasible solution for the issue instance"); + let source_solution = reduction.extract_solution(&target_solution); + assert!( + problem.evaluate(&source_solution), + "extracted source solution must satisfy the original CDFT instance" + ); +} + +#[test] +fn test_cdft_to_ilp_issue_instance_encoding_round_trip() { + let problem = issue_instance(); + let reduction: ReductionCDFTToILP = ReduceTo::>::reduce_to(&problem); + let ilp_solution = reduction.encode_source_solution(&issue_witness()); + let extracted = reduction.extract_solution(&ilp_solution); + assert_eq!(extracted, issue_witness()); +}