From 472ecebddbbc5caeae819764bd4683d6f1539dca Mon Sep 17 00:00:00 2001 From: GiggleLiu Date: Sat, 21 Mar 2026 04:10:43 +0800 Subject: [PATCH 1/9] Add plan for #452: [Model] ConsistencyOfDatabaseFrequencyTables --- ...onsistency-of-database-frequency-tables.md | 216 ++++++++++++++++++ 1 file changed, 216 insertions(+) create mode 100644 docs/plans/2026-03-21-consistency-of-database-frequency-tables.md diff --git a/docs/plans/2026-03-21-consistency-of-database-frequency-tables.md b/docs/plans/2026-03-21-consistency-of-database-frequency-tables.md new file mode 100644 index 000000000..d4051a6d6 --- /dev/null +++ b/docs/plans/2026-03-21-consistency-of-database-frequency-tables.md @@ -0,0 +1,216 @@ +# ConsistencyOfDatabaseFrequencyTables Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Add the `ConsistencyOfDatabaseFrequencyTables` satisfaction model, its canonical example/docs/CLI support, and an ILP reduction so the problem is discoverable, testable, and solvable through the existing pipeline. + +**Architecture:** Model the problem as one categorical variable per `(object, attribute)` pair, with per-variable domain sizes taken from the attribute domains. Keep the core model in `src/models/misc/consistency_of_database_frequency_tables.rs`, register it through the normal schema/variant/example-db paths, then add a binary `ILP` reduction using one-hot assignment variables plus McCormick-linearized pair-count constraints for each published frequency table. Prefer explicit helper structs for frequency tables and known values so the JSON schema, CLI parsing, and paper example stay readable. + +**Tech Stack:** Rust workspace, serde, inventory schema registry, reduction macros, example-db exporters, Typst paper, existing CLI `pred create` infrastructure. + +--- + +### Task 1: Add the model with TDD + +**Files:** +- Create: `src/models/misc/consistency_of_database_frequency_tables.rs` +- Create: `src/unit_tests/models/misc/consistency_of_database_frequency_tables.rs` +- Modify: `src/models/misc/mod.rs` +- Modify: `src/models/mod.rs` +- Modify: `src/lib.rs` + +**Step 1: Write the failing model tests** + +Add tests in `src/unit_tests/models/misc/consistency_of_database_frequency_tables.rs` for: +- constructor/getters/schema-facing shape on the issue’s YES instance +- `dims()` returning one variable per `(object, attribute)` with the correct per-attribute domain sizes +- `evaluate()` accepting the flattened YES witness from the issue +- `evaluate()` rejecting wrong config length, out-of-range values, conflicting known values, and table-mismatch assignments +- `BruteForce::find_satisfying()` finding a satisfying assignment for the YES instance +- `BruteForce::find_satisfying()` returning `None` on a minimally inconsistent instance +- serde round-trip + +Use the issue’s worked example as the paper/example-db source of truth. Flatten the witness object-major: +`[0,0,0, 0,1,1, 0,2,1, 1,0,1, 1,1,1, 1,2,0]`. + +**Step 2: Run the new test file and confirm RED** + +Run: +```bash +cargo test test_consistency_of_database_frequency_tables -- --nocapture +``` + +Expected: compile failure because the model does not exist yet. + +**Step 3: Implement the model minimally** + +Create `src/models/misc/consistency_of_database_frequency_tables.rs` with: +- `ProblemSchemaEntry` metadata +- explicit serializable helper structs for frequency tables / known values if that makes JSON and docs clearer +- constructor validation: + - `attribute_domains` non-empty and every domain size positive + - frequency-table attribute indices distinct and in range + - each table has row count `attribute_domains[a]`, each row has column count `attribute_domains[b]` + - each table sum equals `num_objects` + - known values are in range +- getters needed for complexity / overhead: + - `num_objects()` + - `num_attributes()` + - `domain_size_product()` or equivalent exact brute-force complexity helper + - any extra count getters needed by the ILP reduction overhead +- `dims()` returning `attribute_domains` repeated once per object +- `evaluate()` that: + - rejects malformed configs + - checks all known values + - counts each published pair table exactly +- `SatisfactionProblem` impl +- `declare_variants!` with an exact multi-domain brute-force expression such as `domain_size_product^num_objects` rather than the issue’s binary-only shorthand if needed for correctness +- canonical model example spec using the issue’s YES instance + +Register the model in the misc/module/prelude exports. + +**Step 4: Run the model tests and get GREEN** + +Run: +```bash +cargo test test_consistency_of_database_frequency_tables -- --nocapture +``` + +Expected: the new model tests pass. + +**Step 5: Refactor only after green** + +Extract small helpers only if they reduce duplication in evaluation/validation, then re-run the same test command. + +### Task 2: Add CLI and example-db support with TDD + +**Files:** +- Modify: `problemreductions-cli/src/cli.rs` +- Modify: `problemreductions-cli/src/commands/create.rs` +- Modify: `src/example_db/model_builders.rs` + +**Step 1: Write the failing CLI / example-db tests** + +Add or extend the most local existing tests for: +- alias/discovery/help exposure if covered in this repo’s CLI tests +- `pred create ConsistencyOfDatabaseFrequencyTables ...` producing the expected JSON payload +- `pred create --example ConsistencyOfDatabaseFrequencyTables` using the canonical example + +If no focused CLI unit test file exists yet for `create`, add the smallest relevant coverage next to current create-command tests instead of inventing a new harness. + +**Step 2: Run the focused tests and confirm RED** + +Run the narrowest affected command, for example: +```bash +cargo test create:: --package problemreductions-cli -- --nocapture +``` + +Expected: failure because the problem is not yet wired into CLI construction/help. + +**Step 3: Implement minimal CLI support** + +Add create-flag support using explicit, parseable fields: +- `--num-objects 6` +- `--attribute-domains "2,3,2"` +- `--frequency-tables "0,1:1,1,1|1,1,1;1,2:1,1|0,2|1,1"` +- `--known-values "0,0,0;3,0,1;1,2,1"` (optional) + +Update: +- `CreateArgs` with the new fields +- `all_data_flags_empty()` +- help tables / usage examples / problem-specific examples +- `create()` match arm to parse and validate the compact table syntax into the model structs + +Also ensure `print_problem_help` shows a realistic usage example for this problem. + +**Step 4: Run the focused tests and get GREEN** + +Repeat the CLI/example-db test command until it passes. + +### Task 3: Add the ILP reduction with TDD + +**Files:** +- Create: `src/rules/consistencyofdatabasefrequencytables_ilp.rs` +- Create: `src/unit_tests/rules/consistencyofdatabasefrequencytables_ilp.rs` +- Modify: `src/rules/mod.rs` + +**Step 1: Write the failing reduction tests** + +Follow the project’s closed-loop reduction reference and add tests for: +- reduction shape on the issue’s YES example +- ILP feasibility witness extracts back to the original categorical assignment +- a closed-loop test using brute force on the source and brute force / direct feasibility on the reduced ILP +- the inconsistent instance reducing to an infeasible ILP +- canonical rule example registration if needed + +**Step 2: Run the reduction tests and confirm RED** + +Run: +```bash +cargo test test_consistencyofdatabasefrequencytables_to_ilp --features ilp-solver -- --nocapture +``` + +Expected: compile failure because the rule does not exist. + +**Step 3: Implement the reduction minimally** + +Create a binary `ILP` reduction with: +- assignment variables `y_{v,a,x}` for each object/attribute/value +- auxiliary McCormick variables `z_{t,v,x,y}` for each frequency-table cell and object +- constraints: + - exactly one value for each `(v,a)` + - known values fixed to `1` + - McCormick linearization for every `z` + - exact frequency-table counts via `sum_v z = f_{a,b}(x,y)` +- zero objective with `ObjectiveSense::Minimize` +- solution extraction that reads the chosen `y_{v,a,x}` variables back into the flattened source config +- reduction overhead expressed via concrete getters added in Task 1 +- canonical rule example based on the same issue example + +Register the rule in `src/rules/mod.rs` under `#[cfg(feature = "ilp-solver")]`. + +**Step 4: Run the reduction tests and get GREEN** + +Run the focused reduction test command again until it passes. + +### Task 4: Add paper coverage and final verification + +**Files:** +- Modify: `docs/paper/reductions.typ` + +**Step 1: Add the paper entry after code is green** + +Add: +- `display-name` entry for `ConsistencyOfDatabaseFrequencyTables` +- `problem-def("ConsistencyOfDatabaseFrequencyTables")` with the formal database-frequency-table definition +- short background and cited complexity discussion +- the issue’s worked example in tutorial style, including the satisfying assignment and explicit verification narrative + +Use prose and a table-based presentation rather than forcing a graph-style figure. + +**Step 2: Run paper verification** + +Run: +```bash +make paper +``` + +Expected: clean Typst build. + +**Step 3: Run fresh repo verification before commit/push** + +Run: +```bash +make test +make clippy +``` + +If the ILP reduction or paper/export paths require additional targeted commands, run them too and record the exact outcome before claiming completion. + +**Step 4: Review issue compliance and cleanup** + +Confirm before the implementation summary comment / push: +- the canonical example matches the issue’s Expected Outcome +- the model is exported/discoverable +- the ILP reduction exists so `pred solve` has a path +- the plan file is removed in the cleanup commit From e16aa86c43f9121c35444d186d059d447711ac80 Mon Sep 17 00:00:00 2001 From: GiggleLiu Date: Sat, 21 Mar 2026 04:21:25 +0800 Subject: [PATCH 2/9] Add ConsistencyOfDatabaseFrequencyTables model --- src/lib.rs | 17 +- ...onsistency_of_database_frequency_tables.rs | 363 ++++++++++++++++++ src/models/misc/mod.rs | 6 + src/models/mod.rs | 15 +- ...onsistency_of_database_frequency_tables.rs | 163 ++++++++ 5 files changed, 549 insertions(+), 15 deletions(-) create mode 100644 src/models/misc/consistency_of_database_frequency_tables.rs create mode 100644 src/unit_tests/models/misc/consistency_of_database_frequency_tables.rs diff --git a/src/lib.rs b/src/lib.rs index 34ca7f66a..6a3f84a87 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -63,14 +63,15 @@ pub mod prelude { }; pub use crate::models::misc::{ AdditionalKey, BinPacking, BoyceCoddNormalFormViolation, CbqRelation, - ConjunctiveBooleanQuery, ConjunctiveQueryFoldability, Factoring, FlowShopScheduling, - Knapsack, LongestCommonSubsequence, MinimumTardinessSequencing, MultiprocessorScheduling, - PaintShop, Partition, QueryArg, RectilinearPictureCompression, - ResourceConstrainedScheduling, SchedulingWithIndividualDeadlines, - SequencingToMinimizeMaximumCumulativeCost, SequencingToMinimizeWeightedCompletionTime, - SequencingToMinimizeWeightedTardiness, SequencingWithReleaseTimesAndDeadlines, - SequencingWithinIntervals, ShortestCommonSupersequence, StaffScheduling, - StringToStringCorrection, SubsetSum, SumOfSquaresPartition, Term, + ConjunctiveBooleanQuery, ConjunctiveQueryFoldability, + ConsistencyOfDatabaseFrequencyTables, Factoring, FlowShopScheduling, Knapsack, + LongestCommonSubsequence, MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, + Partition, QueryArg, RectilinearPictureCompression, ResourceConstrainedScheduling, + SchedulingWithIndividualDeadlines, SequencingToMinimizeMaximumCumulativeCost, + SequencingToMinimizeWeightedCompletionTime, SequencingToMinimizeWeightedTardiness, + SequencingWithReleaseTimesAndDeadlines, SequencingWithinIntervals, + ShortestCommonSupersequence, StaffScheduling, StringToStringCorrection, SubsetSum, + SumOfSquaresPartition, Term, }; pub use crate::models::set::{ ComparativeContainment, ConsecutiveSets, ExactCoverBy3Sets, MaximumSetPacking, diff --git a/src/models/misc/consistency_of_database_frequency_tables.rs b/src/models/misc/consistency_of_database_frequency_tables.rs new file mode 100644 index 000000000..1c5022dc2 --- /dev/null +++ b/src/models/misc/consistency_of_database_frequency_tables.rs @@ -0,0 +1,363 @@ +//! Consistency of Database Frequency Tables problem implementation. +//! +//! Given a finite set of objects, categorical attributes with finite domains, +//! pairwise frequency tables for selected attribute pairs, and some known +//! object-attribute values, determine whether there exists a complete +//! assignment of attribute values to all objects that matches every published +//! frequency table and every known value. + +use crate::registry::{FieldInfo, ProblemSchemaEntry}; +use crate::traits::{Problem, SatisfactionProblem}; +use serde::{Deserialize, Serialize}; +use std::collections::BTreeSet; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct FrequencyTable { + attribute_a: usize, + attribute_b: usize, + counts: Vec>, +} + +impl FrequencyTable { + /// Create a new pairwise frequency table. + pub fn new(attribute_a: usize, attribute_b: usize, counts: Vec>) -> Self { + Self { + attribute_a, + attribute_b, + counts, + } + } + + /// Returns the first attribute index. + pub fn attribute_a(&self) -> usize { + self.attribute_a + } + + /// Returns the second attribute index. + pub fn attribute_b(&self) -> usize { + self.attribute_b + } + + /// Returns the table counts. + pub fn counts(&self) -> &[Vec] { + &self.counts + } + + /// Returns the number of table cells. + pub fn num_cells(&self) -> usize { + self.counts.iter().map(Vec::len).sum() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct KnownValue { + object: usize, + attribute: usize, + value: usize, +} + +impl KnownValue { + /// Create a new known object-attribute value. + pub fn new(object: usize, attribute: usize, value: usize) -> Self { + Self { + object, + attribute, + value, + } + } + + /// Returns the object index. + pub fn object(&self) -> usize { + self.object + } + + /// Returns the attribute index. + pub fn attribute(&self) -> usize { + self.attribute + } + + /// Returns the fixed categorical value. + pub fn value(&self) -> usize { + self.value + } +} + +inventory::submit! { + ProblemSchemaEntry { + name: "ConsistencyOfDatabaseFrequencyTables", + display_name: "Consistency of Database Frequency Tables", + aliases: &[], + dimensions: &[], + module_path: module_path!(), + description: "Determine whether pairwise frequency tables and known values admit a consistent complete database assignment", + fields: &[ + FieldInfo { name: "num_objects", type_name: "usize", description: "Number of objects in the database" }, + FieldInfo { name: "attribute_domains", type_name: "Vec", description: "Domain size for each attribute" }, + FieldInfo { name: "frequency_tables", type_name: "Vec", description: "Published pairwise frequency tables" }, + FieldInfo { name: "known_values", type_name: "Vec", description: "Known object-attribute-value triples" }, + ], + } +} + +/// The Consistency of Database Frequency Tables decision problem. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ConsistencyOfDatabaseFrequencyTables { + num_objects: usize, + attribute_domains: Vec, + frequency_tables: Vec, + known_values: Vec, +} + +impl ConsistencyOfDatabaseFrequencyTables { + /// Create a new consistency-of-database-frequency-tables instance. + pub fn new( + num_objects: usize, + attribute_domains: Vec, + frequency_tables: Vec, + known_values: Vec, + ) -> Self { + for (attribute, &domain_size) in attribute_domains.iter().enumerate() { + assert!( + domain_size > 0, + "attribute domain size at index {attribute} must be positive" + ); + } + + let num_attributes = attribute_domains.len(); + let mut seen_pairs = BTreeSet::new(); + for table in &frequency_tables { + let attribute_a = table.attribute_a(); + let attribute_b = table.attribute_b(); + assert!( + attribute_a < num_attributes, + "frequency table attribute_a {attribute_a} out of range for {num_attributes} attributes" + ); + assert!( + attribute_b < num_attributes, + "frequency table attribute_b {attribute_b} out of range for {num_attributes} attributes" + ); + assert!( + attribute_a != attribute_b, + "frequency table attributes must be distinct" + ); + + let pair = if attribute_a < attribute_b { + (attribute_a, attribute_b) + } else { + (attribute_b, attribute_a) + }; + assert!( + seen_pairs.insert(pair), + "duplicate frequency table pair ({}, {})", + pair.0, + pair.1 + ); + + let expected_rows = attribute_domains[attribute_a]; + assert_eq!( + table.counts().len(), + expected_rows, + "frequency table rows ({}) must equal attribute_domains[{attribute_a}] ({expected_rows})", + table.counts().len() + ); + + let expected_cols = attribute_domains[attribute_b]; + for (row, row_counts) in table.counts().iter().enumerate() { + assert_eq!( + row_counts.len(), + expected_cols, + "frequency table columns ({}) in row {row} must equal attribute_domains[{attribute_b}] ({expected_cols})", + row_counts.len() + ); + } + + let total: usize = table.counts().iter().flatten().copied().sum(); + assert_eq!( + total, num_objects, + "frequency table total ({total}) must equal num_objects ({num_objects})" + ); + } + + for known_value in &known_values { + assert!( + known_value.object() < num_objects, + "known value object {} out of range for num_objects {}", + known_value.object(), + num_objects + ); + assert!( + known_value.attribute() < num_attributes, + "known value attribute {} out of range for {num_attributes} attributes", + known_value.attribute() + ); + let domain_size = attribute_domains[known_value.attribute()]; + assert!( + known_value.value() < domain_size, + "known value value {} out of range for attribute {} with domain size {}", + known_value.value(), + known_value.attribute(), + domain_size + ); + } + + Self { + num_objects, + attribute_domains, + frequency_tables, + known_values, + } + } + + /// Returns the number of objects. + pub fn num_objects(&self) -> usize { + self.num_objects + } + + /// Returns the number of attributes. + pub fn num_attributes(&self) -> usize { + self.attribute_domains.len() + } + + /// Returns the attribute-domain sizes. + pub fn attribute_domains(&self) -> &[usize] { + &self.attribute_domains + } + + /// Returns the published frequency tables. + pub fn frequency_tables(&self) -> &[FrequencyTable] { + &self.frequency_tables + } + + /// Returns the known values. + pub fn known_values(&self) -> &[KnownValue] { + &self.known_values + } + + /// Returns the product of attribute domain sizes. + pub fn domain_size_product(&self) -> usize { + self.attribute_domains.iter().copied().product() + } + + /// Returns the number of object-attribute assignment variables in the direct encoding. + pub fn num_assignment_variables(&self) -> usize { + self.num_objects * self.num_attributes() + } + + /// Returns the number of published frequency tables. + pub fn num_frequency_tables(&self) -> usize { + self.frequency_tables.len() + } + + /// Returns the number of known value constraints. + pub fn num_known_values(&self) -> usize { + self.known_values.len() + } + + /// Returns the number of one-hot assignment indicators used by the ILP reduction. + pub fn num_assignment_indicators(&self) -> usize { + self.num_objects * self.attribute_domains.iter().sum::() + } + + /// Returns the total number of published frequency-table cells. + pub fn num_frequency_cells(&self) -> usize { + self.frequency_tables.iter().map(FrequencyTable::num_cells).sum() + } + + /// Returns the number of auxiliary ILP indicators used for frequency-cell counting. + pub fn num_auxiliary_frequency_indicators(&self) -> usize { + self.num_objects * self.num_frequency_cells() + } + + fn config_index(&self, object: usize, attribute: usize) -> usize { + object * self.num_attributes() + attribute + } +} + +impl Problem for ConsistencyOfDatabaseFrequencyTables { + const NAME: &'static str = "ConsistencyOfDatabaseFrequencyTables"; + type Metric = bool; + + fn variant() -> Vec<(&'static str, &'static str)> { + crate::variant_params![] + } + + fn dims(&self) -> Vec { + let mut dims = Vec::with_capacity(self.num_assignment_variables()); + for _ in 0..self.num_objects { + dims.extend(self.attribute_domains.iter().copied()); + } + dims + } + + fn evaluate(&self, config: &[usize]) -> bool { + if config.len() != self.num_assignment_variables() { + return false; + } + + for object in 0..self.num_objects { + for (attribute, &domain_size) in self.attribute_domains.iter().enumerate() { + if config[self.config_index(object, attribute)] >= domain_size { + return false; + } + } + } + + for known_value in &self.known_values { + if config[self.config_index(known_value.object(), known_value.attribute())] + != known_value.value() + { + return false; + } + } + + for table in &self.frequency_tables { + let rows = self.attribute_domains[table.attribute_a()]; + let cols = self.attribute_domains[table.attribute_b()]; + let mut observed = vec![vec![0usize; cols]; rows]; + + for object in 0..self.num_objects { + let value_a = config[self.config_index(object, table.attribute_a())]; + let value_b = config[self.config_index(object, table.attribute_b())]; + observed[value_a][value_b] += 1; + } + + if observed != table.counts { + return false; + } + } + + true + } +} + +impl SatisfactionProblem for ConsistencyOfDatabaseFrequencyTables {} + +crate::declare_variants! { + default sat ConsistencyOfDatabaseFrequencyTables => "domain_size_product^num_objects", +} + +#[cfg(feature = "example-db")] +pub(crate) fn canonical_model_example_specs() -> Vec { + vec![crate::example_db::specs::ModelExampleSpec { + id: "consistency_of_database_frequency_tables", + instance: Box::new(ConsistencyOfDatabaseFrequencyTables::new( + 6, + vec![2, 3, 2], + vec![ + FrequencyTable::new(0, 1, vec![vec![1, 1, 1], vec![1, 1, 1]]), + FrequencyTable::new(1, 2, vec![vec![1, 1], vec![0, 2], vec![1, 1]]), + ], + vec![ + KnownValue::new(0, 0, 0), + KnownValue::new(3, 0, 1), + KnownValue::new(1, 2, 1), + ], + )), + optimal_config: vec![0, 0, 0, 0, 1, 1, 0, 2, 1, 1, 0, 1, 1, 1, 1, 1, 2, 0], + optimal_value: serde_json::json!(true), + }] +} + +#[cfg(test)] +#[path = "../../unit_tests/models/misc/consistency_of_database_frequency_tables.rs"] +mod tests; diff --git a/src/models/misc/mod.rs b/src/models/misc/mod.rs index 37f421cb3..7942193c2 100644 --- a/src/models/misc/mod.rs +++ b/src/models/misc/mod.rs @@ -4,6 +4,7 @@ //! - [`AdditionalKey`]: Determine whether a relational schema has an additional candidate key //! - [`BinPacking`]: Bin Packing (minimize bins) //! - [`BoyceCoddNormalFormViolation`]: Boyce-Codd Normal Form Violation (BCNF) +//! - [`ConsistencyOfDatabaseFrequencyTables`]: Pairwise frequency-table consistency //! - [`ConjunctiveBooleanQuery`]: Evaluate a conjunctive Boolean query over relations //! - [`ConjunctiveQueryFoldability`]: Conjunctive Query Foldability //! - [`Factoring`]: Integer factorization @@ -32,6 +33,7 @@ pub(crate) mod additional_key; mod bin_packing; mod boyce_codd_normal_form_violation; +mod consistency_of_database_frequency_tables; pub(crate) mod conjunctive_boolean_query; pub(crate) mod conjunctive_query_foldability; pub(crate) mod factoring; @@ -61,6 +63,9 @@ pub(crate) mod sum_of_squares_partition; pub use additional_key::AdditionalKey; pub use bin_packing::BinPacking; pub use boyce_codd_normal_form_violation::BoyceCoddNormalFormViolation; +pub use consistency_of_database_frequency_tables::{ + ConsistencyOfDatabaseFrequencyTables, FrequencyTable, KnownValue, +}; pub use conjunctive_boolean_query::{ConjunctiveBooleanQuery, QueryArg, Relation as CbqRelation}; pub use conjunctive_query_foldability::{ConjunctiveQueryFoldability, Term}; pub use factoring::Factoring; @@ -91,6 +96,7 @@ pub use sum_of_squares_partition::SumOfSquaresPartition; pub(crate) fn canonical_model_example_specs() -> Vec { let mut specs = Vec::new(); specs.extend(boyce_codd_normal_form_violation::canonical_model_example_specs()); + specs.extend(consistency_of_database_frequency_tables::canonical_model_example_specs()); specs.extend(conjunctive_boolean_query::canonical_model_example_specs()); specs.extend(conjunctive_query_foldability::canonical_model_example_specs()); specs.extend(factoring::canonical_model_example_specs()); diff --git a/src/models/mod.rs b/src/models/mod.rs index a3eb9ef14..9ecd3b720 100644 --- a/src/models/mod.rs +++ b/src/models/mod.rs @@ -31,13 +31,14 @@ pub use graph::{ pub use misc::PartiallyOrderedKnapsack; pub use misc::{ AdditionalKey, BinPacking, CbqRelation, ConjunctiveBooleanQuery, ConjunctiveQueryFoldability, - Factoring, FlowShopScheduling, Knapsack, LongestCommonSubsequence, MinimumTardinessSequencing, - MultiprocessorScheduling, PaintShop, Partition, PrecedenceConstrainedScheduling, QueryArg, - RectilinearPictureCompression, ResourceConstrainedScheduling, - SchedulingWithIndividualDeadlines, SequencingToMinimizeMaximumCumulativeCost, - SequencingToMinimizeWeightedCompletionTime, SequencingToMinimizeWeightedTardiness, - SequencingWithReleaseTimesAndDeadlines, SequencingWithinIntervals, ShortestCommonSupersequence, - StaffScheduling, StringToStringCorrection, SubsetSum, SumOfSquaresPartition, Term, + ConsistencyOfDatabaseFrequencyTables, Factoring, FlowShopScheduling, Knapsack, + LongestCommonSubsequence, MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, + Partition, PrecedenceConstrainedScheduling, QueryArg, RectilinearPictureCompression, + ResourceConstrainedScheduling, SchedulingWithIndividualDeadlines, + SequencingToMinimizeMaximumCumulativeCost, SequencingToMinimizeWeightedCompletionTime, + SequencingToMinimizeWeightedTardiness, SequencingWithReleaseTimesAndDeadlines, + SequencingWithinIntervals, ShortestCommonSupersequence, StaffScheduling, + StringToStringCorrection, SubsetSum, SumOfSquaresPartition, Term, }; pub use set::{ ComparativeContainment, ConsecutiveSets, ExactCoverBy3Sets, MaximumSetPacking, diff --git a/src/unit_tests/models/misc/consistency_of_database_frequency_tables.rs b/src/unit_tests/models/misc/consistency_of_database_frequency_tables.rs new file mode 100644 index 000000000..6c5d99d36 --- /dev/null +++ b/src/unit_tests/models/misc/consistency_of_database_frequency_tables.rs @@ -0,0 +1,163 @@ +use super::*; +use crate::solvers::{BruteForce, Solver}; +use crate::traits::Problem; + +fn issue_yes_instance() -> ConsistencyOfDatabaseFrequencyTables { + ConsistencyOfDatabaseFrequencyTables::new( + 6, + vec![2, 3, 2], + vec![ + FrequencyTable::new(0, 1, vec![vec![1, 1, 1], vec![1, 1, 1]]), + FrequencyTable::new(1, 2, vec![vec![1, 1], vec![0, 2], vec![1, 1]]), + ], + vec![ + KnownValue::new(0, 0, 0), + KnownValue::new(3, 0, 1), + KnownValue::new(1, 2, 1), + ], + ) +} + +fn issue_yes_witness() -> Vec { + vec![0, 0, 0, 0, 1, 1, 0, 2, 1, 1, 0, 1, 1, 1, 1, 1, 2, 0] +} + +fn small_yes_instance() -> ConsistencyOfDatabaseFrequencyTables { + ConsistencyOfDatabaseFrequencyTables::new( + 2, + vec![2, 2], + vec![FrequencyTable::new(0, 1, vec![vec![1, 0], vec![0, 1]])], + vec![KnownValue::new(0, 0, 0)], + ) +} + +fn small_no_instance() -> ConsistencyOfDatabaseFrequencyTables { + ConsistencyOfDatabaseFrequencyTables::new( + 2, + vec![2, 2], + vec![FrequencyTable::new(0, 1, vec![vec![1, 0], vec![0, 1]])], + vec![KnownValue::new(0, 0, 0), KnownValue::new(1, 1, 0)], + ) +} + +#[test] +fn test_cdft_creation_and_getters() { + let problem = issue_yes_instance(); + assert_eq!(problem.num_objects(), 6); + assert_eq!(problem.num_attributes(), 3); + assert_eq!(problem.domain_size_product(), 12); + assert_eq!(problem.num_assignment_variables(), 18); + assert_eq!(problem.attribute_domains(), &[2, 3, 2]); + assert_eq!(problem.frequency_tables().len(), 2); + assert_eq!(problem.known_values().len(), 3); + assert_eq!( + ::NAME, + "ConsistencyOfDatabaseFrequencyTables" + ); + assert_eq!( + ::variant(), + vec![] + ); +} + +#[test] +fn test_cdft_dims_repeat_attribute_domains_for_each_object() { + let problem = issue_yes_instance(); + assert_eq!(problem.dims(), vec![2, 3, 2, 2, 3, 2, 2, 3, 2, 2, 3, 2, 2, 3, 2, 2, 3, 2]); +} + +#[test] +fn test_cdft_evaluate_issue_witness() { + let problem = issue_yes_instance(); + assert!(problem.evaluate(&issue_yes_witness())); +} + +#[test] +fn test_cdft_evaluate_rejects_wrong_length() { + let problem = issue_yes_instance(); + assert!(!problem.evaluate(&[0, 0, 0])); + let mut too_long = issue_yes_witness(); + too_long.push(0); + assert!(!problem.evaluate(&too_long)); +} + +#[test] +fn test_cdft_evaluate_rejects_out_of_range_value() { + let problem = issue_yes_instance(); + let mut bad = issue_yes_witness(); + bad[1] = 3; + assert!(!problem.evaluate(&bad)); +} + +#[test] +fn test_cdft_evaluate_rejects_known_value_violation() { + let problem = issue_yes_instance(); + let mut bad = issue_yes_witness(); + bad[0] = 1; + assert!(!problem.evaluate(&bad)); +} + +#[test] +fn test_cdft_evaluate_rejects_frequency_table_mismatch() { + let problem = issue_yes_instance(); + let mut bad = issue_yes_witness(); + bad[17] = 1; + assert!(!problem.evaluate(&bad)); +} + +#[test] +fn test_cdft_bruteforce_finds_small_satisfying_assignment() { + let problem = small_yes_instance(); + let solver = BruteForce::new(); + let solution = solver + .find_satisfying(&problem) + .expect("small instance should be satisfiable"); + assert!(problem.evaluate(&solution)); +} + +#[test] +fn test_cdft_bruteforce_detects_small_unsat_instance() { + let problem = small_no_instance(); + let solver = BruteForce::new(); + assert!(solver.find_satisfying(&problem).is_none()); +} + +#[test] +fn test_cdft_serialization_round_trip() { + let problem = issue_yes_instance(); + let json = serde_json::to_value(&problem).unwrap(); + let restored: ConsistencyOfDatabaseFrequencyTables = serde_json::from_value(json).unwrap(); + assert_eq!(restored.num_objects(), problem.num_objects()); + assert_eq!(restored.attribute_domains(), problem.attribute_domains()); + assert_eq!(restored.frequency_tables(), problem.frequency_tables()); + assert_eq!(restored.known_values(), problem.known_values()); + assert!(restored.evaluate(&issue_yes_witness())); +} + +#[test] +fn test_cdft_paper_example_matches_issue_witness() { + let problem = issue_yes_instance(); + assert!(problem.evaluate(&issue_yes_witness())); +} + +#[test] +#[should_panic(expected = "frequency table rows")] +fn test_cdft_constructor_rejects_wrong_row_count() { + let _ = ConsistencyOfDatabaseFrequencyTables::new( + 2, + vec![2, 2], + vec![FrequencyTable::new(0, 1, vec![vec![1, 0]])], + vec![], + ); +} + +#[test] +#[should_panic(expected = "known value value")] +fn test_cdft_constructor_rejects_out_of_range_known_value() { + let _ = ConsistencyOfDatabaseFrequencyTables::new( + 2, + vec![2, 2], + vec![FrequencyTable::new(0, 1, vec![vec![1, 0], vec![0, 1]])], + vec![KnownValue::new(0, 1, 2)], + ); +} From 427d39bbbeaa771fd14e58ace5b90fecf654e183 Mon Sep 17 00:00:00 2001 From: GiggleLiu Date: Sat, 21 Mar 2026 04:25:12 +0800 Subject: [PATCH 3/9] Add CLI creation for frequency table consistency --- problemreductions-cli/src/cli.rs | 14 ++ problemreductions-cli/src/commands/create.rs | 190 ++++++++++++++++++- problemreductions-cli/tests/cli_tests.rs | 76 ++++++++ 3 files changed, 277 insertions(+), 3 deletions(-) diff --git a/problemreductions-cli/src/cli.rs b/problemreductions-cli/src/cli.rs index 5c3bae2d1..34879ea5f 100644 --- a/problemreductions-cli/src/cli.rs +++ b/problemreductions-cli/src/cli.rs @@ -261,6 +261,7 @@ Flags by problem type: RuralPostman (RPP) --graph, --edge-weights, --required-edges, --bound MultipleChoiceBranching --arcs [--weights] --partition --bound [--num-vertices] AdditionalKey --num-attributes, --dependencies, --relation-attrs [--known-keys] + ConsistencyOfDatabaseFrequencyTables --num-objects, --attribute-domains, --frequency-tables [--known-values] SubgraphIsomorphism --graph (host), --pattern (pattern) LCS --strings, --bound [--alphabet-size] FAS --arcs [--weights] [--num-vertices] @@ -307,6 +308,7 @@ Examples: pred create MIS/UnitDiskGraph --positions \"0,0;1,0;0.5,0.8\" --radius 1.5 pred create MIS --random --num-vertices 10 --edge-prob 0.3 pred create MultiprocessorScheduling --lengths 4,5,3,2,6 --num-processors 2 --deadline 10 + pred create ConsistencyOfDatabaseFrequencyTables --num-objects 6 --attribute-domains \"2,3,2\" --frequency-tables \"0,1:1,1,1|1,1,1;1,2:1,1|0,2|1,1\" --known-values \"0,0,0;3,0,1;1,2,1\" pred create BiconnectivityAugmentation --graph 0-1,1-2,2-3 --potential-edges 0-2:3,0-3:4,1-3:2 --budget 5 pred create FVS --arcs \"0>1,1>2,2>0\" --weights 1,1,1 pred create UndirectedTwoCommodityIntegralFlow --graph 0-2,1-2,2-3 --capacities 1,1,2 --source-1 0 --sink-1 3 --source-2 1 --sink-2 3 --requirement-1 1 --requirement-2 1 @@ -573,6 +575,18 @@ pub struct CreateArgs { /// Known candidate keys for AdditionalKey (e.g., "0,1;2,3") #[arg(long)] pub known_keys: Option, + /// Number of objects for ConsistencyOfDatabaseFrequencyTables + #[arg(long)] + pub num_objects: Option, + /// Attribute-domain sizes for ConsistencyOfDatabaseFrequencyTables (comma-separated, e.g., "2,3,2") + #[arg(long)] + pub attribute_domains: Option, + /// Pairwise frequency tables for ConsistencyOfDatabaseFrequencyTables (e.g., "0,1:1,1|0,1;1,2:1,0|0,1") + #[arg(long)] + pub frequency_tables: Option, + /// Known value triples for ConsistencyOfDatabaseFrequencyTables (e.g., "0,0,0;3,1,2") + #[arg(long)] + pub known_values: Option, /// Domain size for ConjunctiveBooleanQuery #[arg(long)] pub domain_size: Option, diff --git a/problemreductions-cli/src/commands/create.rs b/problemreductions-cli/src/commands/create.rs index 85fb53849..f74224be9 100644 --- a/problemreductions-cli/src/commands/create.rs +++ b/problemreductions-cli/src/commands/create.rs @@ -18,9 +18,10 @@ use problemreductions::models::graph::{ }; use problemreductions::models::misc::{ AdditionalKey, BinPacking, BoyceCoddNormalFormViolation, CbqRelation, ConjunctiveBooleanQuery, - FlowShopScheduling, LongestCommonSubsequence, MinimumTardinessSequencing, - MultiprocessorScheduling, PaintShop, PartiallyOrderedKnapsack, QueryArg, - RectilinearPictureCompression, ResourceConstrainedScheduling, + ConsistencyOfDatabaseFrequencyTables, FlowShopScheduling, FrequencyTable, KnownValue, + LongestCommonSubsequence, MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, + PartiallyOrderedKnapsack, QueryArg, RectilinearPictureCompression, + ResourceConstrainedScheduling, SchedulingWithIndividualDeadlines, SequencingToMinimizeMaximumCumulativeCost, SequencingToMinimizeWeightedCompletionTime, SequencingToMinimizeWeightedTardiness, SequencingWithReleaseTimesAndDeadlines, SequencingWithinIntervals, ShortestCommonSupersequence, @@ -139,6 +140,10 @@ fn all_data_flags_empty(args: &CreateArgs) -> bool { && args.dependencies.is_none() && args.relation_attrs.is_none() && args.known_keys.is_none() + && args.num_objects.is_none() + && args.attribute_domains.is_none() + && args.frequency_tables.is_none() + && args.known_values.is_none() && args.domain_size.is_none() && args.relations.is_none() && args.conjuncts_spec.is_none() @@ -187,6 +192,126 @@ fn ensure_attribute_indices_in_range( Ok(()) } +fn parse_cdft_frequency_tables( + raw: &str, + attribute_domains: &[usize], + num_objects: usize, +) -> Result> { + let num_attributes = attribute_domains.len(); + let mut seen_pairs = BTreeSet::new(); + + raw.split(';') + .filter(|entry| !entry.trim().is_empty()) + .map(|entry| { + let (pair_str, counts_str) = entry.trim().split_once(':').ok_or_else(|| { + anyhow::anyhow!( + "Invalid frequency table '{entry}', expected 'a,b:row0|row1|...'" + ) + })?; + let pair: Vec = util::parse_comma_list(pair_str.trim())?; + anyhow::ensure!( + pair.len() == 2, + "Frequency table '{entry}' must start with exactly two attribute indices" + ); + + let attribute_a = pair[0]; + let attribute_b = pair[1]; + ensure_attribute_indices_in_range( + &[attribute_a, attribute_b], + num_attributes, + &format!("Frequency table '{entry}'"), + )?; + anyhow::ensure!( + attribute_a != attribute_b, + "Frequency table '{entry}' must use two distinct attributes" + ); + + let pair_key = if attribute_a < attribute_b { + (attribute_a, attribute_b) + } else { + (attribute_b, attribute_a) + }; + anyhow::ensure!( + seen_pairs.insert(pair_key), + "Duplicate frequency table pair ({}, {})", + pair_key.0, + pair_key.1 + ); + + let rows: Vec> = counts_str + .split('|') + .map(|row| util::parse_comma_list(row.trim())) + .collect::>()?; + + let expected_rows = attribute_domains[attribute_a]; + anyhow::ensure!( + rows.len() == expected_rows, + "Frequency table '{entry}' has {} rows but attribute {attribute_a} has domain size {expected_rows}", + rows.len() + ); + + let expected_cols = attribute_domains[attribute_b]; + for (row_index, row) in rows.iter().enumerate() { + anyhow::ensure!( + row.len() == expected_cols, + "Frequency table '{entry}' row {row_index} has {} columns but attribute {attribute_b} has domain size {expected_cols}", + row.len() + ); + } + + let total: usize = rows.iter().flatten().copied().sum(); + anyhow::ensure!( + total == num_objects, + "Frequency table '{entry}' sums to {total}, expected num_objects={num_objects}" + ); + + Ok(FrequencyTable::new(attribute_a, attribute_b, rows)) + }) + .collect() +} + +fn parse_cdft_known_values( + raw: Option<&str>, + num_objects: usize, + attribute_domains: &[usize], +) -> Result> { + let num_attributes = attribute_domains.len(); + match raw { + None => Ok(vec![]), + Some(s) if s.trim().is_empty() => Ok(vec![]), + Some(s) => s + .split(';') + .filter(|entry| !entry.trim().is_empty()) + .map(|entry| { + let triple: Vec = util::parse_comma_list(entry.trim())?; + anyhow::ensure!( + triple.len() == 3, + "Known value '{entry}' must be an 'object,attribute,value' triple" + ); + let object = triple[0]; + let attribute = triple[1]; + let value = triple[2]; + + anyhow::ensure!( + object < num_objects, + "Known value '{entry}' has object index {object} out of range for num_objects={num_objects}" + ); + anyhow::ensure!( + attribute < num_attributes, + "Known value '{entry}' has attribute index {attribute} out of range for {num_attributes} attributes" + ); + let domain_size = attribute_domains[attribute]; + anyhow::ensure!( + value < domain_size, + "Known value '{entry}' has value {value} out of range for attribute {attribute} with domain size {domain_size}" + ); + + Ok(KnownValue::new(object, attribute, value)) + }) + .collect(), + } +} + fn resolve_example_problem_ref( input: &str, rgraph: &problemreductions::rules::ReductionGraph, @@ -438,6 +563,9 @@ fn example_for(canonical: &str, graph_type: Option<&str>) -> &'static str { "--arcs \"0>1,0>2,1>3,2>3,1>4,3>5,4>5,2>4\" --weights 3,2,4,1,2,3,1,3 --partition \"0,1;2,3;4,7;5,6\" --bound 10" } "AdditionalKey" => "--num-attributes 6 --dependencies \"0,1:2,3;2,3:4,5;4,5:0,1\" --relation-attrs 0,1,2,3,4,5 --known-keys \"0,1;2,3;4,5\"", + "ConsistencyOfDatabaseFrequencyTables" => { + "--num-objects 6 --attribute-domains \"2,3,2\" --frequency-tables \"0,1:1,1,1|1,1,1;1,2:1,1|0,2|1,1\" --known-values \"0,0,0;3,0,1;1,2,1\"" + } "SubgraphIsomorphism" => "--graph 0-1,1-2,2-0 --pattern 0-1", "RectilinearPictureCompression" => { "--matrix \"1,1,0,0;1,1,0,0;0,0,1,1;0,0,1,1\" --bound 2" @@ -567,6 +695,15 @@ fn help_flag_hint( } ("ShortestCommonSupersequence", "strings") => "symbol lists: \"0,1,2;1,2,0\"", ("MultipleChoiceBranching", "partition") => "semicolon-separated groups: \"0,1;2,3\"", + ("ConsistencyOfDatabaseFrequencyTables", "attribute_domains") => { + "comma-separated domain sizes: 2,3,2" + } + ("ConsistencyOfDatabaseFrequencyTables", "frequency_tables") => { + "semicolon-separated tables: \"0,1:1,1,1|1,1,1;1,2:1,1|0,2|1,1\"" + } + ("ConsistencyOfDatabaseFrequencyTables", "known_values") => { + "semicolon-separated triples: \"0,0,0;3,0,1;1,2,1\"" + } ("ConsecutiveOnesSubmatrix", "matrix") => "semicolon-separated 0/1 rows: \"1,0;0,1\"", _ => type_format_hint(type_name, graph_type), } @@ -1573,6 +1710,49 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> { ) } + "ConsistencyOfDatabaseFrequencyTables" => { + let usage = "Usage: pred create ConsistencyOfDatabaseFrequencyTables --num-objects 6 --attribute-domains \"2,3,2\" --frequency-tables \"0,1:1,1,1|1,1,1;1,2:1,1|0,2|1,1\" --known-values \"0,0,0;3,0,1;1,2,1\""; + let num_objects = args.num_objects.ok_or_else(|| { + anyhow::anyhow!( + "ConsistencyOfDatabaseFrequencyTables requires --num-objects\n\n{usage}" + ) + })?; + let attribute_domains_str = args.attribute_domains.as_deref().ok_or_else(|| { + anyhow::anyhow!( + "ConsistencyOfDatabaseFrequencyTables requires --attribute-domains\n\n{usage}" + ) + })?; + let frequency_tables_str = args.frequency_tables.as_deref().ok_or_else(|| { + anyhow::anyhow!( + "ConsistencyOfDatabaseFrequencyTables requires --frequency-tables\n\n{usage}" + ) + })?; + + let attribute_domains: Vec = util::parse_comma_list(attribute_domains_str)?; + for (index, &domain_size) in attribute_domains.iter().enumerate() { + anyhow::ensure!( + domain_size > 0, + "attribute domain at index {index} must be positive\n\n{usage}" + ); + } + let frequency_tables = + parse_cdft_frequency_tables(frequency_tables_str, &attribute_domains, num_objects) + .map_err(|e| anyhow::anyhow!("{e}\n\n{usage}"))?; + let known_values = + parse_cdft_known_values(args.known_values.as_deref(), num_objects, &attribute_domains) + .map_err(|e| anyhow::anyhow!("{e}\n\n{usage}"))?; + + ( + ser(ConsistencyOfDatabaseFrequencyTables::new( + num_objects, + attribute_domains, + frequency_tables, + known_values, + ))?, + resolved_variant.clone(), + ) + } + // SubsetSum "SubsetSum" => { let sizes_str = args.sizes.as_deref().ok_or_else(|| { @@ -4971,6 +5151,10 @@ mod tests { conjuncts_spec: None, relation_attrs: None, known_keys: None, + num_objects: None, + attribute_domains: None, + frequency_tables: None, + known_values: None, costs: None, cut_bound: None, size_bound: None, diff --git a/problemreductions-cli/tests/cli_tests.rs b/problemreductions-cli/tests/cli_tests.rs index a2f6a22fe..fc696e864 100644 --- a/problemreductions-cli/tests/cli_tests.rs +++ b/problemreductions-cli/tests/cli_tests.rs @@ -5538,6 +5538,64 @@ fn test_create_bcnf_rejects_out_of_range_target_attribute_indices() { ); } +#[test] +fn test_create_consistency_of_database_frequency_tables() { + let output = pred() + .args([ + "create", + "ConsistencyOfDatabaseFrequencyTables", + "--num-objects", + "6", + "--attribute-domains", + "2,3,2", + "--frequency-tables", + "0,1:1,1,1|1,1,1;1,2:1,1|0,2|1,1", + "--known-values", + "0,0,0;3,0,1;1,2,1", + ]) + .output() + .unwrap(); + assert!( + output.status.success(), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); + + let stdout = String::from_utf8(output.stdout).unwrap(); + let json: serde_json::Value = serde_json::from_str(&stdout).unwrap(); + assert_eq!(json["type"], "ConsistencyOfDatabaseFrequencyTables"); + assert_eq!(json["data"]["num_objects"], 6); + assert_eq!(json["data"]["attribute_domains"], serde_json::json!([2, 3, 2])); + assert_eq!(json["data"]["frequency_tables"][0]["attribute_a"], 0); + assert_eq!(json["data"]["frequency_tables"][0]["attribute_b"], 1); + assert_eq!( + json["data"]["frequency_tables"][0]["counts"], + serde_json::json!([[1, 1, 1], [1, 1, 1]]) + ); + assert_eq!( + json["data"]["known_values"], + serde_json::json!([ + {"object": 0, "attribute": 0, "value": 0}, + {"object": 3, "attribute": 0, "value": 1}, + {"object": 1, "attribute": 2, "value": 1} + ]) + ); +} + +#[test] +fn test_create_consistency_of_database_frequency_tables_problem_help_uses_supported_flags() { + let output = pred() + .args(["create", "ConsistencyOfDatabaseFrequencyTables"]) + .output() + .unwrap(); + assert!(!output.status.success()); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!(stderr.contains("--num-objects"), "stderr: {stderr}"); + assert!(stderr.contains("--attribute-domains"), "stderr: {stderr}"); + assert!(stderr.contains("--frequency-tables"), "stderr: {stderr}"); + assert!(stderr.contains("--known-values"), "stderr: {stderr}"); +} + #[test] fn test_create_multiple_copy_file_allocation() { let output = pred() @@ -6782,6 +6840,24 @@ fn test_create_model_example_multiprocessor_scheduling() { assert_eq!(json["type"], "MultiprocessorScheduling"); } +#[test] +fn test_create_model_example_consistency_of_database_frequency_tables() { + let output = pred() + .args(["create", "--example", "ConsistencyOfDatabaseFrequencyTables"]) + .output() + .unwrap(); + assert!( + output.status.success(), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); + + let stdout = String::from_utf8(output.stdout).unwrap(); + let json: serde_json::Value = serde_json::from_str(&stdout).unwrap(); + assert_eq!(json["type"], "ConsistencyOfDatabaseFrequencyTables"); + assert_eq!(json["data"]["num_objects"], 6); +} + #[test] fn test_create_model_example_minimum_multiway_cut() { let output = pred() From 635d9f9864ff4bd5e76ca8ce478ff5fc4b250ad4 Mon Sep 17 00:00:00 2001 From: GiggleLiu Date: Sat, 21 Mar 2026 04:28:56 +0800 Subject: [PATCH 4/9] Add ILP reduction for frequency table consistency --- ...onsistencyofdatabasefrequencytables_ilp.rs | 244 ++++++++++++++++++ src/rules/mod.rs | 3 + ...onsistencyofdatabasefrequencytables_ilp.rs | 81 ++++++ 3 files changed, 328 insertions(+) create mode 100644 src/rules/consistencyofdatabasefrequencytables_ilp.rs create mode 100644 src/unit_tests/rules/consistencyofdatabasefrequencytables_ilp.rs diff --git a/src/rules/consistencyofdatabasefrequencytables_ilp.rs b/src/rules/consistencyofdatabasefrequencytables_ilp.rs new file mode 100644 index 000000000..0bea7e8c3 --- /dev/null +++ b/src/rules/consistencyofdatabasefrequencytables_ilp.rs @@ -0,0 +1,244 @@ +//! Reduction from ConsistencyOfDatabaseFrequencyTables to ILP. +//! +//! The reduction uses a binary one-hot encoding: +//! - `y_{v,a,x}` is 1 iff object `v` receives value `x` for attribute `a` +//! - `z_{t,v,x,y}` is 1 iff, for table `t`, object `v` realizes cell `(x, y)` +//! +//! The pair-count equalities are linearized with standard McCormick constraints. + +use crate::models::algebraic::{ILP, LinearConstraint, ObjectiveSense}; +use crate::models::misc::ConsistencyOfDatabaseFrequencyTables; +use crate::reduction; +use crate::rules::traits::{ReduceTo, ReductionResult}; + +/// Result of reducing ConsistencyOfDatabaseFrequencyTables to ILP. +#[derive(Debug, Clone)] +pub struct ReductionCDFTToILP { + target: ILP, + source: ConsistencyOfDatabaseFrequencyTables, +} + +impl ReductionCDFTToILP { + fn assignment_block_size(&self) -> usize { + self.source.attribute_domains().iter().sum() + } + + fn attribute_offset(&self, attribute: usize) -> usize { + self.source.attribute_domains()[..attribute].iter().sum() + } + + fn assignment_var_index(&self, object: usize, attribute: usize, value: usize) -> usize { + object * self.assignment_block_size() + self.attribute_offset(attribute) + value + } + + fn auxiliary_block_start(&self, table_index: usize) -> usize { + self.source.num_assignment_indicators() + + self.source.frequency_tables()[..table_index] + .iter() + .map(|table| self.source.num_objects() * table.num_cells()) + .sum::() + } + + fn auxiliary_var_index( + &self, + table_index: usize, + object: usize, + value_a: usize, + value_b: usize, + ) -> usize { + let table = &self.source.frequency_tables()[table_index]; + let cols = self.source.attribute_domains()[table.attribute_b()]; + self.auxiliary_block_start(table_index) + + object * table.num_cells() + + value_a * cols + + value_b + } + + /// Encode a satisfying source assignment as a concrete ILP variable vector. + #[cfg_attr(not(any(test, feature = "example-db")), allow(dead_code))] + pub(crate) fn encode_source_solution(&self, source_solution: &[usize]) -> Vec { + let mut target_solution = vec![0usize; self.target.num_vars]; + let num_attributes = self.source.num_attributes(); + + for object in 0..self.source.num_objects() { + for attribute in 0..num_attributes { + let source_index = object * num_attributes + attribute; + let value = source_solution[source_index]; + let var = self.assignment_var_index(object, attribute, value); + target_solution[var] = 1; + } + } + + for (table_index, table) in self.source.frequency_tables().iter().enumerate() { + for object in 0..self.source.num_objects() { + let value_a = source_solution[object * num_attributes + table.attribute_a()]; + let value_b = source_solution[object * num_attributes + table.attribute_b()]; + let var = self.auxiliary_var_index(table_index, object, value_a, value_b); + target_solution[var] = 1; + } + } + + target_solution + } +} + +impl ReductionResult for ReductionCDFTToILP { + type Source = ConsistencyOfDatabaseFrequencyTables; + type Target = ILP; + + fn target_problem(&self) -> &ILP { + &self.target + } + + fn extract_solution(&self, target_solution: &[usize]) -> Vec { + let mut source_solution = Vec::with_capacity(self.source.num_assignment_variables()); + for object in 0..self.source.num_objects() { + for (attribute, &domain_size) in self.source.attribute_domains().iter().enumerate() { + let value = (0..domain_size) + .find(|&candidate| { + target_solution + .get(self.assignment_var_index(object, attribute, candidate)) + .copied() + .unwrap_or(0) + == 1 + }) + .unwrap_or(0); + source_solution.push(value); + } + } + source_solution + } +} + +#[reduction( + overhead = { + num_vars = "num_assignment_indicators + num_auxiliary_frequency_indicators", + num_constraints = "num_assignment_variables + num_known_values + num_frequency_cells + 3 * num_auxiliary_frequency_indicators", + } +)] +impl ReduceTo> for ConsistencyOfDatabaseFrequencyTables { + type Result = ReductionCDFTToILP; + + fn reduce_to(&self) -> Self::Result { + let source = self.clone(); + let helper = ReductionCDFTToILP { + target: ILP::empty(), + source: source.clone(), + }; + + let mut constraints = Vec::with_capacity( + source.num_assignment_variables() + + source.num_known_values() + + source.num_frequency_cells() + + 3 * source.num_auxiliary_frequency_indicators(), + ); + + for object in 0..source.num_objects() { + for (attribute, &domain_size) in source.attribute_domains().iter().enumerate() { + let terms = (0..domain_size) + .map(|value| (helper.assignment_var_index(object, attribute, value), 1.0)) + .collect(); + constraints.push(LinearConstraint::eq(terms, 1.0)); + } + } + + for known_value in source.known_values() { + constraints.push(LinearConstraint::eq( + vec![( + helper.assignment_var_index( + known_value.object(), + known_value.attribute(), + known_value.value(), + ), + 1.0, + )], + 1.0, + )); + } + + for (table_index, table) in source.frequency_tables().iter().enumerate() { + let rows = source.attribute_domains()[table.attribute_a()]; + let cols = source.attribute_domains()[table.attribute_b()]; + + for value_a in 0..rows { + for value_b in 0..cols { + let count_terms = (0..source.num_objects()) + .map(|object| { + ( + helper.auxiliary_var_index(table_index, object, value_a, value_b), + 1.0, + ) + }) + .collect(); + constraints.push(LinearConstraint::eq( + count_terms, + table.counts()[value_a][value_b] as f64, + )); + + for object in 0..source.num_objects() { + let z = helper.auxiliary_var_index(table_index, object, value_a, value_b); + let y_a = + helper.assignment_var_index(object, table.attribute_a(), value_a); + let y_b = + helper.assignment_var_index(object, table.attribute_b(), value_b); + + constraints.push(LinearConstraint::le(vec![(z, 1.0), (y_a, -1.0)], 0.0)); + constraints.push(LinearConstraint::le(vec![(z, 1.0), (y_b, -1.0)], 0.0)); + constraints.push(LinearConstraint::ge( + vec![(z, 1.0), (y_a, -1.0), (y_b, -1.0)], + -1.0, + )); + } + } + } + } + + let target = ILP::new( + source.num_assignment_indicators() + source.num_auxiliary_frequency_indicators(), + constraints, + vec![], + ObjectiveSense::Minimize, + ); + + ReductionCDFTToILP { target, source } + } +} + +#[cfg(feature = "example-db")] +pub(crate) fn canonical_rule_example_specs() -> Vec { + use crate::export::SolutionPair; + use crate::models::misc::{FrequencyTable, KnownValue}; + + vec![crate::example_db::specs::RuleExampleSpec { + id: "consistencyofdatabasefrequencytables_to_ilp", + build: || { + let source = ConsistencyOfDatabaseFrequencyTables::new( + 6, + vec![2, 3, 2], + vec![ + FrequencyTable::new(0, 1, vec![vec![1, 1, 1], vec![1, 1, 1]]), + FrequencyTable::new(1, 2, vec![vec![1, 1], vec![0, 2], vec![1, 1]]), + ], + vec![ + KnownValue::new(0, 0, 0), + KnownValue::new(3, 0, 1), + KnownValue::new(1, 2, 1), + ], + ); + let source_config = vec![0, 0, 0, 0, 1, 1, 0, 2, 1, 1, 0, 1, 1, 1, 1, 1, 2, 0]; + let reduction = source.clone().reduce_to(); + let target_config = reduction.encode_source_solution(&source_config); + crate::example_db::specs::rule_example_with_witness::<_, ILP>( + source, + SolutionPair { + source_config, + target_config, + }, + ) + }, + }] +} + +#[cfg(test)] +#[path = "../unit_tests/rules/consistencyofdatabasefrequencytables_ilp.rs"] +mod tests; diff --git a/src/rules/mod.rs b/src/rules/mod.rs index 4b55c9aae..33efd4f52 100644 --- a/src/rules/mod.rs +++ b/src/rules/mod.rs @@ -50,6 +50,8 @@ pub(crate) mod binpacking_ilp; #[cfg(feature = "ilp-solver")] pub(crate) mod circuit_ilp; #[cfg(feature = "ilp-solver")] +pub(crate) mod consistencyofdatabasefrequencytables_ilp; +#[cfg(feature = "ilp-solver")] pub(crate) mod coloring_ilp; #[cfg(feature = "ilp-solver")] pub(crate) mod factoring_ilp; @@ -124,6 +126,7 @@ pub(crate) fn canonical_rule_example_specs() -> Vec ConsistencyOfDatabaseFrequencyTables { + ConsistencyOfDatabaseFrequencyTables::new( + 2, + vec![2, 2], + vec![FrequencyTable::new(0, 1, vec![vec![1, 0], vec![0, 1]])], + vec![KnownValue::new(0, 0, 0)], + ) +} + +fn small_yes_witness() -> Vec { + vec![0, 0, 1, 1] +} + +fn small_no_instance() -> ConsistencyOfDatabaseFrequencyTables { + ConsistencyOfDatabaseFrequencyTables::new( + 2, + vec![2, 2], + vec![FrequencyTable::new(0, 1, vec![vec![1, 0], vec![0, 1]])], + vec![KnownValue::new(0, 0, 0), KnownValue::new(1, 1, 0)], + ) +} + +#[test] +fn test_cdft_to_ilp_structure() { + let problem = small_yes_instance(); + let reduction: ReductionCDFTToILP = ReduceTo::>::reduce_to(&problem); + let ilp = reduction.target_problem(); + + assert_eq!(ilp.num_vars, 16); + assert_eq!(ilp.constraints.len(), 33); + assert_eq!(ilp.sense, ObjectiveSense::Minimize); + assert!(ilp.objective.is_empty()); +} + +#[test] +fn test_cdft_to_ilp_closed_loop() { + let problem = small_yes_instance(); + let reduction: ReductionCDFTToILP = ReduceTo::>::reduce_to(&problem); + assert_satisfaction_round_trip_from_optimization_target( + &problem, + &reduction, + "ConsistencyOfDatabaseFrequencyTables->ILP closed loop", + ); +} + +#[test] +fn test_cdft_to_ilp_solution_encoding_round_trip() { + let problem = small_yes_instance(); + let reduction: ReductionCDFTToILP = ReduceTo::>::reduce_to(&problem); + let ilp_solution = reduction.encode_source_solution(&small_yes_witness()); + let extracted = reduction.extract_solution(&ilp_solution); + assert_eq!(extracted, small_yes_witness()); +} + +#[test] +fn test_cdft_to_ilp_unsat_instance_is_infeasible() { + let problem = small_no_instance(); + let reduction: ReductionCDFTToILP = ReduceTo::>::reduce_to(&problem); + let solver = ILPSolver::new(); + assert!(solver.solve(reduction.target_problem()).is_none()); +} + +#[test] +fn test_cdft_to_ilp_solve_reduced() { + let problem = small_yes_instance(); + let solver = ILPSolver::new(); + let solution = solver + .solve_reduced(&problem) + .expect("solve_reduced should find a satisfying assignment"); + assert!(problem.evaluate(&solution)); +} From 29ec7df9e5f23b40ac199be00ab277f9e6be4897 Mon Sep 17 00:00:00 2001 From: GiggleLiu Date: Sat, 21 Mar 2026 04:34:58 +0800 Subject: [PATCH 5/9] Add paper entry for frequency table consistency --- docs/paper/reductions.typ | 56 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/docs/paper/reductions.typ b/docs/paper/reductions.typ index a412a3a29..e6c29ed33 100644 --- a/docs/paper/reductions.typ +++ b/docs/paper/reductions.typ @@ -105,6 +105,7 @@ "BoundedComponentSpanningForest": [Bounded Component Spanning Forest], "BinPacking": [Bin Packing], "BoyceCoddNormalFormViolation": [Boyce-Codd Normal Form Violation], + "ConsistencyOfDatabaseFrequencyTables": [Consistency of Database Frequency Tables], "ClosestVectorProblem": [Closest Vector Problem], "ConsecutiveSets": [Consecutive Sets], "MinimumMultiwayCut": [Minimum Multiway Cut], @@ -3092,6 +3093,61 @@ A classical NP-complete problem from Garey and Johnson @garey1979[Ch.~3, p.~76], A relation satisfies _Boyce-Codd Normal Form_ (BCNF) if every non-trivial functional dependency $X arrow.r Y$ has $X$ as a superkey --- that is, $X^+$ = $A'$. This classical NP-complete problem from database theory asks whether the given attribute subset $A'$ violates BCNF. The NP-completeness was established by Beeri and Bernstein (1979) via reduction from Hitting Set. It appears as problem SR29 in Garey and Johnson's compendium (category A4: Storage and Retrieval). ] +#{ + let x = load-model-example("ConsistencyOfDatabaseFrequencyTables") + let num_objects = x.instance.num_objects + let num_attrs = x.instance.attribute_domains.len() + let domains = x.instance.attribute_domains + let table01 = x.instance.frequency_tables.at(0).counts + let table12 = x.instance.frequency_tables.at(1).counts + let config = x.optimal_config + let value = (object, attr) => config.at(object * num_attrs + attr) + [ + #problem-def("ConsistencyOfDatabaseFrequencyTables")[ + Given a finite set $V$ of objects, a finite set $A$ of attributes, a domain $D_a$ for each $a in A$, a collection of pairwise frequency tables $f_(a,b): D_a times D_b -> ZZ^(>=0)$ whose entries sum to $|V|$, and a set $K subset.eq V times A times union_(a in A) D_a$ of known triples $(v, a, x)$, determine whether there exist functions $g_a: V -> D_a$ such that $g_a(v) = x$ for every $(v, a, x) in K$ and, for every published table $f_(a,b)$, exactly $f_(a,b)(x, y)$ objects satisfy $(g_a(v), g_b(v)) = (x, y)$. + ][ + Consistency of Database Frequency Tables is Garey and Johnson's storage-and-retrieval problem SR35 @garey1979. It asks whether released pairwise marginals can come from some hidden microdata table while respecting already known individual attribute values, making it a natural decision problem in statistical disclosure control. The direct witness space implemented in this crate assigns one categorical variable to each object-attribute pair, so exhaustive search runs in $O^*((product_(a in A) |D_a|)^(|V|))$. #footnote[This is the exact search bound induced by the implementation's configuration space; no faster general exact worst-case algorithm is claimed here.] + + *Example.* Let $|V| = #num_objects$ with attributes $a_0, a_1, a_2$ having domain sizes $#domains.at(0)$, $#domains.at(1)$, and $#domains.at(2)$ respectively. Publish the pairwise tables + + #align(center, table( + columns: 4, + align: center, + table.header([$f_(a_0, a_1)$], [$0$], [$1$], [$2$]), + [$0$], [#table01.at(0).at(0)], [#table01.at(0).at(1)], [#table01.at(0).at(2)], + [$1$], [#table01.at(1).at(0)], [#table01.at(1).at(1)], [#table01.at(1).at(2)], + )) + + and + + #align(center, table( + columns: 3, + align: center, + table.header([$f_(a_1, a_2)$], [$0$], [$1$]), + [$0$], [#table12.at(0).at(0)], [#table12.at(0).at(1)], + [$1$], [#table12.at(1).at(0)], [#table12.at(1).at(1)], + [$2$], [#table12.at(2).at(0)], [#table12.at(2).at(1)], + )) + + together with the known values $K = {(v_0, a_0, 0), (v_3, a_0, 1), (v_1, a_2, 1)}$. One consistent completion is: + + #align(center, table( + columns: 4, + align: center, + table.header([object], [$a_0$], [$a_1$], [$a_2$]), + [$v_0$], [#value(0, 0)], [#value(0, 1)], [#value(0, 2)], + [$v_1$], [#value(1, 0)], [#value(1, 1)], [#value(1, 2)], + [$v_2$], [#value(2, 0)], [#value(2, 1)], [#value(2, 2)], + [$v_3$], [#value(3, 0)], [#value(3, 1)], [#value(3, 2)], + [$v_4$], [#value(4, 0)], [#value(4, 1)], [#value(4, 2)], + [$v_5$], [#value(5, 0)], [#value(5, 1)], [#value(5, 2)], + )) + + This witness satisfies every published count: in $f_(a_0, a_1)$ each of the six cells appears exactly once, while in $f_(a_1, a_2)$ the five occupied cells have multiplicities $1, 1, 2, 1, 1$ exactly as listed above. It also respects all three known triples, so the answer is YES. + ] + ] +} + #problem-def("SumOfSquaresPartition")[ Given a finite set $A = {a_0, dots, a_(n-1)}$ with sizes $s(a_i) in ZZ^+$, a positive integer $K lt.eq |A|$ (number of groups), and a positive integer $J$ (bound), determine whether $A$ can be partitioned into $K$ disjoint sets $A_1, dots, A_K$ such that $sum_(i=1)^K (sum_(a in A_i) s(a))^2 lt.eq J$. ][ From 0c991bf580746006a5845918c1d7f149a9756594 Mon Sep 17 00:00:00 2001 From: GiggleLiu Date: Sat, 21 Mar 2026 04:35:06 +0800 Subject: [PATCH 6/9] chore: remove plan file after implementation --- ...onsistency-of-database-frequency-tables.md | 216 ------------------ 1 file changed, 216 deletions(-) delete mode 100644 docs/plans/2026-03-21-consistency-of-database-frequency-tables.md diff --git a/docs/plans/2026-03-21-consistency-of-database-frequency-tables.md b/docs/plans/2026-03-21-consistency-of-database-frequency-tables.md deleted file mode 100644 index d4051a6d6..000000000 --- a/docs/plans/2026-03-21-consistency-of-database-frequency-tables.md +++ /dev/null @@ -1,216 +0,0 @@ -# ConsistencyOfDatabaseFrequencyTables Implementation Plan - -> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. - -**Goal:** Add the `ConsistencyOfDatabaseFrequencyTables` satisfaction model, its canonical example/docs/CLI support, and an ILP reduction so the problem is discoverable, testable, and solvable through the existing pipeline. - -**Architecture:** Model the problem as one categorical variable per `(object, attribute)` pair, with per-variable domain sizes taken from the attribute domains. Keep the core model in `src/models/misc/consistency_of_database_frequency_tables.rs`, register it through the normal schema/variant/example-db paths, then add a binary `ILP` reduction using one-hot assignment variables plus McCormick-linearized pair-count constraints for each published frequency table. Prefer explicit helper structs for frequency tables and known values so the JSON schema, CLI parsing, and paper example stay readable. - -**Tech Stack:** Rust workspace, serde, inventory schema registry, reduction macros, example-db exporters, Typst paper, existing CLI `pred create` infrastructure. - ---- - -### Task 1: Add the model with TDD - -**Files:** -- Create: `src/models/misc/consistency_of_database_frequency_tables.rs` -- Create: `src/unit_tests/models/misc/consistency_of_database_frequency_tables.rs` -- Modify: `src/models/misc/mod.rs` -- Modify: `src/models/mod.rs` -- Modify: `src/lib.rs` - -**Step 1: Write the failing model tests** - -Add tests in `src/unit_tests/models/misc/consistency_of_database_frequency_tables.rs` for: -- constructor/getters/schema-facing shape on the issue’s YES instance -- `dims()` returning one variable per `(object, attribute)` with the correct per-attribute domain sizes -- `evaluate()` accepting the flattened YES witness from the issue -- `evaluate()` rejecting wrong config length, out-of-range values, conflicting known values, and table-mismatch assignments -- `BruteForce::find_satisfying()` finding a satisfying assignment for the YES instance -- `BruteForce::find_satisfying()` returning `None` on a minimally inconsistent instance -- serde round-trip - -Use the issue’s worked example as the paper/example-db source of truth. Flatten the witness object-major: -`[0,0,0, 0,1,1, 0,2,1, 1,0,1, 1,1,1, 1,2,0]`. - -**Step 2: Run the new test file and confirm RED** - -Run: -```bash -cargo test test_consistency_of_database_frequency_tables -- --nocapture -``` - -Expected: compile failure because the model does not exist yet. - -**Step 3: Implement the model minimally** - -Create `src/models/misc/consistency_of_database_frequency_tables.rs` with: -- `ProblemSchemaEntry` metadata -- explicit serializable helper structs for frequency tables / known values if that makes JSON and docs clearer -- constructor validation: - - `attribute_domains` non-empty and every domain size positive - - frequency-table attribute indices distinct and in range - - each table has row count `attribute_domains[a]`, each row has column count `attribute_domains[b]` - - each table sum equals `num_objects` - - known values are in range -- getters needed for complexity / overhead: - - `num_objects()` - - `num_attributes()` - - `domain_size_product()` or equivalent exact brute-force complexity helper - - any extra count getters needed by the ILP reduction overhead -- `dims()` returning `attribute_domains` repeated once per object -- `evaluate()` that: - - rejects malformed configs - - checks all known values - - counts each published pair table exactly -- `SatisfactionProblem` impl -- `declare_variants!` with an exact multi-domain brute-force expression such as `domain_size_product^num_objects` rather than the issue’s binary-only shorthand if needed for correctness -- canonical model example spec using the issue’s YES instance - -Register the model in the misc/module/prelude exports. - -**Step 4: Run the model tests and get GREEN** - -Run: -```bash -cargo test test_consistency_of_database_frequency_tables -- --nocapture -``` - -Expected: the new model tests pass. - -**Step 5: Refactor only after green** - -Extract small helpers only if they reduce duplication in evaluation/validation, then re-run the same test command. - -### Task 2: Add CLI and example-db support with TDD - -**Files:** -- Modify: `problemreductions-cli/src/cli.rs` -- Modify: `problemreductions-cli/src/commands/create.rs` -- Modify: `src/example_db/model_builders.rs` - -**Step 1: Write the failing CLI / example-db tests** - -Add or extend the most local existing tests for: -- alias/discovery/help exposure if covered in this repo’s CLI tests -- `pred create ConsistencyOfDatabaseFrequencyTables ...` producing the expected JSON payload -- `pred create --example ConsistencyOfDatabaseFrequencyTables` using the canonical example - -If no focused CLI unit test file exists yet for `create`, add the smallest relevant coverage next to current create-command tests instead of inventing a new harness. - -**Step 2: Run the focused tests and confirm RED** - -Run the narrowest affected command, for example: -```bash -cargo test create:: --package problemreductions-cli -- --nocapture -``` - -Expected: failure because the problem is not yet wired into CLI construction/help. - -**Step 3: Implement minimal CLI support** - -Add create-flag support using explicit, parseable fields: -- `--num-objects 6` -- `--attribute-domains "2,3,2"` -- `--frequency-tables "0,1:1,1,1|1,1,1;1,2:1,1|0,2|1,1"` -- `--known-values "0,0,0;3,0,1;1,2,1"` (optional) - -Update: -- `CreateArgs` with the new fields -- `all_data_flags_empty()` -- help tables / usage examples / problem-specific examples -- `create()` match arm to parse and validate the compact table syntax into the model structs - -Also ensure `print_problem_help` shows a realistic usage example for this problem. - -**Step 4: Run the focused tests and get GREEN** - -Repeat the CLI/example-db test command until it passes. - -### Task 3: Add the ILP reduction with TDD - -**Files:** -- Create: `src/rules/consistencyofdatabasefrequencytables_ilp.rs` -- Create: `src/unit_tests/rules/consistencyofdatabasefrequencytables_ilp.rs` -- Modify: `src/rules/mod.rs` - -**Step 1: Write the failing reduction tests** - -Follow the project’s closed-loop reduction reference and add tests for: -- reduction shape on the issue’s YES example -- ILP feasibility witness extracts back to the original categorical assignment -- a closed-loop test using brute force on the source and brute force / direct feasibility on the reduced ILP -- the inconsistent instance reducing to an infeasible ILP -- canonical rule example registration if needed - -**Step 2: Run the reduction tests and confirm RED** - -Run: -```bash -cargo test test_consistencyofdatabasefrequencytables_to_ilp --features ilp-solver -- --nocapture -``` - -Expected: compile failure because the rule does not exist. - -**Step 3: Implement the reduction minimally** - -Create a binary `ILP` reduction with: -- assignment variables `y_{v,a,x}` for each object/attribute/value -- auxiliary McCormick variables `z_{t,v,x,y}` for each frequency-table cell and object -- constraints: - - exactly one value for each `(v,a)` - - known values fixed to `1` - - McCormick linearization for every `z` - - exact frequency-table counts via `sum_v z = f_{a,b}(x,y)` -- zero objective with `ObjectiveSense::Minimize` -- solution extraction that reads the chosen `y_{v,a,x}` variables back into the flattened source config -- reduction overhead expressed via concrete getters added in Task 1 -- canonical rule example based on the same issue example - -Register the rule in `src/rules/mod.rs` under `#[cfg(feature = "ilp-solver")]`. - -**Step 4: Run the reduction tests and get GREEN** - -Run the focused reduction test command again until it passes. - -### Task 4: Add paper coverage and final verification - -**Files:** -- Modify: `docs/paper/reductions.typ` - -**Step 1: Add the paper entry after code is green** - -Add: -- `display-name` entry for `ConsistencyOfDatabaseFrequencyTables` -- `problem-def("ConsistencyOfDatabaseFrequencyTables")` with the formal database-frequency-table definition -- short background and cited complexity discussion -- the issue’s worked example in tutorial style, including the satisfying assignment and explicit verification narrative - -Use prose and a table-based presentation rather than forcing a graph-style figure. - -**Step 2: Run paper verification** - -Run: -```bash -make paper -``` - -Expected: clean Typst build. - -**Step 3: Run fresh repo verification before commit/push** - -Run: -```bash -make test -make clippy -``` - -If the ILP reduction or paper/export paths require additional targeted commands, run them too and record the exact outcome before claiming completion. - -**Step 4: Review issue compliance and cleanup** - -Confirm before the implementation summary comment / push: -- the canonical example matches the issue’s Expected Outcome -- the model is exported/discoverable -- the ILP reduction exists so `pred solve` has a path -- the plan file is removed in the cleanup commit From b6d334f3f849130bc9fc18c9b32b56841a2b73b4 Mon Sep 17 00:00:00 2001 From: GiggleLiu Date: Sat, 21 Mar 2026 14:43:05 +0800 Subject: [PATCH 7/9] Fix formatting after merge with main --- problemreductions-cli/src/commands/create.rs | 19 +++++++++++-------- problemreductions-cli/tests/cli_tests.rs | 11 +++++++++-- src/lib.rs | 8 ++++---- ...onsistency_of_database_frequency_tables.rs | 5 ++++- src/models/misc/mod.rs | 6 +++--- ...onsistencyofdatabasefrequencytables_ilp.rs | 8 +++----- src/rules/mod.rs | 4 ++-- ...onsistency_of_database_frequency_tables.rs | 5 ++++- ...onsistencyofdatabasefrequencytables_ilp.rs | 6 ++---- 9 files changed, 42 insertions(+), 30 deletions(-) diff --git a/problemreductions-cli/src/commands/create.rs b/problemreductions-cli/src/commands/create.rs index e38e3d363..8fb730dfc 100644 --- a/problemreductions-cli/src/commands/create.rs +++ b/problemreductions-cli/src/commands/create.rs @@ -21,11 +21,11 @@ use problemreductions::models::misc::{ ConsistencyOfDatabaseFrequencyTables, FlowShopScheduling, FrequencyTable, KnownValue, LongestCommonSubsequence, MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, PartiallyOrderedKnapsack, QueryArg, RectilinearPictureCompression, - ResourceConstrainedScheduling, - SchedulingWithIndividualDeadlines, SequencingToMinimizeMaximumCumulativeCost, - SequencingToMinimizeWeightedCompletionTime, SequencingToMinimizeWeightedTardiness, - SequencingWithReleaseTimesAndDeadlines, SequencingWithinIntervals, ShortestCommonSupersequence, - StringToStringCorrection, SubsetSum, SumOfSquaresPartition, + ResourceConstrainedScheduling, SchedulingWithIndividualDeadlines, + SequencingToMinimizeMaximumCumulativeCost, SequencingToMinimizeWeightedCompletionTime, + SequencingToMinimizeWeightedTardiness, SequencingWithReleaseTimesAndDeadlines, + SequencingWithinIntervals, ShortestCommonSupersequence, StringToStringCorrection, SubsetSum, + SumOfSquaresPartition, }; use problemreductions::models::BiconnectivityAugmentation; use problemreductions::prelude::*; @@ -1822,9 +1822,12 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> { let frequency_tables = parse_cdft_frequency_tables(frequency_tables_str, &attribute_domains, num_objects) .map_err(|e| anyhow::anyhow!("{e}\n\n{usage}"))?; - let known_values = - parse_cdft_known_values(args.known_values.as_deref(), num_objects, &attribute_domains) - .map_err(|e| anyhow::anyhow!("{e}\n\n{usage}"))?; + let known_values = parse_cdft_known_values( + args.known_values.as_deref(), + num_objects, + &attribute_domains, + ) + .map_err(|e| anyhow::anyhow!("{e}\n\n{usage}"))?; ( ser(ConsistencyOfDatabaseFrequencyTables::new( diff --git a/problemreductions-cli/tests/cli_tests.rs b/problemreductions-cli/tests/cli_tests.rs index 2e08e23e6..6229ac3d1 100644 --- a/problemreductions-cli/tests/cli_tests.rs +++ b/problemreductions-cli/tests/cli_tests.rs @@ -5795,7 +5795,10 @@ fn test_create_consistency_of_database_frequency_tables() { let json: serde_json::Value = serde_json::from_str(&stdout).unwrap(); assert_eq!(json["type"], "ConsistencyOfDatabaseFrequencyTables"); assert_eq!(json["data"]["num_objects"], 6); - assert_eq!(json["data"]["attribute_domains"], serde_json::json!([2, 3, 2])); + assert_eq!( + json["data"]["attribute_domains"], + serde_json::json!([2, 3, 2]) + ); assert_eq!(json["data"]["frequency_tables"][0]["attribute_a"], 0); assert_eq!(json["data"]["frequency_tables"][0]["attribute_b"], 1); assert_eq!( @@ -7347,7 +7350,11 @@ fn test_create_model_example_multiprocessor_scheduling() { #[test] fn test_create_model_example_consistency_of_database_frequency_tables() { let output = pred() - .args(["create", "--example", "ConsistencyOfDatabaseFrequencyTables"]) + .args([ + "create", + "--example", + "ConsistencyOfDatabaseFrequencyTables", + ]) .output() .unwrap(); assert!( diff --git a/src/lib.rs b/src/lib.rs index 27bbf4923..b270e988b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -64,10 +64,10 @@ pub mod prelude { }; pub use crate::models::misc::{ AdditionalKey, BinPacking, BoyceCoddNormalFormViolation, CbqRelation, - ConjunctiveBooleanQuery, ConjunctiveQueryFoldability, - ConsistencyOfDatabaseFrequencyTables, Factoring, FlowShopScheduling, Knapsack, - LongestCommonSubsequence, MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, - Partition, QueryArg, RectilinearPictureCompression, ResourceConstrainedScheduling, + ConjunctiveBooleanQuery, ConjunctiveQueryFoldability, ConsistencyOfDatabaseFrequencyTables, + Factoring, FlowShopScheduling, Knapsack, LongestCommonSubsequence, + MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, Partition, QueryArg, + RectilinearPictureCompression, ResourceConstrainedScheduling, SchedulingWithIndividualDeadlines, SequencingToMinimizeMaximumCumulativeCost, SequencingToMinimizeWeightedCompletionTime, SequencingToMinimizeWeightedTardiness, SequencingWithReleaseTimesAndDeadlines, SequencingWithinIntervals, diff --git a/src/models/misc/consistency_of_database_frequency_tables.rs b/src/models/misc/consistency_of_database_frequency_tables.rs index 1c5022dc2..f052d42e3 100644 --- a/src/models/misc/consistency_of_database_frequency_tables.rs +++ b/src/models/misc/consistency_of_database_frequency_tables.rs @@ -260,7 +260,10 @@ impl ConsistencyOfDatabaseFrequencyTables { /// Returns the total number of published frequency-table cells. pub fn num_frequency_cells(&self) -> usize { - self.frequency_tables.iter().map(FrequencyTable::num_cells).sum() + self.frequency_tables + .iter() + .map(FrequencyTable::num_cells) + .sum() } /// Returns the number of auxiliary ILP indicators used for frequency-cell counting. diff --git a/src/models/misc/mod.rs b/src/models/misc/mod.rs index 7942193c2..8db92f01f 100644 --- a/src/models/misc/mod.rs +++ b/src/models/misc/mod.rs @@ -33,9 +33,9 @@ pub(crate) mod additional_key; mod bin_packing; mod boyce_codd_normal_form_violation; -mod consistency_of_database_frequency_tables; pub(crate) mod conjunctive_boolean_query; pub(crate) mod conjunctive_query_foldability; +mod consistency_of_database_frequency_tables; pub(crate) mod factoring; mod flow_shop_scheduling; mod knapsack; @@ -63,11 +63,11 @@ pub(crate) mod sum_of_squares_partition; pub use additional_key::AdditionalKey; pub use bin_packing::BinPacking; pub use boyce_codd_normal_form_violation::BoyceCoddNormalFormViolation; +pub use conjunctive_boolean_query::{ConjunctiveBooleanQuery, QueryArg, Relation as CbqRelation}; +pub use conjunctive_query_foldability::{ConjunctiveQueryFoldability, Term}; pub use consistency_of_database_frequency_tables::{ ConsistencyOfDatabaseFrequencyTables, FrequencyTable, KnownValue, }; -pub use conjunctive_boolean_query::{ConjunctiveBooleanQuery, QueryArg, Relation as CbqRelation}; -pub use conjunctive_query_foldability::{ConjunctiveQueryFoldability, Term}; pub use factoring::Factoring; pub use flow_shop_scheduling::FlowShopScheduling; pub use knapsack::Knapsack; diff --git a/src/rules/consistencyofdatabasefrequencytables_ilp.rs b/src/rules/consistencyofdatabasefrequencytables_ilp.rs index 0bea7e8c3..9461ac9f5 100644 --- a/src/rules/consistencyofdatabasefrequencytables_ilp.rs +++ b/src/rules/consistencyofdatabasefrequencytables_ilp.rs @@ -6,7 +6,7 @@ //! //! The pair-count equalities are linearized with standard McCormick constraints. -use crate::models::algebraic::{ILP, LinearConstraint, ObjectiveSense}; +use crate::models::algebraic::{LinearConstraint, ObjectiveSense, ILP}; use crate::models::misc::ConsistencyOfDatabaseFrequencyTables; use crate::reduction; use crate::rules::traits::{ReduceTo, ReductionResult}; @@ -177,10 +177,8 @@ impl ReduceTo> for ConsistencyOfDatabaseFrequencyTables { for object in 0..source.num_objects() { let z = helper.auxiliary_var_index(table_index, object, value_a, value_b); - let y_a = - helper.assignment_var_index(object, table.attribute_a(), value_a); - let y_b = - helper.assignment_var_index(object, table.attribute_b(), value_b); + let y_a = helper.assignment_var_index(object, table.attribute_a(), value_a); + let y_b = helper.assignment_var_index(object, table.attribute_b(), value_b); constraints.push(LinearConstraint::le(vec![(z, 1.0), (y_a, -1.0)], 0.0)); constraints.push(LinearConstraint::le(vec![(z, 1.0), (y_b, -1.0)], 0.0)); diff --git a/src/rules/mod.rs b/src/rules/mod.rs index 6e7a8eb96..d7929687b 100644 --- a/src/rules/mod.rs +++ b/src/rules/mod.rs @@ -52,10 +52,10 @@ pub(crate) mod binpacking_ilp; #[cfg(feature = "ilp-solver")] pub(crate) mod circuit_ilp; #[cfg(feature = "ilp-solver")] -pub(crate) mod consistencyofdatabasefrequencytables_ilp; -#[cfg(feature = "ilp-solver")] pub(crate) mod coloring_ilp; #[cfg(feature = "ilp-solver")] +pub(crate) mod consistencyofdatabasefrequencytables_ilp; +#[cfg(feature = "ilp-solver")] pub(crate) mod factoring_ilp; #[cfg(feature = "ilp-solver")] pub(crate) mod graphpartitioning_ilp; diff --git a/src/unit_tests/models/misc/consistency_of_database_frequency_tables.rs b/src/unit_tests/models/misc/consistency_of_database_frequency_tables.rs index 6c5d99d36..7826720ef 100644 --- a/src/unit_tests/models/misc/consistency_of_database_frequency_tables.rs +++ b/src/unit_tests/models/misc/consistency_of_database_frequency_tables.rs @@ -63,7 +63,10 @@ fn test_cdft_creation_and_getters() { #[test] fn test_cdft_dims_repeat_attribute_domains_for_each_object() { let problem = issue_yes_instance(); - assert_eq!(problem.dims(), vec![2, 3, 2, 2, 3, 2, 2, 3, 2, 2, 3, 2, 2, 3, 2, 2, 3, 2]); + assert_eq!( + problem.dims(), + vec![2, 3, 2, 2, 3, 2, 2, 3, 2, 2, 3, 2, 2, 3, 2, 2, 3, 2] + ); } #[test] diff --git a/src/unit_tests/rules/consistencyofdatabasefrequencytables_ilp.rs b/src/unit_tests/rules/consistencyofdatabasefrequencytables_ilp.rs index ff137c18e..2c5984c49 100644 --- a/src/unit_tests/rules/consistencyofdatabasefrequencytables_ilp.rs +++ b/src/unit_tests/rules/consistencyofdatabasefrequencytables_ilp.rs @@ -1,8 +1,6 @@ use super::*; -use crate::models::algebraic::{ILP, ObjectiveSense}; -use crate::models::misc::{ - ConsistencyOfDatabaseFrequencyTables, FrequencyTable, KnownValue, -}; +use crate::models::algebraic::{ObjectiveSense, ILP}; +use crate::models::misc::{ConsistencyOfDatabaseFrequencyTables, FrequencyTable, KnownValue}; use crate::rules::test_helpers::assert_satisfaction_round_trip_from_optimization_target; use crate::rules::{ReduceTo, ReductionResult}; use crate::solvers::ILPSolver; From 9945c5a33ac84fbb0a0d4ac0330cdeedb0b358cc Mon Sep 17 00:00:00 2001 From: GiggleLiu Date: Sat, 21 Mar 2026 15:04:16 +0800 Subject: [PATCH 8/9] Add reduction-rule paper entry for ConsistencyOfDatabaseFrequencyTables -> ILP --- docs/paper/reductions.typ | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/docs/paper/reductions.typ b/docs/paper/reductions.typ index 55b9b688b..d1f5b43fe 100644 --- a/docs/paper/reductions.typ +++ b/docs/paper/reductions.typ @@ -3229,6 +3229,22 @@ A classical NP-complete problem from Garey and Johnson @garey1979[Ch.~3, p.~76], ] } +#reduction-rule("ConsistencyOfDatabaseFrequencyTables", "ILP")[ + Each object-attribute pair is encoded by a one-hot binary vector over its domain, and each pairwise frequency count becomes a linear equality over McCormick auxiliary variables that linearize the product of two one-hot indicators. Known values are fixed by pinning the corresponding indicator to 1. The resulting ILP is a pure feasibility problem (trivial objective). +][ + _Construction._ Let $V$ be the set of objects, $A$ the set of attributes with domains $D_a$, $cal(T)$ the set of published frequency tables, and $K$ the set of known triples $(v, a, x)$. + + _Variables:_ (1) Binary one-hot indicators $y_(v,a,x) in {0, 1}$ for each object $v in V$, attribute $a in A$, and value $x in D_a$: $y_(v,a,x) = 1$ iff object $v$ takes value $x$ for attribute $a$. (2) Binary auxiliary variables $z_(t,v,x,x') in {0, 1}$ for each table $t in cal(T)$ (with attribute pair $(a, b)$), object $v in V$, and cell $(x, x') in D_a times D_b$: $z_(t,v,x,x') = 1$ iff object $v$ realizes cell $(x, x')$ in table $t$. + + _Constraints:_ (1) One-hot: $sum_(x in D_a) y_(v,a,x) = 1$ for all $v in V$, $a in A$. (2) Known values: $y_(v,a,x) = 1$ for each $(v, a, x) in K$. (3) McCormick linearization for $z_(t,v,x,x') = y_(v,a,x) dot y_(v,b,x')$: $z_(t,v,x,x') lt.eq y_(v,a,x)$, $z_(t,v,x,x') lt.eq y_(v,b,x')$, $z_(t,v,x,x') gt.eq y_(v,a,x) + y_(v,b,x') - 1$. (4) Frequency counts: $sum_(v in V) z_(t,v,x,x') = f_t (x, x')$ for each table $t$ and cell $(x, x')$. + + _Objective:_ Minimize $0$ (feasibility problem). + + _Correctness._ ($arrow.r.double$) A consistent assignment defines one-hot indicators and their products; all constraints hold by construction, and the frequency equalities match the published counts. ($arrow.l.double$) Any feasible binary solution assigns exactly one value per object-attribute (one-hot), respects known values, and the McCormick constraints force $z_(t,v,x,x') = y_(v,a,x) dot y_(v,b,x')$ for binary variables, so the frequency equalities certify consistency. + + _Solution extraction._ For each object $v$ and attribute $a$, find $x$ with $y_(v,a,x) = 1$; assign value $x$ to $(v, a)$. +] + #problem-def("SumOfSquaresPartition")[ Given a finite set $A = {a_0, dots, a_(n-1)}$ with sizes $s(a_i) in ZZ^+$, a positive integer $K lt.eq |A|$ (number of groups), and a positive integer $J$ (bound), determine whether $A$ can be partitioned into $K$ disjoint sets $A_1, dots, A_K$ such that $sum_(i=1)^K (sum_(a in A_i) s(a))^2 lt.eq J$. ][ From 33e9b2b3eceebae4d29969b72ec13251b511e642 Mon Sep 17 00:00:00 2001 From: GiggleLiu Date: Sat, 21 Mar 2026 15:12:56 +0800 Subject: [PATCH 9/9] Add reduction tests using full issue instance (6 objects, 3 attrs, 2 tables) --- ...onsistencyofdatabasefrequencytables_ilp.rs | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/src/unit_tests/rules/consistencyofdatabasefrequencytables_ilp.rs b/src/unit_tests/rules/consistencyofdatabasefrequencytables_ilp.rs index 2c5984c49..966febe3b 100644 --- a/src/unit_tests/rules/consistencyofdatabasefrequencytables_ilp.rs +++ b/src/unit_tests/rules/consistencyofdatabasefrequencytables_ilp.rs @@ -77,3 +77,47 @@ fn test_cdft_to_ilp_solve_reduced() { .expect("solve_reduced should find a satisfying assignment"); assert!(problem.evaluate(&solution)); } + +fn issue_instance() -> ConsistencyOfDatabaseFrequencyTables { + ConsistencyOfDatabaseFrequencyTables::new( + 6, + vec![2, 3, 2], + vec![ + FrequencyTable::new(0, 1, vec![vec![1, 1, 1], vec![1, 1, 1]]), + FrequencyTable::new(1, 2, vec![vec![1, 1], vec![0, 2], vec![1, 1]]), + ], + vec![ + KnownValue::new(0, 0, 0), + KnownValue::new(3, 0, 1), + KnownValue::new(1, 2, 1), + ], + ) +} + +fn issue_witness() -> Vec { + vec![0, 0, 0, 0, 1, 1, 0, 2, 1, 1, 0, 1, 1, 1, 1, 1, 2, 0] +} + +#[test] +fn test_cdft_to_ilp_issue_instance_closed_loop() { + let problem = issue_instance(); + let reduction: ReductionCDFTToILP = ReduceTo::>::reduce_to(&problem); + let solver = ILPSolver::new(); + let target_solution = solver + .solve(reduction.target_problem()) + .expect("ILP solver should find a feasible solution for the issue instance"); + let source_solution = reduction.extract_solution(&target_solution); + assert!( + problem.evaluate(&source_solution), + "extracted source solution must satisfy the original CDFT instance" + ); +} + +#[test] +fn test_cdft_to_ilp_issue_instance_encoding_round_trip() { + let problem = issue_instance(); + let reduction: ReductionCDFTToILP = ReduceTo::>::reduce_to(&problem); + let ilp_solution = reduction.encode_source_solution(&issue_witness()); + let extracted = reduction.extract_solution(&ilp_solution); + assert_eq!(extracted, issue_witness()); +}