From f4f1a9a5a90a3921285bef57f8ab369e5203c53a Mon Sep 17 00:00:00 2001 From: Sam Roberts Date: Sun, 29 Sep 2024 13:40:35 +1000 Subject: [PATCH 01/12] Simplify: remove MapSolution --- fuzzy/src/lib.rs | 1 - fuzzy/src/map_solution.rs | 166 -------------------------------------- 2 files changed, 167 deletions(-) delete mode 100644 fuzzy/src/map_solution.rs diff --git a/fuzzy/src/lib.rs b/fuzzy/src/lib.rs index 93fc358..47c484e 100644 --- a/fuzzy/src/lib.rs +++ b/fuzzy/src/lib.rs @@ -52,7 +52,6 @@ use regex_syntax::hir; pub mod regex_question; pub mod lattice_solution; -pub mod map_solution; pub mod table_solution; pub mod debug_output; pub mod diff_output; diff --git a/fuzzy/src/map_solution.rs b/fuzzy/src/map_solution.rs deleted file mode 100644 index 2346fa4..0000000 --- a/fuzzy/src/map_solution.rs +++ /dev/null @@ -1,166 +0,0 @@ -//! An implementation of [`Solution`](crate::Solution) that should be relatively easy to develop new features for. -//! -//! This implementation uses a [map](State) to store state for each [node](Ix), so it should be -//! easy to change node representation and expand the state space over time. - -use crate::{ElementCore, Match, Problem, Step}; -use crate::flat_pattern::{Flat, FlatPattern}; -use crate::lattice_solution::{LatticeConfig, LatticeIx, LatticeSolution, LatticeState, Node, StepType}; -use std::collections::hash_map::HashMap; - -#[derive(Eq, PartialEq, Debug)] -pub struct MapSolution { - score: usize, - trace: Vec>, -} - -impl LatticeSolution for MapSolution { - type Conf = Config; - type Ix = Ix; - type State = State; - - fn new(score: usize, trace: Vec>) -> Self { - MapSolution { score, trace } - } - - fn score_lattice(&self) -> &usize { - &self.score - } - - fn trace_lattice(&self) -> &Vec> { - &self.trace - } -} - -pub struct Config { - pattern: FlatPattern, - text: Vec, -} - -impl LatticeConfig for Config { - fn new(problem: &Problem) -> Self { - let pattern = FlatPattern::new(&problem.pattern); - let text = problem.text.atoms.clone(); - Config { pattern, text } - } - - fn get(&self, ix: Ix) -> (Option<&Flat>, Option<&char>) { - (self.pattern.get(ix.pattern), self.text.get(ix.text)) - } - - fn start(&self) -> Ix { - Ix { pattern: 0, text: 0, rep_off: 0 } - } - - fn end(&self) -> Ix { - Ix { pattern: self.pattern.len(), text: self.text.len(), rep_off: 0 } - } - - fn step(&self, ix: Ix, step_type: StepType) -> Ix { - match step_type { - StepType::Hit => - Ix { pattern: ix.pattern + 1, text: ix.text + 1, rep_off: 0, ..ix }, - StepType::SkipText => - Ix { text: ix.text + 1, rep_off: 0, ..ix }, - StepType::SkipPattern | StepType::StartGroup | StepType::EndGroup | StepType::StartLeft => - Ix { pattern: ix.pattern + 1, ..ix }, - StepType::StartRight(off) => - Ix { pattern: ix.pattern + off + 1, ..ix }, - StepType::PassRight(off) => - Ix { pattern: ix.pattern + off, ..ix }, - StepType::StartRepetition => - Ix { pattern: ix.pattern + 1, rep_off: ix.rep_off + 1, ..ix }, - StepType::EndRepetition => - Ix { pattern: ix.pattern + 1, rep_off: ix.rep_off - 1, ..ix }, - StepType::PassRepetition(off) => - Ix { pattern: ix.pattern + off + 1, ..ix}, - StepType::RestartRepetition(off) => - Ix { pattern: ix.pattern - off, ..ix }, - } - } -} - -pub struct State { - nodes: HashMap>, - default: Node, -} - -impl LatticeState for State { - fn new(_conf: &Config) -> Self { - State { nodes: HashMap::new(), default: Node::new(), } - } - - fn get(&self, ix: Ix) -> &Node { - match self.nodes.get(&ix) { - Some(node) => node, - None => &self.default, - } - } - - fn get_mut(&mut self, ix: Ix) -> &mut Node { - self.nodes.entry(ix).or_insert(self.default.clone()) - } - - fn set(&mut self, ix: Ix, node: Node) { - let _ = self.nodes.insert(ix, node); - } -} - -#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug, Default)] -pub struct Ix { - /// The index into the [flattened `Problem::pattern`](crate::flat_pattern::FlatPattern). - pub pattern: usize, - /// The index into [`Problem::text`](crate::Problem::text). - pub text: usize, - /// This field represents our "repetition depth since we last changed text index". - /// - /// To avoid infinite loops, we have to avoid repeating a repetition group if that would take us - /// back to the same index we started at. We keep track of how many repetition groups we entered - /// since we last matched or skipped a text character, and avoid looping back unless this is 0. - /// This ix the "repetition depth". Because the "repetition depth" affects future jumps, it also - /// affects the future score, and so we have a separate score and a separate index for each - /// repetition depth value. - pub rep_off: usize, -} - -impl LatticeIx for Ix { - fn can_restart(&self) -> bool { - self.rep_off == 0 - } -} - -#[cfg(test)] -mod tests { - use super::MapSolution; - use crate::test_cases::TestCase; - use crate::lattice_solution::test_logic; - use test_case::test_case; - - #[test_case(TestCase::match_empty())] - #[test_case(TestCase::fail_empty_1())] - #[test_case(TestCase::fail_empty_2())] - #[test_case(TestCase::match_lit_1())] - #[test_case(TestCase::match_lit_2())] - #[test_case(TestCase::fail_lit_1())] - #[test_case(TestCase::fail_lit_2())] - #[test_case(TestCase::fail_lit_3())] - #[test_case(TestCase::match_class_1())] - #[test_case(TestCase::match_class_2())] - #[test_case(TestCase::match_class_3())] - #[test_case(TestCase::fail_class_1())] - #[test_case(TestCase::match_alternative_1())] - #[test_case(TestCase::match_alternative_2())] - #[test_case(TestCase::match_alternative_3())] - #[test_case(TestCase::fail_alternative_1())] - #[test_case(TestCase::match_repetition_1())] - #[test_case(TestCase::match_repetition_2())] - #[test_case(TestCase::match_repetition_3())] - #[test_case(TestCase::match_repetition_4())] - #[test_case(TestCase::match_repetition_5())] - #[test_case(TestCase::fail_repetition_1())] - #[test_case(TestCase::fail_repetition_2())] - #[test_case(TestCase::fail_repetition_3())] - fn test_solve(test: TestCase) { - test_logic::test_solve::(test); - } -} From b0212694e896fbab7349fd92badd35f3c569845b Mon Sep 17 00:00:00 2001 From: Sam Roberts Date: Sat, 12 Apr 2025 15:07:24 +1000 Subject: [PATCH 02/12] Simplify: remove Question trait --- fuzzy/src/lib.rs | 31 ++----------------------------- fuzzy/src/regex_question.rs | 11 ++++------- fuzzy_cli/src/lib.rs | 6 +++--- fuzzy_lambda/src/main.rs | 2 +- 4 files changed, 10 insertions(+), 40 deletions(-) diff --git a/fuzzy/src/lib.rs b/fuzzy/src/lib.rs index 47c484e..f132695 100644 --- a/fuzzy/src/lib.rs +++ b/fuzzy/src/lib.rs @@ -7,18 +7,12 @@ //! //! This crate is very early in it's development, it's API is akward, and will likely be changed in //! breaking ways several times before it matures. We don't currently implement any convenience -//! functions which match a pattern against a text in one call. Instead, the crate provides -//! implementations of the following three traits, which can be combined to do the match: -//! -//! - a [`Question`] produces a [`Problem`] to be solved. -//! - a [`Solution`] calculates the optimal match and provides the corresponding -//! [`score`](Solution::score) and [`trace`](Solution::trace). -//! - an [`Output`] displays [`Problem`] and [`Solution`] info to the user. +//! functions which match a pattern against a text in one call. //! //! Implementations can be combined as follows: //! //! ```rust -//! use fuzzy::{Question, Solution, Output}; +//! use fuzzy::{Solution, Output}; //! use fuzzy::regex_question::RegexQuestion; //! use fuzzy::table_solution::TableSolution; //! use fuzzy::diff_output::DiffOutput; @@ -34,18 +28,6 @@ //! Ok(()) //! } //! ``` -//! -//! # Overview -//! -//! The main three traits in our API are [`Question`], [`Solution`], and [`Output`]. See -//! submodules for the various implementations. -//! -//! In addition to these traits: -//! -//! - The [`Problem`] contains the parsed [`pattern`](Problem::pattern) and [`text`](Problem::text). -//! - From the [`Solution`]: -//! - The [`score`](Solution::score) is a simple `usize`. -//! - A [`Step`] is a single item from the optimal [`trace`](Solution::trace). use std::fmt::Display; use regex_syntax::hir; @@ -58,15 +40,6 @@ pub mod diff_output; pub mod flat_pattern; pub mod error; -/// A builder of [`Problem`] values. -/// -/// Questions are built from some specification of a pattern and text, but the details are not part -/// of this API: different Question implementations can do this differently. -pub trait Question { - /// Try to build a [`Problem`]. - fn ask(&self) -> Result, Error>; -} - /// Calculates the optimal solution for a [`Problem`]. /// /// In practice, our solution implementations to date are simply structs directly storing the final diff --git a/fuzzy/src/regex_question.rs b/fuzzy/src/regex_question.rs index 2d04f6b..a7cc126 100644 --- a/fuzzy/src/regex_question.rs +++ b/fuzzy/src/regex_question.rs @@ -1,5 +1,4 @@ -//! An implementation of [`Question`](crate::Question) that parses the pattern using -//! [`regex_syntax`](https://docs.rs/regex-syntax). +//! Parses pattern using [`regex_syntax`](https://docs.rs/regex-syntax). //! //! [`regex_syntax`](https://docs.rs/regex-syntax) sometimes uses bytes in their API, while this //! crate currently operates on unicode characters. For now, we are getting around this by naively @@ -7,7 +6,7 @@ use regex_syntax; use regex_syntax::hir; -use crate::{Atoms, Class, Element, Match, Pattern, Problem, Question, Repetition}; +use crate::{Atoms, Class, Element, Match, Pattern, Problem, Repetition}; use crate::error::Error; pub struct RegexQuestion { @@ -15,15 +14,13 @@ pub struct RegexQuestion { pub text: String, } -impl Question for RegexQuestion { - fn ask(&self) -> Result, Error> { +impl RegexQuestion { + pub fn ask(&self) -> Result, Error> { let pattern = Self::parse_pattern(&self.pattern_regex)?; let text = Atoms { atoms: self.text.chars().collect() }; Ok(Problem { pattern, text }) } -} -impl RegexQuestion { fn parse_pattern(pattern: &str) -> Result, Error> { let hir = regex_syntax::parse(pattern)?; Self::pattern(Self::parse_impl(&hir)) diff --git a/fuzzy_cli/src/lib.rs b/fuzzy_cli/src/lib.rs index 8f4ddda..82b1f3b 100644 --- a/fuzzy_cli/src/lib.rs +++ b/fuzzy_cli/src/lib.rs @@ -1,5 +1,5 @@ use clap::Parser; -use fuzzy::{Output, Question, Solution}; +use fuzzy::{Output, Solution}; use fuzzy::diff_output::DiffOutput; use fuzzy::table_solution::TableSolution; use fuzzy::regex_question::RegexQuestion; @@ -33,10 +33,10 @@ pub fn run(args: Args) -> Result { }; let question = RegexQuestion { pattern_regex, text }; - run_impl::(question) + run_impl::(question) } -fn run_impl, S: Solution, O: Output>(question: Q) -> Result { +fn run_impl, O: Output>(question: RegexQuestion) -> Result { let problem = question.ask()?; let problem_core = problem.desugar(); let solution = S::solve(&problem_core)?; diff --git a/fuzzy_lambda/src/main.rs b/fuzzy_lambda/src/main.rs index 41dd209..27bd843 100644 --- a/fuzzy_lambda/src/main.rs +++ b/fuzzy_lambda/src/main.rs @@ -1,4 +1,4 @@ -use fuzzy::{Output, Question, Solution}; +use fuzzy::{Output, Solution}; use fuzzy::diff_output::{Chunk, DiffOutput}; use fuzzy::table_solution::TableSolution; use fuzzy::regex_question::RegexQuestion; From 49fc6c36642311168be5ea7e5cc09ecc27eec7d0 Mon Sep 17 00:00:00 2001 From: Sam Roberts Date: Sat, 12 Apr 2025 15:20:30 +1000 Subject: [PATCH 03/12] Simplify: remove Solution trait --- fuzzy/src/lattice_solution.rs | 41 ++++++++++++++++------------------- fuzzy/src/lib.rs | 28 +++++------------------- fuzzy_cli/src/lib.rs | 9 ++++---- fuzzy_lambda/src/main.rs | 5 +++-- 4 files changed, 33 insertions(+), 50 deletions(-) diff --git a/fuzzy/src/lattice_solution.rs b/fuzzy/src/lattice_solution.rs index 842012e..3b86075 100644 --- a/fuzzy/src/lattice_solution.rs +++ b/fuzzy/src/lattice_solution.rs @@ -1,19 +1,19 @@ -//! Provides a sub-trait of [`Solution`] with a generic [`Solution::solve`] implementation. +//! Provides a generic solve implementation. -use crate::{ElementCore, Match, Problem, Solution, Step}; +use crate::{ElementCore, Match, Problem, Step}; use crate::flat_pattern::Flat; use crate::error::Error; use nonempty::{NonEmpty, nonempty}; use std::fmt::Debug; -/// A family of [`Solution`] implementations which record state in a lattice of nodes. +/// A family of solutions hich record state in a lattice of nodes. /// /// For now, this trait hardcodes the use of a flattened pattern which is easier to index, hardcodes /// the node structure, and also the algorithm used to traverse nodes and update their state. /// The way in which individual implementations store and index nodes is configurable. In the /// future, as we add more features, we may make other parts of the implementation configurable. /// -/// [`LatticeSolution`] implementations get [`Solution::solve`] defined automatically. Instead, +/// [`LatticeSolution`] implementations get solve defined automatically. Instead, /// implementations are required to specify a mutable [`State`](LatticeSolution::State) space /// and an [`Ix`](LatticeSolution::Ix) type which addresses it. /// @@ -24,7 +24,7 @@ use std::fmt::Debug; /// Implementation must ensure that [`can_restart`](LatticeIx::can_restart) is implemented /// correctly, so that these links never form a loop. These links form a /// [lattice](https://en.wikipedia.org/wiki/Lattice_(order)). -pub trait LatticeSolution : Sized + Solution { +pub trait LatticeSolution : Sized { /// Carries immutable information derived from the [`Problem`](crate::Problem) being solved. type Conf: LatticeConfig; /// Mutable state being updated while solving. @@ -35,10 +35,23 @@ pub trait LatticeSolution : Sized + Solution { fn new(score: usize, trace: Vec>) -> Self; + fn score(&self) -> &usize { + LatticeSolution::score_lattice(self) + } + + fn trace(&self) -> &Vec> { + LatticeSolution::trace_lattice(self) + } + + fn solve(problem: &Problem) -> Result { + LatticeSolution::solve_lattice(&problem) + } + + fn score_lattice(&self) -> &usize; fn trace_lattice(&self) -> &Vec>; - /// [`Solution::solve`] implementation. + /// solve implementation. fn solve_lattice(problem: &Problem) -> Result { let conf = Self::Conf::new(problem); let mut state = Self::State::new(&conf); @@ -174,22 +187,6 @@ struct Back { child: Ix, } -impl Solution for Sln where - Sln: LatticeSolution, -{ - fn score(&self) -> &usize { - LatticeSolution::score_lattice(self) - } - - fn trace(&self) -> &Vec> { - LatticeSolution::trace_lattice(self) - } - - fn solve(problem: &Problem) -> Result { - LatticeSolution::solve_lattice(&problem) - } -} - pub trait LatticeConfig { fn new(problem: &Problem) -> Self; fn get(&self, ix: Ix) -> (Option<&Flat>, Option<&char>); diff --git a/fuzzy/src/lib.rs b/fuzzy/src/lib.rs index f132695..aaebff1 100644 --- a/fuzzy/src/lib.rs +++ b/fuzzy/src/lib.rs @@ -12,8 +12,9 @@ //! Implementations can be combined as follows: //! //! ```rust -//! use fuzzy::{Solution, Output}; +//! use fuzzy::Output; //! use fuzzy::regex_question::RegexQuestion; +//! use fuzzy::lattice_solution::LatticeSolution; //! use fuzzy::table_solution::TableSolution; //! use fuzzy::diff_output::DiffOutput; //! use fuzzy::error::Error; @@ -22,7 +23,7 @@ //! let question = RegexQuestion { pattern_regex, text }; //! let problem = question.ask()?; //! let problem_core = problem.desugar(); -//! let solution = TableSolution::solve(&problem_core)?; +//! let solution: TableSolution = LatticeSolution::solve(&problem_core)?; //! let output = DiffOutput::new(&solution.score(), &solution.trace()); //! println!("{}", output); //! Ok(()) @@ -40,30 +41,13 @@ pub mod diff_output; pub mod flat_pattern; pub mod error; -/// Calculates the optimal solution for a [`Problem`]. -/// -/// In practice, our solution implementations to date are simply structs directly storing the final -/// calculated `score` and `trace`. We will probably change this API in the future. -pub trait Solution : Sized { - /// Try to figure out the solution for a [`Problem`]. - fn solve(problem: &Problem) -> Result; - - /// Return the final score for the solution. - /// - /// This score represents the cost of mismatches: `0` is best, higher worse. - fn score(&self) -> &usize; - - /// Return the [`Step`]s followed by the optimal match between pattern and text. - fn trace(&self) -> &Vec>; -} - /// Displays the final solution. /// /// Output implementations are just types that implement /// [`Display`](https://doc.rust-lang.org/std/fmt/trait.Display.html) and can be constructed out of -/// the [`score`](Solution::score) and [`trace`](Solution::trace). +/// the [`score`](TableSolution::score) and [`trace`](TableSolution::trace). /// -/// If the [`Solution`] API changes, we will probably change this API as well. +/// If the [`TableSolution`] API changes, we will probably change this API as well. pub trait Output : Display { /// Build the display. This value will have a user-friendly string representation. fn new(score: &usize, trace: &Vec>) -> Self; @@ -210,7 +194,7 @@ impl Class { } } -/// An individual element in [`Solution::trace`]. +/// An individual element in [`TableSolution::trace`]. #[derive(Eq, PartialEq, Clone, Copy, Debug)] pub enum Step { Hit(P, T), diff --git a/fuzzy_cli/src/lib.rs b/fuzzy_cli/src/lib.rs index 82b1f3b..7e77993 100644 --- a/fuzzy_cli/src/lib.rs +++ b/fuzzy_cli/src/lib.rs @@ -1,6 +1,7 @@ use clap::Parser; -use fuzzy::{Output, Solution}; +use fuzzy::Output; use fuzzy::diff_output::DiffOutput; +use fuzzy::lattice_solution::LatticeSolution; use fuzzy::table_solution::TableSolution; use fuzzy::regex_question::RegexQuestion; use fuzzy::error::Error; @@ -33,13 +34,13 @@ pub fn run(args: Args) -> Result { }; let question = RegexQuestion { pattern_regex, text }; - run_impl::(question) + run_impl::(question) } -fn run_impl, O: Output>(question: RegexQuestion) -> Result { +fn run_impl(question: RegexQuestion) -> Result { let problem = question.ask()?; let problem_core = problem.desugar(); - let solution = S::solve(&problem_core)?; + let solution: TableSolution = LatticeSolution::solve(&problem_core)?; let output = O::new(&solution.score(), &solution.trace()); Ok(format!("{}", output)) } diff --git a/fuzzy_lambda/src/main.rs b/fuzzy_lambda/src/main.rs index 27bd843..ca10dad 100644 --- a/fuzzy_lambda/src/main.rs +++ b/fuzzy_lambda/src/main.rs @@ -1,5 +1,6 @@ -use fuzzy::{Output, Solution}; +use fuzzy::Output; use fuzzy::diff_output::{Chunk, DiffOutput}; +use fuzzy::lattice_solution::LatticeSolution; use fuzzy::table_solution::TableSolution; use fuzzy::regex_question::RegexQuestion; use lambda_http::{run, service_fn, Body, Error, Request, Response}; @@ -55,7 +56,7 @@ async fn function_handler(event: Request) -> Result, Error> { let problem = RegexQuestion { pattern_regex: args.pattern, text: args.text }.ask()?; let problem_core = problem.desugar(); - let solution = TableSolution::solve(&problem_core)?; + let solution: TableSolution = LatticeSolution::solve(&problem_core)?; let output = DiffOutput::new(&solution.score(), &solution.trace()); let body = Out { score: *solution.score(), trace: OutChunk::from(&output.chunks) }; let body_json = serde_json::to_string(&body)?; From a65c637d359110225511b76cf4071410312362f1 Mon Sep 17 00:00:00 2001 From: Sam Roberts Date: Sat, 12 Apr 2025 17:13:30 +1000 Subject: [PATCH 04/12] Simplify: remove LatticeSolution --- fuzzy/src/lattice_solution.rs | 448 ---------------------------------- fuzzy/src/lib.rs | 4 +- fuzzy/src/table_solution.rs | 406 ++++++++++++++++++++++++++++-- fuzzy_cli/src/lib.rs | 3 +- fuzzy_lambda/src/main.rs | 3 +- 5 files changed, 385 insertions(+), 479 deletions(-) delete mode 100644 fuzzy/src/lattice_solution.rs diff --git a/fuzzy/src/lattice_solution.rs b/fuzzy/src/lattice_solution.rs deleted file mode 100644 index 3b86075..0000000 --- a/fuzzy/src/lattice_solution.rs +++ /dev/null @@ -1,448 +0,0 @@ -//! Provides a generic solve implementation. - -use crate::{ElementCore, Match, Problem, Step}; -use crate::flat_pattern::Flat; -use crate::error::Error; -use nonempty::{NonEmpty, nonempty}; -use std::fmt::Debug; - -/// A family of solutions hich record state in a lattice of nodes. -/// -/// For now, this trait hardcodes the use of a flattened pattern which is easier to index, hardcodes -/// the node structure, and also the algorithm used to traverse nodes and update their state. -/// The way in which individual implementations store and index nodes is configurable. In the -/// future, as we add more features, we may make other parts of the implementation configurable. -/// -/// [`LatticeSolution`] implementations get solve defined automatically. Instead, -/// implementations are required to specify a mutable [`State`](LatticeSolution::State) space -/// and an [`Ix`](LatticeSolution::Ix) type which addresses it. -/// -/// Each index links to child indices which represent the next possible steps we can take to match -/// the pattern to the text (e.g. match a character, skip a character from the text or pattern, -/// etc.). There is a defined [`start`](LatticeConfig::start) index, when no progress has been made, -/// and an [`end`](LatticeConfig::end) index, when both the entire pattern and text have been matched. -/// Implementation must ensure that [`can_restart`](LatticeIx::can_restart) is implemented -/// correctly, so that these links never form a loop. These links form a -/// [lattice](https://en.wikipedia.org/wiki/Lattice_(order)). -pub trait LatticeSolution : Sized { - /// Carries immutable information derived from the [`Problem`](crate::Problem) being solved. - type Conf: LatticeConfig; - /// Mutable state being updated while solving. - type State: LatticeState; - /// The type used to index into [`State`](LatticeSolution::State) and - /// [`Conf`](LatticeSolution::Conf). - type Ix: LatticeIx; - - fn new(score: usize, trace: Vec>) -> Self; - - fn score(&self) -> &usize { - LatticeSolution::score_lattice(self) - } - - fn trace(&self) -> &Vec> { - LatticeSolution::trace_lattice(self) - } - - fn solve(problem: &Problem) -> Result { - LatticeSolution::solve_lattice(&problem) - } - - - fn score_lattice(&self) -> &usize; - fn trace_lattice(&self) -> &Vec>; - - /// solve implementation. - fn solve_lattice(problem: &Problem) -> Result { - let conf = Self::Conf::new(problem); - let mut state = Self::State::new(&conf); - - let start_ix = conf.start(); - let end_ix = conf.end(); - - let _ = Self::calculate_optimal_path(&conf, &mut state)?; - - let start_node = state.get(start_ix); - let score = start_node.done_info() - .map(|i| i.0) - .map_err(|_| Error::IncompleteFinalState)?; - - let mut trace = vec![]; - let mut from = start_ix; - loop { - let node = state.get(from); - if !node.is_done() || from == end_ix { break; } - let (patt, text) = conf.get(from); - let (_, step_type, next) = node.done_info()?; - if let Some(step) = step_type.step() { - let final_step = step.map( - |_| match patt { - Some(Flat::Lit(c)) => Match::Lit(*c), - Some(Flat::Class(c)) => Match::Class(c.clone()), - unexpected => panic!("Unexpected trace pattern {:?}", unexpected), - }, - |_| match text { - Some(c) => *c, - unexpected => panic!("Unexpected trace text {:?}", unexpected), - } - ); - trace.push(final_step); - } - from = next; - } - if from != end_ix { - return Err(Error::IncompleteFinalState); - } - - Ok(LatticeSolution::new(score, trace)) - } - - /// Update [`State`](LatticeSolution::State) with the optimal steps from the start - /// [`Ix`](LatticeSolution::Ix) onwards. - fn calculate_optimal_path( - conf: &Self::Conf, - state: &mut Self::State, - ) -> Result<(), Error> { - let start_ix = conf.start(); - let end_ix = conf.end(); - - let mut loop_state = LoopState::Down(Down { - parent: Default::default(), - current: start_ix, - }); - - let mut loop_counter = 0; - - loop { - loop_counter += 1; - if loop_counter >= 1000000000 { // TODO make this max configurable - return Err(Error::ExceededMaxSteps(loop_counter)); - } - let new_parent = match &loop_state { - LoopState::Down(down) if state.get(down.current).is_ready() => { - let (flat, text) = conf.get(down.current); - let opt_node_type = NodeType::get(flat, text, &down.current); - let node_state = state.get_mut(down.current); - node_state.initialise(end_ix, down.parent, down.current, opt_node_type)?; - down.parent - } - LoopState::Down(down) => down.parent, - LoopState::Back(back) => { - let new_child = back.child; - let (new_score, _, _) = state.get(new_child).done_info()?; - let node_state = state.get_mut(back.current); - let new_parent = node_state.update(new_child, back.current, new_score)?; - new_parent - } - }; - - let current_ix = loop_state.current(); - let final_state = state.get(current_ix); - if current_ix == start_ix && final_state.is_done() { - break; - } else if final_state.is_done() { - loop_state = LoopState::Back(Back { - current: new_parent, - child: current_ix, - }); - } else if final_state.is_working() { - let current_step_type = final_state.current_step_type()?; - let child = conf.step(current_ix, current_step_type); - loop_state = LoopState::Down(Down { - parent: current_ix, - current: child, - }); - } else { - return Err(Error::NoNodeProgress(format!("{:?}", current_ix))); - } - } - - Ok(()) - } -} - -#[derive(Debug)] -enum LoopState { - Down(Down), - Back(Back), -} - -impl LoopState { - fn current(&self) -> Ix { - match self { - LoopState::Down(down) => down.current, - LoopState::Back(back) => back.current, - } - } -} - -#[derive(Debug)] -struct Down { - parent: Ix, - current: Ix, -} - -#[derive(Debug)] -struct Back { - current: Ix, - child: Ix, -} - -pub trait LatticeConfig { - fn new(problem: &Problem) -> Self; - fn get(&self, ix: Ix) -> (Option<&Flat>, Option<&char>); - - fn start(&self) -> Ix; - fn end(&self) -> Ix; - - fn step(&self, ix: Ix, step_type: StepType) -> Ix; -} - -pub trait LatticeState { - fn new(conf: &Conf) -> Self; - fn get(&self, ix: Ix) -> &Node; - fn get_mut(&mut self, ix: Ix) -> &mut Node; - fn set(&mut self, ix: Ix, node: Node); -} - -// TODO Ix turns out to be a sizable struct, remove Copy and pass by reference where possible -pub trait LatticeIx : Eq + PartialEq + Copy + Clone + Debug + Sized + Default { - fn can_restart(&self) -> bool; -} - -// TODO make a better Node type -// -// Calculate_optimal_path (originally called solve_ix) used to store a lot of state on the stack: -// the parent node, our progress through the possible step types, the optimal score, etc. The -// node was a simple enum which was either Ready, Working, or Done. Only the Done value had any -// fields, and it was never mutated. -// -// Once we began to run out of stack space for mid-sized use-cases, we transferred all of that -// state into the heap by adding it to this Node struct. Much of this information is mutated as we -// try out each possible step type. -// -// I had a lot of trouble implementing this expanded node. Solve loops over my table of node -// values, taking a mutable reference to a single node in each iteration. My code originally -// pattern matched on the Node enum, and called methods on inner types which could only be accessed -// when node had the right case. But I struggled to do this and satisfy rust's borrow checker. -// -// For now, I've abandonded pattern matching and type safety, and implemented rust as an abstract -// data type. The node still has three states: Ready, Working, and Done, but they aren't reflected -// in rust's type system. Instead, Node methods return errors if they are called when the node is -// in the wrong state. -// -// The three states are a bit implicit in the Node structure. They are driven by current. Current -// changes from 0..=step_types.len()+1 over the life of the Node: -// -// 1. A node is Ready if current == 0 -// 2. A node is Working if 1 >= current >= step_types.len() -// 3. A node is Done if current == step_types.len() + 1 -// -// When a node is working, the current step type being attempted is step_types[current-1]. -// -// When a node has processed at least one node (current >= 2), score/step_type/next record the -// optimal choice among step_types[0..current-1]. This means those fields are optimal when a Node -// is Done. -// -// I'd like to return to this Node when I'm more comfortable working with rust, and do a better job -// implementing it. - -#[derive(Clone, Eq, PartialEq, Debug)] -pub struct Node { - current: usize, - parent: Ix, - score: usize, - step_type: StepType, - next: Ix, - step_types: Vec, -} - -impl Node { - pub fn new() -> Self { - Self { - current: 0, - parent: Default::default(), - score: 0, - step_type: StepType::Hit, - next: Default::default(), - step_types: vec![], - } - } - - fn is_ready(&self) -> bool { - self.current == 0 - } - - fn is_working(&self) -> bool { - self.current > 0 && self.current <= self.step_types.len() - } - - fn is_done(&self) -> bool { - self.current > self.step_types.len() - } - - fn current_step_type(&self) -> Result { - if self.is_working() { - Ok(self.step_types[self.current - 1]) - } else { - Err(Error::CannotGetNodeField("current_step_type", "working")) - } - } - - fn done_info(&self) -> Result<(usize, StepType, Ix), Error> { - if self.is_done() { - Ok((self.score, self.step_type, self.next)) - } else { - Err(Error::CannotGetNodeField("score/step_type/next", "done")) - } - } - - fn initialise(&mut self, end_ix: Ix, parent_ix: Ix, ix: Ix, opt_node_type: Option) -> Result<(), Error>{ - if self.is_ready() { - match opt_node_type { - Some(node_type) => { - let step_types = Vec::from(node_type.step_types()); - self.parent = parent_ix; - self.current += 1; - self.step_types = step_types; - Ok(()) - } - None if ix == end_ix => { // end_ix: insert dummy done value - self.parent = parent_ix; - self.current += 1; - Ok(()) - } - None => { - Err(Error::NoNodeType(format!("{:?}", ix))) - } - } - } else { - Err(Error::CannotInitialiseNode(format!("{:?}", ix))) - } - } - - fn update(&mut self, new_child: Ix, ix: Ix, new_score: usize) -> Result { - if self.is_working() { - let parent_ix = self.parent; - let current_step_type = self.current_step_type()?; - let new_score = new_score + current_step_type.cost(); - if self.current <= 1 || new_score < self.score { - self.step_type = current_step_type; - self.score = new_score; - self.next = new_child; - self.current += 1; - } else { - self.current += 1; - } - Ok(parent_ix) - } else { - Err(Error::CannotUpdateNode(format!("{:?}", ix))) - } - } -} - -#[derive(Copy, Clone, Eq, Hash, PartialEq, Debug)] -pub enum NodeType { - FinishedPattern, - FinishedText, - Hit, - NoHit, - StartGroup, - EndGroup, - AlternativeLeft(usize), - AlternativeRight(usize), - RepetitionStart(usize), - RepetitionRestart(usize), - RepetitionEnd, -} - -impl NodeType { - fn get>(opt_flat: Option<&Flat>, opt_text: Option<&char>, ix: &Ix) -> Option { - // TODO this is surprisingly hard to follow for something conceptually simple. Can I make it nicer? - match opt_flat { - None if opt_text == None => None, - None => Some(NodeType::FinishedPattern), - Some(flat) => Some(match flat { - Flat::Lit(c) if opt_text == Some(c) => NodeType::Hit, - Flat::Lit(_) if opt_text == None => NodeType::FinishedText, - Flat::Lit(_) => NodeType::NoHit, - Flat::Class(class) if opt_text.map_or(false, |t| class.matches(*t)) => NodeType::Hit, - Flat::Class(_) if opt_text == None => NodeType::FinishedText, - Flat::Class(_) => NodeType::NoHit, - Flat::GroupStart => NodeType::StartGroup, - Flat::GroupEnd => NodeType::EndGroup, - Flat::AlternativeLeft(off) => NodeType::AlternativeLeft(*off), - Flat::AlternativeRight(off) => NodeType::AlternativeRight(*off), - Flat::RepetitionStart(off) => NodeType::RepetitionStart(*off), - Flat::RepetitionEnd(off) if ix.can_restart() => NodeType::RepetitionRestart(*off), - Flat::RepetitionEnd(_) => NodeType::RepetitionEnd, - }) - } - } - - fn step_types(&self) -> NonEmpty { - use StepType::*; - match self { - Self::FinishedPattern => nonempty![SkipText], - Self::FinishedText => nonempty![SkipPattern], - Self::Hit => nonempty![Hit, SkipPattern, SkipText], - Self::NoHit => nonempty![SkipPattern, SkipText], - Self::StartGroup => nonempty![StartGroup], - Self::EndGroup => nonempty![EndGroup], - Self::AlternativeLeft(off) => nonempty![StartLeft, StartRight(*off)], - Self::AlternativeRight(off) => nonempty![PassRight(*off)], - Self::RepetitionStart(off) => nonempty![StartRepetition, PassRepetition(*off)], - Self::RepetitionRestart(off) => nonempty![RestartRepetition(*off)], - Self::RepetitionEnd => nonempty![EndRepetition], - } - } -} - -#[derive(Copy, Clone, Eq, PartialEq, Debug)] -pub enum StepType { - SkipText, - SkipPattern, - Hit, - StartGroup, - EndGroup, - StartLeft, - StartRight(usize), - PassRight(usize), - StartRepetition, - PassRepetition(usize), - EndRepetition, - RestartRepetition(usize), -} - -impl StepType { - fn cost(&self) -> usize { - match self { - Self::SkipPattern => 1, - Self::SkipText => 1, - _ => 0, - } - } - - fn step(&self) -> Option> { - match self { - Self::Hit => Some(Step::Hit((), ())), - Self::SkipPattern => Some(Step::SkipPattern(())), - Self::SkipText => Some(Step::SkipText(())), - Self::StartGroup => Some(Step::StartCapture), - Self::EndGroup => Some(Step::StopCapture), - _ => None, - } - } -} - -#[cfg(test)] -pub mod test_logic { - use super::*; - use crate::test_cases::TestCase; - - pub fn test_solve(test_case: TestCase) { - let desugared = test_case.problem.desugar(); - let actual = Sln::solve(&desugared).unwrap(); - assert_eq!(test_case.score, *actual.score()); - assert_eq!(test_case.trace, *actual.trace()); - } -} diff --git a/fuzzy/src/lib.rs b/fuzzy/src/lib.rs index aaebff1..25842db 100644 --- a/fuzzy/src/lib.rs +++ b/fuzzy/src/lib.rs @@ -14,7 +14,6 @@ //! ```rust //! use fuzzy::Output; //! use fuzzy::regex_question::RegexQuestion; -//! use fuzzy::lattice_solution::LatticeSolution; //! use fuzzy::table_solution::TableSolution; //! use fuzzy::diff_output::DiffOutput; //! use fuzzy::error::Error; @@ -23,7 +22,7 @@ //! let question = RegexQuestion { pattern_regex, text }; //! let problem = question.ask()?; //! let problem_core = problem.desugar(); -//! let solution: TableSolution = LatticeSolution::solve(&problem_core)?; +//! let solution = TableSolution::solve(&problem_core)?; //! let output = DiffOutput::new(&solution.score(), &solution.trace()); //! println!("{}", output); //! Ok(()) @@ -34,7 +33,6 @@ use std::fmt::Display; use regex_syntax::hir; pub mod regex_question; -pub mod lattice_solution; pub mod table_solution; pub mod debug_output; pub mod diff_output; diff --git a/fuzzy/src/table_solution.rs b/fuzzy/src/table_solution.rs index 8b80b08..a4059a1 100644 --- a/fuzzy/src/table_solution.rs +++ b/fuzzy/src/table_solution.rs @@ -5,8 +5,9 @@ //! do these in the future. use crate::{ElementCore, Match, Problem, Step}; +use crate::error::Error; use crate::flat_pattern::{Flat, FlatPattern}; -use crate::lattice_solution::{LatticeConfig, LatticeIx, LatticeSolution, LatticeState, Node, StepType}; +use nonempty::{NonEmpty, nonempty}; #[derive(Eq, PartialEq, Debug)] pub struct TableSolution { @@ -14,22 +15,123 @@ pub struct TableSolution { trace: Vec>, } -impl LatticeSolution for TableSolution { - type Conf = Config; - type Ix = Ix; - type State = State; - - fn new(score: usize, trace: Vec>) -> Self { +impl TableSolution { + pub fn new(score: usize, trace: Vec>) -> Self { TableSolution { score, trace } } - fn score_lattice(&self) -> &usize { + pub fn score(&self) -> &usize { &self.score } - fn trace_lattice(&self) -> &Vec> { + pub fn trace(&self) -> &Vec> { &self.trace } + + pub fn solve(problem: &Problem) -> Result { + let conf = Config::new(problem); + let mut state = State::new(&conf); + + let start_ix = conf.start(); + let end_ix = conf.end(); + + let _ = Self::calculate_optimal_path(&conf, &mut state)?; + + let start_node = state.get(start_ix); + let score = start_node.done_info() + .map(|i| i.0) + .map_err(|_| Error::IncompleteFinalState)?; + + let mut trace = vec![]; + let mut from = start_ix; + loop { + let node = state.get(from); + if !node.is_done() || from == end_ix { break; } + let (patt, text) = conf.get(from); + let (_, step_type, next) = node.done_info()?; + if let Some(step) = step_type.step() { + let final_step = step.map( + |_| match patt { + Some(Flat::Lit(c)) => Match::Lit(*c), + Some(Flat::Class(c)) => Match::Class(c.clone()), + unexpected => panic!("Unexpected trace pattern {:?}", unexpected), + }, + |_| match text { + Some(c) => *c, + unexpected => panic!("Unexpected trace text {:?}", unexpected), + } + ); + trace.push(final_step); + } + from = next; + } + if from != end_ix { + return Err(Error::IncompleteFinalState); + } + + Ok(Self::new(score, trace)) + } + + fn calculate_optimal_path( + conf: &Config, + state: &mut State, + ) -> Result<(), Error> { + let start_ix = conf.start(); + let end_ix = conf.end(); + + let mut loop_state = LoopState::Down(Down { + parent: Default::default(), + current: start_ix, + }); + + let mut loop_counter = 0; + + loop { + loop_counter += 1; + if loop_counter >= 1000000000 { // TODO make this max configurable + return Err(Error::ExceededMaxSteps(loop_counter)); + } + let new_parent = match &loop_state { + LoopState::Down(down) if state.get(down.current).is_ready() => { + let (flat, text) = conf.get(down.current); + let opt_node_type = NodeType::get(flat, text, &down.current); + let node_state = state.get_mut(down.current); + node_state.initialise(end_ix, down.parent, down.current, opt_node_type)?; + down.parent + } + LoopState::Down(down) => down.parent, + LoopState::Back(back) => { + let new_child = back.child; + let (new_score, _, _) = state.get(new_child).done_info()?; + let node_state = state.get_mut(back.current); + let new_parent = node_state.update(new_child, back.current, new_score)?; + new_parent + } + }; + + let current_ix = loop_state.current(); + let final_state = state.get(current_ix); + if current_ix == start_ix && final_state.is_done() { + break; + } else if final_state.is_done() { + loop_state = LoopState::Back(Back { + current: new_parent, + child: current_ix, + }); + } else if final_state.is_working() { + let current_step_type = final_state.current_step_type()?; + let child = conf.step(current_ix, current_step_type); + loop_state = LoopState::Down(Down { + parent: current_ix, + current: child, + }); + } else { + return Err(Error::NoNodeProgress(format!("{:?}", current_ix))); + } + } + + Ok(()) + } } /// Stores the text and pattern from the original [`Problem`](crate::Problem). @@ -41,7 +143,7 @@ pub struct Config { pattern: FlatPattern, } -impl LatticeConfig for Config { +impl Config { fn new(problem: &Problem) -> Self { let pattern = FlatPattern::custom(&problem.pattern, 1); let text = problem.text.atoms.clone(); @@ -125,7 +227,7 @@ impl LatticeConfig for Config { } pub struct State { - nodes: Vec>, + nodes: Vec, pattern_len: usize, } @@ -133,9 +235,7 @@ impl State { fn node(&self, ix: Ix) -> usize { ix.text * self.pattern_len + ix.pattern + ix.rep_off } -} -impl LatticeState for State { fn new(conf: &Config) -> Self { // we need an extra row/col for indices at the end of pattern and text let pattern_len = conf.pattern.len() + 1; @@ -148,20 +248,15 @@ impl LatticeState for State { } } - fn get(&self, ix: Ix) -> &Node { + fn get(&self, ix: Ix) -> &Node { let node_ix = self.node(ix); &self.nodes[node_ix] } - fn get_mut(&mut self, ix: Ix) -> &mut Node { + fn get_mut(&mut self, ix: Ix) -> &mut Node { let node_ix = self.node(ix); &mut self.nodes[node_ix] } - - fn set(&mut self, ix: Ix, node: Node) { - let node_ix = self.node(ix); - self.nodes[node_ix] = node; - } } /// Indexes into [`State`]. @@ -184,17 +279,280 @@ pub struct Ix { pub rep_off: usize, } -impl LatticeIx for Ix { +impl Ix { fn can_restart(&self) -> bool { self.rep_off == 0 } } +#[derive(Debug)] +enum LoopState { + Down(Down), + Back(Back), +} + +impl LoopState { + fn current(&self) -> Ix { + match self { + LoopState::Down(down) => down.current, + LoopState::Back(back) => back.current, + } + } +} + +#[derive(Debug)] +struct Down { + parent: Ix, + current: Ix, +} + +#[derive(Debug)] +struct Back { + current: Ix, + child: Ix, +} + +// TODO make a better Node type +// +// Calculate_optimal_path (originally called solve_ix) used to store a lot of state on the stack: +// the parent node, our progress through the possible step types, the optimal score, etc. The +// node was a simple enum which was either Ready, Working, or Done. Only the Done value had any +// fields, and it was never mutated. +// +// Once we began to run out of stack space for mid-sized use-cases, we transferred all of that +// state into the heap by adding it to this Node struct. Much of this information is mutated as we +// try out each possible step type. +// +// I had a lot of trouble implementing this expanded node. Solve loops over my table of node +// values, taking a mutable reference to a single node in each iteration. My code originally +// pattern matched on the Node enum, and called methods on inner types which could only be accessed +// when node had the right case. But I struggled to do this and satisfy rust's borrow checker. +// +// For now, I've abandonded pattern matching and type safety, and implemented rust as an abstract +// data type. The node still has three states: Ready, Working, and Done, but they aren't reflected +// in rust's type system. Instead, Node methods return errors if they are called when the node is +// in the wrong state. +// +// The three states are a bit implicit in the Node structure. They are driven by current. Current +// changes from 0..=step_types.len()+1 over the life of the Node: +// +// 1. A node is Ready if current == 0 +// 2. A node is Working if 1 >= current >= step_types.len() +// 3. A node is Done if current == step_types.len() + 1 +// +// When a node is working, the current step type being attempted is step_types[current-1]. +// +// When a node has processed at least one node (current >= 2), score/step_type/next record the +// optimal choice among step_types[0..current-1]. This means those fields are optimal when a Node +// is Done. +// +// I'd like to return to this Node when I'm more comfortable working with rust, and do a better job +// implementing it. + +#[derive(Clone, Eq, PartialEq, Debug)] +pub struct Node { + current: usize, + parent: Ix, + score: usize, + step_type: StepType, + next: Ix, + step_types: Vec, +} + +impl Node { + pub fn new() -> Self { + Self { + current: 0, + parent: Default::default(), + score: 0, + step_type: StepType::Hit, + next: Default::default(), + step_types: vec![], + } + } + + fn is_ready(&self) -> bool { + self.current == 0 + } + + fn is_working(&self) -> bool { + self.current > 0 && self.current <= self.step_types.len() + } + + fn is_done(&self) -> bool { + self.current > self.step_types.len() + } + + fn current_step_type(&self) -> Result { + if self.is_working() { + Ok(self.step_types[self.current - 1]) + } else { + Err(Error::CannotGetNodeField("current_step_type", "working")) + } + } + + fn done_info(&self) -> Result<(usize, StepType, Ix), Error> { + if self.is_done() { + Ok((self.score, self.step_type, self.next)) + } else { + Err(Error::CannotGetNodeField("score/step_type/next", "done")) + } + } + + fn initialise(&mut self, end_ix: Ix, parent_ix: Ix, ix: Ix, opt_node_type: Option) -> Result<(), Error>{ + if self.is_ready() { + match opt_node_type { + Some(node_type) => { + let step_types = Vec::from(node_type.step_types()); + self.parent = parent_ix; + self.current += 1; + self.step_types = step_types; + Ok(()) + } + None if ix == end_ix => { // end_ix: insert dummy done value + self.parent = parent_ix; + self.current += 1; + Ok(()) + } + None => { + Err(Error::NoNodeType(format!("{:?}", ix))) + } + } + } else { + Err(Error::CannotInitialiseNode(format!("{:?}", ix))) + } + } + + fn update(&mut self, new_child: Ix, ix: Ix, new_score: usize) -> Result { + if self.is_working() { + let parent_ix = self.parent; + let current_step_type = self.current_step_type()?; + let new_score = new_score + current_step_type.cost(); + if self.current <= 1 || new_score < self.score { + self.step_type = current_step_type; + self.score = new_score; + self.next = new_child; + self.current += 1; + } else { + self.current += 1; + } + Ok(parent_ix) + } else { + Err(Error::CannotUpdateNode(format!("{:?}", ix))) + } + } +} + +#[derive(Copy, Clone, Eq, Hash, PartialEq, Debug)] +pub enum NodeType { + FinishedPattern, + FinishedText, + Hit, + NoHit, + StartGroup, + EndGroup, + AlternativeLeft(usize), + AlternativeRight(usize), + RepetitionStart(usize), + RepetitionRestart(usize), + RepetitionEnd, +} + +impl NodeType { + fn get(opt_flat: Option<&Flat>, opt_text: Option<&char>, ix: &Ix) -> Option { + // TODO this is surprisingly hard to follow for something conceptually simple. Can I make it nicer? + match opt_flat { + None if opt_text == None => None, + None => Some(NodeType::FinishedPattern), + Some(flat) => Some(match flat { + Flat::Lit(c) if opt_text == Some(c) => NodeType::Hit, + Flat::Lit(_) if opt_text == None => NodeType::FinishedText, + Flat::Lit(_) => NodeType::NoHit, + Flat::Class(class) if opt_text.map_or(false, |t| class.matches(*t)) => NodeType::Hit, + Flat::Class(_) if opt_text == None => NodeType::FinishedText, + Flat::Class(_) => NodeType::NoHit, + Flat::GroupStart => NodeType::StartGroup, + Flat::GroupEnd => NodeType::EndGroup, + Flat::AlternativeLeft(off) => NodeType::AlternativeLeft(*off), + Flat::AlternativeRight(off) => NodeType::AlternativeRight(*off), + Flat::RepetitionStart(off) => NodeType::RepetitionStart(*off), + Flat::RepetitionEnd(off) if ix.can_restart() => NodeType::RepetitionRestart(*off), + Flat::RepetitionEnd(_) => NodeType::RepetitionEnd, + }) + } + } + + fn step_types(&self) -> NonEmpty { + use StepType::*; + match self { + Self::FinishedPattern => nonempty![SkipText], + Self::FinishedText => nonempty![SkipPattern], + Self::Hit => nonempty![Hit, SkipPattern, SkipText], + Self::NoHit => nonempty![SkipPattern, SkipText], + Self::StartGroup => nonempty![StartGroup], + Self::EndGroup => nonempty![EndGroup], + Self::AlternativeLeft(off) => nonempty![StartLeft, StartRight(*off)], + Self::AlternativeRight(off) => nonempty![PassRight(*off)], + Self::RepetitionStart(off) => nonempty![StartRepetition, PassRepetition(*off)], + Self::RepetitionRestart(off) => nonempty![RestartRepetition(*off)], + Self::RepetitionEnd => nonempty![EndRepetition], + } + } +} + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub enum StepType { + SkipText, + SkipPattern, + Hit, + StartGroup, + EndGroup, + StartLeft, + StartRight(usize), + PassRight(usize), + StartRepetition, + PassRepetition(usize), + EndRepetition, + RestartRepetition(usize), +} + +impl StepType { + fn cost(&self) -> usize { + match self { + Self::SkipPattern => 1, + Self::SkipText => 1, + _ => 0, + } + } + + fn step(&self) -> Option> { + match self { + Self::Hit => Some(Step::Hit((), ())), + Self::SkipPattern => Some(Step::SkipPattern(())), + Self::SkipText => Some(Step::SkipText(())), + Self::StartGroup => Some(Step::StartCapture), + Self::EndGroup => Some(Step::StopCapture), + _ => None, + } + } +} + +#[cfg(test)] +pub mod test_logic { + use super::*; + use crate::test_cases::TestCase; + + pub fn test_solve(test_case: TestCase) { + let desugared = test_case.problem.desugar(); + let actual = TableSolution::solve(&desugared).unwrap(); + assert_eq!(test_case.score, *actual.score()); + assert_eq!(test_case.trace, *actual.trace()); + } +} #[cfg(test)] mod tests { - use super::TableSolution; + use super::test_logic; use crate::test_cases::TestCase; - use crate::lattice_solution::test_logic; use test_case::test_case; #[test_case(TestCase::match_empty())] @@ -222,6 +580,6 @@ mod tests { #[test_case(TestCase::fail_repetition_2())] #[test_case(TestCase::fail_repetition_3())] fn test_solve(test: TestCase) { - test_logic::test_solve::(test); + test_logic::test_solve(test); } } diff --git a/fuzzy_cli/src/lib.rs b/fuzzy_cli/src/lib.rs index 7e77993..baaee7e 100644 --- a/fuzzy_cli/src/lib.rs +++ b/fuzzy_cli/src/lib.rs @@ -1,7 +1,6 @@ use clap::Parser; use fuzzy::Output; use fuzzy::diff_output::DiffOutput; -use fuzzy::lattice_solution::LatticeSolution; use fuzzy::table_solution::TableSolution; use fuzzy::regex_question::RegexQuestion; use fuzzy::error::Error; @@ -40,7 +39,7 @@ pub fn run(args: Args) -> Result { fn run_impl(question: RegexQuestion) -> Result { let problem = question.ask()?; let problem_core = problem.desugar(); - let solution: TableSolution = LatticeSolution::solve(&problem_core)?; + let solution = TableSolution::solve(&problem_core)?; let output = O::new(&solution.score(), &solution.trace()); Ok(format!("{}", output)) } diff --git a/fuzzy_lambda/src/main.rs b/fuzzy_lambda/src/main.rs index ca10dad..d2ff7cf 100644 --- a/fuzzy_lambda/src/main.rs +++ b/fuzzy_lambda/src/main.rs @@ -1,6 +1,5 @@ use fuzzy::Output; use fuzzy::diff_output::{Chunk, DiffOutput}; -use fuzzy::lattice_solution::LatticeSolution; use fuzzy::table_solution::TableSolution; use fuzzy::regex_question::RegexQuestion; use lambda_http::{run, service_fn, Body, Error, Request, Response}; @@ -56,7 +55,7 @@ async fn function_handler(event: Request) -> Result, Error> { let problem = RegexQuestion { pattern_regex: args.pattern, text: args.text }.ask()?; let problem_core = problem.desugar(); - let solution: TableSolution = LatticeSolution::solve(&problem_core)?; + let solution = TableSolution::solve(&problem_core)?; let output = DiffOutput::new(&solution.score(), &solution.trace()); let body = Out { score: *solution.score(), trace: OutChunk::from(&output.chunks) }; let body_json = serde_json::to_string(&body)?; From ca78e4b11010db0c15f0140833a21e4340657b8f Mon Sep 17 00:00:00 2001 From: Sam Roberts Date: Sat, 12 Apr 2025 21:25:19 +1000 Subject: [PATCH 05/12] Simplify: remove a few unnecesary getters/setters --- fuzzy/src/lib.rs | 2 +- fuzzy/src/table_solution.rs | 22 +++++----------------- fuzzy_cli/src/lib.rs | 2 +- fuzzy_lambda/src/main.rs | 4 ++-- 4 files changed, 9 insertions(+), 21 deletions(-) diff --git a/fuzzy/src/lib.rs b/fuzzy/src/lib.rs index 25842db..31cedf5 100644 --- a/fuzzy/src/lib.rs +++ b/fuzzy/src/lib.rs @@ -23,7 +23,7 @@ //! let problem = question.ask()?; //! let problem_core = problem.desugar(); //! let solution = TableSolution::solve(&problem_core)?; -//! let output = DiffOutput::new(&solution.score(), &solution.trace()); +//! let output = DiffOutput::new(&solution.score, &solution.trace); //! println!("{}", output); //! Ok(()) //! } diff --git a/fuzzy/src/table_solution.rs b/fuzzy/src/table_solution.rs index a4059a1..c64bf8e 100644 --- a/fuzzy/src/table_solution.rs +++ b/fuzzy/src/table_solution.rs @@ -11,23 +11,11 @@ use nonempty::{NonEmpty, nonempty}; #[derive(Eq, PartialEq, Debug)] pub struct TableSolution { - score: usize, - trace: Vec>, + pub score: usize, + pub trace: Vec>, } impl TableSolution { - pub fn new(score: usize, trace: Vec>) -> Self { - TableSolution { score, trace } - } - - pub fn score(&self) -> &usize { - &self.score - } - - pub fn trace(&self) -> &Vec> { - &self.trace - } - pub fn solve(problem: &Problem) -> Result { let conf = Config::new(problem); let mut state = State::new(&conf); @@ -69,7 +57,7 @@ impl TableSolution { return Err(Error::IncompleteFinalState); } - Ok(Self::new(score, trace)) + Ok(Self { score, trace }) } fn calculate_optimal_path( @@ -545,8 +533,8 @@ pub mod test_logic { pub fn test_solve(test_case: TestCase) { let desugared = test_case.problem.desugar(); let actual = TableSolution::solve(&desugared).unwrap(); - assert_eq!(test_case.score, *actual.score()); - assert_eq!(test_case.trace, *actual.trace()); + assert_eq!(test_case.score, actual.score); + assert_eq!(test_case.trace, actual.trace); } } #[cfg(test)] diff --git a/fuzzy_cli/src/lib.rs b/fuzzy_cli/src/lib.rs index baaee7e..178615c 100644 --- a/fuzzy_cli/src/lib.rs +++ b/fuzzy_cli/src/lib.rs @@ -40,6 +40,6 @@ fn run_impl(question: RegexQuestion) -> Result { let problem = question.ask()?; let problem_core = problem.desugar(); let solution = TableSolution::solve(&problem_core)?; - let output = O::new(&solution.score(), &solution.trace()); + let output = O::new(&solution.score, &solution.trace); Ok(format!("{}", output)) } diff --git a/fuzzy_lambda/src/main.rs b/fuzzy_lambda/src/main.rs index d2ff7cf..b1b7ac7 100644 --- a/fuzzy_lambda/src/main.rs +++ b/fuzzy_lambda/src/main.rs @@ -56,8 +56,8 @@ async fn function_handler(event: Request) -> Result, Error> { let problem = RegexQuestion { pattern_regex: args.pattern, text: args.text }.ask()?; let problem_core = problem.desugar(); let solution = TableSolution::solve(&problem_core)?; - let output = DiffOutput::new(&solution.score(), &solution.trace()); - let body = Out { score: *solution.score(), trace: OutChunk::from(&output.chunks) }; + let output = DiffOutput::new(&solution.score, &solution.trace); + let body = Out { score: solution.score, trace: OutChunk::from(&output.chunks) }; let body_json = serde_json::to_string(&body)?; let resp = Response::builder() From 3fc7822876c91dd3536d775855f2f95b655ee52c Mon Sep 17 00:00:00 2001 From: Sam Roberts Date: Sat, 12 Apr 2025 21:41:44 +1000 Subject: [PATCH 06/12] Simplify: remove Output trait --- fuzzy/src/debug_output.rs | 20 -------------------- fuzzy/src/diff_output.rs | 8 +++----- fuzzy/src/lib.rs | 15 --------------- fuzzy_cli/src/lib.rs | 7 +++---- fuzzy_lambda/src/main.rs | 1 - 5 files changed, 6 insertions(+), 45 deletions(-) delete mode 100644 fuzzy/src/debug_output.rs diff --git a/fuzzy/src/debug_output.rs b/fuzzy/src/debug_output.rs deleted file mode 100644 index c7f6047..0000000 --- a/fuzzy/src/debug_output.rs +++ /dev/null @@ -1,20 +0,0 @@ -//! Provides an implementation of [`Output`] suitable for development. - -use crate::{Match, Output, Step}; -use std::fmt; - -pub struct DebugOutput { - output: String, -} - -impl Output for DebugOutput { - fn new(score: &usize, trace: &Vec>) -> Self { - Self { output: format!("score: {}\ntrace: {:#?}", *score, *trace) } - } -} - -impl fmt::Display for DebugOutput { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(&self.output) - } -} diff --git a/fuzzy/src/diff_output.rs b/fuzzy/src/diff_output.rs index 37cf07c..8d89046 100644 --- a/fuzzy/src/diff_output.rs +++ b/fuzzy/src/diff_output.rs @@ -1,6 +1,4 @@ -//! Provides an implementation of [`Output`] that mimics git's character-level diff. - -use crate::{Output, Match, Step}; +use crate::{Match, Step}; use std::fmt; // NOTE: because we do character by character diffs, this won't be the real diff format @@ -48,8 +46,8 @@ pub struct Same { pub text: Vec } #[derive(Eq, PartialEq, Debug)] pub struct Diff { pub taken: Vec, pub added: Vec } -impl Output for DiffOutput { - fn new(_score: &usize, trace: &Vec>) -> Self { +impl DiffOutput { + pub fn new(_score: &usize, trace: &Vec>) -> Self { let mut chunks = vec![]; for step in trace.iter() { let current_chunk = chunks.last_mut(); diff --git a/fuzzy/src/lib.rs b/fuzzy/src/lib.rs index 31cedf5..ada642c 100644 --- a/fuzzy/src/lib.rs +++ b/fuzzy/src/lib.rs @@ -12,7 +12,6 @@ //! Implementations can be combined as follows: //! //! ```rust -//! use fuzzy::Output; //! use fuzzy::regex_question::RegexQuestion; //! use fuzzy::table_solution::TableSolution; //! use fuzzy::diff_output::DiffOutput; @@ -29,28 +28,14 @@ //! } //! ``` -use std::fmt::Display; use regex_syntax::hir; pub mod regex_question; pub mod table_solution; -pub mod debug_output; pub mod diff_output; pub mod flat_pattern; pub mod error; -/// Displays the final solution. -/// -/// Output implementations are just types that implement -/// [`Display`](https://doc.rust-lang.org/std/fmt/trait.Display.html) and can be constructed out of -/// the [`score`](TableSolution::score) and [`trace`](TableSolution::trace). -/// -/// If the [`TableSolution`] API changes, we will probably change this API as well. -pub trait Output : Display { - /// Build the display. This value will have a user-friendly string representation. - fn new(score: &usize, trace: &Vec>) -> Self; -} - /// A problem to be solved: contains the pattern we are matching text against, as well as the text /// which may or may not match it. #[derive(Eq, PartialEq, Clone, Debug)] diff --git a/fuzzy_cli/src/lib.rs b/fuzzy_cli/src/lib.rs index 178615c..8656407 100644 --- a/fuzzy_cli/src/lib.rs +++ b/fuzzy_cli/src/lib.rs @@ -1,5 +1,4 @@ use clap::Parser; -use fuzzy::Output; use fuzzy::diff_output::DiffOutput; use fuzzy::table_solution::TableSolution; use fuzzy::regex_question::RegexQuestion; @@ -33,13 +32,13 @@ pub fn run(args: Args) -> Result { }; let question = RegexQuestion { pattern_regex, text }; - run_impl::(question) + run_impl(question) } -fn run_impl(question: RegexQuestion) -> Result { +fn run_impl(question: RegexQuestion) -> Result { let problem = question.ask()?; let problem_core = problem.desugar(); let solution = TableSolution::solve(&problem_core)?; - let output = O::new(&solution.score, &solution.trace); + let output = DiffOutput::new(&solution.score, &solution.trace); Ok(format!("{}", output)) } diff --git a/fuzzy_lambda/src/main.rs b/fuzzy_lambda/src/main.rs index b1b7ac7..477efb5 100644 --- a/fuzzy_lambda/src/main.rs +++ b/fuzzy_lambda/src/main.rs @@ -1,4 +1,3 @@ -use fuzzy::Output; use fuzzy::diff_output::{Chunk, DiffOutput}; use fuzzy::table_solution::TableSolution; use fuzzy::regex_question::RegexQuestion; From 4c26447e92e1feee6e2d9c04ed861dc312aabbb6 Mon Sep 17 00:00:00 2001 From: Sam Roberts Date: Sat, 12 Apr 2025 22:05:04 +1000 Subject: [PATCH 07/12] Simplify: add simple API for users --- fuzzy/src/lib.rs | 37 ++++++++++++++----------------------- fuzzy_cli/src/lib.rs | 14 ++------------ fuzzy_lambda/src/main.rs | 14 +++++--------- 3 files changed, 21 insertions(+), 44 deletions(-) diff --git a/fuzzy/src/lib.rs b/fuzzy/src/lib.rs index ada642c..f5a0a4f 100644 --- a/fuzzy/src/lib.rs +++ b/fuzzy/src/lib.rs @@ -4,29 +4,6 @@ //! //! In lieu of better documentation, see the project README for more discussion about the regex //! features we support and how well the "closest match" works in practice. -//! -//! This crate is very early in it's development, it's API is akward, and will likely be changed in -//! breaking ways several times before it matures. We don't currently implement any convenience -//! functions which match a pattern against a text in one call. -//! -//! Implementations can be combined as follows: -//! -//! ```rust -//! use fuzzy::regex_question::RegexQuestion; -//! use fuzzy::table_solution::TableSolution; -//! use fuzzy::diff_output::DiffOutput; -//! use fuzzy::error::Error; -//! -//! fn fuzzy_match(pattern_regex: String, text: String) -> Result<(), Error> { -//! let question = RegexQuestion { pattern_regex, text }; -//! let problem = question.ask()?; -//! let problem_core = problem.desugar(); -//! let solution = TableSolution::solve(&problem_core)?; -//! let output = DiffOutput::new(&solution.score, &solution.trace); -//! println!("{}", output); -//! Ok(()) -//! } -//! ``` use regex_syntax::hir; @@ -36,6 +13,20 @@ pub mod diff_output; pub mod flat_pattern; pub mod error; +use regex_question::RegexQuestion; +use table_solution::TableSolution; +use diff_output::DiffOutput; +use error::Error; + +pub fn fuzzy_match(pattern_regex: String, text: String) -> Result { + let question = RegexQuestion { pattern_regex, text }; + let problem = question.ask()?; + let problem_core = problem.desugar(); + let solution = TableSolution::solve(&problem_core)?; + let output = DiffOutput::new(&solution.score, &solution.trace); + return Ok(output); +} + /// A problem to be solved: contains the pattern we are matching text against, as well as the text /// which may or may not match it. #[derive(Eq, PartialEq, Clone, Debug)] diff --git a/fuzzy_cli/src/lib.rs b/fuzzy_cli/src/lib.rs index 8656407..5f4686d 100644 --- a/fuzzy_cli/src/lib.rs +++ b/fuzzy_cli/src/lib.rs @@ -1,7 +1,5 @@ use clap::Parser; -use fuzzy::diff_output::DiffOutput; -use fuzzy::table_solution::TableSolution; -use fuzzy::regex_question::RegexQuestion; +use fuzzy; use fuzzy::error::Error; use std::fs; @@ -31,14 +29,6 @@ pub fn run(args: Args) -> Result { fs::read_to_string(args.text)? }; - let question = RegexQuestion { pattern_regex, text }; - run_impl(question) -} - -fn run_impl(question: RegexQuestion) -> Result { - let problem = question.ask()?; - let problem_core = problem.desugar(); - let solution = TableSolution::solve(&problem_core)?; - let output = DiffOutput::new(&solution.score, &solution.trace); + let output = fuzzy::fuzzy_match(pattern_regex, text)?; Ok(format!("{}", output)) } diff --git a/fuzzy_lambda/src/main.rs b/fuzzy_lambda/src/main.rs index 477efb5..c86a1ee 100644 --- a/fuzzy_lambda/src/main.rs +++ b/fuzzy_lambda/src/main.rs @@ -1,6 +1,6 @@ -use fuzzy::diff_output::{Chunk, DiffOutput}; -use fuzzy::table_solution::TableSolution; -use fuzzy::regex_question::RegexQuestion; +use fuzzy; +use fuzzy::diff_output::Chunk; + use lambda_http::{run, service_fn, Body, Error, Request, Response}; use serde::{Serialize, Deserialize}; @@ -15,7 +15,6 @@ struct Args { #[derive(Serialize)] struct Out { - score: usize, trace: Vec, } @@ -51,12 +50,9 @@ impl OutChunk { async fn function_handler(event: Request) -> Result, Error> { let body_str = std::str::from_utf8(event.body())?; let args = serde_json::from_str::(body_str)?; + let output = fuzzy::fuzzy_match(args.pattern, args.text)?; - let problem = RegexQuestion { pattern_regex: args.pattern, text: args.text }.ask()?; - let problem_core = problem.desugar(); - let solution = TableSolution::solve(&problem_core)?; - let output = DiffOutput::new(&solution.score, &solution.trace); - let body = Out { score: solution.score, trace: OutChunk::from(&output.chunks) }; + let body = Out { trace: OutChunk::from(&output.chunks) }; let body_json = serde_json::to_string(&body)?; let resp = Response::builder() From ce84fdc89937d5e4cb1e7eaf543c9c4292517e61 Mon Sep 17 00:00:00 2001 From: Sam Roberts Date: Sun, 27 Apr 2025 15:22:27 +1000 Subject: [PATCH 08/12] Simplify: remove RegexQuestion struct --- fuzzy/src/lib.rs | 9 ++- fuzzy/src/regex_question.rs | 156 +++++++++++++++++------------------- 2 files changed, 77 insertions(+), 88 deletions(-) diff --git a/fuzzy/src/lib.rs b/fuzzy/src/lib.rs index f5a0a4f..78469e3 100644 --- a/fuzzy/src/lib.rs +++ b/fuzzy/src/lib.rs @@ -13,14 +13,15 @@ pub mod diff_output; pub mod flat_pattern; pub mod error; -use regex_question::RegexQuestion; +use regex_question::parse_pattern; use table_solution::TableSolution; use diff_output::DiffOutput; use error::Error; -pub fn fuzzy_match(pattern_regex: String, text: String) -> Result { - let question = RegexQuestion { pattern_regex, text }; - let problem = question.ask()?; +pub fn fuzzy_match(pattern_regex: String, text_str: String) -> Result { + let pattern = parse_pattern(&pattern_regex)?; + let text = Atoms { atoms: text_str.chars().collect() }; + let problem = Problem { pattern, text }; let problem_core = problem.desugar(); let solution = TableSolution::solve(&problem_core)?; let output = DiffOutput::new(&solution.score, &solution.trace); diff --git a/fuzzy/src/regex_question.rs b/fuzzy/src/regex_question.rs index a7cc126..a0df9d0 100644 --- a/fuzzy/src/regex_question.rs +++ b/fuzzy/src/regex_question.rs @@ -4,85 +4,73 @@ //! crate currently operates on unicode characters. For now, we are getting around this by naively //! assuming all characters are ASCII. We will change this in the future. -use regex_syntax; use regex_syntax::hir; -use crate::{Atoms, Class, Element, Match, Pattern, Problem, Repetition}; +use crate::{Class, Element, Match, Pattern, Repetition}; use crate::error::Error; -pub struct RegexQuestion { - pub pattern_regex: String, - pub text: String, +pub fn parse_pattern(pattern: &str) -> Result, Error> { + let hir = regex_syntax::parse(pattern)?; + return wrap(parse_impl(&hir)); } -impl RegexQuestion { - pub fn ask(&self) -> Result, Error> { - let pattern = Self::parse_pattern(&self.pattern_regex)?; - let text = Atoms { atoms: self.text.chars().collect() }; - Ok(Problem { pattern, text }) - } - - fn parse_pattern(pattern: &str) -> Result, Error> { - let hir = regex_syntax::parse(pattern)?; - Self::pattern(Self::parse_impl(&hir)) - } +fn wrap(try_elems: Result, Error>) -> Result, Error> { + try_elems.map(|elems| Pattern { elems }) +} - fn pattern(try_elems: Result, Error>) -> Result, Error> { - try_elems.map(|elems| Pattern { elems }) - } - fn parse_impl(hir: &hir::Hir) -> Result, Error> - { - match hir.kind() { - hir::HirKind::Literal(hir::Literal(ref bytes)) => { - // TODO modify Patt::Lit to use bytes rather then chars. For now, assuming ascii - Ok(bytes.iter().map(|b| Element::Match(Match::Lit(*b as char))).collect()) - } - hir::HirKind::Class(class) => { - Ok(vec![Element::Match(Match::Class(Class::from(class.clone())))]) - } - hir::HirKind::Capture(hir::Capture { sub, .. }) => { - Self::pattern(Self::parse_impl(sub)).map(|p| vec![Element::Capture(p)]) - } - hir::HirKind::Alternation(children) => { - match &children[..] { - [] => Ok(vec![]), - [sub] => Self::parse_impl(sub), - [sub1, sub2, subs @ ..] => { - let try_p1 = Self::pattern(Self::parse_impl(sub1)); - let try_p2 = Self::pattern(Self::parse_impl(sub2)); - let mut try_ps = subs.iter().map(|sub| Self::pattern(Self::parse_impl(sub))); - - let try_init = try_p1.and_then(|p1| try_p2.map(|p2| Element::Alternative(p1, p2))); - - let try_alternative = try_init.and_then(|init| - try_ps.try_fold(init, |elem, try_p| - try_p.map(|p| Element::Alternative(Pattern { elems: vec![elem] }, p)) - ) - ); - - try_alternative.map(|alt| vec![alt]) - } +fn parse_impl(hir: &hir::Hir) -> Result, Error> +{ + match hir.kind() { + hir::HirKind::Literal(hir::Literal(ref bytes)) => { + // TODO modify Patt::Lit to use bytes rather then chars. For now, assuming ascii + Ok(bytes.iter().map(|b| Element::Match(Match::Lit(*b as char))).collect()) + } + hir::HirKind::Class(class) => { + Ok(vec![Element::Match(Match::Class(Class::from(class.clone())))]) + } + hir::HirKind::Capture(hir::Capture { sub, .. }) => { + let pattern = wrap(parse_impl(sub))?; + Ok(vec![Element::Capture(pattern)]) + } + hir::HirKind::Alternation(children) => { + match &children[..] { + [] => Ok(vec![]), + [sub] => parse_impl(sub), + [sub1, sub2, subs @ ..] => { + let try_p1 = wrap(parse_impl(sub1)); + let try_p2 = wrap(parse_impl(sub2)); + let mut try_ps = subs.iter().map(|sub| wrap(parse_impl(sub))); + + let try_init = try_p1.and_then(|p1| try_p2.map(|p2| Element::Alternative(p1, p2))); + + let try_alternative = try_init.and_then(|init| + try_ps.try_fold(init, |elem, try_p| + try_p.map(|p| Element::Alternative(Pattern { elems: vec![elem] }, p)) + ) + ); + + try_alternative.map(|alt| vec![alt]) } } - hir::HirKind::Repetition(hir::Repetition { min, max, sub, .. }) => { - Result::from_iter( - Self::pattern(Self::parse_impl(sub)).map(|inner| { - let minimum = (*min).try_into().map_err(|_| Error::RegexBoundTooLarge)?; - let maximum = max.map_or(Ok(None), |max| - max.try_into().map(|m| Some(m)).map_err(|_| Error::RegexBoundTooLarge) - )?; - Ok(Element::Repetition(Repetition { minimum, maximum, inner })) - }) - ) - } - hir::HirKind::Concat(subs) => { - let try_nested: Result>, Error> = - Result::from_iter(subs.iter().map(|sub| Self::parse_impl(sub))); - try_nested.map(|nested| nested.into_iter().flatten().collect()) - } - unsupported => { - Err(Error::PatternUnsupported(format!("{:?}", unsupported))) - } + } + hir::HirKind::Repetition(hir::Repetition { min, max, sub, .. }) => { + Result::from_iter( + wrap(parse_impl(sub)).map(|inner| { + let minimum = (*min).try_into().map_err(|_| Error::RegexBoundTooLarge)?; + let maximum = max.map_or(Ok(None), |max| + max.try_into().map(|m| Some(m)).map_err(|_| Error::RegexBoundTooLarge) + )?; + Ok(Element::Repetition(Repetition { minimum, maximum, inner })) + }) + ) + } + hir::HirKind::Concat(subs) => { + let try_nested: Result>, Error> = + Result::from_iter(subs.iter().map(|sub| parse_impl(sub))); + try_nested.map(|nested| nested.into_iter().flatten().collect()) + } + unsupported => { + Err(Error::PatternUnsupported(format!("{:?}", unsupported))) } } } @@ -118,23 +106,23 @@ mod tests { parse_test("a*", vec![rep(lits("a"))]); } - #[test] - fn parse_repetition_2() { + #[test] + fn parse_repetition_2() { parse_test("a+", vec![rep_min(1, lits("a"))]); } - #[test] - fn parse_repetition_3() { + #[test] + fn parse_repetition_3() { parse_test("a{2,}", vec![rep_min(2, lits("a"))]); } - #[test] - fn parse_repetition_4() { + #[test] + fn parse_repetition_4() { parse_test("a{0,3}", vec![rep_bound(0, 3, lits("a"))]); } - #[test] - fn parse_repetition_5() { + #[test] + fn parse_repetition_5() { parse_test("a{4}", vec![rep_bound(4, 4, lits("a"))]); } @@ -155,7 +143,7 @@ mod tests { fn parse_test(pattern: &str, expected_elems: Vec) { let expected_pattern = Pattern { elems: expected_elems }; - let actual_pattern = RegexQuestion::parse_pattern(&pattern).expect("Cannot parse pattern"); + let actual_pattern = parse_pattern(&pattern).expect("Cannot parse pattern"); assert_eq!(expected_pattern, actual_pattern); } @@ -166,21 +154,21 @@ mod tests { proptest! { #[test] fn smoketest(pattern in "\\PC*") { - let _ = RegexQuestion::parse_pattern(&pattern); + let _ = parse_pattern(&pattern); } #[test] fn literals(pattern in LITERAL_PATTERN_REGEX) { let expected_pattern = Pattern { elems: lits(&pattern) }; - let actual_pattern = RegexQuestion::parse_pattern(&pattern).expect("Cannot parse pattern"); + let actual_pattern = parse_pattern(&pattern).expect("Cannot parse pattern"); prop_assert_eq!(expected_pattern, actual_pattern); } #[test] fn captures(inner in LITERAL_PATTERN_REGEX) { let wrapped = format!("({})", inner); - let Pattern { elems: actual_inner } = RegexQuestion::parse_pattern(&inner).expect("Cannot parse inner"); - let Pattern { elems: actual_wrapped } = RegexQuestion::parse_pattern(&wrapped).expect("Cannot parse wrapped"); + let Pattern { elems: actual_inner } = parse_pattern(&inner).expect("Cannot parse inner"); + let Pattern { elems: actual_wrapped } = parse_pattern(&wrapped).expect("Cannot parse wrapped"); prop_assert_eq!( actual_wrapped, vec![capture(actual_inner)]); } @@ -196,7 +184,7 @@ mod tests { .reduce(|acc, right| vec![alt(acc, right)]).expect("Cannot be empty"); let expected_pattern = Pattern { elems: expected_alt }; - let actual_pattern = RegexQuestion::parse_pattern(&alt_pattern).expect("Cannot parse pattern"); + let actual_pattern = parse_pattern(&alt_pattern).expect("Cannot parse pattern"); prop_assert_eq!(expected_pattern, actual_pattern); } } From 86d6608c6c8c5ed776e75a5d2d04de2f1082afa4 Mon Sep 17 00:00:00 2001 From: Sam Roberts Date: Sun, 27 Apr 2025 15:44:50 +1000 Subject: [PATCH 09/12] Simplify: rename regex_question module now we don't have that named struct --- fuzzy/src/lib.rs | 4 ++-- fuzzy/src/{regex_question.rs => regex_pattern.rs} | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename fuzzy/src/{regex_question.rs => regex_pattern.rs} (100%) diff --git a/fuzzy/src/lib.rs b/fuzzy/src/lib.rs index 78469e3..a60eebd 100644 --- a/fuzzy/src/lib.rs +++ b/fuzzy/src/lib.rs @@ -7,13 +7,13 @@ use regex_syntax::hir; -pub mod regex_question; +pub mod regex_pattern; pub mod table_solution; pub mod diff_output; pub mod flat_pattern; pub mod error; -use regex_question::parse_pattern; +use regex_pattern::parse_pattern; use table_solution::TableSolution; use diff_output::DiffOutput; use error::Error; diff --git a/fuzzy/src/regex_question.rs b/fuzzy/src/regex_pattern.rs similarity index 100% rename from fuzzy/src/regex_question.rs rename to fuzzy/src/regex_pattern.rs From bfd709930c6684f08770703d14ee5f79bcdc5534 Mon Sep 17 00:00:00 2001 From: Sam Roberts Date: Sun, 27 Apr 2025 15:46:19 +1000 Subject: [PATCH 10/12] Simplify: remove Problem struct --- fuzzy/src/lib.rs | 113 +++++++++++++++++++----------------- fuzzy/src/table_solution.rs | 16 ++--- 2 files changed, 68 insertions(+), 61 deletions(-) diff --git a/fuzzy/src/lib.rs b/fuzzy/src/lib.rs index a60eebd..328ed0d 100644 --- a/fuzzy/src/lib.rs +++ b/fuzzy/src/lib.rs @@ -21,29 +21,12 @@ use error::Error; pub fn fuzzy_match(pattern_regex: String, text_str: String) -> Result { let pattern = parse_pattern(&pattern_regex)?; let text = Atoms { atoms: text_str.chars().collect() }; - let problem = Problem { pattern, text }; - let problem_core = problem.desugar(); - let solution = TableSolution::solve(&problem_core)?; + let pattern_core = pattern.desugar(); + let solution = TableSolution::solve(&pattern_core, &text)?; let output = DiffOutput::new(&solution.score, &solution.trace); return Ok(output); } -/// A problem to be solved: contains the pattern we are matching text against, as well as the text -/// which may or may not match it. -#[derive(Eq, PartialEq, Clone, Debug)] -pub struct Problem { - pub pattern: Pattern, - pub text: Atoms, -} - -impl Problem { - pub fn desugar(&self) -> Problem { - let pattern = self.pattern.desugar(); - let text = self.text.clone(); - Problem { pattern, text } - } -} - #[derive(Eq, PartialEq, Clone, Debug)] pub struct Pattern { elems: Vec, @@ -198,7 +181,8 @@ pub mod test_cases { use regex_syntax::hir::HirKind; pub struct TestCase { - pub problem: Problem, + pub pattern: Pattern, + pub text: Atoms, pub score: usize, pub trace: Vec>, } @@ -206,7 +190,8 @@ pub mod test_cases { impl TestCase { pub fn match_empty() -> Self { Self { - problem: problem(vec![], ""), + pattern: pattern(vec![]), + text: text(""), score: 0, trace: vec![], } @@ -214,7 +199,8 @@ pub mod test_cases { pub fn fail_empty_1() -> Self { Self { - problem: problem(vec![], "a"), + pattern: pattern(vec![]), + text: text("a"), score: 1, trace: vec![ Step::SkipText('a'), @@ -224,7 +210,8 @@ pub mod test_cases { pub fn fail_empty_2() -> Self { Self { - problem: problem(lits("a"), ""), + pattern: pattern(lits("a")), + text: text(""), score: 1, trace: vec![ Step::SkipPattern(Match::Lit('a')), @@ -234,7 +221,8 @@ pub mod test_cases { pub fn match_lit_1() -> Self { Self { - problem: problem(lits("a"), "a"), + pattern: pattern(lits("a")), + text: text("a"), score: 0, trace: vec![ Step::Hit(Match::Lit('a'), 'a'), @@ -244,7 +232,8 @@ pub mod test_cases { pub fn match_lit_2() -> Self { Self { - problem: problem(lits("ab"), "ab"), + pattern: pattern(lits("ab")), + text: text("ab"), score: 0, trace: vec![ Step::Hit(Match::Lit('a'), 'a'), @@ -255,7 +244,8 @@ pub mod test_cases { pub fn fail_lit_1() -> Self { Self { - problem: problem(lits("a"), "aa"), + pattern: pattern(lits("a")), + text: text("aa"), score: 1, trace: vec![ Step::Hit(Match::Lit('a'), 'a'), @@ -266,7 +256,8 @@ pub mod test_cases { pub fn fail_lit_2() -> Self { Self { - problem: problem(lits("aba"), "aa"), + pattern: pattern(lits("aba")), + text: text("aa"), score: 1, trace: vec![ Step::Hit(Match::Lit('a'), 'a'), @@ -278,7 +269,8 @@ pub mod test_cases { pub fn fail_lit_3() -> Self { Self { - problem: problem(lits("abcde"), "zabke"), + pattern: pattern(lits("abcde")), + text: text("zabke"), score: 4, trace: vec![ Step::SkipText('z'), @@ -295,7 +287,8 @@ pub mod test_cases { pub fn match_class_1() -> Self { Self { - problem: problem(vec![class(".")], "a"), + pattern: pattern(vec![class(".")]), + text: text("a"), score: 0, trace: vec![ Step::Hit(patt_class("."), 'a'), @@ -305,7 +298,8 @@ pub mod test_cases { pub fn match_class_2() -> Self { Self { - problem: problem(vec![class("[a-zA-Z]")], "a"), + pattern: pattern(vec![class("[a-zA-Z]")]), + text: text("a"), score: 0, trace: vec![ Step::Hit(patt_class("[a-zA-Z]"), 'a'), @@ -315,7 +309,8 @@ pub mod test_cases { pub fn match_class_3() -> Self { Self { - problem: problem(vec![class("[a-zA-Z]")], "X"), + pattern: pattern(vec![class("[a-zA-Z]")]), + text: text("X"), score: 0, trace: vec![ Step::Hit(patt_class("[a-zA-Z]"), 'X'), @@ -325,7 +320,8 @@ pub mod test_cases { pub fn fail_class_1() -> Self { Self { - problem: problem(vec![class("[^a]")], "a"), + pattern: pattern(vec![class("[^a]")]), + text: text("a"), score: 2, trace: vec![ // TODO handle valid possibility that the order of next two steps is reversed @@ -337,7 +333,8 @@ pub mod test_cases { pub fn match_alternative_1() -> Self { Self { - problem: problem(vec![alt(lits("ab"), lits("cd"))], "ab"), + pattern: pattern(vec![alt(lits("ab"), lits("cd"))]), + text: text("ab"), score: 0, trace: vec![ Step::Hit(Match::Lit('a'), 'a'), @@ -348,7 +345,8 @@ pub mod test_cases { pub fn match_alternative_2() -> Self { Self { - problem: problem(vec![alt(lits("ab"), lits("cd"))], "cd"), + pattern: pattern(vec![alt(lits("ab"), lits("cd"))]), + text: text("cd"), score: 0, trace: vec![ Step::Hit(Match::Lit('c'), 'c'), @@ -359,13 +357,12 @@ pub mod test_cases { pub fn match_alternative_3() -> Self { Self { - problem: problem( + pattern: pattern( vec![ - alt(lits("a"), vec![alt(lits("b"), vec![alt(lits("c"), lits("d"))])]), - lit('z') - ], - "cz" + alt(lits("a"), vec![alt(lits("b"), vec![alt(lits("c"), lits("d"))])]), lit('z') + ] ), + text: text("cz"), score: 0, trace: vec![ Step::Hit(Match::Lit('c'), 'c'), @@ -376,7 +373,8 @@ pub mod test_cases { pub fn fail_alternative_1() -> Self { Self { - problem: problem(vec![alt(lits("ab"), lits("cd"))], "acd"), + pattern: pattern(vec![alt(lits("ab"), lits("cd"))]), + text: text("acd"), score: 1, trace: vec![ Step::SkipText('a'), @@ -388,7 +386,8 @@ pub mod test_cases { pub fn match_repetition_1() -> Self { Self { - problem: problem(vec![rep(lits("a"))], "aa"), + pattern: pattern(vec![rep(lits("a"))]), + text: text("aa"), score: 0, trace: vec![ Step::Hit(Match::Lit('a'), 'a'), @@ -399,7 +398,8 @@ pub mod test_cases { pub fn match_repetition_2() -> Self { Self { - problem: problem(vec![rep(vec![lit('a'), rep(lits("b"))])], "aababb"), + pattern: pattern(vec![rep(vec![lit('a'), rep(lits("b"))])]), + text: text("aababb"), score: 0, trace: vec![ Step::Hit(Match::Lit('a'), 'a'), @@ -414,7 +414,8 @@ pub mod test_cases { pub fn match_repetition_3() -> Self { Self { - problem: problem(vec![rep(vec![class("[0-9]")])], "0451"), + pattern: pattern(vec![rep(vec![class("[0-9]")])]), + text: text("0451"), score: 0, trace: vec![ Step::Hit(patt_class("[0-9]"), '0'), @@ -427,7 +428,8 @@ pub mod test_cases { pub fn match_repetition_4() -> Self { Self { - problem: problem(vec![rep_min(1, lits("a"))], "a"), + pattern: pattern(vec![rep_min(1, lits("a"))]), + text: text("a"), score: 0, trace: vec![ Step::Hit(Match::Lit('a'), 'a'), @@ -437,7 +439,8 @@ pub mod test_cases { pub fn match_repetition_5() -> Self { Self { - problem: problem(vec![rep_bound(0, 5, lits("a"))], "aaaa"), + pattern: pattern(vec![rep_bound(0, 5, lits("a"))]), + text: text("aaaa"), score: 0, trace: vec![ Step::Hit(Match::Lit('a'), 'a'), @@ -450,7 +453,8 @@ pub mod test_cases { pub fn fail_repetition_1() -> Self { Self { - problem: problem(vec![rep(lits("a"))], "aba"), + pattern: pattern(vec![rep(lits("a"))]), + text: text("aba"), score: 1, trace: vec![ Step::Hit(Match::Lit('a'), 'a'), @@ -462,7 +466,8 @@ pub mod test_cases { pub fn fail_repetition_2() -> Self { Self { - problem: problem(vec![rep_min(1, lits("a"))], ""), + pattern: pattern(vec![rep_min(1, lits("a"))]), + text: text(""), score: 1, trace: vec![ Step::SkipPattern(Match::Lit('a')), @@ -472,7 +477,8 @@ pub mod test_cases { pub fn fail_repetition_3() -> Self { Self { - problem: problem(vec![rep_bound(0, 1, lits("a"))], "aa"), + pattern: pattern(vec![rep_bound(0, 1, lits("a"))]), + text: text("aa"), score: 1, trace: vec![ Step::Hit(Match::Lit('a'), 'a'), @@ -491,12 +497,13 @@ pub mod test_cases { Match::Class(Class::from(wildcard_class)) } - pub fn problem(elems: Vec, text: &str) -> Problem { + pub fn pattern(elems: Vec) -> Pattern { + Pattern { elems } + } + + pub fn text(text: &str) -> Atoms { let atoms = text.chars().collect(); - Problem { - pattern: Pattern { elems }, - text: Atoms { atoms }, - } + Atoms { atoms } } pub fn lits(cs: &str) -> Vec { diff --git a/fuzzy/src/table_solution.rs b/fuzzy/src/table_solution.rs index c64bf8e..a6ef7df 100644 --- a/fuzzy/src/table_solution.rs +++ b/fuzzy/src/table_solution.rs @@ -4,7 +4,7 @@ //! theory it should be relatively efficient, although we haven't done any benchmarks yet. We will //! do these in the future. -use crate::{ElementCore, Match, Problem, Step}; +use crate::{Atoms, ElementCore, Match, Pattern, Step}; use crate::error::Error; use crate::flat_pattern::{Flat, FlatPattern}; use nonempty::{NonEmpty, nonempty}; @@ -16,8 +16,8 @@ pub struct TableSolution { } impl TableSolution { - pub fn solve(problem: &Problem) -> Result { - let conf = Config::new(problem); + pub fn solve(pattern: &Pattern, text: &Atoms) -> Result { + let conf = Config::new(pattern, text); let mut state = State::new(&conf); let start_ix = conf.start(); @@ -132,9 +132,9 @@ pub struct Config { } impl Config { - fn new(problem: &Problem) -> Self { - let pattern = FlatPattern::custom(&problem.pattern, 1); - let text = problem.text.atoms.clone(); + fn new(pattern: &Pattern, text: &Atoms) -> Self { + let pattern = FlatPattern::custom(pattern, 1); + let text = text.atoms.clone(); Config { text, pattern } } @@ -531,8 +531,8 @@ pub mod test_logic { use crate::test_cases::TestCase; pub fn test_solve(test_case: TestCase) { - let desugared = test_case.problem.desugar(); - let actual = TableSolution::solve(&desugared).unwrap(); + let desugared = test_case.pattern.desugar(); + let actual = TableSolution::solve(&desugared, &test_case.text).unwrap(); assert_eq!(test_case.score, actual.score); assert_eq!(test_case.trace, actual.trace); } From dcbe294006af3ea07855790d0bba3261553f73de Mon Sep 17 00:00:00 2001 From: Sam Roberts Date: Sun, 27 Apr 2025 15:54:00 +1000 Subject: [PATCH 11/12] Simplify: remove TableSolution struct --- fuzzy/src/lib.rs | 10 +- fuzzy/src/table_solution.rs | 198 +++++++++++++++++------------------- 2 files changed, 103 insertions(+), 105 deletions(-) diff --git a/fuzzy/src/lib.rs b/fuzzy/src/lib.rs index 328ed0d..0e5a2f3 100644 --- a/fuzzy/src/lib.rs +++ b/fuzzy/src/lib.rs @@ -14,7 +14,7 @@ pub mod flat_pattern; pub mod error; use regex_pattern::parse_pattern; -use table_solution::TableSolution; +use table_solution::solve; use diff_output::DiffOutput; use error::Error; @@ -22,11 +22,17 @@ pub fn fuzzy_match(pattern_regex: String, text_str: String) -> Result>, +} + #[derive(Eq, PartialEq, Clone, Debug)] pub struct Pattern { elems: Vec, diff --git a/fuzzy/src/table_solution.rs b/fuzzy/src/table_solution.rs index a6ef7df..040b039 100644 --- a/fuzzy/src/table_solution.rs +++ b/fuzzy/src/table_solution.rs @@ -4,122 +4,114 @@ //! theory it should be relatively efficient, although we haven't done any benchmarks yet. We will //! do these in the future. -use crate::{Atoms, ElementCore, Match, Pattern, Step}; +use crate::{Atoms, ElementCore, Match, Pattern, Solution, Step}; use crate::error::Error; use crate::flat_pattern::{Flat, FlatPattern}; use nonempty::{NonEmpty, nonempty}; -#[derive(Eq, PartialEq, Debug)] -pub struct TableSolution { - pub score: usize, - pub trace: Vec>, -} - -impl TableSolution { - pub fn solve(pattern: &Pattern, text: &Atoms) -> Result { - let conf = Config::new(pattern, text); - let mut state = State::new(&conf); - - let start_ix = conf.start(); - let end_ix = conf.end(); - - let _ = Self::calculate_optimal_path(&conf, &mut state)?; - - let start_node = state.get(start_ix); - let score = start_node.done_info() - .map(|i| i.0) - .map_err(|_| Error::IncompleteFinalState)?; - - let mut trace = vec![]; - let mut from = start_ix; - loop { - let node = state.get(from); - if !node.is_done() || from == end_ix { break; } - let (patt, text) = conf.get(from); - let (_, step_type, next) = node.done_info()?; - if let Some(step) = step_type.step() { - let final_step = step.map( - |_| match patt { - Some(Flat::Lit(c)) => Match::Lit(*c), - Some(Flat::Class(c)) => Match::Class(c.clone()), - unexpected => panic!("Unexpected trace pattern {:?}", unexpected), - }, - |_| match text { - Some(c) => *c, - unexpected => panic!("Unexpected trace text {:?}", unexpected), - } - ); - trace.push(final_step); - } - from = next; - } - if from != end_ix { - return Err(Error::IncompleteFinalState); +pub fn solve(pattern: &Pattern, text: &Atoms) -> Result { + let conf = Config::new(pattern, text); + let mut state = State::new(&conf); + + let start_ix = conf.start(); + let end_ix = conf.end(); + + let _ = calculate_optimal_path(&conf, &mut state)?; + + let start_node = state.get(start_ix); + let score = start_node.done_info() + .map(|i| i.0) + .map_err(|_| Error::IncompleteFinalState)?; + + let mut trace = vec![]; + let mut from = start_ix; + loop { + let node = state.get(from); + if !node.is_done() || from == end_ix { break; } + let (patt, text) = conf.get(from); + let (_, step_type, next) = node.done_info()?; + if let Some(step) = step_type.step() { + let final_step = step.map( + |_| match patt { + Some(Flat::Lit(c)) => Match::Lit(*c), + Some(Flat::Class(c)) => Match::Class(c.clone()), + unexpected => panic!("Unexpected trace pattern {:?}", unexpected), + }, + |_| match text { + Some(c) => *c, + unexpected => panic!("Unexpected trace text {:?}", unexpected), + } + ); + trace.push(final_step); } - - Ok(Self { score, trace }) + from = next; } + if from != end_ix { + return Err(Error::IncompleteFinalState); + } + + Ok(Solution { score, trace }) +} - fn calculate_optimal_path( - conf: &Config, - state: &mut State, - ) -> Result<(), Error> { - let start_ix = conf.start(); - let end_ix = conf.end(); +fn calculate_optimal_path( + conf: &Config, + state: &mut State, + ) -> Result<(), Error> { + let start_ix = conf.start(); + let end_ix = conf.end(); - let mut loop_state = LoopState::Down(Down { - parent: Default::default(), - current: start_ix, - }); + let mut loop_state = LoopState::Down(Down { + parent: Default::default(), + current: start_ix, + }); - let mut loop_counter = 0; + let mut loop_counter = 0; - loop { - loop_counter += 1; - if loop_counter >= 1000000000 { // TODO make this max configurable - return Err(Error::ExceededMaxSteps(loop_counter)); + loop { + loop_counter += 1; + if loop_counter >= 1000000000 { // TODO make this max configurable + return Err(Error::ExceededMaxSteps(loop_counter)); + } + let new_parent = match &loop_state { + LoopState::Down(down) if state.get(down.current).is_ready() => { + let (flat, text) = conf.get(down.current); + let opt_node_type = NodeType::get(flat, text, &down.current); + let node_state = state.get_mut(down.current); + node_state.initialise(end_ix, down.parent, down.current, opt_node_type)?; + down.parent } - let new_parent = match &loop_state { - LoopState::Down(down) if state.get(down.current).is_ready() => { - let (flat, text) = conf.get(down.current); - let opt_node_type = NodeType::get(flat, text, &down.current); - let node_state = state.get_mut(down.current); - node_state.initialise(end_ix, down.parent, down.current, opt_node_type)?; - down.parent - } - LoopState::Down(down) => down.parent, - LoopState::Back(back) => { - let new_child = back.child; - let (new_score, _, _) = state.get(new_child).done_info()?; - let node_state = state.get_mut(back.current); - let new_parent = node_state.update(new_child, back.current, new_score)?; - new_parent - } - }; - - let current_ix = loop_state.current(); - let final_state = state.get(current_ix); - if current_ix == start_ix && final_state.is_done() { - break; - } else if final_state.is_done() { - loop_state = LoopState::Back(Back { - current: new_parent, - child: current_ix, - }); - } else if final_state.is_working() { - let current_step_type = final_state.current_step_type()?; - let child = conf.step(current_ix, current_step_type); - loop_state = LoopState::Down(Down { - parent: current_ix, - current: child, - }); - } else { - return Err(Error::NoNodeProgress(format!("{:?}", current_ix))); + LoopState::Down(down) => down.parent, + LoopState::Back(back) => { + let new_child = back.child; + let (new_score, _, _) = state.get(new_child).done_info()?; + let node_state = state.get_mut(back.current); + let new_parent = node_state.update(new_child, back.current, new_score)?; + new_parent } + }; + + let current_ix = loop_state.current(); + let final_state = state.get(current_ix); + if current_ix == start_ix && final_state.is_done() { + break; + } else if final_state.is_done() { + loop_state = LoopState::Back(Back { + current: new_parent, + child: current_ix, + }); + } else if final_state.is_working() { + let current_step_type = final_state.current_step_type()?; + let child = conf.step(current_ix, current_step_type); + loop_state = LoopState::Down(Down { + parent: current_ix, + current: child, + }); + } else { + return Err(Error::NoNodeProgress(format!("{:?}", current_ix))); } - - Ok(()) } + + Ok(()) } /// Stores the text and pattern from the original [`Problem`](crate::Problem). @@ -532,7 +524,7 @@ pub mod test_logic { pub fn test_solve(test_case: TestCase) { let desugared = test_case.pattern.desugar(); - let actual = TableSolution::solve(&desugared, &test_case.text).unwrap(); + let actual = solve(&desugared, &test_case.text).unwrap(); assert_eq!(test_case.score, actual.score); assert_eq!(test_case.trace, actual.trace); } From 5317553bddde304b14bcb48649b9dfbc9bc95df5 Mon Sep 17 00:00:00 2001 From: Sam Roberts Date: Sun, 27 Apr 2025 16:09:03 +1000 Subject: [PATCH 12/12] Simplify: minor changes to docs after previous simplifications --- fuzzy/src/diff_output.rs | 4 +++- fuzzy/src/lib.rs | 2 +- fuzzy/src/table_solution.rs | 9 +++++---- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/fuzzy/src/diff_output.rs b/fuzzy/src/diff_output.rs index 8d89046..ff69513 100644 --- a/fuzzy/src/diff_output.rs +++ b/fuzzy/src/diff_output.rs @@ -2,7 +2,7 @@ use crate::{Match, Step}; use std::fmt; // NOTE: because we do character by character diffs, this won't be the real diff format -// for now. Instead, we will mimic the git diff format, expect we print out all matching +// for now. Instead, we will mimic the git diff format, except we print out all matching // lines and don't print any line numbers. // // The wording in these structs treat the patttern as the original, and text as new. So @@ -12,6 +12,8 @@ use std::fmt; // TODO make this configurable const ANY: char = '?'; +/// A quick display of the final trace, similar to the git character-level diff format. +/// Convenient for small texts. pub struct DiffOutput { pub chunks: Vec, } diff --git a/fuzzy/src/lib.rs b/fuzzy/src/lib.rs index 0e5a2f3..db5b680 100644 --- a/fuzzy/src/lib.rs +++ b/fuzzy/src/lib.rs @@ -158,7 +158,7 @@ impl Class { } } -/// An individual element in [`TableSolution::trace`]. +/// An individual element in [`Solution::trace`]. #[derive(Eq, PartialEq, Clone, Copy, Debug)] pub enum Step { Hit(P, T), diff --git a/fuzzy/src/table_solution.rs b/fuzzy/src/table_solution.rs index 040b039..ff58372 100644 --- a/fuzzy/src/table_solution.rs +++ b/fuzzy/src/table_solution.rs @@ -1,4 +1,4 @@ -//! A theoretically faster implementation of [`Solution`](crate::Solution). +//! A theoretically faster solver than my initial cached recursive implementation. //! //! This implementation pre-allocates a [vector](State) storing state for all [nodes](Ix), so in //! theory it should be relatively efficient, although we haven't done any benchmarks yet. We will @@ -114,7 +114,7 @@ fn calculate_optimal_path( Ok(()) } -/// Stores the text and pattern from the original [`Problem`](crate::Problem). +/// Flattens the text and pattern so we can easily index each one. /// /// Our state stores an array of nodes. This array forms a table, with one dimension representing /// the text, while the other dimension represents an expanded pattern, per [`FlatPattern::custom`]. @@ -240,11 +240,12 @@ impl State { } /// Indexes into [`State`]. +/// This struct is a LOT bigger than I initially expected, attempt to reduce size in the future. #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug, Default)] pub struct Ix { - /// The index into the [flattened `Problem::pattern`](crate::flat_pattern::FlatPattern). + /// The index into the flat pattern. pub pattern: usize, - /// The index into [`Problem::text`](crate::Problem::text). + /// The index into the text. pub text: usize, /// This field tracks how many times we are repeating each pattern element. pub reps: usize,