From 030c399fae0b23f341862757b1f7dc0832bf0f40 Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 28 Jul 2025 01:15:26 +0100 Subject: [PATCH 1/5] Add cycle detection and tests --- docs/netsuke-design.md | 6 +++++ docs/roadmap.md | 4 +-- src/ir.rs | 46 +++++++++++++++++++++++++++++++++ tests/data/circular.yml | 12 +++++++++ tests/features/ir.feature | 4 +++ tests/ir_from_manifest_tests.rs | 9 +++++++ 6 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 tests/data/circular.yml diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index ea15e8a9..7f206c5c 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -989,6 +989,12 @@ This transformation involves several steps: search maintaining a visitation state) on the dependency graph to fail compilation if a circular dependency is found. + The implemented algorithm performs a depth-first traversal of the target + graph. Each output path is visited once, and visitation state is tracked to + detect back edges. Encountering an already visiting node indicates a cycle, + and compilation fails with `IrGenError::CircularDependency` containing the + offending path. + ### 5.4 Ninja File Synthesis (`ninja_gen.rs`) The final step is to synthesize the `build.ninja` file from the `BuildGraph` diff --git a/docs/roadmap.md b/docs/roadmap.md index 46bfcbb5..8e1dc7b2 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -50,8 +50,8 @@ compilation pipeline from parsing to execution. referenced by a target is valid and that they are mutually exclusive. *(done)* - - [ ] Implement a cycle detection algorithm (e.g., depth-first search) to fail - compilation if a circular dependency is found in the target graph. + - [x] Implement a cycle detection algorithm (e.g., depth-first search) to fail + compilation if a circular dependency is found in the target graph. *(done)* - [ ] **Code Generation and Execution:** diff --git a/src/ir.rs b/src/ir.rs index 36b3deef..92f9ba0a 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -93,6 +93,9 @@ pub enum IrGenError { #[error("duplicate target outputs: {outputs:?}")] DuplicateOutput { outputs: Vec }, + + #[error("A circular dependency was detected involving target '{path}'")] + CircularDependency { path: PathBuf }, } impl BuildGraph { @@ -110,6 +113,8 @@ impl BuildGraph { Self::process_targets(manifest, &mut graph.actions, &mut graph.targets, &rule_map)?; Self::process_defaults(manifest, &mut graph.default_targets); + graph.detect_cycles()?; + Ok(graph) } @@ -165,6 +170,47 @@ impl BuildGraph { defaults.push(PathBuf::from(name)); } } + + fn detect_cycles(&self) -> Result<(), IrGenError> { + #[derive(Clone, Copy)] + enum State { + Visiting, + Visited, + } + + fn visit<'a>( + graph: &'a BuildGraph, + node: &'a PathBuf, + states: &mut HashMap<&'a PathBuf, State>, + ) -> Result<(), &'a PathBuf> { + match states.get(node) { + Some(State::Visited) => return Ok(()), + Some(State::Visiting) => return Err(node), + None => {} + } + + states.insert(node, State::Visiting); + if let Some(edge) = graph.targets.get(node) { + for dep in edge.inputs.iter().chain(&edge.order_only_deps) { + if graph.targets.contains_key(dep) { + visit(graph, dep, states)?; + } + } + } + states.insert(node, State::Visited); + Ok(()) + } + + let mut states = HashMap::new(); + for output in self.targets.keys() { + if !states.contains_key(output) + && let Err(path) = visit(self, output, &mut states) + { + return Err(IrGenError::CircularDependency { path: path.clone() }); + } + } + Ok(()) + } } fn register_action( diff --git a/tests/data/circular.yml b/tests/data/circular.yml new file mode 100644 index 00000000..024fb55a --- /dev/null +++ b/tests/data/circular.yml @@ -0,0 +1,12 @@ +netsuke_version: "1.0.0" +targets: + - name: a + sources: b + recipe: + kind: command + command: "touch a" + - name: b + sources: a + recipe: + kind: command + command: "touch b" diff --git a/tests/features/ir.feature b/tests/features/ir.feature index 9ff8d7e3..c4bd70b0 100644 --- a/tests/features/ir.feature +++ b/tests/features/ir.feature @@ -28,3 +28,7 @@ Feature: BuildGraph Scenario: Duplicate target outputs When the manifest file "tests/data/duplicate_outputs.yml" is compiled to IR Then IR generation fails + + Scenario: Circular dependency detection + When the manifest file "tests/data/circular.yml" is compiled to IR + Then IR generation fails diff --git a/tests/ir_from_manifest_tests.rs b/tests/ir_from_manifest_tests.rs index 597ba75e..5689a56d 100644 --- a/tests/ir_from_manifest_tests.rs +++ b/tests/ir_from_manifest_tests.rs @@ -5,6 +5,7 @@ use netsuke::{ manifest, }; use rstest::rstest; +use std::path::PathBuf; #[rstest] fn minimal_manifest_to_ir() { @@ -38,6 +39,7 @@ enum ExpectedError { }, EmptyRule(String), RuleNotFound(String), + CircularDependency(String), } #[rstest] @@ -64,6 +66,10 @@ enum ExpectedError { "tests/data/rule_not_found.yml", ExpectedError::RuleNotFound("missing_rule".into()) )] +#[case( + "tests/data/circular.yml", + ExpectedError::CircularDependency("a".into()) +)] fn manifest_error_cases(#[case] manifest_path: &str, #[case] expected: ExpectedError) { let manifest = manifest::from_path(manifest_path).expect("load"); let err = BuildGraph::from_manifest(&manifest).expect_err("error"); @@ -87,6 +93,9 @@ fn manifest_error_cases(#[case] manifest_path: &str, #[case] expected: ExpectedE (IrGenError::RuleNotFound { rule_name, .. }, ExpectedError::RuleNotFound(exp_rule)) => { assert_eq!(rule_name, exp_rule); } + (IrGenError::CircularDependency { path }, ExpectedError::CircularDependency(exp_path)) => { + assert_eq!(path, PathBuf::from(exp_path)); + } (other, exp) => panic!("expected {exp:?} but got {other:?}"), } } From 54ca782d079e0ed4f86838ea4335522305bcbc6a Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 28 Jul 2025 22:48:29 +0100 Subject: [PATCH 2/5] Refactor cycle detection --- src/ir.rs | 43 +++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/src/ir.rs b/src/ir.rs index 92f9ba0a..ade0acea 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -178,24 +178,42 @@ impl BuildGraph { Visited, } + fn should_visit_node<'a>( + states: &mut HashMap<&'a PathBuf, State>, + node: &'a PathBuf, + ) -> Result { + match states.get(node) { + Some(State::Visited) => Ok(false), + Some(State::Visiting) => Err(node), + None => Ok(true), + } + } + + fn visit_dependencies<'a>( + graph: &'a BuildGraph, + edge: &'a BuildEdge, + states: &mut HashMap<&'a PathBuf, State>, + ) -> Result<(), &'a PathBuf> { + for dep in edge.inputs.iter().chain(&edge.order_only_deps) { + if graph.targets.contains_key(dep) { + visit(graph, dep, states)?; + } + } + Ok(()) + } + fn visit<'a>( graph: &'a BuildGraph, node: &'a PathBuf, states: &mut HashMap<&'a PathBuf, State>, ) -> Result<(), &'a PathBuf> { - match states.get(node) { - Some(State::Visited) => return Ok(()), - Some(State::Visiting) => return Err(node), - None => {} + if !should_visit_node(states, node)? { + return Ok(()); } states.insert(node, State::Visiting); if let Some(edge) = graph.targets.get(node) { - for dep in edge.inputs.iter().chain(&edge.order_only_deps) { - if graph.targets.contains_key(dep) { - visit(graph, dep, states)?; - } - } + visit_dependencies(graph, edge, states)?; } states.insert(node, State::Visited); Ok(()) @@ -203,9 +221,10 @@ impl BuildGraph { let mut states = HashMap::new(); for output in self.targets.keys() { - if !states.contains_key(output) - && let Err(path) = visit(self, output, &mut states) - { + if states.contains_key(output) { + continue; + } + if let Err(path) = visit(self, output, &mut states) { return Err(IrGenError::CircularDependency { path: path.clone() }); } } From 9aba499235a3e107bedc540f38233aa710b49e6f Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 28 Jul 2025 23:20:52 +0100 Subject: [PATCH 3/5] Improve cycle detection --- docs/netsuke-design.md | 12 +-- src/ir.rs | 154 ++++++++++++++++++++------------ tests/ir_from_manifest_tests.rs | 12 ++- 3 files changed, 111 insertions(+), 67 deletions(-) diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index 7f206c5c..7c9194c5 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -990,10 +990,10 @@ This transformation involves several steps: compilation if a circular dependency is found. The implemented algorithm performs a depth-first traversal of the target - graph. Each output path is visited once, and visitation state is tracked to - detect back edges. Encountering an already visiting node indicates a cycle, - and compilation fails with `IrGenError::CircularDependency` containing the - offending path. + graph and maintains a recursion stack. Order-only dependencies are ignored + during this search. Encountering an already visiting node indicates a cycle. + The stack slice from the first occurrence of that node forms the cycle and + is returned in `IrGenError::CircularDependency` for easier debugging. ### 5.4 Ninja File Synthesis (`ninja_gen.rs`) @@ -1205,8 +1205,8 @@ libraries.[^27] # RuleNotFound { target_name: String, rule_name: String, }, - #[error("A circular dependency was detected involving target '{path}'.")] - CircularDependency { path: PathBuf, }, + #[error("circular dependency detected: {cycle:?}")] + CircularDependency { cycle: Vec, }, # DependencyNotFound { target_name: String, dependency_name: String, }, } diff --git a/src/ir.rs b/src/ir.rs index ade0acea..9bef95e1 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -94,8 +94,8 @@ pub enum IrGenError { #[error("duplicate target outputs: {outputs:?}")] DuplicateOutput { outputs: Vec }, - #[error("A circular dependency was detected involving target '{path}'")] - CircularDependency { path: PathBuf }, + #[error("circular dependency detected: {cycle:?}")] + CircularDependency { cycle: Vec }, } impl BuildGraph { @@ -172,61 +172,8 @@ impl BuildGraph { } fn detect_cycles(&self) -> Result<(), IrGenError> { - #[derive(Clone, Copy)] - enum State { - Visiting, - Visited, - } - - fn should_visit_node<'a>( - states: &mut HashMap<&'a PathBuf, State>, - node: &'a PathBuf, - ) -> Result { - match states.get(node) { - Some(State::Visited) => Ok(false), - Some(State::Visiting) => Err(node), - None => Ok(true), - } - } - - fn visit_dependencies<'a>( - graph: &'a BuildGraph, - edge: &'a BuildEdge, - states: &mut HashMap<&'a PathBuf, State>, - ) -> Result<(), &'a PathBuf> { - for dep in edge.inputs.iter().chain(&edge.order_only_deps) { - if graph.targets.contains_key(dep) { - visit(graph, dep, states)?; - } - } - Ok(()) - } - - fn visit<'a>( - graph: &'a BuildGraph, - node: &'a PathBuf, - states: &mut HashMap<&'a PathBuf, State>, - ) -> Result<(), &'a PathBuf> { - if !should_visit_node(states, node)? { - return Ok(()); - } - - states.insert(node, State::Visiting); - if let Some(edge) = graph.targets.get(node) { - visit_dependencies(graph, edge, states)?; - } - states.insert(node, State::Visited); - Ok(()) - } - - let mut states = HashMap::new(); - for output in self.targets.keys() { - if states.contains_key(output) { - continue; - } - if let Err(path) = visit(self, output, &mut states) { - return Err(IrGenError::CircularDependency { path: path.clone() }); - } + if let Some(cycle) = find_cycle(&self.targets) { + return Err(IrGenError::CircularDependency { cycle }); } Ok(()) } @@ -332,3 +279,96 @@ fn get_target_display_name(paths: &[PathBuf]) -> String { .map(|p| p.display().to_string()) .unwrap_or_default() } + +#[derive(Clone, Copy)] +enum VisitState { + Visiting, + Visited, +} + +fn find_cycle(targets: &HashMap) -> Option> { + fn visit( + targets: &HashMap, + node: &PathBuf, + stack: &mut Vec, + states: &mut HashMap, + ) -> Option> { + match states.get(node) { + Some(VisitState::Visited) => return None, + Some(VisitState::Visiting) => { + if let Some(idx) = stack.iter().position(|n| n == node) { + let mut cycle = stack.get(idx..).expect("slice").to_vec(); + cycle.push(node.clone()); + return Some(cycle); + } + return Some(vec![node.clone()]); + } + None => {} + } + + states.insert(node.clone(), VisitState::Visiting); + stack.push(node.clone()); + + if let Some(edge) = targets.get(node) { + for dep in &edge.inputs { + if targets.get(dep).is_some() + && let Some(cycle) = visit(targets, dep, stack, states) + { + return Some(cycle); + } + } + } + + stack.pop(); + states.insert(node.clone(), VisitState::Visited); + None + } + + let mut states = HashMap::new(); + let mut stack = Vec::new(); + + for node in targets.keys() { + if !states.contains_key(node) + && let Some(cycle) = visit(targets, node, &mut stack, &mut states) + { + return Some(cycle); + } + } + None +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn find_cycle_identifies_cycle() { + let mut targets = HashMap::new(); + let edge_a = BuildEdge { + action_id: "id".into(), + inputs: vec![PathBuf::from("b")], + explicit_outputs: vec![PathBuf::from("a")], + implicit_outputs: Vec::new(), + order_only_deps: Vec::new(), + phony: false, + always: false, + }; + let edge_b = BuildEdge { + action_id: "id".into(), + inputs: vec![PathBuf::from("a")], + explicit_outputs: vec![PathBuf::from("b")], + implicit_outputs: Vec::new(), + order_only_deps: Vec::new(), + phony: false, + always: false, + }; + targets.insert(PathBuf::from("a"), edge_a); + targets.insert(PathBuf::from("b"), edge_b); + + let cycle = find_cycle(&targets).expect("cycle"); + assert_eq!( + cycle, + vec![PathBuf::from("a"), PathBuf::from("b"), PathBuf::from("a")] + ); + } +} diff --git a/tests/ir_from_manifest_tests.rs b/tests/ir_from_manifest_tests.rs index 5689a56d..e3f52e13 100644 --- a/tests/ir_from_manifest_tests.rs +++ b/tests/ir_from_manifest_tests.rs @@ -39,7 +39,7 @@ enum ExpectedError { }, EmptyRule(String), RuleNotFound(String), - CircularDependency(String), + CircularDependency(Vec), } #[rstest] @@ -68,7 +68,7 @@ enum ExpectedError { )] #[case( "tests/data/circular.yml", - ExpectedError::CircularDependency("a".into()) + ExpectedError::CircularDependency(vec!["a".into(), "b".into(), "a".into()]) )] fn manifest_error_cases(#[case] manifest_path: &str, #[case] expected: ExpectedError) { let manifest = manifest::from_path(manifest_path).expect("load"); @@ -93,8 +93,12 @@ fn manifest_error_cases(#[case] manifest_path: &str, #[case] expected: ExpectedE (IrGenError::RuleNotFound { rule_name, .. }, ExpectedError::RuleNotFound(exp_rule)) => { assert_eq!(rule_name, exp_rule); } - (IrGenError::CircularDependency { path }, ExpectedError::CircularDependency(exp_path)) => { - assert_eq!(path, PathBuf::from(exp_path)); + ( + IrGenError::CircularDependency { cycle }, + ExpectedError::CircularDependency(exp_cycle), + ) => { + let paths: Vec = exp_cycle.iter().map(PathBuf::from).collect(); + assert_eq!(cycle, paths); } (other, exp) => panic!("expected {exp:?} but got {other:?}"), } From 0cbaaf8a73cb5388c56f9b73940c1e9574645c5d Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 28 Jul 2025 23:49:55 +0100 Subject: [PATCH 4/5] Canonicalize cycle detection --- docs/netsuke-design.md | 4 +++- src/ir.rs | 22 ++++++++++++++++++++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index 7c9194c5..6dff02a6 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -993,7 +993,9 @@ This transformation involves several steps: graph and maintains a recursion stack. Order-only dependencies are ignored during this search. Encountering an already visiting node indicates a cycle. The stack slice from the first occurrence of that node forms the cycle and - is returned in `IrGenError::CircularDependency` for easier debugging. + is returned in `IrGenError::CircularDependency` for easier debugging. The + cycle list is rotated so the lexicographically smallest node appears first, + ensuring deterministic error messages. ### 5.4 Ninja File Synthesis (`ninja_gen.rs`) diff --git a/src/ir.rs b/src/ir.rs index 9bef95e1..6b36b916 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -299,9 +299,9 @@ fn find_cycle(targets: &HashMap) -> Option> { if let Some(idx) = stack.iter().position(|n| n == node) { let mut cycle = stack.get(idx..).expect("slice").to_vec(); cycle.push(node.clone()); - return Some(cycle); + return Some(canonicalize_cycle(cycle)); } - return Some(vec![node.clone()]); + return Some(vec![node.clone(), node.clone()]); } None => {} } @@ -337,6 +337,24 @@ fn find_cycle(targets: &HashMap) -> Option> { None } +fn canonicalize_cycle(mut cycle: Vec) -> Vec { + if cycle.len() < 2 { + return cycle; + } + let len = cycle.len() - 1; + let start = cycle + .iter() + .take(len) + .enumerate() + .min_by(|(_, a), (_, b)| a.cmp(b)) + .map_or(0, |(idx, _)| idx); + cycle.rotate_left(start); + if let (Some(first), Some(slot)) = (cycle.first().cloned(), cycle.get_mut(len)) { + slot.clone_from(&first); + } + cycle +} + #[cfg(test)] mod tests { use super::*; From 81be382edf32ce539239c6b91f00b99dec53e901 Mon Sep 17 00:00:00 2001 From: Leynos Date: Tue, 29 Jul 2025 00:06:22 +0100 Subject: [PATCH 5/5] Fix nondeterministic cycle tests --- docs/netsuke-design.md | 2 +- src/ir.rs | 64 ++++++++++++++++++++++----------- tests/ir_from_manifest_tests.rs | 7 ++-- 3 files changed, 50 insertions(+), 23 deletions(-) diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index 6dff02a6..9820e8e0 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -993,7 +993,7 @@ This transformation involves several steps: graph and maintains a recursion stack. Order-only dependencies are ignored during this search. Encountering an already visiting node indicates a cycle. The stack slice from the first occurrence of that node forms the cycle and - is returned in `IrGenError::CircularDependency` for easier debugging. The + is returned in `IrGenError::CircularDependency` for improved debugging. The cycle list is rotated so the lexicographically smallest node appears first, ensuring deterministic error messages. diff --git a/src/ir.rs b/src/ir.rs index 6b36b916..ca6bba19 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -286,6 +286,20 @@ enum VisitState { Visited, } +fn should_visit_node<'a>( + states: &'a mut HashMap, + node: &'a PathBuf, +) -> Result { + match states.get(node) { + Some(VisitState::Visited) => Ok(false), + Some(VisitState::Visiting) => Err(node), + None => { + states.insert(node.clone(), VisitState::Visiting); + Ok(true) + } + } +} + fn find_cycle(targets: &HashMap) -> Option> { fn visit( targets: &HashMap, @@ -293,30 +307,25 @@ fn find_cycle(targets: &HashMap) -> Option> { stack: &mut Vec, states: &mut HashMap, ) -> Option> { - match states.get(node) { - Some(VisitState::Visited) => return None, - Some(VisitState::Visiting) => { - if let Some(idx) = stack.iter().position(|n| n == node) { + match should_visit_node(states, node) { + Ok(false) => return None, + Err(path) => { + if let Some(idx) = stack.iter().position(|n| n == path) { let mut cycle = stack.get(idx..).expect("slice").to_vec(); - cycle.push(node.clone()); + cycle.push(path.clone()); return Some(canonicalize_cycle(cycle)); } - return Some(vec![node.clone(), node.clone()]); + return Some(vec![path.clone(), path.clone()]); } - None => {} + Ok(true) => {} } - states.insert(node.clone(), VisitState::Visiting); stack.push(node.clone()); - if let Some(edge) = targets.get(node) { - for dep in &edge.inputs { - if targets.get(dep).is_some() - && let Some(cycle) = visit(targets, dep, stack, states) - { - return Some(cycle); - } - } + if let Some(edge) = targets.get(node) + && let Some(cycle) = visit_dependencies(targets, &edge.inputs, stack, states) + { + return Some(cycle); } stack.pop(); @@ -324,6 +333,22 @@ fn find_cycle(targets: &HashMap) -> Option> { None } + fn visit_dependencies( + targets: &HashMap, + deps: &[PathBuf], + stack: &mut Vec, + states: &mut HashMap, + ) -> Option> { + for dep in deps { + if targets.contains_key(dep) + && let Some(cycle) = visit(targets, dep, stack, states) + { + return Some(cycle); + } + } + None + } + let mut states = HashMap::new(); let mut stack = Vec::new(); @@ -384,9 +409,8 @@ mod tests { targets.insert(PathBuf::from("b"), edge_b); let cycle = find_cycle(&targets).expect("cycle"); - assert_eq!( - cycle, - vec![PathBuf::from("a"), PathBuf::from("b"), PathBuf::from("a")] - ); + let option_a = vec![PathBuf::from("a"), PathBuf::from("b"), PathBuf::from("a")]; + let option_b = vec![PathBuf::from("b"), PathBuf::from("a"), PathBuf::from("b")]; + assert!(cycle == option_a || cycle == option_b); } } diff --git a/tests/ir_from_manifest_tests.rs b/tests/ir_from_manifest_tests.rs index e3f52e13..3bc08c04 100644 --- a/tests/ir_from_manifest_tests.rs +++ b/tests/ir_from_manifest_tests.rs @@ -97,8 +97,11 @@ fn manifest_error_cases(#[case] manifest_path: &str, #[case] expected: ExpectedE IrGenError::CircularDependency { cycle }, ExpectedError::CircularDependency(exp_cycle), ) => { - let paths: Vec = exp_cycle.iter().map(PathBuf::from).collect(); - assert_eq!(cycle, paths); + let mut expected: Vec = exp_cycle.iter().map(PathBuf::from).collect(); + let mut actual = cycle; + expected.sort(); + actual.sort(); + assert_eq!(actual, expected); } (other, exp) => panic!("expected {exp:?} but got {other:?}"), }