From 140f3185ae2743f1f736f1485c50dbef5d7b1980 Mon Sep 17 00:00:00 2001 From: Leynos Date: Sun, 14 Sep 2025 12:06:15 +0100 Subject: [PATCH 1/5] Refactor cycle detection traversal --- docs/netsuke-design.md | 47 ++++++++++++++----------- src/ir.rs | 79 +++++++++++++++++++++++++++--------------- 2 files changed, 77 insertions(+), 49 deletions(-) diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index a83983c4..d4ce698b 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -1081,6 +1081,10 @@ This transformation involves several steps: cycle list is rotated so the lexicographically smallest node appears first, ensuring deterministic error messages. + Traversal state is managed by a small `CycleDetector` helper struct. This + type holds the recursion stack and visitation map, allowing the traversal + functions to remain focused and easily testable. + ### 5.4 Ninja File Synthesis (`ninja_gen.rs`) The final step is to synthesize the `build.ninja` file from the `BuildGraph` @@ -1181,17 +1185,17 @@ The command construction will follow this pattern: 1. A new `Command` is created via `Command::new("ninja")`. Netsuke will assume `ninja` is available in the system's `PATH`. -1. Arguments passed to Netsuke's own CLI will be translated and forwarded to +2. Arguments passed to Netsuke's own CLI will be translated and forwarded to Ninja. For example, a `Netsuke build my_target` command would result in `Command::new("ninja").arg("my_target")`. Flags like `-j` for parallelism will also be passed through.[^8] -1. The working directory for the Ninja process will be set using +3. The working directory for the Ninja process will be set using `.current_dir()`. When the user supplies a `-C` flag, Netsuke canonicalises the path and applies it via `current_dir` rather than forwarding the flag to Ninja. -1. Standard I/O streams (`stdin`, `stdout`, `stderr`) will be configured using +4. Standard I/O streams (`stdin`, `stdout`, `stderr`) will be configured using `.stdout(Stdio::piped())` and `.stderr(Stdio::piped())`.[^24] This allows Netsuke to capture the real-time output from Ninja, which can then be streamed to the user's console, potentially with additional formatting or @@ -1279,11 +1283,11 @@ three fundamental questions: 1. **What** went wrong? A concise summary of the failure (e.g., "YAML parsing failed," "Build configuration is invalid"). -1. **Where** did it go wrong? Precise location information, including the file, +2. **Where** did it go wrong? Precise location information, including the file, line number, and column where applicable (e.g., "in `Netsukefile` at line 15, column 3"). -1. **Why** did it go wrong, and what can be done about it? The underlying cause +3. **Why** did it go wrong, and what can be done about it? The underlying cause of the error and a concrete suggestion for how to fix it (e.g., "Cause: Found a tab character, which is not allowed. Hint: Use spaces for indentation instead."). @@ -1347,22 +1351,22 @@ enrichment: 1. A specific, low-level error occurs within a module. For instance, the IR generator detects a missing rule and creates an `IrGenError::RuleNotFound`. -1. The function where the error occurred returns +2. The function where the error occurred returns `Err(IrGenError::RuleNotFound {... }.into())`. The `.into()` call converts the specific `thiserror` enum variant into a generic `anyhow::Error` object, preserving the original error as its source. -1. A higher-level function in the call stack, which called the failing function, +3. A higher-level function in the call stack, which called the failing function, receives this `Err` value. It uses the `.with_context()` method to wrap the error with more application-level context. For example: `ir::from_manifest(ast)` `.with_context(|| "Failed to build the internal build graph from the manifest")?` . -1. This process of propagation and contextualization repeats as the error +4. This process of propagation and contextualization repeats as the error bubbles up towards `main`. -1. Finally, the `main` function receives the `Err` result. It prints the entire +5. Finally, the `main` function receives the `Err` result. It prints the entire error chain provided by `anyhow`, which displays the highest-level context first, followed by a list of underlying "Caused by:" messages. This provides the user with a rich, layered explanation of the failure, from the general @@ -1530,15 +1534,15 @@ goal. 1. Implement the initial `clap` CLI structure for the `build` command. - 1. Implement the YAML parser using `serde_yml` and the AST data structures + 2. Implement the YAML parser using `serde_yml` and the AST data structures (`ast.rs`). - 1. Implement the AST-to-IR transformation logic, including basic validation + 3. Implement the AST-to-IR transformation logic, including basic validation like checking for rule existence. - 1. Implement the IR-to-Ninja file generator (`ninja_gen.rs`). + 4. Implement the IR-to-Ninja file generator (`ninja_gen.rs`). - 1. Implement the `std::process::Command` logic to invoke `ninja`. + 5. Implement the `std::process::Command` logic to invoke `ninja`. - **Success Criterion:** Netsuke can successfully take a `Netsukefile` file *without any Jinja syntax* and compile it to a `build.ninja` file, then @@ -1554,13 +1558,13 @@ goal. 1. Integrate the `minijinja` crate into the build pipeline. - 1. Implement the two-pass parsing mechanism: first render the manifest with + 2. Implement the two-pass parsing mechanism: first render the manifest with `minijinja`, then parse the result with `serde_yml`. - 1. Populate the initial Jinja context with the global `vars` from the + 3. Populate the initial Jinja context with the global `vars` from the manifest. - 1. Implement basic Jinja control flow (`{% if... %}`, `{% for... %}`) and + 4. Implement basic Jinja control flow (`{% if... %}`, `{% for... %}`) and variable substitution. - **Success Criterion:** Netsuke can successfully build a manifest that uses @@ -1577,15 +1581,15 @@ goal. 1. Implement the full suite of custom Jinja functions (`glob`, `env`, etc.) and filters (`shell_escape`). - 1. Mandate the use of `shell-quote` for all command variable substitutions. + 2. Mandate the use of `shell-quote` for all command variable substitutions. - 1. Refactor the error handling to fully adopt the `anyhow`/`thiserror` + 3. Refactor the error handling to fully adopt the `anyhow`/`thiserror` strategy, ensuring all user-facing errors are contextual and actionable as specified in Section 7. - 1. Implement the `clean` and `graph` subcommands. + 4. Implement the `clean` and `graph` subcommands. - 1. Refine the CLI output for clarity and readability. + 5. Refine the CLI output for clarity and readability. - **Success Criterion:** Netsuke is a feature-complete, secure, and user-friendly build tool that meets all the initial design goals. @@ -1652,7 +1656,8 @@ projects. ### **Works cited** [^1]: Ninja, a small build system with a focus on speed. Accessed on 12 July - 2025. + + 1. [^2]: "Ninja (build system)." Wikipedia. Accessed on 12 July 2025. diff --git a/src/ir.rs b/src/ir.rs index 17ae6ebb..68e0e267 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -492,18 +492,28 @@ fn should_visit_node<'a>( } } -fn find_cycle(targets: &HashMap) -> Option> { - fn visit( - targets: &HashMap, - node: &PathBuf, - stack: &mut Vec, - states: &mut HashMap, - ) -> Option> { - match should_visit_node(states, node) { +/// Detects cycles in a dependency graph by tracking traversal state. +struct CycleDetector<'a> { + targets: &'a HashMap, + stack: Vec, + states: HashMap, +} + +impl<'a> CycleDetector<'a> { + fn new(targets: &'a HashMap) -> Self { + Self { + targets, + stack: Vec::new(), + states: HashMap::new(), + } + } + + fn visit_node(&mut self, node: &PathBuf) -> Option> { + match should_visit_node(&mut self.states, node) { Ok(false) => return None, Err(path) => { - if let Some(idx) = stack.iter().position(|n| n == path) { - let mut cycle = stack.get(idx..).expect("slice").to_vec(); + if let Some(idx) = self.stack.iter().position(|n| n == path) { + let mut cycle = self.stack.get(idx..).expect("slice").to_vec(); cycle.push(path.clone()); return Some(canonicalize_cycle(cycle)); } @@ -512,47 +522,42 @@ fn find_cycle(targets: &HashMap) -> Option> { Ok(true) => {} } - stack.push(node.clone()); + self.stack.push(node.clone()); - if let Some(cycle) = targets - .get(node) - .and_then(|edge| visit_dependencies(targets, &edge.inputs, stack, states)) + if let Some(edge) = self.targets.get(node) + && let Some(cycle) = self.visit_dependencies(&edge.inputs) { return Some(cycle); } - stack.pop(); - states.insert(node.clone(), VisitState::Visited); + self.stack.pop(); + self.states.insert(node.clone(), VisitState::Visited); None } - fn visit_dependencies( - targets: &HashMap, - deps: &[PathBuf], - stack: &mut Vec, - states: &mut HashMap, - ) -> Option> { + fn visit_dependencies(&mut self, deps: &[PathBuf]) -> Option> { for dep in deps { - if !targets.contains_key(dep) { + if !self.targets.contains_key(dep) { continue; } - if let Some(cycle) = visit(targets, dep, stack, states) { + if let Some(cycle) = self.visit_node(dep) { return Some(cycle); } } None } +} - let mut states = HashMap::new(); - let mut stack = Vec::new(); +fn find_cycle(targets: &HashMap) -> Option> { + let mut detector = CycleDetector::new(targets); for node in targets.keys() { // Skip nodes we've already processed to avoid redundant traversal. - if states.contains_key(node) { + if detector.states.contains_key(node) { continue; } - if let Some(cycle) = visit(targets, node, &mut stack, &mut states) { + if let Some(cycle) = detector.visit_node(node) { return Some(cycle); } } @@ -610,4 +615,22 @@ mod tests { let option_b = vec![PathBuf::from("b"), PathBuf::from("a"), PathBuf::from("b")]; assert!(cycle == option_a || cycle == option_b); } + + #[test] + fn canonicalize_cycle_rotates_smallest_node() { + let cycle = vec![ + PathBuf::from("c"), + PathBuf::from("a"), + PathBuf::from("b"), + PathBuf::from("c"), + ]; + let canonical = canonicalize_cycle(cycle); + let expected = vec![ + PathBuf::from("a"), + PathBuf::from("b"), + PathBuf::from("c"), + PathBuf::from("a"), + ]; + assert_eq!(canonical, expected); + } } From bfcabecbc2c6b18cb49a2fb8ecd63cb58144dca8 Mon Sep 17 00:00:00 2001 From: Leynos Date: Sun, 14 Sep 2025 12:15:09 +0100 Subject: [PATCH 2/5] Escape footnote period --- docs/netsuke-design.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index d4ce698b..a4c87952 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -1656,8 +1656,7 @@ projects. ### **Works cited** [^1]: Ninja, a small build system with a focus on speed. Accessed on 12 July - - 1. + 2025\. [^2]: "Ninja (build system)." Wikipedia. Accessed on 12 July 2025. From d3b6ccd9741cc33d6023195925c69e7ca1363183 Mon Sep 17 00:00:00 2001 From: Leynos Date: Sun, 14 Sep 2025 12:30:18 +0100 Subject: [PATCH 3/5] Encapsulate cycle detector state --- src/ir.rs | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/src/ir.rs b/src/ir.rs index 68e0e267..0df4fcfb 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -25,7 +25,7 @@ //! ``` // use std::collections::HashMap; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; /// The complete, static build graph. #[derive(Debug, Default, Clone)] @@ -479,14 +479,14 @@ enum VisitState { } fn should_visit_node<'a>( - states: &'a mut HashMap, - node: &'a PathBuf, -) -> Result { + states: &mut HashMap<&'a Path, VisitState>, + node: &'a Path, +) -> Result { match states.get(node) { Some(VisitState::Visited) => Ok(false), Some(VisitState::Visiting) => Err(node), None => { - states.insert(node.clone(), VisitState::Visiting); + states.insert(node, VisitState::Visiting); Ok(true) } } @@ -495,8 +495,8 @@ fn should_visit_node<'a>( /// Detects cycles in a dependency graph by tracking traversal state. struct CycleDetector<'a> { targets: &'a HashMap, - stack: Vec, - states: HashMap, + stack: Vec<&'a Path>, + states: HashMap<&'a Path, VisitState>, } impl<'a> CycleDetector<'a> { @@ -508,21 +508,31 @@ impl<'a> CycleDetector<'a> { } } - fn visit_node(&mut self, node: &PathBuf) -> Option> { + fn is_visited(&self, node: &Path) -> bool { + matches!(self.states.get(node), Some(VisitState::Visited)) + } + + fn visit_node(&mut self, node: &'a Path) -> Option> { match should_visit_node(&mut self.states, node) { Ok(false) => return None, Err(path) => { - if let Some(idx) = self.stack.iter().position(|n| n == path) { - let mut cycle = self.stack.get(idx..).expect("slice").to_vec(); - cycle.push(path.clone()); + if let Some(idx) = self.stack.iter().position(|n| *n == path) { + let mut cycle: Vec = self + .stack + .get(idx..) + .expect("slice") + .iter() + .map(|p| (*p).to_path_buf()) + .collect(); + cycle.push(path.to_path_buf()); return Some(canonicalize_cycle(cycle)); } - return Some(vec![path.clone(), path.clone()]); + return Some(vec![path.to_path_buf(), path.to_path_buf()]); } Ok(true) => {} } - self.stack.push(node.clone()); + self.stack.push(node); if let Some(edge) = self.targets.get(node) && let Some(cycle) = self.visit_dependencies(&edge.inputs) @@ -531,11 +541,11 @@ impl<'a> CycleDetector<'a> { } self.stack.pop(); - self.states.insert(node.clone(), VisitState::Visited); + self.states.insert(node, VisitState::Visited); None } - fn visit_dependencies(&mut self, deps: &[PathBuf]) -> Option> { + fn visit_dependencies(&mut self, deps: &'a [PathBuf]) -> Option> { for dep in deps { if !self.targets.contains_key(dep) { continue; @@ -554,7 +564,7 @@ fn find_cycle(targets: &HashMap) -> Option> { for node in targets.keys() { // Skip nodes we've already processed to avoid redundant traversal. - if detector.states.contains_key(node) { + if detector.is_visited(node) { continue; } if let Some(cycle) = detector.visit_node(node) { From dca1185d535baf4c1ca0a4f356c2fa913fa288ba Mon Sep 17 00:00:00 2001 From: Leynos Date: Sun, 14 Sep 2025 17:25:02 +0100 Subject: [PATCH 4/5] Detect self-edges in cycle detection --- docs/netsuke-design.md | 15 +++++++++------ src/ir.rs | 30 +++++++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index a4c87952..561d9c93 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -1075,15 +1075,18 @@ This transformation involves several steps: The implemented algorithm performs a depth-first traversal of the target graph and maintains a recursion stack. Order-only dependencies are ignored - during this search. Encountering an already visiting node indicates a cycle. - The stack slice from the first occurrence of that node forms the cycle and - is returned in `IrGenError::CircularDependency` for improved debugging. The - cycle list is rotated so the lexicographically smallest node appears first, - ensuring deterministic error messages. + during this search. Self-edges are rejected immediately, and encountering an + already visiting node indicates a cycle. The stack slice from the first + occurrence of that node forms the cycle and is returned in + `IrGenError::CircularDependency` for improved debugging. The cycle list is + rotated so the lexicographically smallest node appears first, ensuring + deterministic error messages. Traversal state is managed by a small `CycleDetector` helper struct. This type holds the recursion stack and visitation map, allowing the traversal - functions to remain focused and easily testable. + functions to remain focused and easily testable. The detector borrows path + references from the `targets` map, so `targets` must remain unchanged during + detection. ### 5.4 Ninja File Synthesis (`ninja_gen.rs`) diff --git a/src/ir.rs b/src/ir.rs index 0df4fcfb..ffb36df5 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -547,6 +547,12 @@ impl<'a> CycleDetector<'a> { fn visit_dependencies(&mut self, deps: &'a [PathBuf]) -> Option> { for dep in deps { + if let Some(&head) = self.stack.last() + && head == dep.as_path() + { + return Some(canonicalize_cycle(vec![dep.clone(), dep.clone()])); + } + if !self.targets.contains_key(dep) { continue; } @@ -560,6 +566,8 @@ impl<'a> CycleDetector<'a> { } fn find_cycle(targets: &HashMap) -> Option> { + // CycleDetector borrows paths from `targets`; keep `targets` + // stable during detection. let mut detector = CycleDetector::new(targets); for node in targets.keys() { @@ -585,7 +593,9 @@ fn canonicalize_cycle(mut cycle: Vec) -> Vec { .enumerate() .min_by(|(_, a), (_, b)| a.cmp(b)) .map_or(0, |(idx, _)| idx); - cycle.rotate_left(start); + if let Some(slice) = cycle.get_mut(..len) { + slice.rotate_left(start); + } if let (Some(first), Some(slot)) = (cycle.first().cloned(), cycle.get_mut(len)) { slot.clone_from(&first); } @@ -643,4 +653,22 @@ mod tests { ]; assert_eq!(canonical, expected); } + + #[test] + fn canonicalize_cycle_handles_reverse_direction() { + let cycle = vec![ + PathBuf::from("c"), + PathBuf::from("b"), + PathBuf::from("a"), + PathBuf::from("c"), + ]; + let canonical = canonicalize_cycle(cycle); + let expected = vec![ + PathBuf::from("a"), + PathBuf::from("c"), + PathBuf::from("b"), + PathBuf::from("a"), + ]; + assert_eq!(canonical, expected); + } } From a5f8e395f84e4602d9eaab437e7ffa59ef9cb7e2 Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 15 Sep 2025 17:16:31 +0100 Subject: [PATCH 5/5] Clarify cycle detection immutability --- src/ir.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ir.rs b/src/ir.rs index ffb36df5..9dc91968 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -566,8 +566,8 @@ impl<'a> CycleDetector<'a> { } fn find_cycle(targets: &HashMap) -> Option> { - // CycleDetector borrows paths from `targets`; keep `targets` - // stable during detection. + // CycleDetector borrows paths from `targets`; `targets` must remain + // unmodified during detection to keep borrowed keys valid. let mut detector = CycleDetector::new(targets); for node in targets.keys() {