From 6d45855fc4ef344a215133c85851c27cd43c85bf Mon Sep 17 00:00:00 2001 From: Leynos Date: Wed, 27 Aug 2025 23:24:00 +0100 Subject: [PATCH 01/10] Test command quoting --- Cargo.lock | 17 ++++ Cargo.toml | 2 + docs/netsuke-design.md | 11 +-- docs/roadmap.md | 6 +- src/ast.rs | 2 +- src/ir.rs | 82 +++++++++++++------ src/ninja_gen.rs | 15 ++-- tests/command_escaping_tests.rs | 32 ++++++++ tests/data/quote.yml | 5 ++ tests/features/ir.feature | 2 +- tests/features/ir_generation.feature | 2 +- tests/features/ninja.feature | 5 ++ tests/ir_from_manifest_tests.rs | 2 +- tests/ninja_gen_tests.rs | 4 +- ..._snapshot_tests__touch_manifest_ninja.snap | 7 +- 15 files changed, 148 insertions(+), 46 deletions(-) create mode 100644 tests/command_escaping_tests.rs create mode 100644 tests/data/quote.yml diff --git a/Cargo.lock b/Cargo.lock index 63a37022..2b24c31a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -941,6 +941,8 @@ dependencies = [ "serde_yml", "serial_test", "sha2", + "shell-quote", + "shlex", "strip-ansi-escapes", "tempfile", "test_support", @@ -1459,6 +1461,21 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "shell-quote" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb502615975ae2365825521fa1529ca7648fd03ce0b0746604e0683856ecd7e4" +dependencies = [ + "bstr", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "similar" version = "2.7.0" diff --git a/Cargo.toml b/Cargo.toml index b2222718..a51089a2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,8 @@ serde_json = "1" serde_json_canonicalizer = "0.3" tempfile = "3.8.0" ninja_env = { path = "ninja_env" } +shell-quote = "0.7" +shlex = "1.3.0" [lints.clippy] pedantic = { level = "warn", priority = -1 } diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index e521172a..e6aa0bde 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -240,11 +240,12 @@ Each entry in the `rules` list is a mapping that defines a reusable action. - `command`: A single command string to be executed. It may include the placeholders `{{ ins }}` and `{{ outs }}` to represent input and output files. Netsuke expands these placeholders to space-separated, shell-escaped - lists of file paths before hashing the action. When generating the Ninja - rule, the lists are replaced with Ninja's `$in` and `$out` macros. After - interpolation, the command must be parsable by - [shlex](https://docs.rs/shlex/latest/shlex/). Any interpolation other than - `ins` or `outs` is automatically shell-escaped. + lists of file paths using the + [`shell-quote`](https://docs.rs/shell-quote/latest/shell_quote/) crate before + hashing the action. When generating the Ninja rule, the lists are replaced + with Ninja's `$in` and `$out` macros. After interpolation, the command must + be parsable by [shlex](https://docs.rs/shlex/latest/shlex/). Any + interpolation other than `ins` or `outs` is automatically shell-escaped. - `script`: A multi-line script declared with the YAML `|` block style. The entire block is passed to an interpreter. If the first line begins with `#!` diff --git a/docs/roadmap.md b/docs/roadmap.md index 421f99dc..bb555e27 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -125,12 +125,12 @@ library, and CLI ergonomics. - [ ] **Security and Shell Escaping:** - - [ ] Integrate the `shell-quote` crate. + - [x] Integrate the `shell-quote` crate. - - [ ] Mandate its use for all variable substitutions within command + - [x] Mandate its use for all variable substitutions within command strings during Ninja file synthesis to prevent command injection. - - [ ] After interpolation, validate the final command string is parsable using + - [x] After interpolation, validate the final command string is parsable using the shlex crate. - [ ] **Actionable Error Reporting:** diff --git a/src/ast.rs b/src/ast.rs index 5fd3fcc9..816dd5d9 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -89,7 +89,7 @@ pub struct NetsukeManifest { /// targets. It may define a command line, a script block, or delegate to another /// named rule. Dependencies may be specified as either a single string or a /// list of strings. -#[derive(Debug, Deserialize, Serialize)] +#[derive(Debug, Clone, Deserialize, Serialize)] #[serde(deny_unknown_fields)] pub struct Rule { /// Unique identifier used by targets to reference this rule. diff --git a/src/ir.rs b/src/ir.rs index 94846f53..962d7849 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -74,7 +74,7 @@ pub struct BuildEdge { pub always: bool, } -use crate::ast::{NetsukeManifest, Recipe, StringOrList}; +use crate::ast::{NetsukeManifest, Recipe, Rule, StringOrList}; use thiserror::Error; use crate::hasher::ActionHasher; @@ -105,6 +105,9 @@ pub enum IrGenError { #[error("failed to serialise action: {0}")] ActionSerialisation(#[from] serde_json::Error), + + #[error("command \"{command}\" is not a valid shell command")] + InvalidCommand { command: String }, } impl BuildGraph { @@ -118,7 +121,7 @@ impl BuildGraph { let mut graph = Self::default(); let mut rule_map = HashMap::new(); - Self::process_rules(manifest, &mut graph.actions, &mut rule_map)?; + Self::process_rules(manifest, &mut rule_map); Self::process_targets(manifest, &mut graph.actions, &mut graph.targets, &rule_map)?; Self::process_defaults(manifest, &mut graph.default_targets); @@ -127,37 +130,41 @@ impl BuildGraph { Ok(graph) } - fn process_rules( - manifest: &NetsukeManifest, - actions: &mut HashMap, - rule_map: &mut HashMap, - ) -> Result<(), IrGenError> { + fn process_rules(manifest: &NetsukeManifest, rule_map: &mut HashMap) { for rule in &manifest.rules { - let hash = register_action(actions, rule.recipe.clone(), rule.description.clone())?; - rule_map.insert(rule.name.clone(), hash); + rule_map.insert(rule.name.clone(), rule.clone()); } - Ok(()) } fn process_targets( manifest: &NetsukeManifest, actions: &mut HashMap, targets: &mut HashMap, - rule_map: &HashMap, + rule_map: &HashMap, ) -> Result<(), IrGenError> { for target in manifest.actions.iter().chain(&manifest.targets) { let outputs = to_paths(&target.name); + let inputs = to_paths(&target.sources); let target_name = get_target_display_name(&outputs); let action_id = match &target.recipe { - Recipe::Rule { rule } => resolve_rule(rule, rule_map, &target_name)?, + Recipe::Rule { rule } => { + let tmpl = resolve_rule(rule, rule_map, &target_name)?; + register_action( + actions, + tmpl.recipe.clone(), + tmpl.description.clone(), + &inputs, + &outputs, + )? + } Recipe::Command { .. } | Recipe::Script { .. } => { - register_action(actions, target.recipe.clone(), None)? + register_action(actions, target.recipe.clone(), None, &inputs, &outputs)? } }; let edge = BuildEdge { action_id, - inputs: to_paths(&target.sources), + inputs: inputs.clone(), explicit_outputs: outputs.clone(), implicit_outputs: Vec::new(), order_only_deps: to_paths(&target.order_only_deps), @@ -193,7 +200,37 @@ fn register_action( actions: &mut HashMap, recipe: Recipe, description: Option, + inputs: &[PathBuf], + outputs: &[PathBuf], ) -> Result { + let recipe = match recipe { + Recipe::Command { command } => { + use shell_quote::Bash; + let quote_paths = |paths: &[PathBuf]| -> String { + paths + .iter() + .map(|p| { + let raw = p.display().to_string(); + String::from_utf8(Bash::quote_vec(&raw)) + .expect("quoted path is valid UTF-8") + }) + .collect::>() + .join(" ") + }; + let ins = quote_paths(inputs); + let outs = quote_paths(outputs); + let interpolated = command.replace("$in", &ins).replace("$out", &outs); + if shlex::split(&interpolated).is_none() { + return Err(IrGenError::InvalidCommand { + command: interpolated, + }); + } + Recipe::Command { + command: interpolated, + } + } + other => other, + }; let action = Action { recipe, description, @@ -234,11 +271,11 @@ fn extract_single(sol: &StringOrList) -> Option<&str> { } } -fn resolve_rule( +fn resolve_rule<'a>( rule: &StringOrList, - rule_map: &HashMap, + rule_map: &'a HashMap, target_name: &str, -) -> Result { +) -> Result<&'a Rule, IrGenError> { extract_single(rule).map_or_else( || { let mut rules = to_string_vec(rule); @@ -255,13 +292,10 @@ fn resolve_rule( } }, |name| { - rule_map - .get(name) - .cloned() - .ok_or_else(|| IrGenError::RuleNotFound { - target_name: target_name.to_string(), - rule_name: name.to_string(), - }) + rule_map.get(name).ok_or_else(|| IrGenError::RuleNotFound { + target_name: target_name.to_string(), + rule_name: name.to_string(), + }) }, ) } diff --git a/src/ninja_gen.rs b/src/ninja_gen.rs index 3163a883..190da464 100644 --- a/src/ninja_gen.rs +++ b/src/ninja_gen.rs @@ -111,16 +111,21 @@ impl Display for NamedAction<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { writeln!(f, "rule {}", self.id)?; match &self.action.recipe { - Recipe::Command { command } => writeln!(f, " command = {command}")?, + Recipe::Command { command } => { + assert!( + shlex::split(command).is_some(), + "invalid command: {command}" + ); + writeln!(f, " command = {command}")?; + } Recipe::Script { script } => { // Ninja commands must be single-line. Encode newlines and // reconstruct the original script with `printf %b` piped into // a fresh shell to preserve expected expansions. let escaped = escape_script(script); - writeln!( - f, - " command = /bin/sh -e -c \"printf %b '{escaped}' | /bin/sh -e\"" - )?; + let cmd = format!("/bin/sh -e -c \"printf %b '{escaped}' | /bin/sh -e\""); + assert!(shlex::split(&cmd).is_some(), "invalid command: {cmd}"); + writeln!(f, " command = {cmd}")?; } Recipe::Rule { .. } => unreachable!("rules do not reference other rules"), } diff --git a/tests/command_escaping_tests.rs b/tests/command_escaping_tests.rs new file mode 100644 index 00000000..7bf75038 --- /dev/null +++ b/tests/command_escaping_tests.rs @@ -0,0 +1,32 @@ +//! Tests for shell quoting of command substitutions. +use netsuke::{ast::Recipe, ir::BuildGraph, manifest}; +use rstest::rstest; + +fn manifest_yaml(body: &str) -> String { + format!("netsuke_version: 1.0.0\n{body}") +} + +#[rstest] +fn inputs_and_outputs_are_quoted() { + let yaml = manifest_yaml( + "targets:\n - name: 'out file'\n sources: 'in file'\n command: \"cat $in > $out\"\n", + ); + let manifest = manifest::from_str(&yaml).expect("parse"); + let graph = BuildGraph::from_manifest(&manifest).expect("graph"); + let action = graph.actions.values().next().expect("action"); + let Recipe::Command { command } = &action.recipe else { + panic!("expected command") + }; + assert!(command.contains("$'in file'")); + assert!(command.contains("$'out file'")); +} + +#[rstest] +fn invalid_command_errors() { + let yaml = manifest_yaml( + "targets:\n - name: out\n sources: in\n command: \"echo 'unterminated\"\n", + ); + let manifest = manifest::from_str(&yaml).expect("parse"); + let err = BuildGraph::from_manifest(&manifest).expect_err("should fail"); + assert!(err.to_string().contains("not a valid shell command")); +} diff --git a/tests/data/quote.yml b/tests/data/quote.yml new file mode 100644 index 00000000..585a344a --- /dev/null +++ b/tests/data/quote.yml @@ -0,0 +1,5 @@ +netsuke_version: 1.0.0 +targets: + - name: "out file" + sources: "in file" + command: "cat $in > $out" diff --git a/tests/features/ir.feature b/tests/features/ir.feature index c4bd70b0..0293ab46 100644 --- a/tests/features/ir.feature +++ b/tests/features/ir.feature @@ -14,7 +14,7 @@ Feature: BuildGraph Scenario: Duplicate rules are deduplicated When the manifest file "tests/data/duplicate_rules.yml" is compiled to IR - Then the graph has 1 actions + Then the graph has 2 actions And the graph has 2 targets Scenario: Rule not found during IR generation diff --git a/tests/features/ir_generation.feature b/tests/features/ir_generation.feature index 7b731453..1af9ea8c 100644 --- a/tests/features/ir_generation.feature +++ b/tests/features/ir_generation.feature @@ -19,7 +19,7 @@ Feature: Intermediate Representation (IR) Generation Scenario: Identical rules are deduplicated during IR generation Given the manifest file "tests/data/duplicate_rules.yml" is compiled to IR When the graph contents are checked - Then the graph has 1 actions + Then the graph has 2 actions And the graph has 2 targets Scenario: IR generation fails if a target references a rule that does not exist diff --git a/tests/features/ninja.feature b/tests/features/ninja.feature index af28c6ae..8496288a 100644 --- a/tests/features/ninja.feature +++ b/tests/features/ninja.feature @@ -11,3 +11,8 @@ Feature: Ninja file generation And the ninja file is generated Then the ninja file contains "build clean:" And the ninja file contains "rm -rf build" + + Scenario: Inputs and outputs are shell-quoted + When the manifest file "tests/data/quote.yml" is compiled to IR + And the ninja file is generated + Then the ninja file contains "command = cat $'in file' > $'out file'" diff --git a/tests/ir_from_manifest_tests.rs b/tests/ir_from_manifest_tests.rs index 3bc08c04..469373fe 100644 --- a/tests/ir_from_manifest_tests.rs +++ b/tests/ir_from_manifest_tests.rs @@ -19,7 +19,7 @@ fn minimal_manifest_to_ir() { fn duplicate_rules_are_deduped() { let manifest = manifest::from_path("tests/data/duplicate_rules.yml").expect("load"); let graph = BuildGraph::from_manifest(&manifest).expect("ir"); - assert_eq!(graph.actions.len(), 1); + assert_eq!(graph.actions.len(), 2); assert_eq!(graph.targets.len(), 2); } diff --git a/tests/ninja_gen_tests.rs b/tests/ninja_gen_tests.rs index cf38c2cb..2ddd5dd8 100644 --- a/tests/ninja_gen_tests.rs +++ b/tests/ninja_gen_tests.rs @@ -73,7 +73,7 @@ fn ninja_integration_setup() -> Option { )] #[case::standard_build( Action { - recipe: Recipe::Command { command: "cc -c $in -o $out".into() }, + recipe: Recipe::Command { command: "cc -c 'a.c' 'b.c' -o 'ab.o'".into() }, description: None, depfile: None, deps_format: None, @@ -92,7 +92,7 @@ fn ninja_integration_setup() -> Option { PathBuf::from("ab.o"), concat!( "rule compile\n", - " command = cc -c $in -o $out\n\n", + " command = cc -c 'a.c' 'b.c' -o 'ab.o'\n\n", "build ab.o: compile a.c b.c\n\n", ), )] diff --git a/tests/snapshots/ninja/ninja_snapshot_tests__touch_manifest_ninja.snap b/tests/snapshots/ninja/ninja_snapshot_tests__touch_manifest_ninja.snap index 96b8b56e..b1d3153d 100644 --- a/tests/snapshots/ninja/ninja_snapshot_tests__touch_manifest_ninja.snap +++ b/tests/snapshots/ninja/ninja_snapshot_tests__touch_manifest_ninja.snap @@ -1,8 +1,9 @@ --- source: tests/ninja_snapshot_tests.rs +assertion_line: 51 expression: ninja_content --- -rule d3cc8be04150cb4e2d9ccbdbe94cf9f2e8ade54bb4701b8faf99cafeb456a75d - command = python3 -c 'import os,sys; open(sys.argv[1],"a").close()' $out +rule bf28092e56401ec4461623efcd7658d0d276965c084951d61ec87f6f5248a8af + command = python3 -c 'import os,sys; open(sys.argv[1],"a").close()' out/a -build out/a: d3cc8be04150cb4e2d9ccbdbe94cf9f2e8ade54bb4701b8faf99cafeb456a75d in/a +build out/a: bf28092e56401ec4461623efcd7658d0d276965c084951d61ec87f6f5248a8af in/a From b3b3074e83e429b743e8ef3c4cec027ca1256cc2 Mon Sep 17 00:00:00 2001 From: Leynos Date: Wed, 27 Aug 2025 23:56:26 +0100 Subject: [PATCH 02/10] Use POSIX quoting for command templates --- docs/netsuke-design.md | 11 +++-- src/ir.rs | 88 +++++++++++++++++++++++++-------- tests/command_escaping_tests.rs | 20 +++++++- tests/features/ninja.feature | 2 +- 4 files changed, 94 insertions(+), 27 deletions(-) diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index e6aa0bde..14caf095 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -1232,11 +1232,14 @@ strings. Instead, parse the Netsuke command template (e.g., `{{ cc }} -c {{ ins }} -o` `{{ outs }}`) and build the final command string step by step. The placeholders `{{ ins }}` and `{{ outs }}` are expanded to space-separated lists of file paths within Netsuke itself, each path being -shell-escaped using the `shell-quote` API. When the command is written to -`build.ninja`, these lists replace Ninja's `$in` and `$out` macros. After -substitution, the command is validated with \[`shlex`\] +shell-escaped using the `shell-quote` API. Netsuke uses the `Sh` quoting mode +to emit POSIX-compliant single-quoted strings and scans the template for +standalone `$in` and `$out` tokens to avoid rewriting unrelated variables. When +the command is written to `build.ninja`, these lists replace Ninja's `$in` and +`$out` macros. After substitution, the command is validated with \[`shlex`\] () to ensure it parses correctly. This -approach guarantees that every dynamic part of the command is securely quoted. +approach guarantees that every dynamic part of the command is securely quoted, +albeit at the cost of deduplicating only actions with identical file sets. ### 6.4 Automatic Security as a "Friendliness" Feature diff --git a/src/ir.rs b/src/ir.rs index 962d7849..b8e2866e 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -75,6 +75,7 @@ pub struct BuildEdge { } use crate::ast::{NetsukeManifest, Recipe, Rule, StringOrList}; +use shell_quote::{QuoteRefExt, Sh}; use thiserror::Error; use crate::hasher::ActionHasher; @@ -205,26 +206,7 @@ fn register_action( ) -> Result { let recipe = match recipe { Recipe::Command { command } => { - use shell_quote::Bash; - let quote_paths = |paths: &[PathBuf]| -> String { - paths - .iter() - .map(|p| { - let raw = p.display().to_string(); - String::from_utf8(Bash::quote_vec(&raw)) - .expect("quoted path is valid UTF-8") - }) - .collect::>() - .join(" ") - }; - let ins = quote_paths(inputs); - let outs = quote_paths(outputs); - let interpolated = command.replace("$in", &ins).replace("$out", &outs); - if shlex::split(&interpolated).is_none() { - return Err(IrGenError::InvalidCommand { - command: interpolated, - }); - } + let interpolated = interpolate_command(&command, inputs, outputs)?; Recipe::Command { command: interpolated, } @@ -244,6 +226,72 @@ fn register_action( Ok(hash) } +fn interpolate_command( + template: &str, + inputs: &[PathBuf], + outputs: &[PathBuf], +) -> Result { + fn quote_paths(paths: &[PathBuf]) -> Vec { + paths + .iter() + .map(|p| { + let raw = p.display().to_string(); + String::from_utf8(raw.quoted(Sh)).expect("quoted path is valid UTF-8") + }) + .collect() + } + + let ins = quote_paths(inputs); + let outs = quote_paths(outputs); + let interpolated = substitute(template, &ins, &outs); + if shlex::split(&interpolated).is_none() { + return Err(IrGenError::InvalidCommand { + command: interpolated, + }); + } + Ok(interpolated) +} + +fn substitute(template: &str, ins: &[String], outs: &[String]) -> String { + fn is_ident(ch: char) -> bool { + ch.is_ascii_alphanumeric() || ch == '_' + } + + let chars: Vec = template.chars().collect(); + let mut out = String::with_capacity(template.len()); + let mut i = 0; + while let Some(&ch) = chars.get(i) { + if ch == '$' { + // Attempt to match $in + if matches!(chars.get(i + 1), Some('i')) && matches!(chars.get(i + 2), Some('n')) { + let prev_ok = chars.get(i.wrapping_sub(1)).is_none_or(|c| !is_ident(*c)); + let next_ok = chars.get(i + 3).is_none_or(|c| !is_ident(*c)); + if prev_ok && next_ok { + out.push_str(&ins.join(" ")); + i += 3; + continue; + } + } + // Attempt to match $out + if matches!(chars.get(i + 1), Some('o')) + && matches!(chars.get(i + 2), Some('u')) + && matches!(chars.get(i + 3), Some('t')) + { + let prev_ok = chars.get(i.wrapping_sub(1)).is_none_or(|c| !is_ident(*c)); + let next_ok = chars.get(i + 4).is_none_or(|c| !is_ident(*c)); + if prev_ok && next_ok { + out.push_str(&outs.join(" ")); + i += 4; + continue; + } + } + } + out.push(ch); + i += 1; + } + out +} + fn map_string_or_list(sol: &StringOrList, f: F) -> Vec where F: Fn(&str) -> T, diff --git a/tests/command_escaping_tests.rs b/tests/command_escaping_tests.rs index 7bf75038..8c619cbd 100644 --- a/tests/command_escaping_tests.rs +++ b/tests/command_escaping_tests.rs @@ -17,8 +17,24 @@ fn inputs_and_outputs_are_quoted() { let Recipe::Command { command } = &action.recipe else { panic!("expected command") }; - assert!(command.contains("$'in file'")); - assert!(command.contains("$'out file'")); + assert_eq!(command, "cat in' file' > out' file'"); +} + +#[rstest] +fn multiple_inputs_outputs_with_special_chars_are_quoted() { + let yaml = manifest_yaml( + "targets:\n - name: ['out file', 'out&2']\n sources: ['in file', 'input$1']\n command: \"echo $in && echo $out\"\n", + ); + let manifest = manifest::from_str(&yaml).expect("parse"); + let graph = BuildGraph::from_manifest(&manifest).expect("graph"); + let action = graph.actions.values().next().expect("action"); + let Recipe::Command { command } = &action.recipe else { + panic!("expected command") + }; + assert_eq!( + command, + "echo in' file' input'$1' && echo out' file' out'&2'", + ); } #[rstest] diff --git a/tests/features/ninja.feature b/tests/features/ninja.feature index 8496288a..f11b5413 100644 --- a/tests/features/ninja.feature +++ b/tests/features/ninja.feature @@ -15,4 +15,4 @@ Feature: Ninja file generation Scenario: Inputs and outputs are shell-quoted When the manifest file "tests/data/quote.yml" is compiled to IR And the ninja file is generated - Then the ninja file contains "command = cat $'in file' > $'out file'" + Then the ninja file contains "command = cat in' file' > out' file'" From 7f5fe559e0fa0eb4a69e32080eb086933de9d2ff Mon Sep 17 00:00:00 2001 From: Leynos Date: Thu, 28 Aug 2025 00:20:20 +0100 Subject: [PATCH 03/10] Pin shell-quote and refine shell quoting --- Cargo.lock | 3 -- Cargo.toml | 2 +- docs/netsuke-design.md | 15 ++++---- docs/roadmap.md | 9 +++-- src/ir.rs | 9 +++-- src/ninja_gen.rs | 4 +-- tests/command_escaping_tests.rs | 54 +++++++++++++++++++++++++--- tests/data/quote.yml | 3 ++ tests/features/ir.feature | 2 +- tests/features/ir_generation.feature | 2 +- tests/features/ninja.feature | 7 +++- tests/ir_from_manifest_tests.rs | 2 +- tests/steps/ninja_steps.rs | 35 +++++++++++++++++- 13 files changed, 119 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2b24c31a..ad63e3b1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1466,9 +1466,6 @@ name = "shell-quote" version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb502615975ae2365825521fa1529ca7648fd03ce0b0746604e0683856ecd7e4" -dependencies = [ - "bstr", -] [[package]] name = "shlex" diff --git a/Cargo.toml b/Cargo.toml index a51089a2..6888d9a9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,7 @@ serde_json = "1" serde_json_canonicalizer = "0.3" tempfile = "3.8.0" ninja_env = { path = "ninja_env" } -shell-quote = "0.7" +shell-quote = { version = "0.7.2", default-features = false, features = ["sh"] } shlex = "1.3.0" [lints.clippy] diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index 14caf095..5d85e962 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -239,13 +239,14 @@ Each entry in the `rules` list is a mapping that defines a reusable action. - `command`: A single command string to be executed. It may include the placeholders `{{ ins }}` and `{{ outs }}` to represent input and output - files. Netsuke expands these placeholders to space-separated, shell-escaped - lists of file paths using the - [`shell-quote`](https://docs.rs/shell-quote/latest/shell_quote/) crate before - hashing the action. When generating the Ninja rule, the lists are replaced - with Ninja's `$in` and `$out` macros. After interpolation, the command must - be parsable by [shlex](https://docs.rs/shlex/latest/shlex/). Any - interpolation other than `ins` or `outs` is automatically shell-escaped. + files. Netsuke expands these placeholders to space-separated lists of file + paths quoted for POSIX `/bin/sh` using the + [`shell-quote`](https://docs.rs/shell-quote/latest/shell_quote/) crate (Sh + mode) before hashing the action. When generating the Ninja rule, the lists + are replaced with Ninja's `$in` and `$out` macros. After interpolation, the + command must be parsable by [shlex](https://docs.rs/shlex/latest/shlex/) + (POSIX mode). Any interpolation other than `ins` or `outs` is automatically + shell-escaped. - `script`: A multi-line script declared with the YAML `|` block style. The entire block is passed to an interpreter. If the first line begins with `#!` diff --git a/docs/roadmap.md b/docs/roadmap.md index bb555e27..f85ec0e5 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -127,8 +127,13 @@ library, and CLI ergonomics. - [x] Integrate the `shell-quote` crate. - - [x] Mandate its use for all variable substitutions within command - strings during Ninja file synthesis to prevent command injection. + - [x] Mandate its use for $in/$out substitutions within command strings + during Ninja file synthesis to prevent command injection. Other + interpolations are validated with shlex but are not shell-quoted. + + - [x] Emit POSIX-sh-compatible quoting (portable single-quote style) + rather than Bash-only forms. If Bash-specific quoting is required, document + and enforce execution under bash. - [x] After interpolation, validate the final command string is parsable using the shlex crate. diff --git a/src/ir.rs b/src/ir.rs index b8e2866e..bc88865f 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -107,8 +107,8 @@ pub enum IrGenError { #[error("failed to serialise action: {0}")] ActionSerialisation(#[from] serde_json::Error), - #[error("command \"{command}\" is not a valid shell command")] - InvalidCommand { command: String }, + #[error("command is not a valid shell command: {snippet}")] + InvalidCommand { command: String, snippet: String }, } impl BuildGraph { @@ -244,9 +244,12 @@ fn interpolate_command( let ins = quote_paths(inputs); let outs = quote_paths(outputs); let interpolated = substitute(template, &ins, &outs); - if shlex::split(&interpolated).is_none() { + let unmatched_backticks = interpolated.chars().filter(|&c| c == '`').count() & 1 != 0; + if unmatched_backticks || shlex::split(&interpolated).is_none() { + let snippet = interpolated.chars().take(160).collect(); return Err(IrGenError::InvalidCommand { command: interpolated, + snippet, }); } Ok(interpolated) diff --git a/src/ninja_gen.rs b/src/ninja_gen.rs index 190da464..906824b0 100644 --- a/src/ninja_gen.rs +++ b/src/ninja_gen.rs @@ -112,7 +112,7 @@ impl Display for NamedAction<'_> { writeln!(f, "rule {}", self.id)?; match &self.action.recipe { Recipe::Command { command } => { - assert!( + debug_assert!( shlex::split(command).is_some(), "invalid command: {command}" ); @@ -124,7 +124,7 @@ impl Display for NamedAction<'_> { // a fresh shell to preserve expected expansions. let escaped = escape_script(script); let cmd = format!("/bin/sh -e -c \"printf %b '{escaped}' | /bin/sh -e\""); - assert!(shlex::split(&cmd).is_some(), "invalid command: {cmd}"); + debug_assert!(shlex::split(&cmd).is_some(), "invalid command: {cmd}"); writeln!(f, " command = {cmd}")?; } Recipe::Rule { .. } => unreachable!("rules do not reference other rules"), diff --git a/tests/command_escaping_tests.rs b/tests/command_escaping_tests.rs index 8c619cbd..e07db6ac 100644 --- a/tests/command_escaping_tests.rs +++ b/tests/command_escaping_tests.rs @@ -2,6 +2,14 @@ use netsuke::{ast::Recipe, ir::BuildGraph, manifest}; use rstest::rstest; +/// Prefix the provided YAML body with a required `netsuke_version`. +/// +/// # Examples +/// ``` +/// let y = manifest_yaml("targets: []"); +/// assert!(y.starts_with("netsuke_version")); +/// ``` +#[inline] fn manifest_yaml(body: &str) -> String { format!("netsuke_version: 1.0.0\n{body}") } @@ -17,7 +25,8 @@ fn inputs_and_outputs_are_quoted() { let Recipe::Command { command } = &action.recipe else { panic!("expected command") }; - assert_eq!(command, "cat in' file' > out' file'"); + let words = shlex::split(command).expect("split command into words"); + assert_eq!(words, ["cat", "in file", ">", "out file"]); } #[rstest] @@ -31,18 +40,53 @@ fn multiple_inputs_outputs_with_special_chars_are_quoted() { let Recipe::Command { command } = &action.recipe else { panic!("expected command") }; + let words = shlex::split(command).expect("split command into words"); assert_eq!( - command, - "echo in' file' input'$1' && echo out' file' out'&2'", + words, + [ + "echo", "in file", "input$1", "&&", "echo", "out file", "out&2", + ], ); } #[rstest] -fn invalid_command_errors() { +fn variable_name_overlap_not_rewritten() { let yaml = manifest_yaml( - "targets:\n - name: out\n sources: in\n command: \"echo 'unterminated\"\n", + "targets:\n - name: 'out file'\n sources: in\n command: \"echo $input > $out\"\n", ); let manifest = manifest::from_str(&yaml).expect("parse"); + let graph = BuildGraph::from_manifest(&manifest).expect("graph"); + let action = graph.actions.values().next().expect("action"); + let Recipe::Command { command } = &action.recipe else { + panic!("expected command") + }; + let words = shlex::split(command).expect("split command into words"); + assert_eq!(words, ["echo", "$input", ">", "out file"]); +} + +#[rstest] +fn command_without_placeholders_remains_valid() { + let yaml = + manifest_yaml("targets:\n - name: out\n sources: in\n command: \"echo hi\"\n"); + let manifest = manifest::from_str(&yaml).expect("parse"); + let graph = BuildGraph::from_manifest(&manifest).expect("graph"); + let action = graph.actions.values().next().expect("action"); + let Recipe::Command { command } = &action.recipe else { + panic!("expected command") + }; + assert_eq!(command, "echo hi"); +} + +#[rstest] +#[case("echo \"unterminated")] +#[case("echo 'unterminated")] +#[case("echo `unterminated")] +fn invalid_command_errors(#[case] cmd: &str) { + let escaped = cmd.replace('\\', "\\\\").replace('"', "\\\""); + let yaml = manifest_yaml(&format!( + "targets:\n - name: out\n sources: in\n command: \"{escaped}\"\n" + )); + let manifest = manifest::from_str(&yaml).expect("parse"); let err = BuildGraph::from_manifest(&manifest).expect_err("should fail"); assert!(err.to_string().contains("not a valid shell command")); } diff --git a/tests/data/quote.yml b/tests/data/quote.yml index 585a344a..be7b6c29 100644 --- a/tests/data/quote.yml +++ b/tests/data/quote.yml @@ -3,3 +3,6 @@ targets: - name: "out file" sources: "in file" command: "cat $in > $out" + - name: "o'utfile" + sources: "-in file" + command: "printf %s $in > $out" diff --git a/tests/features/ir.feature b/tests/features/ir.feature index 0293ab46..335d7249 100644 --- a/tests/features/ir.feature +++ b/tests/features/ir.feature @@ -12,7 +12,7 @@ Feature: BuildGraph Then the graph has 1 actions And the graph has 1 targets - Scenario: Duplicate rules are deduplicated + Scenario: Duplicate rules emit distinct actions (see docs/netsuke-design.md#55-design-decisions) When the manifest file "tests/data/duplicate_rules.yml" is compiled to IR Then the graph has 2 actions And the graph has 2 targets diff --git a/tests/features/ir_generation.feature b/tests/features/ir_generation.feature index 1af9ea8c..e5814aac 100644 --- a/tests/features/ir_generation.feature +++ b/tests/features/ir_generation.feature @@ -16,7 +16,7 @@ Feature: Intermediate Representation (IR) Generation Then the graph has 1 actions And the graph has 1 targets - Scenario: Identical rules are deduplicated during IR generation + Scenario: Identical rules emit distinct actions during IR generation (see docs/netsuke-design.md#55-design-decisions) Given the manifest file "tests/data/duplicate_rules.yml" is compiled to IR When the graph contents are checked Then the graph has 2 actions diff --git a/tests/features/ninja.feature b/tests/features/ninja.feature index f11b5413..04923530 100644 --- a/tests/features/ninja.feature +++ b/tests/features/ninja.feature @@ -15,4 +15,9 @@ Feature: Ninja file generation Scenario: Inputs and outputs are shell-quoted When the manifest file "tests/data/quote.yml" is compiled to IR And the ninja file is generated - Then the ninja file contains "command = cat in' file' > out' file'" + Then shlex splitting the command yields "cat, in file, >, out file" + + Scenario: Edge-case paths are shell-quoted + When the manifest file "tests/data/quote.yml" is compiled to IR + And the ninja file is generated + Then shlex splitting command 2 yields "printf, %s, -in file, >, o'utfile" diff --git a/tests/ir_from_manifest_tests.rs b/tests/ir_from_manifest_tests.rs index 469373fe..b3cd0d16 100644 --- a/tests/ir_from_manifest_tests.rs +++ b/tests/ir_from_manifest_tests.rs @@ -16,7 +16,7 @@ fn minimal_manifest_to_ir() { } #[rstest] -fn duplicate_rules_are_deduped() { +fn duplicate_rules_emit_distinct_actions() { let manifest = manifest::from_path("tests/data/duplicate_rules.yml").expect("load"); let graph = BuildGraph::from_manifest(&manifest).expect("ir"); assert_eq!(graph.actions.len(), 2); diff --git a/tests/steps/ninja_steps.rs b/tests/steps/ninja_steps.rs index 71591d79..2a6bb00a 100644 --- a/tests/steps/ninja_steps.rs +++ b/tests/steps/ninja_steps.rs @@ -13,7 +13,7 @@ fn generate_ninja(world: &mut CliWorld) { world.ninja = Some(ninja_gen::generate(graph)); } -#[expect( +#[allow( clippy::needless_pass_by_value, reason = "Cucumber requires owned String arguments" )] @@ -25,3 +25,36 @@ fn ninja_contains(world: &mut CliWorld, text: String) { .expect("ninja content should be available"); assert!(ninja.contains(&text)); } + +#[allow( + clippy::needless_pass_by_value, + reason = "Cucumber requires owned String arguments" +)] +#[then(expr = "shlex splitting command {int} yields {string}")] +fn ninja_command_tokens(world: &mut CliWorld, index: usize, expected: String) { + let ninja = world + .ninja + .as_ref() + .expect("ninja content should be available"); + let commands: Vec<&str> = ninja + .lines() + .filter(|l| l.trim_start().starts_with("command =")) + .collect(); + let line = commands.get(index - 1).expect("command index within range"); + let command = line.trim_start().trim_start_matches("command = "); + let words = shlex::split(command).expect("split command"); + let expected: Vec = expected + .split(',') + .map(|w| w.trim().replace("\\n", "\n")) + .collect(); + assert_eq!(words, expected); +} + +#[allow( + clippy::needless_pass_by_value, + reason = "Cucumber requires owned String arguments" +)] +#[then(expr = "shlex splitting the command yields {string}")] +fn ninja_first_command_tokens(world: &mut CliWorld, expected: String) { + ninja_command_tokens(world, 1, expected); +} From 4b33719107d1bef01f42d410742fa997e5550010 Mon Sep 17 00:00:00 2001 From: Leynos Date: Thu, 28 Aug 2025 18:31:59 +0100 Subject: [PATCH 04/10] Document action deduplication --- docs/netsuke-design.md | 31 ++++++++++++++++--------------- src/ir.rs | 14 ++++++++++++++ 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index 5d85e962..42a3342b 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -1047,28 +1047,28 @@ The core logic of the validation stage is a function, `ir::from_manifest`, that consumes a `NetsukeManifest` (the AST) and produces a `BuildGraph` (the IR). This transformation involves several steps: -1. **Action Consolidation:** Iterate through the `manifest.rules` from the AST. - For each rule, create a corresponding `ir::Action` struct. These actions are - stored in the `BuildGraph`'s `actions` map, keyed by a hash of their fully - resolved command text, interpreter, local variables, and depfile options. - This ensures deduplication only occurs when two actions are truly - interchangeable. +1. **Rule Collection:** Insert each entry in `manifest.rules` into a + `HashMap` keyed by its name. Rules are stored as templates and are not + deduplicated at this stage. 2. **Target Expansion:** Iterate through the `manifest.targets` and the optional `manifest.actions`. Entries in `actions` are treated identically to targets but with `phony` defaulting to `true`. For each item, resolve all strings into `PathBuf`s and resolve all dependency names against other targets. -3. **Edge Creation:** For each AST target, create an `ir::BuildEdge` object. - This involves linking it to the appropriate `ir::Action` (by its ID), - transferring the `phony` and `always` flags, and populating its input and - output vectors. +3. **Action Registration and Edge Creation:** For each expanded target, + resolve the referenced rule template, interpolate its command with the + target's input and output paths, and register the resulting `ir::Action` in + the `actions` map. Actions are hashed on the fully resolved command and file + set, so identical rule templates yield distinct actions when their paths + differ. Create a corresponding `ir::BuildEdge` linking the target to the + action identifier and transfer the `phony` and `always` flags. 4. **Graph Validation:** As the graph is constructed, perform validation checks. This includes ensuring that every rule referenced by a target exists in the - `actions` map and running a cycle detection algorithm (e.g., a depth- first - search maintaining a visitation state) on the dependency graph to fail - compilation if a circular dependency is found. + `actions` map and running a cycle detection algorithm (e.g., a depth-first + search maintaining a visitation state) on the dependency graph to fail early + on circular dependencies. The implemented algorithm performs a depth-first traversal of the target graph and maintains a recursion stack. Order-only dependencies are ignored @@ -1144,8 +1144,9 @@ the `phony` and `always` flags verbatim from the manifest. No Ninja specific placeholders are stored in the IR to keep the representation portable. - Actions are deduplicated using a SHA-256 hash of a canonical JSON - serialisation of their recipe and metadata. Identical commands therefore - share the same identifier which keeps the IR deterministic for snapshot tests. + serialisation of their recipe, inputs, and outputs. Because commands embed + shell-quoted file paths, two targets share an identifier only when both the + command text and file sets match exactly. - Multiple rule references in a single target are not yet supported. The IR generator reports `IrGenError::MultipleRules` when encountered. - Duplicate output files are rejected. Attempting to define the same output diff --git a/src/ir.rs b/src/ir.rs index bc88865f..94389322 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -131,6 +131,12 @@ impl BuildGraph { Ok(graph) } + /// Collect rule templates without deduplicating them. + /// + /// Rules are stored verbatim and expanded later when targets reference + /// them. This allows each target's input and output paths to be embedded in + /// the resulting command, meaning identical rule definitions may yield + /// distinct actions once interpolated. fn process_rules(manifest: &NetsukeManifest, rule_map: &mut HashMap) { for rule in &manifest.rules { rule_map.insert(rule.name.clone(), rule.clone()); @@ -197,6 +203,14 @@ impl BuildGraph { } } +/// Insert an action into the graph, deduplicating on the resolved command and +/// file set. +/// +/// The rule template is interpolated with the target's inputs and outputs, +/// which are shell-escaped and embedded directly in the command. The resulting +/// [`Action`] is hashed so identical commands operating on the same file sets +/// share an identifier, while differing paths produce distinct actions even if +/// they originate from the same rule definition. fn register_action( actions: &mut HashMap, recipe: Recipe, From 93f04eff963ab2cff46a30e08b5dd47202a2d053 Mon Sep 17 00:00:00 2001 From: Leynos Date: Thu, 28 Aug 2025 19:13:50 +0100 Subject: [PATCH 05/10] Store rule templates by Arc and clarify quoting --- docs/netsuke-design.md | 22 ++++----- docs/roadmap.md | 6 +-- src/ir.rs | 83 ++++++++++++++++++++------------- tests/command_escaping_tests.rs | 17 ++++++- 4 files changed, 81 insertions(+), 47 deletions(-) diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index 42a3342b..44fec885 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -242,11 +242,10 @@ Each entry in the `rules` list is a mapping that defines a reusable action. files. Netsuke expands these placeholders to space-separated lists of file paths quoted for POSIX `/bin/sh` using the [`shell-quote`](https://docs.rs/shell-quote/latest/shell_quote/) crate (Sh - mode) before hashing the action. When generating the Ninja rule, the lists - are replaced with Ninja's `$in` and `$out` macros. After interpolation, the - command must be parsable by [shlex](https://docs.rs/shlex/latest/shlex/) - (POSIX mode). Any interpolation other than `ins` or `outs` is automatically - shell-escaped. + mode) before hashing the action. The IR stores the fully expanded command; + Ninja executes this text verbatim. After interpolation, the command must be + parsable by [shlex](https://docs.rs/shlex/latest/shlex/) (POSIX mode). Any + interpolation other than `ins` or `outs` is automatically shell-escaped. - `script`: A multi-line script declared with the YAML `|` block style. The entire block is passed to an interpreter. If the first line begins with `#!` @@ -1236,12 +1235,13 @@ step by step. The placeholders `{{ ins }}` and `{{ outs }}` are expanded to space-separated lists of file paths within Netsuke itself, each path being shell-escaped using the `shell-quote` API. Netsuke uses the `Sh` quoting mode to emit POSIX-compliant single-quoted strings and scans the template for -standalone `$in` and `$out` tokens to avoid rewriting unrelated variables. When -the command is written to `build.ninja`, these lists replace Ninja's `$in` and -`$out` macros. After substitution, the command is validated with \[`shlex`\] -() to ensure it parses correctly. This -approach guarantees that every dynamic part of the command is securely quoted, -albeit at the cost of deduplicating only actions with identical file sets. +standalone `$in` and `$out` tokens to avoid rewriting unrelated variables. +Substitution happens during IR generation and the fully expanded command is +emitted to `build.ninja` unchanged. After substitution, the command is +validated with \[`shlex`\]() to ensure it +parses correctly. This approach guarantees that every dynamic part of the +command is securely quoted, albeit at the cost of deduplicating only actions +with identical file sets. ### 6.4 Automatic Security as a "Friendliness" Feature diff --git a/docs/roadmap.md b/docs/roadmap.md index f85ec0e5..510c240b 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -127,9 +127,9 @@ library, and CLI ergonomics. - [x] Integrate the `shell-quote` crate. - - [x] Mandate its use for $in/$out substitutions within command strings - during Ninja file synthesis to prevent command injection. Other - interpolations are validated with shlex but are not shell-quoted. + - [x] Mandate its use for variable substitutions within command strings + during IR generation to prevent command injection, and validate the final + command string with shlex. - [x] Emit POSIX-sh-compatible quoting (portable single-quote style) rather than Bash-only forms. If Bash-specific quoting is required, document diff --git a/src/ir.rs b/src/ir.rs index 94389322..e5a08c70 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -76,6 +76,7 @@ pub struct BuildEdge { use crate::ast::{NetsukeManifest, Recipe, Rule, StringOrList}; use shell_quote::{QuoteRefExt, Sh}; +use std::sync::Arc; use thiserror::Error; use crate::hasher::ActionHasher; @@ -120,7 +121,7 @@ impl BuildGraph { /// are specified for a single target, or no rule is provided. pub fn from_manifest(manifest: &NetsukeManifest) -> Result { let mut graph = Self::default(); - let mut rule_map = HashMap::new(); + let mut rule_map: HashMap> = HashMap::new(); Self::process_rules(manifest, &mut rule_map); Self::process_targets(manifest, &mut graph.actions, &mut graph.targets, &rule_map)?; @@ -137,9 +138,9 @@ impl BuildGraph { /// them. This allows each target's input and output paths to be embedded in /// the resulting command, meaning identical rule definitions may yield /// distinct actions once interpolated. - fn process_rules(manifest: &NetsukeManifest, rule_map: &mut HashMap) { + fn process_rules(manifest: &NetsukeManifest, rule_map: &mut HashMap>) { for rule in &manifest.rules { - rule_map.insert(rule.name.clone(), rule.clone()); + rule_map.insert(rule.name.clone(), Arc::new(rule.clone())); } } @@ -147,7 +148,7 @@ impl BuildGraph { manifest: &NetsukeManifest, actions: &mut HashMap, targets: &mut HashMap, - rule_map: &HashMap, + rule_map: &HashMap>, ) -> Result<(), IrGenError> { for target in manifest.actions.iter().chain(&manifest.targets) { let outputs = to_paths(&target.name); @@ -156,6 +157,9 @@ impl BuildGraph { let action_id = match &target.recipe { Recipe::Rule { rule } => { let tmpl = resolve_rule(rule, rule_map, &target_name)?; + // Future schema versions may allow targets to override + // recipe or description fields. If so, those values will + // take precedence over the rule template. register_action( actions, tmpl.recipe.clone(), @@ -274,33 +278,45 @@ fn substitute(template: &str, ins: &[String], outs: &[String]) -> String { ch.is_ascii_alphanumeric() || ch == '_' } + fn try_match_in(chars: &[char], i: usize, ins: &[String]) -> Option<(String, usize)> { + if matches!(chars.get(i + 1), Some('i')) && matches!(chars.get(i + 2), Some('n')) { + let prev_ok = chars.get(i.wrapping_sub(1)).is_none_or(|c| !is_ident(*c)); + let next_ok = chars.get(i + 3).is_none_or(|c| !is_ident(*c)); + if prev_ok && next_ok { + return Some((ins.join(" "), 3)); + } + } + None + } + + fn try_match_out(chars: &[char], i: usize, outs: &[String]) -> Option<(String, usize)> { + if matches!(chars.get(i + 1), Some('o')) + && matches!(chars.get(i + 2), Some('u')) + && matches!(chars.get(i + 3), Some('t')) + { + let prev_ok = chars.get(i.wrapping_sub(1)).is_none_or(|c| !is_ident(*c)); + let next_ok = chars.get(i + 4).is_none_or(|c| !is_ident(*c)); + if prev_ok && next_ok { + return Some((outs.join(" "), 4)); + } + } + None + } + let chars: Vec = template.chars().collect(); let mut out = String::with_capacity(template.len()); let mut i = 0; while let Some(&ch) = chars.get(i) { if ch == '$' { - // Attempt to match $in - if matches!(chars.get(i + 1), Some('i')) && matches!(chars.get(i + 2), Some('n')) { - let prev_ok = chars.get(i.wrapping_sub(1)).is_none_or(|c| !is_ident(*c)); - let next_ok = chars.get(i + 3).is_none_or(|c| !is_ident(*c)); - if prev_ok && next_ok { - out.push_str(&ins.join(" ")); - i += 3; - continue; - } + if let Some((replacement, skip)) = try_match_in(&chars, i, ins) { + out.push_str(&replacement); + i += skip; + continue; } - // Attempt to match $out - if matches!(chars.get(i + 1), Some('o')) - && matches!(chars.get(i + 2), Some('u')) - && matches!(chars.get(i + 3), Some('t')) - { - let prev_ok = chars.get(i.wrapping_sub(1)).is_none_or(|c| !is_ident(*c)); - let next_ok = chars.get(i + 4).is_none_or(|c| !is_ident(*c)); - if prev_ok && next_ok { - out.push_str(&outs.join(" ")); - i += 4; - continue; - } + if let Some((replacement, skip)) = try_match_out(&chars, i, outs) { + out.push_str(&replacement); + i += skip; + continue; } } out.push(ch); @@ -336,11 +352,11 @@ fn extract_single(sol: &StringOrList) -> Option<&str> { } } -fn resolve_rule<'a>( +fn resolve_rule( rule: &StringOrList, - rule_map: &'a HashMap, + rule_map: &HashMap>, target_name: &str, -) -> Result<&'a Rule, IrGenError> { +) -> Result, IrGenError> { extract_single(rule).map_or_else( || { let mut rules = to_string_vec(rule); @@ -357,10 +373,13 @@ fn resolve_rule<'a>( } }, |name| { - rule_map.get(name).ok_or_else(|| IrGenError::RuleNotFound { - target_name: target_name.to_string(), - rule_name: name.to_string(), - }) + rule_map + .get(name) + .cloned() + .ok_or_else(|| IrGenError::RuleNotFound { + target_name: target_name.to_string(), + rule_name: name.to_string(), + }) }, ) } diff --git a/tests/command_escaping_tests.rs b/tests/command_escaping_tests.rs index e07db6ac..6b161b1e 100644 --- a/tests/command_escaping_tests.rs +++ b/tests/command_escaping_tests.rs @@ -10,7 +10,7 @@ use rstest::rstest; /// assert!(y.starts_with("netsuke_version")); /// ``` #[inline] -fn manifest_yaml(body: &str) -> String { +pub(crate) fn manifest_yaml(body: &str) -> String { format!("netsuke_version: 1.0.0\n{body}") } @@ -64,6 +64,21 @@ fn variable_name_overlap_not_rewritten() { assert_eq!(words, ["echo", "$input", ">", "out file"]); } +#[rstest] +fn newline_in_paths_is_quoted() { + let yaml = manifest_yaml( + "targets:\n - name: \"o'ut\\nfile\"\n sources: \"-in file\"\n command: \"printf %s $in > $out\"\n", + ); + let manifest = manifest::from_str(&yaml).expect("parse"); + let graph = BuildGraph::from_manifest(&manifest).expect("graph"); + let action = graph.actions.values().next().expect("action"); + let Recipe::Command { command } = &action.recipe else { + panic!("expected command") + }; + let words = shlex::split(command).expect("split command into words"); + assert_eq!(words, ["printf", "%s", "-in file", ">", "o'ut\nfile"]); +} + #[rstest] fn command_without_placeholders_remains_valid() { let yaml = From b914df83beda692d3f1fc38f0152f509b34cd03b Mon Sep 17 00:00:00 2001 From: Leynos Date: Thu, 28 Aug 2025 22:36:32 +0100 Subject: [PATCH 06/10] Refactor placeholder substitution --- src/ir.rs | 128 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 90 insertions(+), 38 deletions(-) diff --git a/src/ir.rs b/src/ir.rs index e5a08c70..a2bcdf39 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -273,54 +273,106 @@ fn interpolate_command( Ok(interpolated) } -fn substitute(template: &str, ins: &[String], outs: &[String]) -> String { - fn is_ident(ch: char) -> bool { - ch.is_ascii_alphanumeric() || ch == '_' - } +/// Returns whether `ch` is a valid identifier character (ASCII letter, digit, or underscore). +/// +/// # Examples +/// ```rust,ignore +/// assert!(is_identifier_char('a')); +/// assert!(!is_identifier_char('-')); +/// ``` +fn is_identifier_char(ch: char) -> bool { + ch.is_ascii_alphanumeric() || ch == '_' +} - fn try_match_in(chars: &[char], i: usize, ins: &[String]) -> Option<(String, usize)> { - if matches!(chars.get(i + 1), Some('i')) && matches!(chars.get(i + 2), Some('n')) { - let prev_ok = chars.get(i.wrapping_sub(1)).is_none_or(|c| !is_ident(*c)); - let next_ok = chars.get(i + 3).is_none_or(|c| !is_ident(*c)); - if prev_ok && next_ok { - return Some((ins.join(" "), 3)); - } - } - None - } +/// Checks if `pattern` matches `chars` starting at `pos`. +/// +/// # Examples +/// ```rust,ignore +/// let chars: Vec = "in-out".chars().collect(); +/// assert!(matches_pattern_at_position(&chars, 0, &['i', 'n'])); +/// assert!(!matches_pattern_at_position(&chars, 3, &['i', 'n'])); +/// ``` +fn matches_pattern_at_position(chars: &[char], pos: usize, pattern: &[char]) -> bool { + pattern + .iter() + .enumerate() + .all(|(off, ch)| matches!(chars.get(pos + off), Some(c) if c == ch)) +} - fn try_match_out(chars: &[char], i: usize, outs: &[String]) -> Option<(String, usize)> { - if matches!(chars.get(i + 1), Some('o')) - && matches!(chars.get(i + 2), Some('u')) - && matches!(chars.get(i + 3), Some('t')) - { - let prev_ok = chars.get(i.wrapping_sub(1)).is_none_or(|c| !is_ident(*c)); - let next_ok = chars.get(i + 4).is_none_or(|c| !is_ident(*c)); - if prev_ok && next_ok { - return Some((outs.join(" "), 4)); - } - } +/// Ensures characters around the token are not identifier characters. +/// +/// # Examples +/// ```rust,ignore +/// let chars: Vec = "$in".chars().collect(); +/// assert!(has_valid_word_boundaries(&chars, 0, 2)); +/// let chars: Vec = "$input".chars().collect(); +/// assert!(!has_valid_word_boundaries(&chars, 0, 2)); +/// ``` +fn has_valid_word_boundaries(chars: &[char], pos: usize, len: usize) -> bool { + let prev_ok = chars + .get(pos.wrapping_sub(1)) + .is_none_or(|c| !is_identifier_char(*c)); + let next_ok = chars + .get(pos + len + 1) + .is_none_or(|c| !is_identifier_char(*c)); + prev_ok && next_ok +} + +/// Attempts to substitute a placeholder with the provided values. +/// +/// # Examples +/// ```rust,ignore +/// let chars: Vec = "$in".chars().collect(); +/// let res = try_match_placeholder(&chars, 0, &['i', 'n'], &["a".into()]); +/// assert_eq!(res, Some(("a".into(), 3))); +/// ``` +fn try_match_placeholder( + chars: &[char], + pos: usize, + pattern: &[char], + values: &[String], +) -> Option<(String, usize)> { + if matches_pattern_at_position(chars, pos + 1, pattern) + && has_valid_word_boundaries(chars, pos, pattern.len()) + { + Some((values.join(" "), pattern.len() + 1)) + } else { None } +} +/// Finds the appropriate substitution for `$in` or `$out` at `pos`. +/// +/// # Examples +/// ```rust,ignore +/// let chars: Vec = "$in".chars().collect(); +/// let res = find_substitution(&chars, 0, &["a".into()], &[]); +/// assert_eq!(res, Some(("a".into(), 3))); +/// ``` +fn find_substitution( + chars: &[char], + pos: usize, + ins: &[String], + outs: &[String], +) -> Option<(String, usize)> { + try_match_placeholder(chars, pos, &['i', 'n'], ins) + .or_else(|| try_match_placeholder(chars, pos, &['o', 'u', 't'], outs)) +} + +fn substitute(template: &str, ins: &[String], outs: &[String]) -> String { let chars: Vec = template.chars().collect(); let mut out = String::with_capacity(template.len()); let mut i = 0; while let Some(&ch) = chars.get(i) { - if ch == '$' { - if let Some((replacement, skip)) = try_match_in(&chars, i, ins) { - out.push_str(&replacement); - i += skip; - continue; - } - if let Some((replacement, skip)) = try_match_out(&chars, i, outs) { - out.push_str(&replacement); - i += skip; - continue; - } + if ch == '$' + && let Some((replacement, skip)) = find_substitution(&chars, i, ins, outs) + { + out.push_str(&replacement); + i += skip; + } else { + out.push(ch); + i += 1; } - out.push(ch); - i += 1; } out } From 8c60dea36e7df6e8e18695cd4872c5d825a9e6c1 Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 29 Aug 2025 00:15:52 +0100 Subject: [PATCH 07/10] Prejoin token expansions and extend quoting tests --- src/ir.rs | 58 ++++++++++++++++++++------------- tests/command_escaping_tests.rs | 15 +++++++++ tests/data/quote.yml | 3 ++ tests/features/ninja.feature | 2 +- tests/steps/ninja_steps.rs | 2 +- 5 files changed, 55 insertions(+), 25 deletions(-) diff --git a/src/ir.rs b/src/ir.rs index a2bcdf39..250c955a 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -137,7 +137,10 @@ impl BuildGraph { /// Rules are stored verbatim and expanded later when targets reference /// them. This allows each target's input and output paths to be embedded in /// the resulting command, meaning identical rule definitions may yield - /// distinct actions once interpolated. + /// distinct actions once interpolated. Should the manifest schema ever + /// permit targets to override recipe fields such as `command` or + /// `description`, those target-level values take precedence over the rule's + /// defaults. fn process_rules(manifest: &NetsukeManifest, rule_map: &mut HashMap>) { for rule in &manifest.rules { rule_map.insert(rule.name.clone(), Arc::new(rule.clone())); @@ -244,6 +247,17 @@ fn register_action( Ok(hash) } +/// Returns `true` when the command contains an odd number of backticks. +/// +/// # Examples +/// ```rust,ignore +/// assert!(has_unmatched_backticks("echo`")); +/// assert!(!has_unmatched_backticks("`echo`")); +/// ``` +fn has_unmatched_backticks(s: &str) -> bool { + s.chars().filter(|&c| c == '`').count() & 1 != 0 +} + fn interpolate_command( template: &str, inputs: &[PathBuf], @@ -262,8 +276,7 @@ fn interpolate_command( let ins = quote_paths(inputs); let outs = quote_paths(outputs); let interpolated = substitute(template, &ins, &outs); - let unmatched_backticks = interpolated.chars().filter(|&c| c == '`').count() & 1 != 0; - if unmatched_backticks || shlex::split(&interpolated).is_none() { + if has_unmatched_backticks(&interpolated) || shlex::split(&interpolated).is_none() { let snippet = interpolated.chars().take(160).collect(); return Err(IrGenError::InvalidCommand { command: interpolated, @@ -318,24 +331,19 @@ fn has_valid_word_boundaries(chars: &[char], pos: usize, len: usize) -> bool { prev_ok && next_ok } -/// Attempts to substitute a placeholder with the provided values. +/// Returns the skip length when `pattern` matches at `pos`. /// /// # Examples /// ```rust,ignore /// let chars: Vec = "$in".chars().collect(); -/// let res = try_match_placeholder(&chars, 0, &['i', 'n'], &["a".into()]); -/// assert_eq!(res, Some(("a".into(), 3))); +/// let res = try_match_placeholder(&chars, 0, &['i', 'n']); +/// assert_eq!(res, Some(3)); /// ``` -fn try_match_placeholder( - chars: &[char], - pos: usize, - pattern: &[char], - values: &[String], -) -> Option<(String, usize)> { +fn try_match_placeholder(chars: &[char], pos: usize, pattern: &[char]) -> Option { if matches_pattern_at_position(chars, pos + 1, pattern) && has_valid_word_boundaries(chars, pos, pattern.len()) { - Some((values.join(" "), pattern.len() + 1)) + Some(pattern.len() + 1) } else { None } @@ -346,28 +354,32 @@ fn try_match_placeholder( /// # Examples /// ```rust,ignore /// let chars: Vec = "$in".chars().collect(); -/// let res = find_substitution(&chars, 0, &["a".into()], &[]); -/// assert_eq!(res, Some(("a".into(), 3))); +/// let res = find_substitution(&chars, 0, "a", ""); +/// assert_eq!(res, Some(("a", 3))); /// ``` -fn find_substitution( +fn find_substitution<'a>( chars: &[char], pos: usize, - ins: &[String], - outs: &[String], -) -> Option<(String, usize)> { - try_match_placeholder(chars, pos, &['i', 'n'], ins) - .or_else(|| try_match_placeholder(chars, pos, &['o', 'u', 't'], outs)) + ins: &'a str, + outs: &'a str, +) -> Option<(&'a str, usize)> { + try_match_placeholder(chars, pos, &['i', 'n']) + .map(|skip| (ins, skip)) + .or_else(|| try_match_placeholder(chars, pos, &['o', 'u', 't']).map(|skip| (outs, skip))) } fn substitute(template: &str, ins: &[String], outs: &[String]) -> String { let chars: Vec = template.chars().collect(); + let ins_joined = ins.join(" "); + let outs_joined = outs.join(" "); let mut out = String::with_capacity(template.len()); let mut i = 0; while let Some(&ch) = chars.get(i) { if ch == '$' - && let Some((replacement, skip)) = find_substitution(&chars, i, ins, outs) + && let Some((replacement, skip)) = + find_substitution(&chars, i, &ins_joined, &outs_joined) { - out.push_str(&replacement); + out.push_str(replacement); i += skip; } else { out.push(ch); diff --git a/tests/command_escaping_tests.rs b/tests/command_escaping_tests.rs index 6b161b1e..7ac801f8 100644 --- a/tests/command_escaping_tests.rs +++ b/tests/command_escaping_tests.rs @@ -64,6 +64,21 @@ fn variable_name_overlap_not_rewritten() { assert_eq!(words, ["echo", "$input", ">", "out file"]); } +#[rstest] +fn output_variable_overlap_not_rewritten() { + let yaml = manifest_yaml( + "targets:\n - name: out\n sources: in\n command: \"echo $output_dir > $out\"\n", + ); + let manifest = manifest::from_str(&yaml).expect("parse"); + let graph = BuildGraph::from_manifest(&manifest).expect("graph"); + let action = graph.actions.values().next().expect("action"); + let Recipe::Command { command } = &action.recipe else { + panic!("expected command") + }; + let words = shlex::split(command).expect("split command into words"); + assert_eq!(words, ["echo", "$output_dir", ">", "out"]); +} + #[rstest] fn newline_in_paths_is_quoted() { let yaml = manifest_yaml( diff --git a/tests/data/quote.yml b/tests/data/quote.yml index be7b6c29..ab4161b7 100644 --- a/tests/data/quote.yml +++ b/tests/data/quote.yml @@ -6,3 +6,6 @@ targets: - name: "o'utfile" sources: "-in file" command: "printf %s $in > $out" + - name: "o'ut\nfile" + sources: "-in file" + command: "printf %s $in > $out" diff --git a/tests/features/ninja.feature b/tests/features/ninja.feature index 04923530..53a8ee0c 100644 --- a/tests/features/ninja.feature +++ b/tests/features/ninja.feature @@ -20,4 +20,4 @@ Feature: Ninja file generation Scenario: Edge-case paths are shell-quoted When the manifest file "tests/data/quote.yml" is compiled to IR And the ninja file is generated - Then shlex splitting command 2 yields "printf, %s, -in file, >, o'utfile" + Then shlex splitting command 3 yields "printf, %s, -in file, >, o'utfile" diff --git a/tests/steps/ninja_steps.rs b/tests/steps/ninja_steps.rs index 2a6bb00a..6800fc4c 100644 --- a/tests/steps/ninja_steps.rs +++ b/tests/steps/ninja_steps.rs @@ -56,5 +56,5 @@ fn ninja_command_tokens(world: &mut CliWorld, index: usize, expected: String) { )] #[then(expr = "shlex splitting the command yields {string}")] fn ninja_first_command_tokens(world: &mut CliWorld, expected: String) { - ninja_command_tokens(world, 1, expected); + ninja_command_tokens(world, 2, expected); } From 9d2af032ce12eccde6a2edaacd932f14868d5234 Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 29 Aug 2025 12:21:46 +0100 Subject: [PATCH 08/10] Refactor command quoting tests --- tests/command_escaping_tests.rs | 75 ++++++++++++--------------------- 1 file changed, 27 insertions(+), 48 deletions(-) diff --git a/tests/command_escaping_tests.rs b/tests/command_escaping_tests.rs index 7ac801f8..41bde763 100644 --- a/tests/command_escaping_tests.rs +++ b/tests/command_escaping_tests.rs @@ -14,33 +14,39 @@ pub(crate) fn manifest_yaml(body: &str) -> String { format!("netsuke_version: 1.0.0\n{body}") } -#[rstest] -fn inputs_and_outputs_are_quoted() { - let yaml = manifest_yaml( - "targets:\n - name: 'out file'\n sources: 'in file'\n command: \"cat $in > $out\"\n", - ); +/// Extract shell words from the first target's command. +/// +/// # Examples +/// ``` +/// let words = command_words( +/// "targets:\n - name: out\n sources: in\n command: \"echo hi\"\n", +/// ); +/// assert_eq!(words, ["echo", "hi"]); +/// ``` +fn command_words(body: &str) -> Vec { + let yaml = manifest_yaml(body); let manifest = manifest::from_str(&yaml).expect("parse"); let graph = BuildGraph::from_manifest(&manifest).expect("graph"); let action = graph.actions.values().next().expect("action"); let Recipe::Command { command } = &action.recipe else { - panic!("expected command") + panic!("expected command"); }; - let words = shlex::split(command).expect("split command into words"); + shlex::split(command).expect("split command into words") +} + +#[rstest] +fn inputs_and_outputs_are_quoted() { + let words = command_words( + "targets:\n - name: 'out file'\n sources: 'in file'\n command: \"cat $in > $out\"\n", + ); assert_eq!(words, ["cat", "in file", ">", "out file"]); } #[rstest] fn multiple_inputs_outputs_with_special_chars_are_quoted() { - let yaml = manifest_yaml( + let words = command_words( "targets:\n - name: ['out file', 'out&2']\n sources: ['in file', 'input$1']\n command: \"echo $in && echo $out\"\n", ); - let manifest = manifest::from_str(&yaml).expect("parse"); - let graph = BuildGraph::from_manifest(&manifest).expect("graph"); - let action = graph.actions.values().next().expect("action"); - let Recipe::Command { command } = &action.recipe else { - panic!("expected command") - }; - let words = shlex::split(command).expect("split command into words"); assert_eq!( words, [ @@ -51,60 +57,33 @@ fn multiple_inputs_outputs_with_special_chars_are_quoted() { #[rstest] fn variable_name_overlap_not_rewritten() { - let yaml = manifest_yaml( + let words = command_words( "targets:\n - name: 'out file'\n sources: in\n command: \"echo $input > $out\"\n", ); - let manifest = manifest::from_str(&yaml).expect("parse"); - let graph = BuildGraph::from_manifest(&manifest).expect("graph"); - let action = graph.actions.values().next().expect("action"); - let Recipe::Command { command } = &action.recipe else { - panic!("expected command") - }; - let words = shlex::split(command).expect("split command into words"); assert_eq!(words, ["echo", "$input", ">", "out file"]); } #[rstest] fn output_variable_overlap_not_rewritten() { - let yaml = manifest_yaml( + let words = command_words( "targets:\n - name: out\n sources: in\n command: \"echo $output_dir > $out\"\n", ); - let manifest = manifest::from_str(&yaml).expect("parse"); - let graph = BuildGraph::from_manifest(&manifest).expect("graph"); - let action = graph.actions.values().next().expect("action"); - let Recipe::Command { command } = &action.recipe else { - panic!("expected command") - }; - let words = shlex::split(command).expect("split command into words"); assert_eq!(words, ["echo", "$output_dir", ">", "out"]); } #[rstest] fn newline_in_paths_is_quoted() { - let yaml = manifest_yaml( + let words = command_words( "targets:\n - name: \"o'ut\\nfile\"\n sources: \"-in file\"\n command: \"printf %s $in > $out\"\n", ); - let manifest = manifest::from_str(&yaml).expect("parse"); - let graph = BuildGraph::from_manifest(&manifest).expect("graph"); - let action = graph.actions.values().next().expect("action"); - let Recipe::Command { command } = &action.recipe else { - panic!("expected command") - }; - let words = shlex::split(command).expect("split command into words"); assert_eq!(words, ["printf", "%s", "-in file", ">", "o'ut\nfile"]); } #[rstest] fn command_without_placeholders_remains_valid() { - let yaml = - manifest_yaml("targets:\n - name: out\n sources: in\n command: \"echo hi\"\n"); - let manifest = manifest::from_str(&yaml).expect("parse"); - let graph = BuildGraph::from_manifest(&manifest).expect("graph"); - let action = graph.actions.values().next().expect("action"); - let Recipe::Command { command } = &action.recipe else { - panic!("expected command") - }; - assert_eq!(command, "echo hi"); + let words = + command_words("targets:\n - name: out\n sources: in\n command: \"echo hi\"\n"); + assert_eq!(words, ["echo", "hi"]); } #[rstest] From 9d8fdd2708b5077b1a9c40e0c0bd9bc7eb56bd17 Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 29 Aug 2025 14:00:29 +0100 Subject: [PATCH 09/10] Refine backtick check and quoting feature --- src/ir.rs | 2 +- tests/features/ninja.feature | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ir.rs b/src/ir.rs index 250c955a..640484b3 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -255,7 +255,7 @@ fn register_action( /// assert!(!has_unmatched_backticks("`echo`")); /// ``` fn has_unmatched_backticks(s: &str) -> bool { - s.chars().filter(|&c| c == '`').count() & 1 != 0 + s.chars().filter(|&c| c == '`').count().rem_euclid(2) != 0 } fn interpolate_command( diff --git a/tests/features/ninja.feature b/tests/features/ninja.feature index 53a8ee0c..e4d1dd39 100644 --- a/tests/features/ninja.feature +++ b/tests/features/ninja.feature @@ -21,3 +21,4 @@ Feature: Ninja file generation When the manifest file "tests/data/quote.yml" is compiled to IR And the ninja file is generated Then shlex splitting command 3 yields "printf, %s, -in file, >, o'utfile" + From 1af580301a9159df6c24782a9ae491dd7b429fe7 Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 29 Aug 2025 14:15:58 +0100 Subject: [PATCH 10/10] Quote paths using OsStr --- src/ir.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ir.rs b/src/ir.rs index 640484b3..17ae6ebb 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -267,8 +267,8 @@ fn interpolate_command( paths .iter() .map(|p| { - let raw = p.display().to_string(); - String::from_utf8(raw.quoted(Sh)).expect("quoted path is valid UTF-8") + let quoted_bytes: Vec = p.as_os_str().quoted(Sh); + String::from_utf8_lossy("ed_bytes).into_owned() }) .collect() }