diff --git a/Cargo.lock b/Cargo.lock index 0ad2229a..63a37022 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -583,9 +583,9 @@ checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "glob" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" [[package]] name = "globset" @@ -924,6 +924,7 @@ dependencies = [ "assert_cmd", "clap", "cucumber", + "glob", "insta", "itertools 0.12.1", "itoa", diff --git a/Cargo.toml b/Cargo.toml index 7a929fbe..018a04ae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ miette = { version = "7.6.0", features = ["fancy"] } sha2 = "0.10" itoa = "1" itertools = "0.12" +glob = "0.3.3" tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["fmt"] } serde_json = "1" diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index 1328df11..bd4d8fd5 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -732,13 +732,21 @@ providing a secure bridge to the underlying system. error if the variable is undefined or contains invalid UTF-8 to ensure manifests fail fast on missing inputs. -- `glob(pattern: &str) -> Result, Error>`: A function that performs - file path globbing. This is a critical feature for any modern build tool, - allowing users to easily specify sets of source files (e.g., `src/**/*.c`). - The results are returned sorted lexicographically and symlinks are followed - to keep builds deterministic. This function bridges a key feature gap, as - Ninja itself does not support globbing.[^3] - +- `glob(pattern: &str) -> Result, Error>`: Expand filesystem + patterns (e.g., `src/**/*.c`) into a list of matched paths. Results are + yielded in lexicographic order by the iterator and returned unchanged. + Symlinks are followed by the `glob` crate by default. Matching is case- + sensitive on all platforms. `glob_with` enforces + `require_literal_separator = true` internally, so wildcards do not cross path + separators unless `**` is used. Callers may use `/` or `\` in patterns; these + are normalised to the host platform before matching. Results contain only + files (directories are ignored) and path separators are normalised to `/`. + Leading-dot entries are matched by wildcards. Empty results are represented + as `[]`. Invalid patterns surface as `SyntaxError`; filesystem iteration + errors surface as `InvalidOperation`, matching minijinja error semantics. + On Unix, backslash escapes for glob metacharacters (`[`, `]`, `{`, `}`) are + preserved during separator normalisation. + This fills a Ninja gap since Ninja itself does not support globbing.[^3] - `python_version(requirement: &str) -> Result`: An example of a domain-specific helper function that demonstrates the extensibility of this architecture. This function would execute `python --version` or diff --git a/docs/roadmap.md b/docs/roadmap.md index 48bb5020..421f99dc 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -104,8 +104,8 @@ configurations with variables, control flow, and custom functions. - [x] Implement the essential custom Jinja function env(var_name) to read system environment variables. - - [ ] Implement the critical glob(pattern) custom function to perform file - path globbing, with results sorted lexicographically. + - [x] Implement the critical glob(pattern) custom function to perform file + path globbing, with results sorted lexicographically. - [ ] Support user-defined Jinja macros declared in a top-level macros list, registering them with the environment before rendering. diff --git a/src/manifest.rs b/src/manifest.rs index 3a415812..76343539 100644 --- a/src/manifest.rs +++ b/src/manifest.rs @@ -3,8 +3,9 @@ //! This module parses a `Netsukefile` without relying on a global Jinja //! preprocessing pass. The YAML is parsed first and Jinja expressions are //! evaluated only within string values or the `foreach` and `when` keys. It -//! exposes an `env()` function to surface environment variables, failing fast -//! when values are missing or invalid. +//! exposes `env()` to read environment variables and `glob()` to expand +//! filesystem patterns during template evaluation. Both helpers fail fast when +//! inputs are missing or patterns are invalid. use crate::{ ast::{NetsukeManifest, Recipe, StringOrList, Target, Vars}, @@ -119,13 +120,15 @@ fn map_yaml_error(err: YamlError, src: &str, name: &str) -> YamlDiagnostic { /// /// # Examples /// +/// The [`EnvLock`](test_support::env_lock::EnvLock) guard serialises access to +/// the process environment so tests do not interfere with each other. +/// /// ```rust,ignore -/// // SAFETY: Process environment mutation is unsafe in Rust 2024. Examples and -/// // tests serialise access with an environment lock and restore prior state to -/// // avoid races and leaks. -/// unsafe { std::env::set_var("FOO", "bar"); } +/// use test_support::env_lock::EnvLock; +/// let _guard = EnvLock::acquire(); +/// std::env::set_var("FOO", "bar"); /// assert_eq!(env("FOO").unwrap(), "bar"); -/// unsafe { std::env::remove_var("FOO"); } +/// std::env::remove_var("FOO"); /// ``` fn env_var(name: &str) -> std::result::Result { match std::env::var(name) { @@ -141,6 +144,126 @@ fn env_var(name: &str) -> std::result::Result { } } +/// Process a single glob entry and normalise its output. +/// +/// Returns the entry path when it points to a file, skipping directories. +/// Requires matched paths to be valid UTF-8; output is normalised to use +/// forward slashes. +/// +/// # Examples +/// +/// ```rust,ignore +/// use glob::glob; +/// let entry = glob("Cargo.toml").unwrap().next().unwrap(); +/// let path = process_glob_entry(entry, "Cargo.toml") +/// .unwrap() +/// .unwrap(); +/// assert!(path.ends_with("Cargo.toml")); +/// ``` +fn process_glob_entry( + entry: std::result::Result, + pattern: &str, +) -> std::result::Result, Error> { + match entry { + Ok(path) => { + // Query metadata early to surface filesystem errors promptly. + let meta = path.metadata().map_err(|e| { + Error::new( + ErrorKind::InvalidOperation, + format!("glob failed for '{pattern}': {e}"), + ) + })?; + if !meta.is_file() { + return Ok(None); + } + // Reject non-UTF-8 paths to avoid lossy round-trips and ensure + // manifests remain deterministic. + let s = path.to_str().ok_or_else(|| { + Error::new( + ErrorKind::InvalidOperation, + format!("glob matched a non-UTF-8 path: {}", path.display()), + ) + })?; + Ok(Some(s.replace('\\', "/"))) + } + Err(e) => Err(Error::new( + ErrorKind::InvalidOperation, + format!("glob failed for '{pattern}': {e}"), + )), + } +} + +/// Expand a glob pattern into a list of matching paths with deterministic ordering. +/// +/// Results are returned in lexicographic order to keep builds deterministic. +/// Invalid patterns or filesystem errors surface as Jinja evaluation errors so +/// manifests fail fast when input is incorrect. +/// +/// # Errors +/// +/// Returns an error if the glob pattern is invalid, a directory cannot be +/// read, or a matched path is not valid UTF-8. +/// Matching is case-sensitive on all platforms, wildcards do not cross path +/// separators (use `**` to span directories), and leading-dot entries are +/// matched by wildcards. +/// Convert `/` and `\` to the host separator while preserving escapes for +/// glob metacharacters on Unix. +fn normalise_separators(pattern: &str) -> String { + let native = std::path::MAIN_SEPARATOR; + #[cfg(unix)] + { + let mut out = String::with_capacity(pattern.len()); + let mut it = pattern.chars().peekable(); + while let Some(c) = it.next() { + if c == '\\' && matches!(it.peek(), Some('[' | ']' | '{' | '}')) { + out.push('\\'); + continue; + } + if c == '/' || c == '\\' { + out.push(native); + } else { + out.push(c); + } + } + out + } + #[cfg(not(unix))] + { + pattern.replace('/', &native.to_string()) + } +} + +fn glob_paths(pattern: &str) -> std::result::Result, Error> { + use glob::{MatchOptions, glob_with}; + + // Enforce shell-like semantics: + // - patterns are case-sensitive, + // - wildcards do not cross path separators, + // - dotfiles are matched by default. + let opts = MatchOptions { + case_sensitive: true, + require_literal_separator: true, + require_literal_leading_dot: false, + }; + + // Normalise separators so `/` and `\\` behave the same on all platforms. + let normalized = normalise_separators(pattern); + + let entries = glob_with(&normalized, opts).map_err(|e| { + Error::new( + ErrorKind::SyntaxError, + format!("invalid glob pattern '{pattern}': {e}"), + ) + })?; + let mut paths = Vec::new(); + for entry in entries { + if let Some(p) = process_glob_entry(entry, pattern)? { + paths.push(p); + } + } + Ok(paths) +} + /// Parse a manifest string using Jinja for value templating. /// /// The input YAML must be valid on its own. Jinja expressions are evaluated @@ -155,8 +278,9 @@ fn from_str_named(yaml: &str, name: &str) -> Result { let mut jinja = Environment::new(); jinja.set_undefined_behavior(UndefinedBehavior::Strict); - // Expose a strict environment variable accessor to templates. + // Expose custom helpers to templates. jinja.add_function("env", |name: String| env_var(&name)); + jinja.add_function("glob", |pattern: String| glob_paths(&pattern)); if let Some(vars) = doc.get("vars").and_then(|v| v.as_mapping()).cloned() { for (k, v) in vars { diff --git a/src/manifest/tests.rs b/src/manifest/tests.rs index 54c15861..f61179db 100644 --- a/src/manifest/tests.rs +++ b/src/manifest/tests.rs @@ -18,3 +18,28 @@ fn yaml_hint_needles_are_lowercase() { ); } } + +#[test] +fn glob_paths_invalid_pattern_sets_syntax_error() { + let err = super::glob_paths("[").expect_err("expected pattern error"); + assert_eq!(err.kind(), minijinja::ErrorKind::SyntaxError); +} + +#[cfg(unix)] +#[test] +fn normalise_separators_preserves_bracket_escape() { + assert_eq!(super::normalise_separators("\\["), "\\["); +} + +#[test] +fn glob_paths_ignores_directories() { + let dir = tempfile::tempdir().expect("temp dir"); + std::fs::create_dir(dir.path().join("sub")).expect("create dir"); + std::fs::write(dir.path().join("a.txt"), "a").expect("write file"); + let pattern = format!("{}/{}", dir.path().display(), "*"); + let out = super::glob_paths(&pattern).expect("glob ok"); + assert_eq!( + out, + vec![format!("{}/a.txt", dir.path().display()).replace('\\', "/")] + ); +} diff --git a/tests/data/glob.yml b/tests/data/glob.yml new file mode 100644 index 00000000..e0cfa4b1 --- /dev/null +++ b/tests/data/glob.yml @@ -0,0 +1,5 @@ +netsuke_version: 1.0.0 +targets: + - foreach: glob('tests/data/glob_files/*.txt') + name: "{{ item | replace('tests/data/glob_files/', '') | replace('.txt', '.out') }}" + command: "echo {{ item }}" diff --git a/tests/data/glob_files/a.txt b/tests/data/glob_files/a.txt new file mode 100644 index 00000000..78981922 --- /dev/null +++ b/tests/data/glob_files/a.txt @@ -0,0 +1 @@ +a diff --git a/tests/data/glob_files/b.txt b/tests/data/glob_files/b.txt new file mode 100644 index 00000000..61780798 --- /dev/null +++ b/tests/data/glob_files/b.txt @@ -0,0 +1 @@ +b diff --git a/tests/data/glob_invalid.yml b/tests/data/glob_invalid.yml new file mode 100644 index 00000000..cbe7d8a4 --- /dev/null +++ b/tests/data/glob_invalid.yml @@ -0,0 +1,5 @@ +netsuke_version: 1.0.0 +targets: + - foreach: glob('[') # intentional: unmatched '[' to trigger a glob parse error + name: broken + command: "echo hi" diff --git a/tests/data/glob_windows.yml b/tests/data/glob_windows.yml new file mode 100644 index 00000000..984183df --- /dev/null +++ b/tests/data/glob_windows.yml @@ -0,0 +1,5 @@ +netsuke_version: 1.0.0 +targets: + - foreach: glob('tests\\data\\glob_files\\*.txt') + name: "{{ item | replace('tests/data/glob_files/', '') | replace('.txt', '.out') }}" + command: "echo {{ item }}" diff --git a/tests/features/manifest.feature b/tests/features/manifest.feature index b3f8a595..64622475 100644 --- a/tests/features/manifest.feature +++ b/tests/features/manifest.feature @@ -91,6 +91,32 @@ Feature: Manifest Parsing And the target 2 command is "echo 'bar'" And the target 2 index is 1 + Scenario: Generating targets with glob + Given the manifest file "tests/data/glob.yml" is parsed + When the manifest is checked + Then the manifest has 2 targets + And the target 1 name is "a.out" + And the target 1 index is 0 + And the target 2 name is "b.out" + And the target 2 index is 1 + + Scenario: Generating targets with glob using Windows separators + Given the manifest file "tests/data/glob_windows.yml" is parsed + When the manifest is checked + Then the manifest has 2 targets + And the target 1 name is "a.out" + And the target 2 name is "b.out" + And the target 1 index is 0 + And the target 2 index is 1 + + Scenario: Parsing fails for an invalid glob pattern + Given the manifest file "tests/data/glob_invalid.yml" is parsed + When the parsing result is checked + Then parsing the manifest fails + And the error message contains "glob pattern" + + + Scenario: Parsing fails when a foreach expression is not iterable Given the manifest file "tests/data/foreach_invalid.yml" is parsed When the parsing result is checked diff --git a/tests/manifest_glob_tests.rs b/tests/manifest_glob_tests.rs new file mode 100644 index 00000000..78c0a732 --- /dev/null +++ b/tests/manifest_glob_tests.rs @@ -0,0 +1,196 @@ +//! Tests for file globbing via the `glob()` Jinja helper. + +use netsuke::{ + ast::{NetsukeManifest, StringOrList}, + manifest, +}; +use rstest::{fixture, rstest}; +use std::{fs, path::Path}; + +fn manifest_yaml(body: &str) -> String { + format!("netsuke_version: 1.0.0\n{body}") +} + +fn target_names(manifest: &NetsukeManifest) -> Vec { + manifest + .targets + .iter() + .map(|t| match &t.name { + StringOrList::String(s) => s.clone(), + other => panic!("expected String, got {other:?}"), + }) + .collect() +} + +fn create_test_files(base: &Path, files: &[(&str, &str)]) { + for (rel, content) in files { + let path = base.join(rel); + if let Some(parent) = path.parent() { + fs::create_dir_all(parent).expect("create parent dirs"); + } + fs::write(path, content).expect("write file"); + } +} + +fn create_test_dirs(base: &Path, dirs: &[&str]) { + for d in dirs { + fs::create_dir_all(base.join(d)).expect("create dir"); + } +} + +#[fixture] +fn temp_dir() -> tempfile::TempDir { + tempfile::tempdir().expect("temp dir") +} + +#[rstest] +#[case( + &[("b.txt", "b"), ("a.txt", "a")], + &[], + "*.txt", + "{{ item | replace('{dir}/', '') | replace('{dir}\\\\', '') | replace('.txt', '.out') }}", + &["a.out", "b.out"], + "expands and sorts matches", +)] +#[case( + &[], + &[], + "*.nomatch", + "none", + &[], + "no targets when pattern has no matches", +)] +#[case( + &[("sub/x.txt", "x")], + &[], + "*.txt", + "bad", + &[], + "wildcards must not cross '/'", +)] +#[case( + &[("sub/x.txt", "x")], + &[], + "**/*.txt", + "{{ item | replace('{dir}/', '') }}", + &["sub/x.txt"], + "** spans directories", +)] +#[case( + &[(".hidden.txt", "h")], + &[], + "*.txt", + "{{ item }}", + &[".hidden.txt"], + "dotfiles should match", +)] +#[case( + &[("UPPER.TXT", "x")], + &[], + "*.txt", + "fail", + &[], + "glob should be case-sensitive", +)] +#[case( + &[("a.txt", "a")], + &["sub"], + "*", + "{{ item }}", + &["a.txt"], + "should filter directories", +)] +fn test_glob_behavior( + temp_dir: tempfile::TempDir, + #[case] files: &[(&str, &str)], + #[case] dirs: &[&str], + #[case] pattern_suffix: &str, + #[case] name_template: &str, + #[case] expected_partial: &[&str], + #[case] description: &str, +) { + create_test_files(temp_dir.path(), files); + create_test_dirs(temp_dir.path(), dirs); + + let dir_str = temp_dir.path().display().to_string(); + let dir_fwd = dir_str.replace('\\', "/"); + let pattern = format!("{dir_str}/{pattern_suffix}"); + let yaml = manifest_yaml(&format!( + concat!( + "targets:\n", + " - foreach: glob('{pattern}')\n", + " name: \"{name_template}\"\n", + " command: echo hi\n", + ), + pattern = pattern, + name_template = name_template + .replace("{dir}/", &format!("{dir_fwd}/")) + .replace("{dir}\\\\", &format!("{dir_str}\\\\\\\\")), + )); + + let manifest = manifest::from_str(&yaml).expect("parse"); + + if expected_partial.is_empty() { + assert!(manifest.targets.is_empty(), "{description}"); + } else { + let prefix_fwd = format!("{dir_fwd}/"); + let prefix_back = format!("{dir_str}\\"); + let names: Vec<_> = target_names(&manifest) + .into_iter() + .map(|n| n.replace(&prefix_fwd, "").replace(&prefix_back, "")) + .collect(); + assert_eq!(names, expected_partial, "{description}"); + } +} + +#[rstest] +fn glob_invalid_pattern_errors() { + let yaml = + manifest_yaml("targets:\n - foreach: glob('[')\n name: bad\n command: echo hi\n"); + let err = manifest::from_str(&yaml).expect_err("invalid pattern should error"); + let msg = format!("{err:#}"); + assert!(msg.contains("invalid glob pattern"), "{msg}"); +} + +#[rstest] +fn glob_accepts_windows_path_separators(temp_dir: tempfile::TempDir) { + fs::write(temp_dir.path().join("a.txt"), "a").expect("write a"); + fs::write(temp_dir.path().join("b.txt"), "b").expect("write b"); + let dir_fwd = temp_dir.path().display().to_string(); + let dir_win = dir_fwd.replace('/', "\\\\"); + let pattern = format!("{dir_win}\\\\*.txt"); + let yaml = manifest_yaml(&format!( + concat!( + "targets:\n", + " - foreach: glob('{pattern}')\n", + " name: \"{{{{ item }}}}\"\n", + " command: echo hi\n", + ), + pattern = pattern, + )); + let manifest = manifest::from_str(&yaml).expect("parse"); + let prefix_fwd = format!("{dir_fwd}/"); + let names: Vec<_> = target_names(&manifest) + .into_iter() + .map(|n| n.replace(&prefix_fwd, "").replace(".txt", ".out")) + .collect(); + assert_eq!(names, vec!["a.out", "b.out"]); +} + +#[cfg(windows)] +#[rstest] +fn glob_is_case_sensitive_on_windows(temp_dir: tempfile::TempDir) { + fs::write(temp_dir.path().join("UPPER.TXT"), "x").expect("write file"); + let pattern = format!("{}/*.txt", temp_dir.path().display()); + let yaml = manifest_yaml(&format!( + concat!( + "targets:\n", + " - foreach: glob('{pattern}')\n", + " name: fail\n", + " command: echo hi\n", + ), + pattern = pattern, + )); + let manifest = manifest::from_str(&yaml).expect("parse"); + assert!(manifest.targets.is_empty()); +} diff --git a/tests/steps/manifest_steps.rs b/tests/steps/manifest_steps.rs index 31284b8d..129e26d5 100644 --- a/tests/steps/manifest_steps.rs +++ b/tests/steps/manifest_steps.rs @@ -27,7 +27,7 @@ fn parse_manifest_inner(world: &mut CliWorld, path: &str) { } Err(e) => { world.manifest = None; - world.manifest_error = Some(e.to_string()); + world.manifest_error = Some(format!("{e:#}")); } } } @@ -160,6 +160,16 @@ fn manifest_parse_error(world: &mut CliWorld) { assert!(world.manifest_error.is_some(), "expected parse error"); } +#[then(expr = "the error message contains {string}")] +#[expect( + clippy::needless_pass_by_value, + reason = "Cucumber step requires owned String" +)] +fn manifest_error_contains(world: &mut CliWorld, text: String) { + let msg = world.manifest_error.as_ref().expect("expected parse error"); + assert!(msg.contains(&text), "{msg}"); +} + #[then(expr = "the first rule name is {string}")] #[expect( clippy::needless_pass_by_value,