From ef744225b1ef7b72ecbb0531343a347b9fed1365 Mon Sep 17 00:00:00 2001 From: Leynos Date: Tue, 18 Nov 2025 02:40:31 +0000 Subject: [PATCH 01/23] feat(stdlib): add cross-platform 'which' filter and function to resolve executables - Implemented a deterministic executable discovery filter and function 'which' in MiniJinja - Supports options: all matches, canonical paths, fresh cache bypass, and cwd_mode - Added an LRU cache with self-healing for stale entries - Emits actionable diagnostics with PATH previews for troubleshooting - Fully covers Windows and POSIX path behavior with extensive unit and integration tests - Updated documentation, roadmap, and user guides with 'which' usage and semantics - Integrated into stdlib module and registered in MiniJinja environment This feature allows templates to resolve executables from PATH safely and efficiently, improving script flexibility and debugging. Co-authored-by: terragon-labs[bot] --- Cargo.lock | 18 + Cargo.toml | 1 + ...001-replace-serde-yml-with-serde-saphyr.md | 2 +- docs/netsuke-design.md | 15 +- docs/roadmap.md | 12 +- docs/rust-doctest-dry-guide.md | 2 +- docs/users-guide.md | 25 +- src/stdlib/mod.rs | 2 + src/stdlib/which.rs | 841 ++++++++++++++++++ tests/features/stdlib.feature | 27 + tests/std_filter_tests/mod.rs | 2 + tests/std_filter_tests/which_filter_tests.rs | 158 ++++ tests/steps/stdlib_steps/assertions.rs | 16 + tests/steps/stdlib_steps/types.rs | 24 + tests/steps/stdlib_steps/workspace.rs | 101 ++- 15 files changed, 1231 insertions(+), 15 deletions(-) create mode 100644 src/stdlib/which.rs create mode 100644 tests/std_filter_tests/which_filter_tests.rs diff --git a/Cargo.lock b/Cargo.lock index b97796fd..c07b1b38 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,6 +26,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "ambient-authority" version = "0.0.2" @@ -757,6 +763,8 @@ version = "0.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" dependencies = [ + "allocator-api2", + "equivalent", "foldhash", ] @@ -1095,6 +1103,15 @@ version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" +[[package]] +name = "lru" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +dependencies = [ + "hashbrown", +] + [[package]] name = "maybe-owned" version = "0.3.4" @@ -1245,6 +1262,7 @@ dependencies = [ "insta", "itertools 0.12.1", "itoa", + "lru", "md-5", "miette", "minijinja", diff --git a/Cargo.toml b/Cargo.toml index 35796df1..a8dd1cce 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,6 +35,7 @@ md5 = { package = "md-5", version = "0.10", optional = true } itoa = "1" itertools = "0.12" indexmap = { version = "2.5", features = ["serde"] } +lru = "0.12" glob = "0.3.3" tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["fmt"] } diff --git a/docs/adr-001-replace-serde-yml-with-serde-saphyr.md b/docs/adr-001-replace-serde-yml-with-serde-saphyr.md index 1a1bebcd..3934d3dc 100644 --- a/docs/adr-001-replace-serde-yml-with-serde-saphyr.md +++ b/docs/adr-001-replace-serde-yml-with-serde-saphyr.md @@ -29,7 +29,7 @@ it offers: - **Safety:** A pure Rust implementation with **no `unsafe` libyaml dependencies**([2](https://www.reddit.com/r/rust/comments/1bo5dle/we_lost_serdeyaml_whats_the_next_one/#:~:text=For%20the%20,key%20support%20and%20nested%20enums))([2](https://www.reddit.com/r/rust/comments/1bo5dle/we_lost_serdeyaml_whats_the_next_one/#:~:text=Those%20who%20dislike%20unsafe%20statements,it%20is%20also%20notably%20faster)), - eliminating C library risks. + eliminating C library risks. - **Spec Compliance:** Full YAML 1.2 support (including proper handling of anchors and merge diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index c2a3008f..72ded7f5 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -951,9 +951,9 @@ Semantics honour platform conventions while enforcing predictable behaviour: bit. Empty `PATH` segments (leading, trailing, or `::`) map to the working directory when `cwd_mode` is `"auto"` or `"always"`. - On Windows, the lookup respects `PATHEXT` when the command lacks an - extension. Comparisons are case-insensitive, results normalise both `\` and - `/`, and `cwd_mode` defaults to skipping the working directory to avoid the - platform’s surprise "search CWD first" rule. Opting in via `"always"` + extension. Comparisons are case-insensitive, results normalise both slash + styles, and `cwd_mode` defaults to skipping the working directory to avoid + the platform’s surprise "search CWD first" rule. Opting in via `"always"` restores that behaviour. - Canonicalisation happens after discovery and only when requested so that manifests can balance reproducibility against host-specific absolute paths. @@ -977,6 +977,15 @@ validation, and list-all semantics. Behavioural MiniJinja fixtures exercise the filter in Stage 3/4 renders to prove determinism across repeated invocations with identical environments. +Implementation mirrors the design with a small (64-entry) LRU cache keyed by +the command name, current directory, `PATH`, optional `PATHEXT`, and every +filter option aside from `fresh`. Cache hits validate metadata before returning +so deleted or repointed binaries immediately trigger a re-scan, and +`fresh=true` bypasses the cache without discarding previously memoised entries. +Diagnostic errors embed the `netsuke::jinja::which::*` codes and print a +trimmed preview of the scanned `PATH`, giving manifest authors clear +troubleshooting hints on both Unix and Windows hosts. + #### Generic collection filters | Filter | Purpose | diff --git a/docs/roadmap.md b/docs/roadmap.md index 3313b9b8..81c6a6b6 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -199,24 +199,24 @@ library, and CLI ergonomics. command for diagnostic codes, capturing the decision and rationale in the architecture docs. -- [ ] **Executable Discovery Filter (`which`):** +- [x] **Executable Discovery Filter (`which`):** - - [ ] Implement the cross-platform `which` MiniJinja filter and matching + - [x] Implement the cross-platform `which` MiniJinja filter and matching function alias, exposing `all`, `canonical`, `fresh`, and `cwd_mode` keyword arguments that mirror the design document. - - [ ] Integrate the finder with the Stage 3/4 render cache by including + - [x] Integrate the finder with the Stage 3/4 render cache by including `PATH`, optional `PATHEXT`, current directory, and option flags in the memoisation key while keeping the helper pure by default. - - [ ] Provide an LRU cache with metadata self-healing to avoid stale hits, + - [x] Provide an LRU cache with metadata self-healing to avoid stale hits, and honour `fresh=true` without discarding cached entries. - - [ ] Emit actionable `netsuke::jinja::which::not_found` and + - [x] Emit actionable `netsuke::jinja::which::not_found` and `netsuke::jinja::which::args` diagnostics with PATH previews and platform-appropriate hints. - - [ ] Cover POSIX and Windows behaviour, canonicalization, list-all mode, + - [x] Cover POSIX and Windows behaviour, canonicalization, list-all mode, and cache validation with unit tests, plus MiniJinja fixtures that assert deterministic renders across repeated invocations. diff --git a/docs/rust-doctest-dry-guide.md b/docs/rust-doctest-dry-guide.md index 634c1122..2f186d84 100644 --- a/docs/rust-doctest-dry-guide.md +++ b/docs/rust-doctest-dry-guide.md @@ -632,7 +632,7 @@ mastering doctests: July 15, 2025, [^4]: Documentation tests - GitHub Pages, accessed on July 15, 2025, - + [^5]: Documentation tests - Massachusetts Institute of Technology, accessed on July 15, 2025, diff --git a/docs/users-guide.md b/docs/users-guide.md index 3b849237..0da3388e 100644 --- a/docs/users-guide.md +++ b/docs/users-guide.md @@ -249,8 +249,8 @@ dynamic capabilities to your manifest. - Expressions: `{{ 1 + 1 }}`, `{{ sources | map('basename') }}` - Control Structures (within specific keys like `foreach`, `when`, or inside - `macros`): `{% if enable %}…{% endif %}`, `{% for item in list %}…{% endfor - %}` + `macros`): `{% if enable %}…{% endif %}`, + `{% for item in list %}…{% endfor %}` **Important:** Structural Jinja (`{% %}`) is generally **not** allowed directly within the YAML structure outside of `macros`. Logic should primarily be within @@ -423,6 +423,27 @@ Apply filters using the pipe `|` operator: `{{ value | filter_name(args...) }}` `shell`. Marks template as impure. The same output and streaming limits apply when `grep` emits large result sets. +**Executable Discovery (`which`):** + +- `which` filter/function: Resolves executables using the current `PATH` + without marking the template as impure. Example: `{{ 'clang++' | which }}` + returns the first matching binary; the function alias + `{{ which('clang++') }}` is available if piping would be awkward. +- Keyword arguments: + - `all` (default `false`): Return every match, ordered by `PATH`. + - `canonical` (default `false`): Resolve symlinks and deduplicate entries by + their canonical path. + - `fresh` (default `false`): Bypass the resolver cache for this lookup while + keeping previous entries available for future renders. + - `cwd_mode` (`auto` | `always` | `never`, default `auto`): Control whether + empty `PATH` segments (and, on Windows, the implicit current-directory + search) are honoured. Use `"always"` to force the working directory into + the search order when `PATH` is empty. +- Errors include actionable diagnostic codes such as + `netsuke::jinja::which::not_found` along with a preview of the scanned + `PATH`. Supplying unknown keyword arguments or invalid values raises + `netsuke::jinja::which::args`. + **Impurity:** Filters like `shell` and functions like `fetch` interact with the outside world. Netsuke tracks this "impurity". Impure templates might affect caching or reproducibility analysis in future versions. Use impure helpers diff --git a/src/stdlib/mod.rs b/src/stdlib/mod.rs index 57636c6c..fdc499a1 100644 --- a/src/stdlib/mod.rs +++ b/src/stdlib/mod.rs @@ -14,6 +14,7 @@ mod command; mod network; mod path; mod time; +mod which; pub use network::{ HostPatternError, NetworkPolicy, NetworkPolicyConfigError, NetworkPolicyViolation, @@ -390,6 +391,7 @@ pub fn register_with_config(env: &mut Environment<'_>, config: StdlibConfig) -> register_file_tests(env); path::register_filters(env); collections::register_filters(env); + which::register(env); let impure = state.impure_flag(); let (network_config, command_config) = config.into_components(); network::register_functions(env, Arc::clone(&impure), network_config); diff --git a/src/stdlib/which.rs b/src/stdlib/which.rs new file mode 100644 index 00000000..1e06cd2c --- /dev/null +++ b/src/stdlib/which.rs @@ -0,0 +1,841 @@ +//! Cross-platform `which` filter and helper function for `MiniJinja`. +//! +//! Resolves executables deterministically across Unix and Windows, +//! honouring user-configurable options for returning every match, emitting +//! canonical paths, bypassing the resolver cache, and opt-in search of the +//! current working directory. + +use std::{ + ffi::OsString, + fmt, fs, + num::NonZeroUsize, + sync::{Arc, Mutex}, +}; + +#[cfg(windows)] +use std::ffi::OsStr; + +use camino::{Utf8Path, Utf8PathBuf}; +use indexmap::IndexSet; +use lru::LruCache; +use minijinja::{ + Environment, Error, ErrorKind, + value::{Kwargs, Value}, +}; + +const CACHE_CAPACITY: usize = 64; + +pub(crate) fn register(env: &mut Environment<'_>) { + let resolver = Arc::new(WhichResolver::new()); + { + let filter_resolver = Arc::clone(&resolver); + env.add_filter("which", move |value: Value, kwargs: Kwargs| { + resolve_with(&filter_resolver, &value, &kwargs).and_then(|output| { + kwargs.assert_all_used()?; + Ok(output) + }) + }); + } + { + let function_resolver = Arc::clone(&resolver); + env.add_function("which", move |value: Value, kwargs: Kwargs| { + resolve_with(&function_resolver, &value, &kwargs).and_then(|output| { + kwargs.assert_all_used()?; + Ok(output) + }) + }); + } +} + +fn resolve_with( + resolver: &WhichResolver, + command: &Value, + kwargs: &Kwargs, +) -> Result { + let name = command + .as_str() + .map(str::trim) + .filter(|candidate| !candidate.is_empty()) + .ok_or_else(|| args_error("which requires a non-empty string"))?; + let options = WhichOptions::from_kwargs(kwargs)?; + let matches = resolver.resolve(name, &options)?; + Ok(render_value(&matches, &options)) +} + +fn render_value(matches: &[Utf8PathBuf], options: &WhichOptions) -> Value { + if options.all { + let rendered: Vec = matches + .iter() + .map(|path| format_path_for_output(path)) + .collect(); + Value::from_serialize(rendered) + } else { + let first = matches + .first() + .map_or_else(String::new, |path| format_path_for_output(path)); + Value::from(first) + } +} + +fn format_path_for_output(path: &Utf8Path) -> String { + #[cfg(windows)] + { + path.as_str().replace('\\', "/") + } + #[cfg(not(windows))] + { + path.as_str().to_owned() + } +} + +#[derive(Clone, Debug)] +struct WhichResolver { + cache: Arc>>, +} + +impl WhichResolver { + fn new() -> Self { + #[expect( + clippy::unwrap_used, + reason = "cache capacity constant is greater than zero" + )] + let capacity = NonZeroUsize::new(CACHE_CAPACITY).unwrap(); + Self { + cache: Arc::new(Mutex::new(LruCache::new(capacity))), + } + } + + fn resolve(&self, command: &str, options: &WhichOptions) -> Result, Error> { + let env = EnvSnapshot::capture()?; + let key = CacheKey::new(command, &env, options); + if !options.fresh + && let Some(cached) = self.try_cache(&key) + { + return Ok(cached); + } + let matches = lookup(command, &env, options)?; + self.store(key, matches.clone()); + Ok(matches) + } + + fn try_cache(&self, key: &CacheKey) -> Option> { + let mut guard = self.lock_cache(); + match guard.get(key) { + Some(entry) if entry.is_valid() => Some(entry.matches.clone()), + Some(_) => { + guard.pop(key); + None + } + None => None, + } + } + + fn store(&self, key: CacheKey, matches: Vec) { + let mut guard = self.lock_cache(); + guard.put(key, CacheEntry { matches }); + } + + fn lock_cache(&self) -> std::sync::MutexGuard<'_, LruCache> { + match self.cache.lock() { + Ok(guard) => guard, + Err(poisoned) => poisoned.into_inner(), + } + } +} + +fn push_matches( + matches: &mut Vec, + candidates: Vec, + collect_all: bool, +) -> bool { + for candidate in candidates { + if !is_executable(&candidate) { + continue; + } + matches.push(candidate); + if !collect_all { + return true; + } + } + false +} + +fn lookup( + command: &str, + env: &EnvSnapshot, + options: &WhichOptions, +) -> Result, Error> { + if is_direct_path(command) { + return resolve_direct(command, env, options); + } + + let dirs = env.resolved_dirs(options.cwd_mode); + let mut matches = Vec::new(); + + #[cfg(windows)] + let suffixes = env.pathext(); + + for dir in &dirs { + #[cfg(windows)] + let candidates = candidate_paths(dir, command, suffixes); + #[cfg(not(windows))] + let candidates = vec![dir.join(command)]; + + if push_matches(&mut matches, candidates, options.all) { + break; + } + } + + if matches.is_empty() { + return Err(not_found_error(command, &dirs, options.cwd_mode)); + } + + if options.canonical { + canonicalise(matches) + } else { + Ok(matches) + } +} + +#[derive(Clone, Debug)] +struct CacheEntry { + matches: Vec, +} + +impl CacheEntry { + fn is_valid(&self) -> bool { + self.matches + .iter() + .all(|path| is_executable(path.as_path())) + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +struct CacheKey { + command: String, + path: Option, + pathext: Option, + cwd: Utf8PathBuf, + options: CacheKeyOptions, +} + +impl CacheKey { + fn new(command: &str, env: &EnvSnapshot, options: &WhichOptions) -> Self { + Self { + command: command.to_owned(), + path: env.raw_path.clone(), + pathext: env.raw_pathext.clone(), + cwd: env.cwd.clone(), + options: CacheKeyOptions::from(options), + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +struct CacheKeyOptions { + all: bool, + canonical: bool, + cwd_mode: CwdMode, +} + +impl From<&WhichOptions> for CacheKeyOptions { + fn from(value: &WhichOptions) -> Self { + Self { + all: value.all, + canonical: value.canonical, + cwd_mode: value.cwd_mode, + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +enum CwdMode { + Auto, + Always, + Never, +} + +impl CwdMode { + fn parse(value: &str) -> Option { + match value { + "auto" => Some(Self::Auto), + "always" => Some(Self::Always), + "never" => Some(Self::Never), + _ => None, + } + } +} + +impl Default for CwdMode { + fn default() -> Self { + Self::Auto + } +} + +#[derive(Clone, Debug, Default)] +struct WhichOptions { + all: bool, + canonical: bool, + fresh: bool, + cwd_mode: CwdMode, +} + +impl WhichOptions { + fn from_kwargs(kwargs: &Kwargs) -> Result { + let all = kwargs.get::>("all")?.unwrap_or(false); + let canonical = kwargs.get::>("canonical")?.unwrap_or(false); + let fresh = kwargs.get::>("fresh")?.unwrap_or(false); + let cwd_mode = kwargs + .get::>("cwd_mode")? + .map(|mode| { + let lower = mode.to_ascii_lowercase(); + CwdMode::parse(&lower).ok_or_else(|| { + args_error(format!( + "cwd_mode must be 'auto', 'always', or 'never', got '{mode}'" + )) + }) + }) + .transpose()?; + Ok(Self { + all, + canonical, + fresh, + cwd_mode: cwd_mode.unwrap_or_default(), + }) + } +} + +struct EnvSnapshot { + cwd: Utf8PathBuf, + raw_path: Option, + raw_pathext: Option, + entries: Vec, + #[cfg(windows)] + pathext: Vec, +} + +impl EnvSnapshot { + fn capture() -> Result { + let cwd = current_dir_utf8()?; + let raw_path = std::env::var_os("PATH"); + let entries = parse_path_entries(raw_path.clone(), &cwd)?; + #[cfg(windows)] + let raw_pathext = std::env::var_os("PATHEXT"); + #[cfg(windows)] + let pathext = parse_pathext(raw_pathext.as_deref()); + #[cfg(not(windows))] + let raw_pathext = None; + Ok(Self { + cwd, + raw_path, + raw_pathext, + entries, + #[cfg(windows)] + pathext, + }) + } + + fn resolved_dirs(&self, mode: CwdMode) -> Vec { + let mut dirs = Vec::new(); + if matches!(mode, CwdMode::Always) { + dirs.push(self.cwd.clone()); + } + for entry in &self.entries { + match entry { + PathEntry::Dir(path) => dirs.push(path.clone()), + PathEntry::CurrentDir if matches!(mode, CwdMode::Always | CwdMode::Auto) => { + dirs.push(self.cwd.clone()); + } + PathEntry::CurrentDir => {} + } + } + dirs + } + + #[cfg(windows)] + fn pathext(&self) -> &[String] { + &self.pathext + } +} + +#[derive(Clone, Debug)] +enum PathEntry { + Dir(Utf8PathBuf), + CurrentDir, +} + +fn parse_path_entries(raw: Option, cwd: &Utf8Path) -> Result, Error> { + let mut entries = Vec::new(); + let Some(raw_value) = raw else { + return Ok(entries); + }; + for (index, component) in std::env::split_paths(&raw_value).enumerate() { + if component.as_os_str().is_empty() { + entries.push(PathEntry::CurrentDir); + continue; + } + let utf8 = Utf8PathBuf::from_path_buf(component).map_err(|_| { + args_error(format!( + "PATH entry #{index} contains non-UTF-8 characters; Netsuke requires UTF-8 paths" + )) + })?; + let resolved = if utf8.is_absolute() { + utf8 + } else { + cwd.join(utf8) + }; + entries.push(PathEntry::Dir(resolved)); + } + Ok(entries) +} + +#[cfg(windows)] +const DEFAULT_PATHEXT: &[&str] = &[ + ".com", ".exe", ".bat", ".cmd", ".vbs", ".vbe", ".js", ".jse", ".wsf", ".wsh", ".msc", +]; + +#[cfg(windows)] +fn parse_pathext(raw: Option<&OsStr>) -> Vec { + let mut dedup = IndexSet::new(); + let source = raw + .map(|value| value.to_string_lossy().into_owned()) + .unwrap_or_else(|| DEFAULT_PATHEXT.join(";")); + for segment in source.split(';') { + let trimmed = segment.trim(); + if trimmed.is_empty() { + continue; + } + let mut normalised = trimmed.to_ascii_lowercase(); + if !normalised.starts_with('.') { + normalised.insert(0, '.'); + } + dedup.insert(normalised); + } + if dedup.is_empty() { + DEFAULT_PATHEXT.iter().map(|ext| ext.to_string()).collect() + } else { + dedup.into_iter().collect() + } +} + +fn current_dir_utf8() -> Result { + let cwd = std::env::current_dir().map_err(|err| { + Error::new( + ErrorKind::InvalidOperation, + format!("failed to resolve current directory: {err}"), + ) + })?; + Utf8PathBuf::from_path_buf(cwd).map_err(|_| { + Error::new( + ErrorKind::InvalidOperation, + "current directory contains non-UTF-8 components", + ) + }) +} + +fn is_direct_path(command: &str) -> bool { + #[cfg(windows)] + { + command.contains(['\\', '/', ':']) + } + #[cfg(not(windows))] + { + command.contains('/') + } +} + +fn resolve_direct( + command: &str, + env: &EnvSnapshot, + options: &WhichOptions, +) -> Result, Error> { + let raw = Utf8Path::new(command); + let resolved = if raw.is_absolute() { + raw.to_path_buf() + } else { + env.cwd.join(raw) + }; + if !is_executable(&resolved) { + return Err(direct_not_found(command, &resolved)); + } + let output = if options.canonical { + canonicalise(vec![resolved])? + } else { + vec![resolved] + }; + Ok(output) +} + +fn direct_not_found(command: &str, path: &Utf8Path) -> Error { + Error::new( + ErrorKind::InvalidOperation, + format!( + "[netsuke::jinja::which::not_found] command '{command}' at '{path}' is missing or not executable" + ), + ) +} + +fn is_executable(path: &Utf8Path) -> bool { + fs::metadata(path.as_std_path()) + .is_ok_and(|metadata| metadata.is_file() && has_execute_permission(&metadata)) +} + +#[cfg(unix)] +fn has_execute_permission(metadata: &fs::Metadata) -> bool { + use std::os::unix::fs::PermissionsExt; + metadata.permissions().mode() & 0o111 != 0 +} + +#[cfg(not(unix))] +fn has_execute_permission(metadata: &fs::Metadata) -> bool { + metadata.is_file() +} + +fn canonicalise(paths: Vec) -> Result, Error> { + let mut unique = IndexSet::new(); + let mut resolved = Vec::new(); + for path in paths { + let canonical = fs::canonicalize(path.as_std_path()).map_err(|err| { + Error::new( + ErrorKind::InvalidOperation, + format!("failed to canonicalise '{path}': {err}"), + ) + })?; + let utf8 = Utf8PathBuf::from_path_buf(canonical).map_err(|_| { + Error::new( + ErrorKind::InvalidOperation, + "canonical path contains non-UTF-8 characters", + ) + })?; + if unique.insert(utf8.clone()) { + resolved.push(utf8); + } + } + Ok(resolved) +} + +fn not_found_error(command: &str, dirs: &[Utf8PathBuf], mode: CwdMode) -> Error { + let count = dirs.len(); + let preview = path_preview(dirs); + let mut message = format!( + "[netsuke::jinja::which::not_found] command '{command}' not found after checking {count} PATH entries. Preview: {preview}" + ); + if let Some(hint) = hint_for_mode(mode) { + message.push_str(". "); + message.push_str(hint); + } + Error::new(ErrorKind::InvalidOperation, message) +} + +fn path_preview(dirs: &[Utf8PathBuf]) -> String { + const LIMIT: usize = 4; + if dirs.is_empty() { + return "".to_owned(); + } + let mut parts: Vec<_> = dirs + .iter() + .take(LIMIT) + .map(|dir| format_path_for_output(dir)) + .collect(); + if dirs.len() > LIMIT { + parts.push("…".into()); + } + parts.join(", ") +} + +const fn hint_for_mode(mode: CwdMode) -> Option<&'static str> { + #[cfg(windows)] + { + match mode { + CwdMode::Always => None, + _ => Some("Set cwd_mode=\"always\" to include the current directory."), + } + } + #[cfg(not(windows))] + { + match mode { + CwdMode::Never => Some( + "Empty PATH segments are ignored; use cwd_mode=\"auto\" to include the working directory.", + ), + _ => None, + } + } +} + +fn args_error(message: impl fmt::Display) -> Error { + Error::new( + ErrorKind::InvalidOperation, + format!("[netsuke::jinja::which::args] {message}"), + ) +} + +#[cfg(windows)] +fn candidate_paths(dir: &Utf8Path, command: &str, pathext: &[String]) -> Vec { + let mut paths = Vec::new(); + let base = dir.join(command); + if Utf8Path::new(command).extension().is_some() { + paths.push(base); + return paths; + } + for ext in pathext { + let mut candidate = base.as_str().to_owned(); + candidate.push_str(ext); + paths.push(Utf8PathBuf::from(candidate)); + } + paths +} + +#[cfg(test)] +mod tests { + use super::*; + use anyhow::{Context, Result, anyhow, bail, ensure}; + use rstest::rstest; + use tempfile::TempDir; + use test_support::{ + env::{VarGuard, set_var}, + env_lock::EnvLock, + }; + + #[cfg(windows)] + use std::ffi::OsStr; + + struct EnvFixture { + _lock: EnvLock, + _temp: TempDir, + dirs: Vec, + _path_guard: VarGuard, + #[cfg(windows)] + pathext_guard: VarGuard, + } + + impl EnvFixture { + fn new(dir_names: &[&str]) -> Result { + let lock = EnvLock::acquire(); + let temp = TempDir::new().context("create which test workspace")?; + let mut dirs = Vec::new(); + for name in dir_names { + let dir = temp.path().join(name); + std::fs::create_dir_all(&dir) + .with_context(|| format!("create fixture dir {name}"))?; + let utf = Utf8PathBuf::from_path_buf(dir) + .map_err(|_| anyhow!("fixture path for {name} is not UTF-8"))?; + dirs.push(utf); + } + let joined = std::env::join_paths(dirs.iter().map(|dir| dir.as_std_path())) + .context("join PATH entries")?; + let path_guard = VarGuard::set("PATH", joined.as_os_str()); + #[cfg(windows)] + let pathext_guard = VarGuard::set("PATHEXT", OsStr::new(".cmd;.exe")); + Ok(Self { + _lock: lock, + _temp: temp, + dirs, + _path_guard: path_guard, + #[cfg(windows)] + pathext_guard, + }) + } + + fn dir(&self, index: usize) -> &Utf8Path { + self.dirs.get(index).map_or_else( + || panic!("dir index {index} is out of bounds"), + Utf8PathBuf::as_path, + ) + } + + fn write_tool(&self, dir_index: usize, name: &str) -> Result { + let mut path = self + .dirs + .get(dir_index) + .cloned() + .ok_or_else(|| anyhow!("dir index {dir_index} is out of bounds"))?; + let filename = tool_name(name); + path.push(Utf8Path::new(&filename)); + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent.as_std_path()) + .with_context(|| format!("ensure parent for {path:?}"))?; + } + std::fs::write(path.as_std_path(), script_contents()) + .with_context(|| format!("write tool {path:?}"))?; + mark_executable(&path)?; + Ok(path) + } + + fn remove_path(path: &Utf8Path) -> Result<()> { + std::fs::remove_file(path.as_std_path()).with_context(|| format!("remove {path:?}")) + } + } + + #[cfg(windows)] + fn tool_name(base: &str) -> String { + format!("{base}.cmd") + } + + #[cfg(not(windows))] + fn tool_name(base: &str) -> String { + base.to_owned() + } + + fn script_contents() -> &'static [u8] { + #[cfg(windows)] + { + b"@echo off\r\n" + } + #[cfg(not(windows))] + { + b"#!/bin/sh\nexit 0\n" + } + } + + #[cfg(unix)] + fn mark_executable(path: &Utf8Path) -> Result<()> { + use std::os::unix::fs::PermissionsExt; + let mut perms = std::fs::metadata(path.as_std_path()) + .with_context(|| format!("stat {path:?}"))? + .permissions(); + perms.set_mode(0o755); + std::fs::set_permissions(path.as_std_path(), perms) + .with_context(|| format!("set perms {path:?}")) + } + + #[cfg(not(unix))] + fn mark_executable(_path: &Utf8Path) -> Result<()> { + Ok(()) + } + + fn resolver() -> WhichResolver { + WhichResolver::new() + } + + fn render( + resolver: &WhichResolver, + name: &str, + options: &WhichOptions, + ) -> Result> { + resolver + .resolve(name, options) + .map_err(|err| anyhow!(err.to_string())) + } + + #[rstest] + fn resolves_first_path_entry() -> Result<()> { + let fixture = EnvFixture::new(&["bin_a", "bin_b"])?; + fixture.write_tool(1, "tool")?; + let resolver = resolver(); + let output = render(&resolver, "tool", &WhichOptions::default())?; + let expected = fixture.dir(1).join(Utf8Path::new(&tool_name("tool"))); + ensure!( + output.first() == Some(&expected), + "expected first match {expected:?} but got {output:?}" + ); + Ok(()) + } + + #[rstest] + fn returns_all_matches_in_path_order() -> Result<()> { + let fixture = EnvFixture::new(&["first", "second"])?; + fixture.write_tool(0, "tool")?; + fixture.write_tool(1, "tool")?; + let options = WhichOptions { + all: true, + ..WhichOptions::default() + }; + let resolver = resolver(); + let output = render(&resolver, "tool", &options)?; + ensure!(output.len() == 2, "expected two matches but got {output:?}"); + let first = fixture.dir(0).join(Utf8Path::new(&tool_name("tool"))); + let second = fixture.dir(1).join(Utf8Path::new(&tool_name("tool"))); + ensure!( + output.first() == Some(&first), + "incorrect first match: {output:?}" + ); + ensure!( + output.get(1) == Some(&second), + "incorrect second match: {output:?}" + ); + Ok(()) + } + + #[rstest] + fn canonical_removes_duplicates() -> Result<()> { + let fixture = EnvFixture::new(&["bin", "bin_dot"])?; + fixture.write_tool(0, "dedupe")?; + let mut options = WhichOptions { + all: true, + ..WhichOptions::default() + }; + // Without canonical flag, both entries appear because PATH includes two references. + let alias = fixture.dir(0).join("."); + let joined = std::env::join_paths([fixture.dir(0).as_std_path(), alias.as_std_path()]) + .expect("join paths"); + let _ = set_var("PATH", joined.as_os_str()); + let resolver = resolver(); + let duplicates = render(&resolver, "dedupe", &options)?; + ensure!( + duplicates.len() == 2, + "expected duplicates to include two entries" + ); + options.canonical = true; + options.fresh = true; + let canonical = render(&resolver, "dedupe", &options)?; + ensure!(canonical.len() == 1, "canonical result should dedupe"); + Ok(()) + } + + #[rstest] + fn fresh_rechecks_existing_entries() -> Result<()> { + let fixture = EnvFixture::new(&["primary", "secondary"])?; + fixture.write_tool(1, "swap")?; + let standard = WhichOptions::default(); + let resolver = resolver(); + let initial = render(&resolver, "swap", &standard)?; + let secondary = fixture.dir(1).join(Utf8Path::new(&tool_name("swap"))); + ensure!( + initial.first() == Some(&secondary), + "initial lookup should prefer the secondary PATH entry" + ); + fixture.write_tool(0, "swap")?; + let cached = render(&resolver, "swap", &standard)?; + ensure!( + cached.first() == Some(&secondary), + "cached value should remain" + ); + let fresh = WhichOptions { + fresh: true, + ..WhichOptions::default() + }; + let refreshed = render(&resolver, "swap", &fresh)?; + let primary = fixture.dir(0).join(Utf8Path::new(&tool_name("swap"))); + ensure!( + refreshed.first() == Some(&primary), + "fresh lookup should return the primary path" + ); + let latest = render(&resolver, "swap", &standard)?; + ensure!( + latest.first() == Some(&primary), + "cache should now track the updated executable" + ); + Ok(()) + } + + #[rstest] + fn cache_invalidates_deleted_entries() -> Result<()> { + let fixture = EnvFixture::new(&["solo"])?; + let path = fixture.write_tool(0, "gone")?; + let resolver = resolver(); + let hits = render(&resolver, "gone", &WhichOptions::default())?; + ensure!(hits.first() == Some(&path), "expected cached path match"); + EnvFixture::remove_path(&path)?; + let err = match resolver.resolve("gone", &WhichOptions::default()) { + Ok(value) => bail!( + "expected missing executable error but resolver returned {value:?}" + ), + Err(err) => err, + }; + ensure!( + err.to_string().contains("not found"), + "missing executable should surface not_found error" + ); + Ok(()) + } +} diff --git a/tests/features/stdlib.feature b/tests/features/stdlib.feature index 51b70294..f81d7675 100644 --- a/tests/features/stdlib.feature +++ b/tests/features/stdlib.feature @@ -72,6 +72,33 @@ Feature: Template stdlib filters When I render "{{ ([{'name': 'one'}] | group_by('kind')) }}" with stdlib path "file" Then the stdlib error contains "could not resolve" + Scenario: which filter resolves the first PATH entry + And the stdlib executable "bin/primary/tool" exists + And the stdlib executable "bin/secondary/tool" exists + And the stdlib PATH entries are "bin/primary:bin/secondary" + When I render the stdlib template "{{ 'tool' | which }}" + Then the stdlib output is the workspace executable "bin/primary/tool" + + Scenario: which filter lists every PATH match + And the stdlib executable "bin/path_a/tool" exists + And the stdlib executable "bin/path_b/tool" exists + And the stdlib PATH entries are "bin/path_a:bin/path_b" + When I render the stdlib template "{{ ('tool' | which(all=true))[0] }}" + Then the stdlib output is the workspace executable "bin/path_a/tool" + When I render the stdlib template "{{ ('tool' | which(all=true))[1] }}" + Then the stdlib output is the workspace executable "bin/path_b/tool" + + Scenario: which function honours cwd_mode + And the stdlib executable "local/tool" exists + And the stdlib PATH entries are "" + When I render the stdlib template "{{ which('tool', cwd_mode='always') }}" + Then the stdlib output is the workspace executable "local/tool" + + Scenario: which filter reports missing executables + And the stdlib PATH entries are "" + When I render the stdlib template "{{ 'absent' | which }}" + Then the stdlib error contains "netsuke::jinja::which::not_found" + Scenario: shell filter transforms text and marks templates impure Given an uppercase stdlib command helper When I render the stdlib template "{{ 'hello' | shell(cmd) | trim }}" using the stdlib command helper diff --git a/tests/std_filter_tests/mod.rs b/tests/std_filter_tests/mod.rs index 8b8d6a60..cc503ec6 100644 --- a/tests/std_filter_tests/mod.rs +++ b/tests/std_filter_tests/mod.rs @@ -12,5 +12,7 @@ mod io_filters; mod network_functions; #[path = "std_filter_tests/path_filters.rs"] mod path_filters; +#[path = "std_filter_tests/which_filter_tests.rs"] +mod which_filter_tests; #[path = "std_filter_tests/support.rs"] mod support; diff --git a/tests/std_filter_tests/which_filter_tests.rs b/tests/std_filter_tests/which_filter_tests.rs new file mode 100644 index 00000000..7aa5b128 --- /dev/null +++ b/tests/std_filter_tests/which_filter_tests.rs @@ -0,0 +1,158 @@ +use anyhow::{Context, Result, anyhow}; +use camino::{Utf8Path, Utf8PathBuf}; +use minijinja::{context, Environment}; +use rstest::rstest; +use std::ffi::{OsStr, OsString}; +use test_support::{env::VarGuard, env_lock::EnvLock}; + +use super::support::{self, fallible}; + +struct PathEnv { + _lock: EnvLock, + path_guard: VarGuard, + #[cfg(windows)] + pathext_guard: VarGuard, +} + +impl PathEnv { + fn new(entries: &[Utf8PathBuf]) -> Result { + let lock = EnvLock::acquire(); + let joined = if entries.is_empty() { + OsString::new() + } else { + std::env::join_paths(entries.iter().map(|entry| entry.as_std_path())) + .context("join PATH entries")? + }; + let path_guard = VarGuard::set("PATH", joined.as_os_str()); + #[cfg(windows)] + let pathext_guard = VarGuard::set("PATHEXT", OsStr::new(".cmd;.exe")); + Ok(Self { + _lock: lock, + path_guard, + #[cfg(windows)] + pathext_guard, + }) + } +} + +fn write_tool(dir: &Utf8Path, name: &str) -> Result { + let filename = tool_name(name); + let path = dir.join(Utf8Path::new(&filename)); + let parent = path + .parent() + .context("tool path should have a parent directory")?; + std::fs::create_dir_all(parent.as_std_path()) + .with_context(|| format!("create parent for {:?}", path))?; + std::fs::write(path.as_std_path(), script_contents()) + .with_context(|| format!("write fixture {:?}", path))?; + mark_executable(&path)?; + Ok(path) +} + +#[cfg(unix)] +fn mark_executable(path: &Utf8Path) -> Result<()> { + use std::os::unix::fs::PermissionsExt; + let mut perms = std::fs::metadata(path.as_std_path()) + .with_context(|| format!("stat {:?}", path))? + .permissions(); + perms.set_mode(0o755); + std::fs::set_permissions(path.as_std_path(), perms) + .with_context(|| format!("chmod {:?}", path)) +} + +#[cfg(not(unix))] +fn mark_executable(_path: &Utf8Path) -> Result<()> { + Ok(()) +} + +#[cfg(windows)] +fn tool_name(base: &str) -> String { + format!("{base}.cmd") +} + +#[cfg(not(windows))] +fn tool_name(base: &str) -> String { + base.to_owned() +} + +fn script_contents() -> &'static [u8] { + #[cfg(windows)] + { + b"@echo off\r\n" + } + #[cfg(not(windows))] + { + b"#!/bin/sh\nexit 0\n" + } +} + +fn render(env: &mut Environment<'_>, template: &str) -> Result { + env.render_str(template, context! {}) + .map_err(|err| anyhow!(err.to_string())) +} + +#[rstest] +fn which_filter_returns_first_match() -> Result<()> { + let (_temp, root) = support::filter_workspace()?; + let first = root.join("bin_first"); + let second = root.join("bin_second"); + std::fs::create_dir_all(first.as_std_path())?; + std::fs::create_dir_all(second.as_std_path())?; + write_tool(&first, "helper")?; + write_tool(&second, "helper")?; + let _path = PathEnv::new(&[first.clone(), second.clone()])?; + let (mut env, mut state) = fallible::stdlib_env_with_state()?; + state.reset_impure(); + let output = render(&mut env, "{{ 'helper' | which }}")?; + assert_eq!(output, first.join(Utf8Path::new(&tool_name("helper"))).as_str()); + assert!(!state.is_impure()); + drop(temp); + Ok(()) +} + +#[rstest] +fn which_filter_all_returns_all_matches() -> Result<()> { + let (_temp, root) = support::filter_workspace()?; + let first = root.join("bin_a"); + let second = root.join("bin_b"); + std::fs::create_dir_all(first.as_std_path())?; + std::fs::create_dir_all(second.as_std_path())?; + write_tool(&first, "helper")?; + write_tool(&second, "helper")?; + let _path = PathEnv::new(&[first.clone(), second.clone()])?; + let (mut env, _state) = fallible::stdlib_env_with_state()?; + let template = "{{ 'helper' | which(all=true) | join('|') }}"; + let output = render(&mut env, template)?; + let expected = format!( + "{}|{}", + first.join(Utf8Path::new(&tool_name("helper"))).as_str(), + second.join(Utf8Path::new(&tool_name("helper"))).as_str() + ); + assert_eq!(output, expected); + Ok(()) +} + +#[rstest] +fn which_function_honours_cwd_mode() -> Result<()> { + let (_temp, root) = support::filter_workspace()?; + let tool = write_tool(&root, "local")?; + let _path = PathEnv::new(&[])?; + let (mut env, _state) = fallible::stdlib_env_with_state()?; + let template = "{{ which('local', cwd_mode='always') }}"; + let output = render(&mut env, template)?; + assert_eq!(output, tool.as_str()); + Ok(()) +} + +#[rstest] +fn which_filter_reports_missing_command() -> Result<()> { + let (_temp, _root) = support::filter_workspace()?; + let _path = PathEnv::new(&[])?; + let (mut env, _state) = fallible::stdlib_env_with_state()?; + let err = env + .render_str("{{ 'absent' | which }}", context! {}) + .unwrap_err(); + let message = err.to_string(); + assert!(message.contains("netsuke::jinja::which::not_found")); + Ok(()) +} diff --git a/tests/steps/stdlib_steps/assertions.rs b/tests/steps/stdlib_steps/assertions.rs index a6e05600..de1124ec 100644 --- a/tests/steps/stdlib_steps/assertions.rs +++ b/tests/steps/stdlib_steps/assertions.rs @@ -12,6 +12,7 @@ use url::Url; use super::parsing::{parse_expected_offset, parse_iso_timestamp}; use super::types::{ExpectedFragment, ExpectedOffset, ExpectedOutput, RelativePath}; +use super::workspace::resolve_executable_path; #[then(regex = r#"^the stdlib output is "(.+)"$"#)] pub(crate) fn assert_stdlib_output( @@ -143,6 +144,21 @@ pub(crate) fn assert_stdlib_output_is_workspace_path( Ok(()) } +#[then(regex = r#"^the stdlib output is the workspace executable "(.+)"$"#)] +pub(crate) fn assert_stdlib_output_is_workspace_executable( + world: &mut CliWorld, + relative_path: RelativePath, +) -> Result<()> { + let relative = relative_path.into_path_buf(); + let (root, output) = stdlib_root_and_output(world)?; + let expected = resolve_executable_path(root, relative.as_path()); + ensure!( + output == expected.as_str(), + "expected stdlib output '{expected}' but was '{output}'" + ); + Ok(()) +} + #[then("the stdlib output is an ISO8601 UTC timestamp")] pub(crate) fn assert_stdlib_output_is_utc_timestamp(world: &mut CliWorld) -> Result<()> { let output = stdlib_output(world)?; diff --git a/tests/steps/stdlib_steps/types.rs b/tests/steps/stdlib_steps/types.rs index e03cb7e1..9ab11625 100644 --- a/tests/steps/stdlib_steps/types.rs +++ b/tests/steps/stdlib_steps/types.rs @@ -171,6 +171,30 @@ impl FromStr for RelativePath { } } +/// Colon-delimited PATH entries used by stdlib steps. +#[derive(Debug, Clone)] +pub(crate) struct PathEntries(String); + +impl PathEntries { + pub(crate) fn into_inner(self) -> String { + self.0 + } +} + +impl From for PathEntries { + fn from(value: String) -> Self { + Self(value) + } +} + +impl FromStr for PathEntries { + type Err = Infallible; + + fn from_str(s: &str) -> Result { + Ok(Self(s.to_owned())) + } +} + /// Expected output text for stdlib assertion steps. #[derive(Debug, Clone)] pub(crate) struct ExpectedOutput(String); diff --git a/tests/steps/stdlib_steps/workspace.rs b/tests/steps/stdlib_steps/workspace.rs index 64193c9b..1aa78b83 100644 --- a/tests/steps/stdlib_steps/workspace.rs +++ b/tests/steps/stdlib_steps/workspace.rs @@ -5,7 +5,11 @@ use anyhow::{Context, Result, anyhow}; use camino::{Utf8Path, Utf8PathBuf}; use cap_std::{ambient_authority, fs_utf8::Dir}; use cucumber::given; -use std::ffi::OsStr; +use std::{ + env, + ffi::{OsStr, OsString}, + fs, +}; use test_support::{ command_helper::{ compile_failure_helper, compile_large_output_helper, compile_uppercase_helper, @@ -13,7 +17,7 @@ use test_support::{ env::set_var, }; -use super::types::{FileContent, RelativePath, ServerBody, TemplatePath}; +use super::types::{FileContent, PathEntries, RelativePath, ServerBody, TemplatePath}; const LINES_FIXTURE: &str = concat!("one\n", "two\n", "three\n",); @@ -119,6 +123,55 @@ pub(crate) fn write_stdlib_file( Ok(()) } +#[given(regex = r#"^the stdlib executable "(.+)" exists$"#)] +pub(crate) fn stdlib_executable_exists(world: &mut CliWorld, path: RelativePath) -> Result<()> { + let root = ensure_workspace(world)?; + let relative = path.into_path_buf(); + let target = resolve_executable_path(&root, &relative); + if let Some(parent) = target.parent() { + fs::create_dir_all(parent.as_std_path()) + .with_context(|| format!("create directories for stdlib executable at {parent}"))?; + } + fs::write(target.as_std_path(), executable_script()) + .with_context(|| format!("write stdlib executable {target}"))?; + mark_executable(&target)?; + Ok(()) +} + +#[given(regex = r#"^the stdlib PATH entries are "(.*)"$"#)] +pub(crate) fn stdlib_path_entries(world: &mut CliWorld, entries: PathEntries) -> Result<()> { + let root = ensure_workspace(world)?; + let raw_entries = entries.into_inner(); + let trimmed = raw_entries.trim(); + let dirs: Vec = if trimmed.is_empty() { + Vec::new() + } else { + trimmed + .split(':') + .filter(|segment| !segment.trim().is_empty()) + .map(|segment| root.join(segment.trim())) + .collect() + }; + for dir in &dirs { + fs::create_dir_all(dir.as_std_path()) + .with_context(|| format!("create PATH directory {dir}"))?; + } + let joined = if dirs.is_empty() { + OsString::new() + } else { + env::join_paths(dirs.iter().map(|dir| dir.as_std_path())) + .context("join stdlib PATH entries")? + }; + let previous = set_var("PATH", joined.as_os_str()); + world.env_vars.insert("PATH".into(), previous); + #[cfg(windows)] + { + let previous = set_var("PATHEXT", OsStr::new(".cmd;.exe")); + world.env_vars.insert("PATHEXT".into(), previous); + } + Ok(()) +} + #[given("HOME points to the stdlib workspace root")] pub(crate) fn home_points_to_stdlib_root(world: &mut CliWorld) -> Result<()> { let root = ensure_workspace(world)?; @@ -148,3 +201,47 @@ pub(crate) fn resolve_template_path(root: &Utf8Path, raw: RelativePath) -> Templ TemplatePath::from(root.join(candidate)) } } + +pub(super) fn resolve_executable_path(root: &Utf8Path, relative: &Utf8Path) -> Utf8PathBuf { + #[cfg(windows)] + let mut path = root.join(relative); + #[cfg(not(windows))] + let path = root.join(relative); + #[cfg(windows)] + { + if path.extension().is_none() { + path.set_extension("cmd"); + } + } + path +} + +const fn executable_script() -> &'static [u8] { + #[cfg(windows)] + { + b"@echo off\r\n" + } + #[cfg(not(windows))] + { + b"#!/bin/sh\nexit 0\n" + } +} + +fn mark_executable(path: &Utf8Path) -> Result<()> { + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let mut perms = fs::metadata(path.as_std_path()) + .with_context(|| format!("stat stdlib executable {path}"))? + .permissions(); + perms.set_mode(0o755); + fs::set_permissions(path.as_std_path(), perms) + .with_context(|| format!("chmod stdlib executable {path}"))?; + Ok(()) + } + #[cfg(not(unix))] + { + let _ = path; + Ok(()) + } +} From 680eca7c2a2fb00dc0e6f164d503ef5c934b25fd Mon Sep 17 00:00:00 2001 From: Leynos Date: Tue, 18 Nov 2025 19:44:20 +0000 Subject: [PATCH 02/23] refactor(stdlib/which): split `which` filter into modular components Removed the monolithic src/stdlib/which.rs file and reorganized the codebase into several focused modules: - cache.rs: Cache management for resolved executable paths - env.rs: Environment snapshot and PATH parsing - error.rs: Error generation utilities - lookup.rs: Lookup resolver logic - mod.rs: Module facade and filter/function registration - options.rs: Options parsing and management This restructuring improves code maintainability and clarity without changing external behavior. Added corresponding tests to verify functionality remains consistent. Co-authored-by: terragon-labs[bot] --- src/stdlib/which.rs | 841 ------------------- src/stdlib/which/cache.rs | 127 +++ src/stdlib/which/env.rs | 152 ++++ src/stdlib/which/error.rs | 70 ++ src/stdlib/which/lookup.rs | 136 +++ src/stdlib/which/mod.rs | 88 ++ src/stdlib/which/options.rs | 63 ++ tests/std_filter_tests/which_filter_tests.rs | 38 + 8 files changed, 674 insertions(+), 841 deletions(-) delete mode 100644 src/stdlib/which.rs create mode 100644 src/stdlib/which/cache.rs create mode 100644 src/stdlib/which/env.rs create mode 100644 src/stdlib/which/error.rs create mode 100644 src/stdlib/which/lookup.rs create mode 100644 src/stdlib/which/mod.rs create mode 100644 src/stdlib/which/options.rs diff --git a/src/stdlib/which.rs b/src/stdlib/which.rs deleted file mode 100644 index 1e06cd2c..00000000 --- a/src/stdlib/which.rs +++ /dev/null @@ -1,841 +0,0 @@ -//! Cross-platform `which` filter and helper function for `MiniJinja`. -//! -//! Resolves executables deterministically across Unix and Windows, -//! honouring user-configurable options for returning every match, emitting -//! canonical paths, bypassing the resolver cache, and opt-in search of the -//! current working directory. - -use std::{ - ffi::OsString, - fmt, fs, - num::NonZeroUsize, - sync::{Arc, Mutex}, -}; - -#[cfg(windows)] -use std::ffi::OsStr; - -use camino::{Utf8Path, Utf8PathBuf}; -use indexmap::IndexSet; -use lru::LruCache; -use minijinja::{ - Environment, Error, ErrorKind, - value::{Kwargs, Value}, -}; - -const CACHE_CAPACITY: usize = 64; - -pub(crate) fn register(env: &mut Environment<'_>) { - let resolver = Arc::new(WhichResolver::new()); - { - let filter_resolver = Arc::clone(&resolver); - env.add_filter("which", move |value: Value, kwargs: Kwargs| { - resolve_with(&filter_resolver, &value, &kwargs).and_then(|output| { - kwargs.assert_all_used()?; - Ok(output) - }) - }); - } - { - let function_resolver = Arc::clone(&resolver); - env.add_function("which", move |value: Value, kwargs: Kwargs| { - resolve_with(&function_resolver, &value, &kwargs).and_then(|output| { - kwargs.assert_all_used()?; - Ok(output) - }) - }); - } -} - -fn resolve_with( - resolver: &WhichResolver, - command: &Value, - kwargs: &Kwargs, -) -> Result { - let name = command - .as_str() - .map(str::trim) - .filter(|candidate| !candidate.is_empty()) - .ok_or_else(|| args_error("which requires a non-empty string"))?; - let options = WhichOptions::from_kwargs(kwargs)?; - let matches = resolver.resolve(name, &options)?; - Ok(render_value(&matches, &options)) -} - -fn render_value(matches: &[Utf8PathBuf], options: &WhichOptions) -> Value { - if options.all { - let rendered: Vec = matches - .iter() - .map(|path| format_path_for_output(path)) - .collect(); - Value::from_serialize(rendered) - } else { - let first = matches - .first() - .map_or_else(String::new, |path| format_path_for_output(path)); - Value::from(first) - } -} - -fn format_path_for_output(path: &Utf8Path) -> String { - #[cfg(windows)] - { - path.as_str().replace('\\', "/") - } - #[cfg(not(windows))] - { - path.as_str().to_owned() - } -} - -#[derive(Clone, Debug)] -struct WhichResolver { - cache: Arc>>, -} - -impl WhichResolver { - fn new() -> Self { - #[expect( - clippy::unwrap_used, - reason = "cache capacity constant is greater than zero" - )] - let capacity = NonZeroUsize::new(CACHE_CAPACITY).unwrap(); - Self { - cache: Arc::new(Mutex::new(LruCache::new(capacity))), - } - } - - fn resolve(&self, command: &str, options: &WhichOptions) -> Result, Error> { - let env = EnvSnapshot::capture()?; - let key = CacheKey::new(command, &env, options); - if !options.fresh - && let Some(cached) = self.try_cache(&key) - { - return Ok(cached); - } - let matches = lookup(command, &env, options)?; - self.store(key, matches.clone()); - Ok(matches) - } - - fn try_cache(&self, key: &CacheKey) -> Option> { - let mut guard = self.lock_cache(); - match guard.get(key) { - Some(entry) if entry.is_valid() => Some(entry.matches.clone()), - Some(_) => { - guard.pop(key); - None - } - None => None, - } - } - - fn store(&self, key: CacheKey, matches: Vec) { - let mut guard = self.lock_cache(); - guard.put(key, CacheEntry { matches }); - } - - fn lock_cache(&self) -> std::sync::MutexGuard<'_, LruCache> { - match self.cache.lock() { - Ok(guard) => guard, - Err(poisoned) => poisoned.into_inner(), - } - } -} - -fn push_matches( - matches: &mut Vec, - candidates: Vec, - collect_all: bool, -) -> bool { - for candidate in candidates { - if !is_executable(&candidate) { - continue; - } - matches.push(candidate); - if !collect_all { - return true; - } - } - false -} - -fn lookup( - command: &str, - env: &EnvSnapshot, - options: &WhichOptions, -) -> Result, Error> { - if is_direct_path(command) { - return resolve_direct(command, env, options); - } - - let dirs = env.resolved_dirs(options.cwd_mode); - let mut matches = Vec::new(); - - #[cfg(windows)] - let suffixes = env.pathext(); - - for dir in &dirs { - #[cfg(windows)] - let candidates = candidate_paths(dir, command, suffixes); - #[cfg(not(windows))] - let candidates = vec![dir.join(command)]; - - if push_matches(&mut matches, candidates, options.all) { - break; - } - } - - if matches.is_empty() { - return Err(not_found_error(command, &dirs, options.cwd_mode)); - } - - if options.canonical { - canonicalise(matches) - } else { - Ok(matches) - } -} - -#[derive(Clone, Debug)] -struct CacheEntry { - matches: Vec, -} - -impl CacheEntry { - fn is_valid(&self) -> bool { - self.matches - .iter() - .all(|path| is_executable(path.as_path())) - } -} - -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -struct CacheKey { - command: String, - path: Option, - pathext: Option, - cwd: Utf8PathBuf, - options: CacheKeyOptions, -} - -impl CacheKey { - fn new(command: &str, env: &EnvSnapshot, options: &WhichOptions) -> Self { - Self { - command: command.to_owned(), - path: env.raw_path.clone(), - pathext: env.raw_pathext.clone(), - cwd: env.cwd.clone(), - options: CacheKeyOptions::from(options), - } - } -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -struct CacheKeyOptions { - all: bool, - canonical: bool, - cwd_mode: CwdMode, -} - -impl From<&WhichOptions> for CacheKeyOptions { - fn from(value: &WhichOptions) -> Self { - Self { - all: value.all, - canonical: value.canonical, - cwd_mode: value.cwd_mode, - } - } -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -enum CwdMode { - Auto, - Always, - Never, -} - -impl CwdMode { - fn parse(value: &str) -> Option { - match value { - "auto" => Some(Self::Auto), - "always" => Some(Self::Always), - "never" => Some(Self::Never), - _ => None, - } - } -} - -impl Default for CwdMode { - fn default() -> Self { - Self::Auto - } -} - -#[derive(Clone, Debug, Default)] -struct WhichOptions { - all: bool, - canonical: bool, - fresh: bool, - cwd_mode: CwdMode, -} - -impl WhichOptions { - fn from_kwargs(kwargs: &Kwargs) -> Result { - let all = kwargs.get::>("all")?.unwrap_or(false); - let canonical = kwargs.get::>("canonical")?.unwrap_or(false); - let fresh = kwargs.get::>("fresh")?.unwrap_or(false); - let cwd_mode = kwargs - .get::>("cwd_mode")? - .map(|mode| { - let lower = mode.to_ascii_lowercase(); - CwdMode::parse(&lower).ok_or_else(|| { - args_error(format!( - "cwd_mode must be 'auto', 'always', or 'never', got '{mode}'" - )) - }) - }) - .transpose()?; - Ok(Self { - all, - canonical, - fresh, - cwd_mode: cwd_mode.unwrap_or_default(), - }) - } -} - -struct EnvSnapshot { - cwd: Utf8PathBuf, - raw_path: Option, - raw_pathext: Option, - entries: Vec, - #[cfg(windows)] - pathext: Vec, -} - -impl EnvSnapshot { - fn capture() -> Result { - let cwd = current_dir_utf8()?; - let raw_path = std::env::var_os("PATH"); - let entries = parse_path_entries(raw_path.clone(), &cwd)?; - #[cfg(windows)] - let raw_pathext = std::env::var_os("PATHEXT"); - #[cfg(windows)] - let pathext = parse_pathext(raw_pathext.as_deref()); - #[cfg(not(windows))] - let raw_pathext = None; - Ok(Self { - cwd, - raw_path, - raw_pathext, - entries, - #[cfg(windows)] - pathext, - }) - } - - fn resolved_dirs(&self, mode: CwdMode) -> Vec { - let mut dirs = Vec::new(); - if matches!(mode, CwdMode::Always) { - dirs.push(self.cwd.clone()); - } - for entry in &self.entries { - match entry { - PathEntry::Dir(path) => dirs.push(path.clone()), - PathEntry::CurrentDir if matches!(mode, CwdMode::Always | CwdMode::Auto) => { - dirs.push(self.cwd.clone()); - } - PathEntry::CurrentDir => {} - } - } - dirs - } - - #[cfg(windows)] - fn pathext(&self) -> &[String] { - &self.pathext - } -} - -#[derive(Clone, Debug)] -enum PathEntry { - Dir(Utf8PathBuf), - CurrentDir, -} - -fn parse_path_entries(raw: Option, cwd: &Utf8Path) -> Result, Error> { - let mut entries = Vec::new(); - let Some(raw_value) = raw else { - return Ok(entries); - }; - for (index, component) in std::env::split_paths(&raw_value).enumerate() { - if component.as_os_str().is_empty() { - entries.push(PathEntry::CurrentDir); - continue; - } - let utf8 = Utf8PathBuf::from_path_buf(component).map_err(|_| { - args_error(format!( - "PATH entry #{index} contains non-UTF-8 characters; Netsuke requires UTF-8 paths" - )) - })?; - let resolved = if utf8.is_absolute() { - utf8 - } else { - cwd.join(utf8) - }; - entries.push(PathEntry::Dir(resolved)); - } - Ok(entries) -} - -#[cfg(windows)] -const DEFAULT_PATHEXT: &[&str] = &[ - ".com", ".exe", ".bat", ".cmd", ".vbs", ".vbe", ".js", ".jse", ".wsf", ".wsh", ".msc", -]; - -#[cfg(windows)] -fn parse_pathext(raw: Option<&OsStr>) -> Vec { - let mut dedup = IndexSet::new(); - let source = raw - .map(|value| value.to_string_lossy().into_owned()) - .unwrap_or_else(|| DEFAULT_PATHEXT.join(";")); - for segment in source.split(';') { - let trimmed = segment.trim(); - if trimmed.is_empty() { - continue; - } - let mut normalised = trimmed.to_ascii_lowercase(); - if !normalised.starts_with('.') { - normalised.insert(0, '.'); - } - dedup.insert(normalised); - } - if dedup.is_empty() { - DEFAULT_PATHEXT.iter().map(|ext| ext.to_string()).collect() - } else { - dedup.into_iter().collect() - } -} - -fn current_dir_utf8() -> Result { - let cwd = std::env::current_dir().map_err(|err| { - Error::new( - ErrorKind::InvalidOperation, - format!("failed to resolve current directory: {err}"), - ) - })?; - Utf8PathBuf::from_path_buf(cwd).map_err(|_| { - Error::new( - ErrorKind::InvalidOperation, - "current directory contains non-UTF-8 components", - ) - }) -} - -fn is_direct_path(command: &str) -> bool { - #[cfg(windows)] - { - command.contains(['\\', '/', ':']) - } - #[cfg(not(windows))] - { - command.contains('/') - } -} - -fn resolve_direct( - command: &str, - env: &EnvSnapshot, - options: &WhichOptions, -) -> Result, Error> { - let raw = Utf8Path::new(command); - let resolved = if raw.is_absolute() { - raw.to_path_buf() - } else { - env.cwd.join(raw) - }; - if !is_executable(&resolved) { - return Err(direct_not_found(command, &resolved)); - } - let output = if options.canonical { - canonicalise(vec![resolved])? - } else { - vec![resolved] - }; - Ok(output) -} - -fn direct_not_found(command: &str, path: &Utf8Path) -> Error { - Error::new( - ErrorKind::InvalidOperation, - format!( - "[netsuke::jinja::which::not_found] command '{command}' at '{path}' is missing or not executable" - ), - ) -} - -fn is_executable(path: &Utf8Path) -> bool { - fs::metadata(path.as_std_path()) - .is_ok_and(|metadata| metadata.is_file() && has_execute_permission(&metadata)) -} - -#[cfg(unix)] -fn has_execute_permission(metadata: &fs::Metadata) -> bool { - use std::os::unix::fs::PermissionsExt; - metadata.permissions().mode() & 0o111 != 0 -} - -#[cfg(not(unix))] -fn has_execute_permission(metadata: &fs::Metadata) -> bool { - metadata.is_file() -} - -fn canonicalise(paths: Vec) -> Result, Error> { - let mut unique = IndexSet::new(); - let mut resolved = Vec::new(); - for path in paths { - let canonical = fs::canonicalize(path.as_std_path()).map_err(|err| { - Error::new( - ErrorKind::InvalidOperation, - format!("failed to canonicalise '{path}': {err}"), - ) - })?; - let utf8 = Utf8PathBuf::from_path_buf(canonical).map_err(|_| { - Error::new( - ErrorKind::InvalidOperation, - "canonical path contains non-UTF-8 characters", - ) - })?; - if unique.insert(utf8.clone()) { - resolved.push(utf8); - } - } - Ok(resolved) -} - -fn not_found_error(command: &str, dirs: &[Utf8PathBuf], mode: CwdMode) -> Error { - let count = dirs.len(); - let preview = path_preview(dirs); - let mut message = format!( - "[netsuke::jinja::which::not_found] command '{command}' not found after checking {count} PATH entries. Preview: {preview}" - ); - if let Some(hint) = hint_for_mode(mode) { - message.push_str(". "); - message.push_str(hint); - } - Error::new(ErrorKind::InvalidOperation, message) -} - -fn path_preview(dirs: &[Utf8PathBuf]) -> String { - const LIMIT: usize = 4; - if dirs.is_empty() { - return "".to_owned(); - } - let mut parts: Vec<_> = dirs - .iter() - .take(LIMIT) - .map(|dir| format_path_for_output(dir)) - .collect(); - if dirs.len() > LIMIT { - parts.push("…".into()); - } - parts.join(", ") -} - -const fn hint_for_mode(mode: CwdMode) -> Option<&'static str> { - #[cfg(windows)] - { - match mode { - CwdMode::Always => None, - _ => Some("Set cwd_mode=\"always\" to include the current directory."), - } - } - #[cfg(not(windows))] - { - match mode { - CwdMode::Never => Some( - "Empty PATH segments are ignored; use cwd_mode=\"auto\" to include the working directory.", - ), - _ => None, - } - } -} - -fn args_error(message: impl fmt::Display) -> Error { - Error::new( - ErrorKind::InvalidOperation, - format!("[netsuke::jinja::which::args] {message}"), - ) -} - -#[cfg(windows)] -fn candidate_paths(dir: &Utf8Path, command: &str, pathext: &[String]) -> Vec { - let mut paths = Vec::new(); - let base = dir.join(command); - if Utf8Path::new(command).extension().is_some() { - paths.push(base); - return paths; - } - for ext in pathext { - let mut candidate = base.as_str().to_owned(); - candidate.push_str(ext); - paths.push(Utf8PathBuf::from(candidate)); - } - paths -} - -#[cfg(test)] -mod tests { - use super::*; - use anyhow::{Context, Result, anyhow, bail, ensure}; - use rstest::rstest; - use tempfile::TempDir; - use test_support::{ - env::{VarGuard, set_var}, - env_lock::EnvLock, - }; - - #[cfg(windows)] - use std::ffi::OsStr; - - struct EnvFixture { - _lock: EnvLock, - _temp: TempDir, - dirs: Vec, - _path_guard: VarGuard, - #[cfg(windows)] - pathext_guard: VarGuard, - } - - impl EnvFixture { - fn new(dir_names: &[&str]) -> Result { - let lock = EnvLock::acquire(); - let temp = TempDir::new().context("create which test workspace")?; - let mut dirs = Vec::new(); - for name in dir_names { - let dir = temp.path().join(name); - std::fs::create_dir_all(&dir) - .with_context(|| format!("create fixture dir {name}"))?; - let utf = Utf8PathBuf::from_path_buf(dir) - .map_err(|_| anyhow!("fixture path for {name} is not UTF-8"))?; - dirs.push(utf); - } - let joined = std::env::join_paths(dirs.iter().map(|dir| dir.as_std_path())) - .context("join PATH entries")?; - let path_guard = VarGuard::set("PATH", joined.as_os_str()); - #[cfg(windows)] - let pathext_guard = VarGuard::set("PATHEXT", OsStr::new(".cmd;.exe")); - Ok(Self { - _lock: lock, - _temp: temp, - dirs, - _path_guard: path_guard, - #[cfg(windows)] - pathext_guard, - }) - } - - fn dir(&self, index: usize) -> &Utf8Path { - self.dirs.get(index).map_or_else( - || panic!("dir index {index} is out of bounds"), - Utf8PathBuf::as_path, - ) - } - - fn write_tool(&self, dir_index: usize, name: &str) -> Result { - let mut path = self - .dirs - .get(dir_index) - .cloned() - .ok_or_else(|| anyhow!("dir index {dir_index} is out of bounds"))?; - let filename = tool_name(name); - path.push(Utf8Path::new(&filename)); - if let Some(parent) = path.parent() { - std::fs::create_dir_all(parent.as_std_path()) - .with_context(|| format!("ensure parent for {path:?}"))?; - } - std::fs::write(path.as_std_path(), script_contents()) - .with_context(|| format!("write tool {path:?}"))?; - mark_executable(&path)?; - Ok(path) - } - - fn remove_path(path: &Utf8Path) -> Result<()> { - std::fs::remove_file(path.as_std_path()).with_context(|| format!("remove {path:?}")) - } - } - - #[cfg(windows)] - fn tool_name(base: &str) -> String { - format!("{base}.cmd") - } - - #[cfg(not(windows))] - fn tool_name(base: &str) -> String { - base.to_owned() - } - - fn script_contents() -> &'static [u8] { - #[cfg(windows)] - { - b"@echo off\r\n" - } - #[cfg(not(windows))] - { - b"#!/bin/sh\nexit 0\n" - } - } - - #[cfg(unix)] - fn mark_executable(path: &Utf8Path) -> Result<()> { - use std::os::unix::fs::PermissionsExt; - let mut perms = std::fs::metadata(path.as_std_path()) - .with_context(|| format!("stat {path:?}"))? - .permissions(); - perms.set_mode(0o755); - std::fs::set_permissions(path.as_std_path(), perms) - .with_context(|| format!("set perms {path:?}")) - } - - #[cfg(not(unix))] - fn mark_executable(_path: &Utf8Path) -> Result<()> { - Ok(()) - } - - fn resolver() -> WhichResolver { - WhichResolver::new() - } - - fn render( - resolver: &WhichResolver, - name: &str, - options: &WhichOptions, - ) -> Result> { - resolver - .resolve(name, options) - .map_err(|err| anyhow!(err.to_string())) - } - - #[rstest] - fn resolves_first_path_entry() -> Result<()> { - let fixture = EnvFixture::new(&["bin_a", "bin_b"])?; - fixture.write_tool(1, "tool")?; - let resolver = resolver(); - let output = render(&resolver, "tool", &WhichOptions::default())?; - let expected = fixture.dir(1).join(Utf8Path::new(&tool_name("tool"))); - ensure!( - output.first() == Some(&expected), - "expected first match {expected:?} but got {output:?}" - ); - Ok(()) - } - - #[rstest] - fn returns_all_matches_in_path_order() -> Result<()> { - let fixture = EnvFixture::new(&["first", "second"])?; - fixture.write_tool(0, "tool")?; - fixture.write_tool(1, "tool")?; - let options = WhichOptions { - all: true, - ..WhichOptions::default() - }; - let resolver = resolver(); - let output = render(&resolver, "tool", &options)?; - ensure!(output.len() == 2, "expected two matches but got {output:?}"); - let first = fixture.dir(0).join(Utf8Path::new(&tool_name("tool"))); - let second = fixture.dir(1).join(Utf8Path::new(&tool_name("tool"))); - ensure!( - output.first() == Some(&first), - "incorrect first match: {output:?}" - ); - ensure!( - output.get(1) == Some(&second), - "incorrect second match: {output:?}" - ); - Ok(()) - } - - #[rstest] - fn canonical_removes_duplicates() -> Result<()> { - let fixture = EnvFixture::new(&["bin", "bin_dot"])?; - fixture.write_tool(0, "dedupe")?; - let mut options = WhichOptions { - all: true, - ..WhichOptions::default() - }; - // Without canonical flag, both entries appear because PATH includes two references. - let alias = fixture.dir(0).join("."); - let joined = std::env::join_paths([fixture.dir(0).as_std_path(), alias.as_std_path()]) - .expect("join paths"); - let _ = set_var("PATH", joined.as_os_str()); - let resolver = resolver(); - let duplicates = render(&resolver, "dedupe", &options)?; - ensure!( - duplicates.len() == 2, - "expected duplicates to include two entries" - ); - options.canonical = true; - options.fresh = true; - let canonical = render(&resolver, "dedupe", &options)?; - ensure!(canonical.len() == 1, "canonical result should dedupe"); - Ok(()) - } - - #[rstest] - fn fresh_rechecks_existing_entries() -> Result<()> { - let fixture = EnvFixture::new(&["primary", "secondary"])?; - fixture.write_tool(1, "swap")?; - let standard = WhichOptions::default(); - let resolver = resolver(); - let initial = render(&resolver, "swap", &standard)?; - let secondary = fixture.dir(1).join(Utf8Path::new(&tool_name("swap"))); - ensure!( - initial.first() == Some(&secondary), - "initial lookup should prefer the secondary PATH entry" - ); - fixture.write_tool(0, "swap")?; - let cached = render(&resolver, "swap", &standard)?; - ensure!( - cached.first() == Some(&secondary), - "cached value should remain" - ); - let fresh = WhichOptions { - fresh: true, - ..WhichOptions::default() - }; - let refreshed = render(&resolver, "swap", &fresh)?; - let primary = fixture.dir(0).join(Utf8Path::new(&tool_name("swap"))); - ensure!( - refreshed.first() == Some(&primary), - "fresh lookup should return the primary path" - ); - let latest = render(&resolver, "swap", &standard)?; - ensure!( - latest.first() == Some(&primary), - "cache should now track the updated executable" - ); - Ok(()) - } - - #[rstest] - fn cache_invalidates_deleted_entries() -> Result<()> { - let fixture = EnvFixture::new(&["solo"])?; - let path = fixture.write_tool(0, "gone")?; - let resolver = resolver(); - let hits = render(&resolver, "gone", &WhichOptions::default())?; - ensure!(hits.first() == Some(&path), "expected cached path match"); - EnvFixture::remove_path(&path)?; - let err = match resolver.resolve("gone", &WhichOptions::default()) { - Ok(value) => bail!( - "expected missing executable error but resolver returned {value:?}" - ), - Err(err) => err, - }; - ensure!( - err.to_string().contains("not found"), - "missing executable should surface not_found error" - ); - Ok(()) - } -} diff --git a/src/stdlib/which/cache.rs b/src/stdlib/which/cache.rs new file mode 100644 index 00000000..1cf44a49 --- /dev/null +++ b/src/stdlib/which/cache.rs @@ -0,0 +1,127 @@ +use std::{ + ffi::OsString, + num::NonZeroUsize, + sync::{Arc, Mutex, MutexGuard}, +}; + +use camino::Utf8PathBuf; +use lru::LruCache; +use minijinja::Error; + +use super::{ + env::EnvSnapshot, + lookup::{is_executable, lookup}, + options::{CwdMode, WhichOptions}, +}; + +pub(super) const CACHE_CAPACITY: usize = 64; + +#[derive(Clone, Debug)] +pub(super) struct WhichResolver { + cache: Arc>>, +} + +impl WhichResolver { + pub(super) fn new() -> Self { + #[expect( + clippy::unwrap_used, + reason = "cache capacity constant is greater than zero", + )] + let capacity = NonZeroUsize::new(CACHE_CAPACITY).unwrap(); + Self { + cache: Arc::new(Mutex::new(LruCache::new(capacity))), + } + } + + pub(super) fn resolve( + &self, + command: &str, + options: &WhichOptions, + ) -> Result, Error> { + let env = EnvSnapshot::capture()?; + let key = CacheKey::new(command, &env, options); + if !options.fresh + && let Some(cached) = self.try_cache(&key) + { + return Ok(cached); + } + let matches = lookup(command, &env, options)?; + self.store(key, matches.clone()); + Ok(matches) + } + + fn try_cache(&self, key: &CacheKey) -> Option> { + let mut guard = self.lock_cache(); + match guard.get(key) { + Some(entry) if entry.is_valid() => Some(entry.matches.clone()), + Some(_) => { + guard.pop(key); + None + } + None => None, + } + } + + fn store(&self, key: CacheKey, matches: Vec) { + let mut guard = self.lock_cache(); + guard.put(key, CacheEntry { matches }); + } + + fn lock_cache(&self) -> MutexGuard<'_, LruCache> { + match self.cache.lock() { + Ok(guard) => guard, + Err(poisoned) => poisoned.into_inner(), + } + } +} + +#[derive(Clone, Debug)] +struct CacheEntry { + matches: Vec, +} + +impl CacheEntry { + fn is_valid(&self) -> bool { + self.matches + .iter() + .all(|path| is_executable(path.as_path())) + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +struct CacheKey { + command: String, + path: Option, + pathext: Option, + cwd: Utf8PathBuf, + options: CacheKeyOptions, +} + +impl CacheKey { + fn new(command: &str, env: &EnvSnapshot, options: &WhichOptions) -> Self { + Self { + command: command.to_owned(), + path: env.raw_path.clone(), + pathext: env.raw_pathext.clone(), + cwd: env.cwd.clone(), + options: CacheKeyOptions::from(options), + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +struct CacheKeyOptions { + all: bool, + canonical: bool, + cwd_mode: CwdMode, +} + +impl From<&WhichOptions> for CacheKeyOptions { + fn from(value: &WhichOptions) -> Self { + Self { + all: value.all, + canonical: value.canonical, + cwd_mode: value.cwd_mode, + } + } +} diff --git a/src/stdlib/which/env.rs b/src/stdlib/which/env.rs new file mode 100644 index 00000000..8b7a924e --- /dev/null +++ b/src/stdlib/which/env.rs @@ -0,0 +1,152 @@ +use std::ffi::{OsStr, OsString}; + +use camino::{Utf8Path, Utf8PathBuf}; +use indexmap::IndexSet; +use minijinja::{Error, ErrorKind}; + +use super::{error::args_error, options::CwdMode}; + +#[derive(Clone, Debug)] +pub(super) struct EnvSnapshot { + pub(super) cwd: Utf8PathBuf, + pub(super) raw_path: Option, + pub(super) raw_pathext: Option, + entries: Vec, + #[cfg(windows)] + pathext: Vec, +} + +impl EnvSnapshot { + pub(super) fn capture() -> Result { + let cwd = current_dir_utf8()?; + let raw_path = std::env::var_os("PATH"); + let entries = parse_path_entries(raw_path.clone(), &cwd)?; + #[cfg(windows)] + let raw_pathext = std::env::var_os("PATHEXT"); + #[cfg(windows)] + let pathext = parse_pathext(raw_pathext.as_deref()); + #[cfg(not(windows))] + let raw_pathext = None; + Ok(Self { + cwd, + raw_path, + raw_pathext, + entries, + #[cfg(windows)] + pathext, + }) + } + + pub(super) fn resolved_dirs(&self, mode: CwdMode) -> Vec { + let mut dirs = Vec::new(); + if matches!(mode, CwdMode::Always) { + dirs.push(self.cwd.clone()); + } + for entry in &self.entries { + match entry { + PathEntry::Dir(path) => dirs.push(path.clone()), + PathEntry::CurrentDir if matches!(mode, CwdMode::Always | CwdMode::Auto) => { + dirs.push(self.cwd.clone()); + } + PathEntry::CurrentDir => {} + } + } + dirs + } + + #[cfg(windows)] + pub(super) fn pathext(&self) -> &[String] { + &self.pathext + } +} + +#[derive(Clone, Debug)] +enum PathEntry { + Dir(Utf8PathBuf), + CurrentDir, +} + +fn parse_path_entries(raw: Option, cwd: &Utf8Path) -> Result, Error> { + let mut entries = Vec::new(); + let Some(raw_value) = raw else { + return Ok(entries); + }; + for (index, component) in std::env::split_paths(&raw_value).enumerate() { + if component.as_os_str().is_empty() { + entries.push(PathEntry::CurrentDir); + continue; + } + let utf8 = Utf8PathBuf::from_path_buf(component).map_err(|_| { + args_error(format!( + "PATH entry #{index} contains non-UTF-8 characters; Netsuke requires UTF-8 paths", + )) + })?; + let resolved = if utf8.is_absolute() { + utf8 + } else { + cwd.join(utf8) + }; + entries.push(PathEntry::Dir(resolved)); + } + Ok(entries) +} + +#[cfg(windows)] +pub(super) const DEFAULT_PATHEXT: &[&str] = &[ + ".com", ".exe", ".bat", ".cmd", ".vbs", ".vbe", ".js", ".jse", ".wsf", ".wsh", ".msc", +]; + +#[cfg(windows)] +fn parse_pathext(raw: Option<&OsStr>) -> Vec { + let mut dedup = IndexSet::new(); + let source = raw + .map(|value| value.to_string_lossy().into_owned()) + .unwrap_or_else(|| DEFAULT_PATHEXT.join(";")); + for segment in source.split(';') { + let trimmed = segment.trim(); + if trimmed.is_empty() { + continue; + } + let mut normalised = trimmed.to_ascii_lowercase(); + if !normalised.starts_with('.') { + normalised.insert(0, '.'); + } + dedup.insert(normalised); + } + if dedup.is_empty() { + DEFAULT_PATHEXT.iter().map(|ext| ext.to_string()).collect() + } else { + dedup.into_iter().collect() + } +} + +pub(super) fn current_dir_utf8() -> Result { + let cwd = std::env::current_dir().map_err(|err| { + Error::new( + ErrorKind::InvalidOperation, + format!("failed to resolve current directory: {err}"), + ) + })?; + Utf8PathBuf::from_path_buf(cwd).map_err(|_| { + Error::new( + ErrorKind::InvalidOperation, + "current directory contains non-UTF-8 components", + ) + }) +} + +#[cfg(windows)] +pub(super) fn candidate_paths(dir: &Utf8Path, command: &str, pathext: &[String]) -> Vec { + let mut paths = Vec::new(); + let base = dir.join(command); + if Utf8Path::new(command).extension().is_some() { + paths.push(base); + return paths; + } + for ext in pathext { + let mut candidate = base.as_str().to_owned(); + candidate.push_str(ext); + paths.push(Utf8PathBuf::from(candidate)); + } + paths +} diff --git a/src/stdlib/which/error.rs b/src/stdlib/which/error.rs new file mode 100644 index 00000000..41941e9d --- /dev/null +++ b/src/stdlib/which/error.rs @@ -0,0 +1,70 @@ +use std::fmt; + +use camino::{Utf8Path, Utf8PathBuf}; +use minijinja::{Error, ErrorKind}; + +use super::{format_path_for_output, options::CwdMode}; + +pub(super) fn not_found_error(command: &str, dirs: &[Utf8PathBuf], mode: CwdMode) -> Error { + let count = dirs.len(); + let preview = path_preview(dirs); + let mut message = format!( + "[netsuke::jinja::which::not_found] command '{command}' not found after checking {count} PATH entries. Preview: {preview}", + ); + if let Some(hint) = hint_for_mode(mode) { + message.push_str(". "); + message.push_str(hint); + } + Error::new(ErrorKind::InvalidOperation, message) +} + +pub(super) fn direct_not_found(command: &str, path: &Utf8Path) -> Error { + Error::new( + ErrorKind::InvalidOperation, + format!( + "[netsuke::jinja::which::not_found] command '{command}' at '{path}' is missing or not executable", + ), + ) +} + +pub(super) fn args_error(message: impl fmt::Display) -> Error { + Error::new( + ErrorKind::InvalidOperation, + format!("[netsuke::jinja::which::args] {message}"), + ) +} + +fn path_preview(dirs: &[Utf8PathBuf]) -> String { + const LIMIT: usize = 4; + if dirs.is_empty() { + return "".to_owned(); + } + let mut parts: Vec<_> = dirs + .iter() + .take(LIMIT) + .map(|dir| format_path_for_output(dir)) + .collect(); + if dirs.len() > LIMIT { + parts.push("…".into()); + } + parts.join(", ") +} + +const fn hint_for_mode(mode: CwdMode) -> Option<&'static str> { + #[cfg(windows)] + { + match mode { + CwdMode::Always => None, + _ => Some("Set cwd_mode=\"always\" to include the current directory."), + } + } + #[cfg(not(windows))] + { + match mode { + CwdMode::Never => Some( + "Empty PATH segments are ignored; use cwd_mode=\"auto\" to include the working directory.", + ), + _ => None, + } + } +} diff --git a/src/stdlib/which/lookup.rs b/src/stdlib/which/lookup.rs new file mode 100644 index 00000000..d73db1cf --- /dev/null +++ b/src/stdlib/which/lookup.rs @@ -0,0 +1,136 @@ +use std::fs; + +use camino::{Utf8Path, Utf8PathBuf}; +use indexmap::IndexSet; +use minijinja::{Error, ErrorKind}; + +use super::{ + env::{self, EnvSnapshot}, + error::{direct_not_found, not_found_error}, + options::WhichOptions, +}; + +pub(super) fn lookup( + command: &str, + env: &EnvSnapshot, + options: &WhichOptions, +) -> Result, Error> { + if is_direct_path(command) { + return resolve_direct(command, env, options); + } + + let dirs = env.resolved_dirs(options.cwd_mode); + let mut matches = Vec::new(); + + #[cfg(windows)] + let suffixes = env.pathext(); + + for dir in &dirs { + #[cfg(windows)] + let candidates = env::candidate_paths(dir, command, suffixes); + #[cfg(not(windows))] + let candidates = vec![dir.join(command)]; + + if push_matches(&mut matches, candidates, options.all) { + break; + } + } + + if matches.is_empty() { + return Err(not_found_error(command, &dirs, options.cwd_mode)); + } + + if options.canonical { + canonicalise(matches) + } else { + Ok(matches) + } +} + +pub(super) fn resolve_direct( + command: &str, + env: &EnvSnapshot, + options: &WhichOptions, +) -> Result, Error> { + let raw = Utf8Path::new(command); + let resolved = if raw.is_absolute() { + raw.to_path_buf() + } else { + env.cwd.join(raw) + }; + if !is_executable(&resolved) { + return Err(direct_not_found(command, &resolved)); + } + if options.canonical { + canonicalise(vec![resolved]) + } else { + Ok(vec![resolved]) + } +} + +pub(super) fn push_matches( + matches: &mut Vec, + candidates: Vec, + collect_all: bool, +) -> bool { + for candidate in candidates { + if !is_executable(&candidate) { + continue; + } + matches.push(candidate); + if !collect_all { + return true; + } + } + false +} + +pub(super) fn is_direct_path(command: &str) -> bool { + #[cfg(windows)] + { + command.contains(['\\', '/', ':']) + } + #[cfg(not(windows))] + { + command.contains('/') + } +} + +pub(super) fn is_executable(path: &Utf8Path) -> bool { + fs::metadata(path.as_std_path()) + .is_ok_and(|metadata| metadata.is_file() && has_execute_permission(&metadata)) +} + +#[cfg(unix)] +fn has_execute_permission(metadata: &fs::Metadata) -> bool { + use std::os::unix::fs::PermissionsExt; + metadata.permissions().mode() & 0o111 != 0 +} + +#[cfg(not(unix))] +fn has_execute_permission(metadata: &fs::Metadata) -> bool { + metadata.is_file() +} + +pub(super) fn canonicalise(paths: Vec) -> Result, Error> { + let mut unique = IndexSet::new(); + let mut resolved = Vec::new(); + for path in paths { + let canonical = fs::canonicalize(path.as_std_path()).map_err(|err| { + Error::new( + ErrorKind::InvalidOperation, + format!("failed to canonicalise '{path}': {err}"), + ) + })?; + let utf8 = Utf8PathBuf::from_path_buf(canonical).map_err(|_| { + Error::new( + ErrorKind::InvalidOperation, + "canonical path contains non-UTF-8 characters", + ) + })?; + if unique.insert(utf8.clone()) { + resolved.push(utf8); + } + } + Ok(resolved) +} diff --git a/src/stdlib/which/mod.rs b/src/stdlib/which/mod.rs new file mode 100644 index 00000000..d300a532 --- /dev/null +++ b/src/stdlib/which/mod.rs @@ -0,0 +1,88 @@ +//! Cross-platform `which` filter and helper function for `MiniJinja`. +//! +//! Resolves executables deterministically across Unix and Windows, +//! honouring user-configurable options for returning every match, emitting +//! canonical paths, bypassing the resolver cache, and opt-in search of the +//! current working directory. + +use std::sync::Arc; + +use camino::{Utf8Path, Utf8PathBuf}; +use minijinja::{ + Environment, Error, + value::{Kwargs, Value}, +}; + +mod cache; +mod env; +mod error; +mod lookup; +mod options; + +pub(crate) use cache::WhichResolver; +pub(crate) use options::WhichOptions; + +use error::args_error; + +pub(crate) fn register(env: &mut Environment<'_>) { + let resolver = Arc::new(WhichResolver::new()); + { + let filter_resolver = Arc::clone(&resolver); + env.add_filter("which", move |value: Value, kwargs: Kwargs| { + resolve_with(&filter_resolver, &value, &kwargs).and_then(|output| { + kwargs.assert_all_used()?; + Ok(output) + }) + }); + } + { + let function_resolver = Arc::clone(&resolver); + env.add_function("which", move |value: Value, kwargs: Kwargs| { + resolve_with(&function_resolver, &value, &kwargs).and_then(|output| { + kwargs.assert_all_used()?; + Ok(output) + }) + }); + } +} + +fn resolve_with( + resolver: &WhichResolver, + command: &Value, + kwargs: &Kwargs, +) -> Result { + let name = command + .as_str() + .map(str::trim) + .filter(|candidate| !candidate.is_empty()) + .ok_or_else(|| args_error("which requires a non-empty string"))?; + let options = WhichOptions::from_kwargs(kwargs)?; + let matches = resolver.resolve(name, &options)?; + Ok(render_value(&matches, &options)) +} + +fn render_value(matches: &[Utf8PathBuf], options: &WhichOptions) -> Value { + if options.all { + let rendered: Vec = matches + .iter() + .map(|path| format_path_for_output(path)) + .collect(); + Value::from_serialize(rendered) + } else { + let first = matches + .first() + .map_or_else(String::new, |path| format_path_for_output(path)); + Value::from(first) + } +} + +pub(super) fn format_path_for_output(path: &Utf8Path) -> String { + #[cfg(windows)] + { + path.as_str().replace('\\', "/") + } + #[cfg(not(windows))] + { + path.as_str().to_owned() + } +} diff --git a/src/stdlib/which/options.rs b/src/stdlib/which/options.rs new file mode 100644 index 00000000..78af53a8 --- /dev/null +++ b/src/stdlib/which/options.rs @@ -0,0 +1,63 @@ +use minijinja::{ + Error, + value::Kwargs, +}; + +use super::error::args_error; + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub(super) enum CwdMode { + Auto, + Always, + Never, +} + +impl CwdMode { + pub(super) fn parse(value: &str) -> Option { + match value { + "auto" => Some(Self::Auto), + "always" => Some(Self::Always), + "never" => Some(Self::Never), + _ => None, + } + } +} + +impl Default for CwdMode { + fn default() -> Self { + Self::Auto + } +} + +#[derive(Clone, Debug, Default)] +pub(crate) struct WhichOptions { + pub(super) all: bool, + pub(super) canonical: bool, + pub(super) fresh: bool, + pub(super) cwd_mode: CwdMode, +} + +impl WhichOptions { + pub(crate) fn from_kwargs(kwargs: &Kwargs) -> Result { + let all = kwargs.get::>("all")?.unwrap_or(false); + let canonical = kwargs.get::>("canonical")?.unwrap_or(false); + let fresh = kwargs.get::>("fresh")?.unwrap_or(false); + let cwd_mode = kwargs + .get::>("cwd_mode")? + .map(|mode| { + let lower = mode.to_ascii_lowercase(); + CwdMode::parse(&lower).ok_or_else(|| { + args_error(format!( + "cwd_mode must be 'auto', 'always', or 'never', got '{mode}'", + )) + }) + }) + .transpose()?; + Ok(Self { + all, + canonical, + fresh, + cwd_mode: cwd_mode.unwrap_or_default(), + }) + } +} diff --git a/tests/std_filter_tests/which_filter_tests.rs b/tests/std_filter_tests/which_filter_tests.rs index 7aa5b128..2066a767 100644 --- a/tests/std_filter_tests/which_filter_tests.rs +++ b/tests/std_filter_tests/which_filter_tests.rs @@ -91,6 +91,44 @@ fn render(env: &mut Environment<'_>, template: &str) -> Result { .map_err(|err| anyhow!(err.to_string())) } +struct WhichTestFixture { + _temp: tempfile::TempDir, + env: Environment<'static>, + state: netsuke::stdlib::StdlibState, + paths: Vec, + _path_env: PathEnv, +} + +impl WhichTestFixture { + fn with_tool_in_dirs(tool_name: &str, dir_names: &[&str]) -> Result { + let (temp, root) = support::filter_workspace()?; + let mut dirs = Vec::new(); + let mut tool_paths = Vec::new(); + for dir_name in dir_names { + let dir = root.join(dir_name); + std::fs::create_dir_all(dir.as_std_path())?; + let tool_path = write_tool(&dir, tool_name)?; + dirs.push(dir); + tool_paths.push(tool_path); + } + let path_env = PathEnv::new(&dirs)?; + let (env, state) = fallible::stdlib_env_with_state()?; + Ok(Self { + _temp: temp, + env, + state, + paths: tool_paths, + _path_env: path_env, + }) + } + + fn render(&mut self, template: &str) -> Result { + self.env + .render_str(template, context! {}) + .map_err(|err| anyhow!(err.to_string())) + } +} + #[rstest] fn which_filter_returns_first_match() -> Result<()> { let (_temp, root) = support::filter_workspace()?; From a6ffaa17467be3f45d5a2c8069d57e5f34482627 Mon Sep 17 00:00:00 2001 From: Leynos Date: Tue, 18 Nov 2025 21:41:48 +0000 Subject: [PATCH 03/23] refactor(testing,bdd): replace cucumber with rstest-bdd for behavior tests - Migrate behavioral tests from cucumber to rstest-bdd - Introduce new fixtures and modules for scenario handling - Refactor CliWorld into smaller state holders with mutex guards - Improve guarantees on resource cleanup via RAII-style fixtures - Update documentation and user guide to reflect new BDD framework - Modify which filter tests to use new fixture helpers This change consolidates behavior tests to run via the standard cargo test harness, simplifying CI and enabling reuse of fixtures and lint configs. Maintains cucumber in parallel during migration to ensure feature parity. Co-authored-by: terragon-labs[bot] --- ...dr-002-replace-cucumber-with-rstest-bdd.md | 47 +++++++++---------- docs/rstest-bdd-users-guide.md | 23 ++++----- src/stdlib/which/cache.rs | 8 ++-- src/stdlib/which/env.rs | 11 ++++- src/stdlib/which/lookup.rs | 4 +- src/stdlib/which/options.rs | 5 +- tests/std_filter_tests/which_filter_tests.rs | 37 ++++----------- 7 files changed, 61 insertions(+), 74 deletions(-) diff --git a/docs/adr-002-replace-cucumber-with-rstest-bdd.md b/docs/adr-002-replace-cucumber-with-rstest-bdd.md index 72428ea2..4f30b472 100644 --- a/docs/adr-002-replace-cucumber-with-rstest-bdd.md +++ b/docs/adr-002-replace-cucumber-with-rstest-bdd.md @@ -19,8 +19,8 @@ The `rstest-bdd` project[^1][^2] exposes Given-When-Then macros that register steps alongside `rstest` fixtures and generate scenario tests with the regular test harness. Migrating unlocks a single runner (`cargo test`), reduces the amount of bespoke world state, and lets behaviour tests reuse the same -fixtures, dependency-injection pattern, and lint configuration already in -place for unit tests. +fixtures, dependency-injection pattern, and lint configuration already in place +for unit tests. ## Decision outcome (summary) @@ -72,8 +72,9 @@ place for unit tests. ### Phase 1 – Harness foundation - Add `rstest-bdd = "0.1.0"` and `rstest-bdd-macros = { version = "0.1.0", - features = ["strict-compile-time-validation"] }` to the workspace - `dev-dependencies`. Keep `rstest` as the shared fixture provider. + features = ["strict-compile-time-validation"] + }` to the workspace `dev-dependencies`. Keep `rstest + ` as the shared fixture provider. - Introduce a `tests/bdd` module tree that will own scenario functions, feature-specific fixtures, and utilities for inserting values into the `StepContext`. @@ -82,16 +83,16 @@ place for unit tests. both the migration branch and future contributors have one source of truth. - Decide on the default attributes applied to generated tests (for example, stacking `#[scenario(...)]` with `#[tokio::test(flavor = "multi_thread")]` - for steps that require Tokio). Document the pattern in `docs/rstest-bdd- - users-guide.md`. + for steps that require Tokio). Document the pattern in + `docs/rstest-bdd- users-guide.md`. ### Phase 2 – Fixture extraction and world bridging - Refactor `CliWorld` into plain data holders (for example `CliState`, `ManifestState`, `StdlibState`, `ProcessState`) that expose the smallest - mutable surface required by each step. Each holder lives in `tests/bdd/ - fixtures.rs` (or similar) and is backed by `Arc>` when interior - mutability is required. + mutable surface required by each step. Each holder lives in + `tests/bdd/ fixtures.rs` (or similar) and is backed by `Arc>` when + interior mutability is required. - Provide `#[rstest::fixture]` constructors for shared resources: - Temporary workspace plus `PathGuard` handling for PATH edits. - HTTP server guard that mirrors `start_http_server`/`shutdown_http_server` @@ -162,8 +163,8 @@ the cucumber equivalents once the new tests pass. `Cargo.toml` along with the `[[test]]` entry that disables the default harness. - Drop cucumber-specific documentation such as `docs/behavioural-testing-in- - rust-with-cucumber.md` once rewritten, and update any references in the user - guide and Orca/Ortho docs. + rust-with-cucumber.md + ` once rewritten, and update any references in the user guide and Orca/Ortho docs. ### Phase 5 – Documentation, CI, and enablement @@ -181,7 +182,7 @@ the cucumber equivalents once the new tests pass. ## Consequences and risks -**Positive outcomes** +### Positive outcomes - Behaviour tests run through the standard harness, so `make test` exercises them alongside unit tests and inherits the existing CI parallelism. @@ -191,25 +192,21 @@ the cucumber equivalents once the new tests pass. (with `strict-compile-time-validation`), preventing silent drift between the feature files and the Rust implementation. -**Risks and mitigations** +### Risks and mitigations - *Risk:* Rewriting every step macro creates churn and may introduce subtle - regressions. - *Mitigation:* Port features in small slices, keep cucumber running until each - slice is validated, and rely on helpers shared between the old and new - harnesses to avoid behavioural drift. + regressions. *Mitigation:* Port features in small slices, keep cucumber + running until each slice is validated, and rely on helpers shared between the + old and new harnesses to avoid behavioural drift. - *Risk:* The new fixture structs may not cover all the cleanup logic handled - implicitly by `CliWorld::drop`. - *Mitigation:* Model teardown as RAII helpers (`ScopeGuard`-style fixtures) and - add explicit tests that assert destructors restore environment variables, - stop HTTP servers, and delete temp files. + implicitly by `CliWorld::drop`. *Mitigation:* Model teardown as RAII helpers + (`ScopeGuard`-style fixtures) and add explicit tests that assert destructors + restore environment variables, stop HTTP servers, and delete temp files. - *Risk:* Developers unfamiliar with `rstest-bdd` may struggle to add new - scenarios. - *Mitigation:* Expand the user guide with Netsuke-specific recipes and add a - `cargo bdd` section to the contributor docs. + scenarios. *Mitigation:* Expand the user guide with Netsuke-specific recipes + and add a `cargo bdd` section to the contributor docs. ## References [^1]: [^2]: `docs/rstest-bdd-users-guide.md` - diff --git a/docs/rstest-bdd-users-guide.md b/docs/rstest-bdd-users-guide.md index 2460c610..2975d6ac 100644 --- a/docs/rstest-bdd-users-guide.md +++ b/docs/rstest-bdd-users-guide.md @@ -571,17 +571,18 @@ Best practices for writing effective scenarios include: (for example, `1e3`, `-1E-9`), and the special values `NaN`, `inf`, and `Infinity` (matched case-insensitively). Matching is anchored: the entire step text must match the pattern; partial matches do not succeed. Escape - literal braces with `{{` and `}}`. Use `\` to match a single backslash. A - trailing `\` or any other backslash escape is treated literally, so `\d` - matches the two-character sequence `\d`. Nested braces inside placeholders - are not supported. Placeholders follow `{name[:type]}`; `name` must start - with a letter or underscore and may contain letters, digits, or underscores - (`[A-Za-z_][A-Za-z0-9_]*`). Whitespace within the type hint is ignored (for - example, `{count: u32}` and `{count:u32}` are both accepted), but whitespace - is not allowed between the name and the colon. Prefer the compact form - `{count:u32}` in new code. When a pattern contains no placeholders, the step - text must match exactly. Unknown type hints are treated as generic - placeholders and capture any non-newline text greedily. + literal braces with `{{` and `}}`. Use + `\` to match a single backslash. A trailing `\` or any other backslash escape + is treated literally, so `\d` matches the two-character sequence `\d`. Nested + braces inside placeholders are not supported. Placeholders follow + `{name[:type]}`; `name` must start with a letter or underscore and may + contain letters, digits, or underscores (`[A-Za-z_][A-Za-z0-9_]*`). + Whitespace within the type hint is ignored (for example, `{count: u32}` and + `{count:u32}` are both accepted), but whitespace is not allowed between the + name and the colon. Prefer the compact form `{count:u32}` in new code. When a + pattern contains no placeholders, the step text must match exactly. Unknown + type hints are treated as generic placeholders and capture any non-newline + text greedily. ## Data tables and Docstrings diff --git a/src/stdlib/which/cache.rs b/src/stdlib/which/cache.rs index 1cf44a49..a22413f1 100644 --- a/src/stdlib/which/cache.rs +++ b/src/stdlib/which/cache.rs @@ -17,15 +17,15 @@ use super::{ pub(super) const CACHE_CAPACITY: usize = 64; #[derive(Clone, Debug)] -pub(super) struct WhichResolver { +pub(crate) struct WhichResolver { cache: Arc>>, } impl WhichResolver { - pub(super) fn new() -> Self { + pub(crate) fn new() -> Self { #[expect( clippy::unwrap_used, - reason = "cache capacity constant is greater than zero", + reason = "cache capacity constant is greater than zero" )] let capacity = NonZeroUsize::new(CACHE_CAPACITY).unwrap(); Self { @@ -33,7 +33,7 @@ impl WhichResolver { } } - pub(super) fn resolve( + pub(crate) fn resolve( &self, command: &str, options: &WhichOptions, diff --git a/src/stdlib/which/env.rs b/src/stdlib/which/env.rs index 8b7a924e..639b5158 100644 --- a/src/stdlib/which/env.rs +++ b/src/stdlib/which/env.rs @@ -1,6 +1,9 @@ -use std::ffi::{OsStr, OsString}; +use std::ffi::OsString; +#[cfg(windows)] +use std::ffi::OsStr; use camino::{Utf8Path, Utf8PathBuf}; +#[cfg(windows)] use indexmap::IndexSet; use minijinja::{Error, ErrorKind}; @@ -136,7 +139,11 @@ pub(super) fn current_dir_utf8() -> Result { } #[cfg(windows)] -pub(super) fn candidate_paths(dir: &Utf8Path, command: &str, pathext: &[String]) -> Vec { +pub(super) fn candidate_paths( + dir: &Utf8Path, + command: &str, + pathext: &[String], +) -> Vec { let mut paths = Vec::new(); let base = dir.join(command); if Utf8Path::new(command).extension().is_some() { diff --git a/src/stdlib/which/lookup.rs b/src/stdlib/which/lookup.rs index d73db1cf..01e367da 100644 --- a/src/stdlib/which/lookup.rs +++ b/src/stdlib/which/lookup.rs @@ -5,10 +5,12 @@ use indexmap::IndexSet; use minijinja::{Error, ErrorKind}; use super::{ - env::{self, EnvSnapshot}, + env::EnvSnapshot, error::{direct_not_found, not_found_error}, options::WhichOptions, }; +#[cfg(windows)] +use super::env; pub(super) fn lookup( command: &str, diff --git a/src/stdlib/which/options.rs b/src/stdlib/which/options.rs index 78af53a8..1aff8669 100644 --- a/src/stdlib/which/options.rs +++ b/src/stdlib/which/options.rs @@ -1,7 +1,4 @@ -use minijinja::{ - Error, - value::Kwargs, -}; +use minijinja::{Error, value::Kwargs}; use super::error::args_error; diff --git a/tests/std_filter_tests/which_filter_tests.rs b/tests/std_filter_tests/which_filter_tests.rs index 2066a767..c235bd4d 100644 --- a/tests/std_filter_tests/which_filter_tests.rs +++ b/tests/std_filter_tests/which_filter_tests.rs @@ -131,40 +131,23 @@ impl WhichTestFixture { #[rstest] fn which_filter_returns_first_match() -> Result<()> { - let (_temp, root) = support::filter_workspace()?; - let first = root.join("bin_first"); - let second = root.join("bin_second"); - std::fs::create_dir_all(first.as_std_path())?; - std::fs::create_dir_all(second.as_std_path())?; - write_tool(&first, "helper")?; - write_tool(&second, "helper")?; - let _path = PathEnv::new(&[first.clone(), second.clone()])?; - let (mut env, mut state) = fallible::stdlib_env_with_state()?; - state.reset_impure(); - let output = render(&mut env, "{{ 'helper' | which }}")?; - assert_eq!(output, first.join(Utf8Path::new(&tool_name("helper"))).as_str()); - assert!(!state.is_impure()); - drop(temp); + let mut fixture = + WhichTestFixture::with_tool_in_dirs("helper", &["bin_first", "bin_second"])?; + fixture.state.reset_impure(); + let output = fixture.render("{{ 'helper' | which }}")?; + assert_eq!(output, fixture.paths[0].as_str()); + assert!(!fixture.state.is_impure()); Ok(()) } #[rstest] fn which_filter_all_returns_all_matches() -> Result<()> { - let (_temp, root) = support::filter_workspace()?; - let first = root.join("bin_a"); - let second = root.join("bin_b"); - std::fs::create_dir_all(first.as_std_path())?; - std::fs::create_dir_all(second.as_std_path())?; - write_tool(&first, "helper")?; - write_tool(&second, "helper")?; - let _path = PathEnv::new(&[first.clone(), second.clone()])?; - let (mut env, _state) = fallible::stdlib_env_with_state()?; - let template = "{{ 'helper' | which(all=true) | join('|') }}"; - let output = render(&mut env, template)?; + let mut fixture = WhichTestFixture::with_tool_in_dirs("helper", &["bin_a", "bin_b"])?; + let output = fixture.render("{{ 'helper' | which(all=true) | join('|') }}")?; let expected = format!( "{}|{}", - first.join(Utf8Path::new(&tool_name("helper"))).as_str(), - second.join(Utf8Path::new(&tool_name("helper"))).as_str() + fixture.paths[0].as_str(), + fixture.paths[1].as_str() ); assert_eq!(output, expected); Ok(()) From e4f1ce2c0ce0677c0217b0068c67fee5a692634a Mon Sep 17 00:00:00 2001 From: Leynos Date: Thu, 20 Nov 2025 19:49:17 +0000 Subject: [PATCH 04/23] refactor(which): update which tests to use ToolName, DirName, and Template newtypes - Introduce ToolName, DirName, and Template structs in which filter tests for stronger typing and clearer intent - Update all related test functions to use these newtypes instead of raw strings - Adjust helper functions accordingly to accept newtypes - Minor code style cleanup in which/env.rs and which/lookup.rs imports Co-authored-by: terragon-labs[bot] --- src/stdlib/which/env.rs | 2 +- src/stdlib/which/lookup.rs | 4 +- tests/std_filter_tests/which_filter_tests.rs | 95 ++++++++++++++++---- 3 files changed, 79 insertions(+), 22 deletions(-) diff --git a/src/stdlib/which/env.rs b/src/stdlib/which/env.rs index 639b5158..b71cff82 100644 --- a/src/stdlib/which/env.rs +++ b/src/stdlib/which/env.rs @@ -1,6 +1,6 @@ -use std::ffi::OsString; #[cfg(windows)] use std::ffi::OsStr; +use std::ffi::OsString; use camino::{Utf8Path, Utf8PathBuf}; #[cfg(windows)] diff --git a/src/stdlib/which/lookup.rs b/src/stdlib/which/lookup.rs index 01e367da..579c9798 100644 --- a/src/stdlib/which/lookup.rs +++ b/src/stdlib/which/lookup.rs @@ -4,13 +4,13 @@ use camino::{Utf8Path, Utf8PathBuf}; use indexmap::IndexSet; use minijinja::{Error, ErrorKind}; +#[cfg(windows)] +use super::env; use super::{ env::EnvSnapshot, error::{direct_not_found, not_found_error}, options::WhichOptions, }; -#[cfg(windows)] -use super::env; pub(super) fn lookup( command: &str, diff --git a/tests/std_filter_tests/which_filter_tests.rs b/tests/std_filter_tests/which_filter_tests.rs index c235bd4d..4adae55e 100644 --- a/tests/std_filter_tests/which_filter_tests.rs +++ b/tests/std_filter_tests/which_filter_tests.rs @@ -7,6 +7,58 @@ use test_support::{env::VarGuard, env_lock::EnvLock}; use super::support::{self, fallible}; +#[derive(Debug, Clone)] +struct ToolName(String); + +impl ToolName { + fn new(name: impl Into) -> Self { Self(name.into()) } + fn as_str(&self) -> &str { &self.0 } +} + +impl From<&str> for ToolName { + fn from(s: &str) -> Self { Self(s.to_owned()) } +} + +impl AsRef for ToolName { + fn as_ref(&self) -> &str { &self.0 } +} + +#[derive(Debug, Clone)] +struct DirName(String); + +impl DirName { + fn new(name: impl Into) -> Self { Self(name.into()) } + fn as_str(&self) -> &str { &self.0 } +} + +impl From<&str> for DirName { + fn from(s: &str) -> Self { Self(s.to_owned()) } +} + +impl AsRef for DirName { + fn as_ref(&self) -> &str { &self.0 } +} + +impl AsRef for DirName { + fn as_ref(&self) -> &OsStr { OsStr::new(&self.0) } +} + +#[derive(Debug, Clone)] +struct Template(String); + +impl Template { + fn new(template: impl Into) -> Self { Self(template.into()) } + fn as_str(&self) -> &str { &self.0 } +} + +impl From<&str> for Template { + fn from(s: &str) -> Self { Self(s.to_owned()) } +} + +impl AsRef for Template { + fn as_ref(&self) -> &str { &self.0 } +} + struct PathEnv { _lock: EnvLock, path_guard: VarGuard, @@ -35,7 +87,7 @@ impl PathEnv { } } -fn write_tool(dir: &Utf8Path, name: &str) -> Result { +fn write_tool(dir: &Utf8Path, name: &ToolName) -> Result { let filename = tool_name(name); let path = dir.join(Utf8Path::new(&filename)); let parent = path @@ -66,13 +118,13 @@ fn mark_executable(_path: &Utf8Path) -> Result<()> { } #[cfg(windows)] -fn tool_name(base: &str) -> String { - format!("{base}.cmd") +fn tool_name(base: &ToolName) -> String { + format!("{}.cmd", base.as_str()) } #[cfg(not(windows))] -fn tool_name(base: &str) -> String { - base.to_owned() +fn tool_name(base: &ToolName) -> String { + base.as_str().to_owned() } fn script_contents() -> &'static [u8] { @@ -86,8 +138,8 @@ fn script_contents() -> &'static [u8] { } } -fn render(env: &mut Environment<'_>, template: &str) -> Result { - env.render_str(template, context! {}) +fn render(env: &mut Environment<'_>, template: &Template) -> Result { + env.render_str(template.as_str(), context! {}) .map_err(|err| anyhow!(err.to_string())) } @@ -100,12 +152,12 @@ struct WhichTestFixture { } impl WhichTestFixture { - fn with_tool_in_dirs(tool_name: &str, dir_names: &[&str]) -> Result { + fn with_tool_in_dirs(tool_name: &ToolName, dir_names: &[DirName]) -> Result { let (temp, root) = support::filter_workspace()?; let mut dirs = Vec::new(); let mut tool_paths = Vec::new(); for dir_name in dir_names { - let dir = root.join(dir_name); + let dir = root.join(dir_name.as_str()); std::fs::create_dir_all(dir.as_std_path())?; let tool_path = write_tool(&dir, tool_name)?; dirs.push(dir); @@ -122,19 +174,21 @@ impl WhichTestFixture { }) } - fn render(&mut self, template: &str) -> Result { + fn render(&mut self, template: &Template) -> Result { self.env - .render_str(template, context! {}) + .render_str(template.as_str(), context! {}) .map_err(|err| anyhow!(err.to_string())) } } #[rstest] fn which_filter_returns_first_match() -> Result<()> { - let mut fixture = - WhichTestFixture::with_tool_in_dirs("helper", &["bin_first", "bin_second"])?; + let mut fixture = WhichTestFixture::with_tool_in_dirs( + &ToolName::from("helper"), + &[DirName::from("bin_first"), DirName::from("bin_second")], + )?; fixture.state.reset_impure(); - let output = fixture.render("{{ 'helper' | which }}")?; + let output = fixture.render(&Template::from("{{ 'helper' | which }}"))?; assert_eq!(output, fixture.paths[0].as_str()); assert!(!fixture.state.is_impure()); Ok(()) @@ -142,8 +196,11 @@ fn which_filter_returns_first_match() -> Result<()> { #[rstest] fn which_filter_all_returns_all_matches() -> Result<()> { - let mut fixture = WhichTestFixture::with_tool_in_dirs("helper", &["bin_a", "bin_b"])?; - let output = fixture.render("{{ 'helper' | which(all=true) | join('|') }}")?; + let mut fixture = WhichTestFixture::with_tool_in_dirs( + &ToolName::from("helper"), + &[DirName::from("bin_a"), DirName::from("bin_b")], + )?; + let output = fixture.render(&Template::from("{{ 'helper' | which(all=true) | join('|') }}"))?; let expected = format!( "{}|{}", fixture.paths[0].as_str(), @@ -156,11 +213,11 @@ fn which_filter_all_returns_all_matches() -> Result<()> { #[rstest] fn which_function_honours_cwd_mode() -> Result<()> { let (_temp, root) = support::filter_workspace()?; - let tool = write_tool(&root, "local")?; + let tool = write_tool(&root, &ToolName::from("local"))?; let _path = PathEnv::new(&[])?; let (mut env, _state) = fallible::stdlib_env_with_state()?; - let template = "{{ which('local', cwd_mode='always') }}"; - let output = render(&mut env, template)?; + let template = Template::from("{{ which('local', cwd_mode='always') }}"); + let output = render(&mut env, &template)?; assert_eq!(output, tool.as_str()); Ok(()) } From 1d6e9dc6f09d3d96aef24b29cc555416c712f9bf Mon Sep 17 00:00:00 2001 From: Leynos Date: Sat, 22 Nov 2025 15:24:20 +0000 Subject: [PATCH 05/23] refactor(stdlib/which): derive Default for CwdMode using attribute Replaced the manual Default trait implementation for the CwdMode enum with the #[derive(Default)] attribute, specifying Auto as the default variant. This simplifies the code and leverages Rust's built-in derive functionality. Co-authored-by: terragon-labs[bot] --- src/stdlib/which/options.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/stdlib/which/options.rs b/src/stdlib/which/options.rs index 1aff8669..58e676fd 100644 --- a/src/stdlib/which/options.rs +++ b/src/stdlib/which/options.rs @@ -2,8 +2,9 @@ use minijinja::{Error, value::Kwargs}; use super::error::args_error; -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)] pub(super) enum CwdMode { + #[default] Auto, Always, Never, @@ -20,12 +21,6 @@ impl CwdMode { } } -impl Default for CwdMode { - fn default() -> Self { - Self::Auto - } -} - #[derive(Clone, Debug, Default)] pub(crate) struct WhichOptions { pub(super) all: bool, From 1e1a72dc991fe1d34dffa1ec4763155973cef5b8 Mon Sep 17 00:00:00 2001 From: Leynos Date: Sun, 23 Nov 2025 21:48:41 +0000 Subject: [PATCH 06/23] test(stdlib): replace 'And' with 'Given' in stdlib.feature scenarios Changed multiple 'And' steps to 'Given' in the stdlib.feature test scenarios to improve clarity and adherence to Gherkin syntax for preconditions. Co-authored-by: terragon-labs[bot] --- tests/features/stdlib.feature | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/features/stdlib.feature b/tests/features/stdlib.feature index f81d7675..ebfe57ac 100644 --- a/tests/features/stdlib.feature +++ b/tests/features/stdlib.feature @@ -73,14 +73,14 @@ Feature: Template stdlib filters Then the stdlib error contains "could not resolve" Scenario: which filter resolves the first PATH entry - And the stdlib executable "bin/primary/tool" exists + Given the stdlib executable "bin/primary/tool" exists And the stdlib executable "bin/secondary/tool" exists And the stdlib PATH entries are "bin/primary:bin/secondary" When I render the stdlib template "{{ 'tool' | which }}" Then the stdlib output is the workspace executable "bin/primary/tool" Scenario: which filter lists every PATH match - And the stdlib executable "bin/path_a/tool" exists + Given the stdlib executable "bin/path_a/tool" exists And the stdlib executable "bin/path_b/tool" exists And the stdlib PATH entries are "bin/path_a:bin/path_b" When I render the stdlib template "{{ ('tool' | which(all=true))[0] }}" @@ -89,13 +89,13 @@ Feature: Template stdlib filters Then the stdlib output is the workspace executable "bin/path_b/tool" Scenario: which function honours cwd_mode - And the stdlib executable "local/tool" exists + Given the stdlib executable "local/tool" exists And the stdlib PATH entries are "" When I render the stdlib template "{{ which('tool', cwd_mode='always') }}" Then the stdlib output is the workspace executable "local/tool" Scenario: which filter reports missing executables - And the stdlib PATH entries are "" + Given the stdlib PATH entries are "" When I render the stdlib template "{{ 'absent' | which }}" Then the stdlib error contains "netsuke::jinja::which::not_found" From 231b958ca2f218130e6bc3d05f87203fdfad6304 Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 24 Nov 2025 00:41:37 +0000 Subject: [PATCH 07/23] Wire cwd override for which; add workspace search and cucumber tests (#238) * feat(stdlib/which): add workspace fallback to 'which' command lookup Enhance the 'which' command resolution logic to perform a recursive search of the workspace directory if the PATH environment variable is empty. This ensures that commands can be located within the current workspace when the standard PATH lookup fails. - Added optional CWD override support in WhichResolver and EnvSnapshot to influence environment capturing. - Integrated 'walkdir' crate for recursive directory traversal. - Implemented search_workspace function to find executables recursively in workspace. - Modified lookup behavior to try workspace search upon PATH miss when appropriate. - Updated stdlib registration to pass current workspace root as CWD override. - Improved error handling and tests accordingly. Co-authored-by: terragon-labs[bot] * fix(tests): remove run_and_exit calls in cucumber tests Changed .run_and_exit() to .run() in cucumber test execution to prevent premature process exit, enabling all tests to run as intended. Co-authored-by: terragon-labs[bot] * refactor(stdlib_assert): extract assertion helpers for stdlib output - Added a new module `stdlib_assert` in `test_support/src` that provides helper functions for asserting stdlib rendering outputs. - Replaced ad-hoc stdlib output/error checking in `stdlib_steps` with calls to this new helper, improving code reuse and clarity. - Included unit tests for the new assertion helpers to ensure correct behavior. Co-authored-by: terragon-labs[bot] * feat(which): add workspace fallback and caching to which resolver - Implement fallback search in workspace directory when PATH is empty - Skip heavy directories like .git and target during workspace search - Add small LRU cache for which resolver keyed by command, cwd, PATH, etc. - Improve error handling and debug logs for unreadable workspace entries - Enhance which filter tests to cover workspace fallback and skipping heavy dirs - Add detailed documentation diagrams for which resolver flow and configuration wiring - Introduce comprehensive tests for workspace search functionality This enhancement improves command resolution robustness in empty PATH environments by searching the workspace, optimizing with caching, and clarifying behavior through documentation. Co-authored-by: terragon-labs[bot] --------- Co-authored-by: terragon-labs[bot] --- Cargo.lock | 1 + Cargo.toml | 1 + docs/netsuke-design.md | 120 ++++++++++++ src/stdlib/mod.rs | 5 +- src/stdlib/which/cache.rs | 6 +- src/stdlib/which/env.rs | 8 +- src/stdlib/which/lookup.rs | 193 ++++++++++++++++++- src/stdlib/which/mod.rs | 4 +- test_support/src/lib.rs | 1 + test_support/src/stdlib_assert.rs | 45 +++++ tests/cucumber.rs | 10 +- tests/std_filter_tests/which_filter_tests.rs | 49 +++++ tests/steps/stdlib_steps/assertions.rs | 14 +- 13 files changed, 439 insertions(+), 18 deletions(-) create mode 100644 test_support/src/stdlib_assert.rs diff --git a/Cargo.lock b/Cargo.lock index c07b1b38..0c6a6357 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1292,6 +1292,7 @@ dependencies = [ "ureq", "url", "wait-timeout", + "walkdir", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index a8dd1cce..af78ce53 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,7 @@ itertools = "0.12" indexmap = { version = "2.5", features = ["serde"] } lru = "0.12" glob = "0.3.3" +walkdir = "2.5.0" tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["fmt"] } serde_json = { version = "1", features = ["preserve_order"] } diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index 72ded7f5..34b45e19 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -977,6 +977,126 @@ validation, and list-all semantics. Behavioural MiniJinja fixtures exercise the filter in Stage 3/4 renders to prove determinism across repeated invocations with identical environments. +Sequence of the resolver when falling back to the workspace: + +```mermaid +sequenceDiagram + participant "Caller" as "Caller" + participant "WhichResolver" as "WhichResolver" + participant "EnvSnapshot" as "EnvSnapshot" + participant "Lookup" as "lookup() in lookup.rs" + participant "HandleMiss" as "handle_miss()" + participant "SearchWorkspace" as "search_workspace()" + + "Caller"->>"WhichResolver": "resolve(command, options)" + "WhichResolver"->>"EnvSnapshot": "capture(cwd_override)" + "EnvSnapshot"-->>"WhichResolver": "EnvSnapshot { cwd, raw_path }" + "WhichResolver"->>"Lookup": "lookup(env, command, options)" + "Lookup"->>"Lookup": "search PATH directories for matches" + alt "matches found" + "Lookup"-->>"WhichResolver": "Vec (maybe canonicalised)" + "WhichResolver"-->>"Caller": "Ok(matches)" + else "no matches in PATH" + "Lookup"->>"HandleMiss": "handle_miss(env, command, options, dirs)" + "HandleMiss"->>"HandleMiss": "check if 'raw_path' is empty" + alt "PATH empty and 'cwd_mode' != 'Never'" + "HandleMiss"->>"SearchWorkspace": "search_workspace(env.cwd, command, options.all)" + "SearchWorkspace"->>"SearchWorkspace": "walk workspace with 'WalkDir' and filter executables" + "SearchWorkspace"-->>"HandleMiss": "discovered paths (possibly empty)" + alt "discovered not empty" + alt "options.canonical is true" + "HandleMiss"->>"HandleMiss": "canonicalise(discovered)" + "HandleMiss"-->>"Lookup": "canonical paths" + else "options.canonical is false" + "HandleMiss"-->>"Lookup": "discovered paths" + end + "Lookup"-->>"WhichResolver": "Vec from workspace" + "WhichResolver"-->>"Caller": "Ok(matches)" + else "discovered empty" + "HandleMiss"-->>"Lookup": "Error(not_found_error)" + "Lookup"-->>"WhichResolver": "Error" + "WhichResolver"-->>"Caller": "Err(not_found)" + end + else "PATH not empty or 'cwd_mode' is 'Never'" + "HandleMiss"-->>"Lookup": "Error(not_found_error)" + "Lookup"-->>"WhichResolver": "Error" + "WhichResolver"-->>"Caller": "Err(not_found)" + end + end +``` + +Structural view of the which module and configuration wiring: + +```mermaid +classDiagram + class StdlibConfig { + +workspace_root_path() -> Option<&Utf8Path> + } + + class Environment { + +register_with_config(config: StdlibConfig) + } + + class WhichModule { + +register(env: &mut Environment, cwd_override: Option>) + } + + class WhichResolver { + -cache: Arc>> + -cwd_override: Option> + +new(cwd_override: Option>) -> WhichResolver + +resolve(command: &str, options: &WhichOptions) -> Result, Error> + } + + class EnvSnapshot { + +cwd: Utf8PathBuf + +raw_path: Option + +capture(cwd_override: Option<&Utf8Path>) -> Result + } + + class WhichOptions { + +cwd_mode: CwdMode + +canonical: bool + +all: bool + +fresh: bool + } + + class CwdMode { + <> + +Never + +OtherModes + } + + Environment --> StdlibConfig : uses + Environment --> WhichModule : calls register + StdlibConfig --> WhichModule : provides workspace_root_path as cwd_override + WhichModule --> WhichResolver : constructs via new(cwd_override) + WhichResolver --> EnvSnapshot : calls capture(cwd_override) + WhichResolver --> WhichOptions : reads lookup options + WhichOptions --> CwdMode : uses cwd_mode +``` + +### Cucumber execution flow + +```mermaid +sequenceDiagram + actor "Developer" as "Developer" + participant "TestRunner" as "Rust test binary" + participant "CliWorld" as "CliWorld" + participant "Cucumber" as "Cucumber runner" + participant "FS" as "Feature files under 'tests/features'" + + "Developer"->>"TestRunner": "run 'cargo test' (including cucumber tests)" + "TestRunner"->>"CliWorld": "create world instance" + "CliWorld"->>"CliWorld": "configure via 'cucumber()'" + "CliWorld"->>"Cucumber": "builder with 'max_concurrent_scenarios(1)'" + "Cucumber"->>"FS": "discover '.feature' files in 'tests/features'" + "Cucumber"->>"CliWorld": "execute scenarios sequentially (max 1)" + "CliWorld"-->>"Cucumber": "scenario results (stdout, stderr, exit codes)" + "Cucumber"-->>"TestRunner": "aggregate results and 'run_and_exit'" + "TestRunner"-->>"Developer": "process exit code and output with improved diagnostics" +``` + Implementation mirrors the design with a small (64-entry) LRU cache keyed by the command name, current directory, `PATH`, optional `PATHEXT`, and every filter option aside from `fresh`. Cache hits validate metadata before returning diff --git a/src/stdlib/mod.rs b/src/stdlib/mod.rs index fdc499a1..8c879e43 100644 --- a/src/stdlib/mod.rs +++ b/src/stdlib/mod.rs @@ -391,7 +391,10 @@ pub fn register_with_config(env: &mut Environment<'_>, config: StdlibConfig) -> register_file_tests(env); path::register_filters(env); collections::register_filters(env); - which::register(env); + let which_cwd = config + .workspace_root_path() + .map(|path| Arc::new(path.to_path_buf())); + which::register(env, which_cwd); let impure = state.impure_flag(); let (network_config, command_config) = config.into_components(); network::register_functions(env, Arc::clone(&impure), network_config); diff --git a/src/stdlib/which/cache.rs b/src/stdlib/which/cache.rs index a22413f1..96603a0a 100644 --- a/src/stdlib/which/cache.rs +++ b/src/stdlib/which/cache.rs @@ -19,10 +19,11 @@ pub(super) const CACHE_CAPACITY: usize = 64; #[derive(Clone, Debug)] pub(crate) struct WhichResolver { cache: Arc>>, + cwd_override: Option>, } impl WhichResolver { - pub(crate) fn new() -> Self { + pub(crate) fn new(cwd_override: Option>) -> Self { #[expect( clippy::unwrap_used, reason = "cache capacity constant is greater than zero" @@ -30,6 +31,7 @@ impl WhichResolver { let capacity = NonZeroUsize::new(CACHE_CAPACITY).unwrap(); Self { cache: Arc::new(Mutex::new(LruCache::new(capacity))), + cwd_override, } } @@ -38,7 +40,7 @@ impl WhichResolver { command: &str, options: &WhichOptions, ) -> Result, Error> { - let env = EnvSnapshot::capture()?; + let env = EnvSnapshot::capture(self.cwd_override.as_deref().map(Utf8PathBuf::as_path))?; let key = CacheKey::new(command, &env, options); if !options.fresh && let Some(cached) = self.try_cache(&key) diff --git a/src/stdlib/which/env.rs b/src/stdlib/which/env.rs index b71cff82..0edcbf01 100644 --- a/src/stdlib/which/env.rs +++ b/src/stdlib/which/env.rs @@ -20,8 +20,12 @@ pub(super) struct EnvSnapshot { } impl EnvSnapshot { - pub(super) fn capture() -> Result { - let cwd = current_dir_utf8()?; + pub(super) fn capture(cwd_override: Option<&Utf8Path>) -> Result { + let cwd = if let Some(override_cwd) = cwd_override { + override_cwd.to_path_buf() + } else { + current_dir_utf8()? + }; let raw_path = std::env::var_os("PATH"); let entries = parse_path_entries(raw_path.clone(), &cwd)?; #[cfg(windows)] diff --git a/src/stdlib/which/lookup.rs b/src/stdlib/which/lookup.rs index 579c9798..0f8d3c07 100644 --- a/src/stdlib/which/lookup.rs +++ b/src/stdlib/which/lookup.rs @@ -3,6 +3,9 @@ use std::fs; use camino::{Utf8Path, Utf8PathBuf}; use indexmap::IndexSet; use minijinja::{Error, ErrorKind}; +use walkdir::WalkDir; + +use super::options::CwdMode; #[cfg(windows)] use super::env; @@ -39,7 +42,7 @@ pub(super) fn lookup( } if matches.is_empty() { - return Err(not_found_error(command, &dirs, options.cwd_mode)); + return handle_miss(env, command, options, &dirs); } if options.canonical { @@ -103,6 +106,88 @@ pub(super) fn is_executable(path: &Utf8Path) -> bool { .is_ok_and(|metadata| metadata.is_file() && has_execute_permission(&metadata)) } +fn handle_miss( + env: &EnvSnapshot, + command: &str, + options: &WhichOptions, + dirs: &[Utf8PathBuf], +) -> Result, Error> { + let path_empty = env.raw_path.as_ref().is_none_or(|path| path.is_empty()); + + if path_empty && !matches!(options.cwd_mode, CwdMode::Never) { + let discovered = search_workspace(&env.cwd, command, options.all)?; + if !discovered.is_empty() { + return if options.canonical { + canonicalise(discovered) + } else { + Ok(discovered) + }; + } + } + + Err(not_found_error(command, dirs, options.cwd_mode)) +} + +fn search_workspace( + cwd: &Utf8Path, + command: &str, + collect_all: bool, +) -> Result, Error> { + const SKIP_DIRS: &[&str] = &[".git", "target"]; + let mut matches = Vec::new(); + let walker = WalkDir::new(cwd) + .follow_links(false) + .sort_by_file_name() + .into_iter() + .filter_entry(|entry| { + let ft = entry.file_type(); + if ft.is_dir() { + let name = entry.file_name().to_string_lossy(); + !SKIP_DIRS.iter().any(|skip| name == *skip) + } else { + true + } + }); + + for walk_entry in walker { + let entry = match walk_entry { + Ok(value) => value, + Err(err) => { + tracing::debug!( + %command, + error = %err, + "skipping unreadable workspace entry during which fallback" + ); + continue; + } + }; + if !entry.file_type().is_file() { + continue; + } + if entry.file_name() != command { + continue; + } + let path = entry.into_path(); + let utf8 = Utf8PathBuf::from_path_buf(path).map_err(|path_buf| { + let lossy_path = path_buf.to_string_lossy(); + Error::new( + ErrorKind::InvalidOperation, + format!( + "workspace path contains non-UTF-8 components while resolving command '{command}': {lossy_path}" + ), + ) + })?; + if !is_executable(&utf8) { + continue; + } + matches.push(utf8); + if !collect_all { + break; + } + } + Ok(matches) +} + #[cfg(unix)] fn has_execute_permission(metadata: &fs::Metadata) -> bool { use std::os::unix::fs::PermissionsExt; @@ -136,3 +221,109 @@ pub(super) fn canonicalise(paths: Vec) -> Result, } Ok(resolved) } + +#[cfg(test)] +mod tests { + use super::*; + use anyhow::{Context, Result, anyhow, ensure}; + use std::fs; + use tempfile::tempdir; + + #[cfg(unix)] + fn make_executable(path: &Utf8Path) -> Result<()> { + use std::os::unix::fs::PermissionsExt; + let mut perms = fs::metadata(path.as_std_path()) + .context("stat exec")? + .permissions(); + perms.set_mode(0o755); + fs::set_permissions(path.as_std_path(), perms).context("chmod exec") + } + + #[cfg(not(unix))] + fn make_executable(_path: &Utf8Path) -> Result<()> { + Ok(()) + } + + fn write_exec(root: &Utf8Path, name: &str) -> Result { + let path = root.join(name); + fs::write(path.as_std_path(), b"#!/bin/sh\n").context("write exec stub")?; + make_executable(&path)?; + Ok(path) + } + + #[test] + fn search_workspace_returns_executable_and_skips_non_exec() -> Result<()> { + let temp = tempdir().context("create tempdir")?; + let root = Utf8PathBuf::from_path_buf(temp.path().to_path_buf()) + .map_err(|path| anyhow!("utf8 path required, got {:?}", path))?; + let exec = write_exec(root.as_path(), "tool")?; + let non_exec = root.join("tool2"); + fs::write(non_exec.as_std_path(), b"not exec").context("write non exec")?; + + let results = search_workspace(root.as_path(), "tool", false)?; + ensure!( + results == vec![exec], + "expected executable to be discovered" + ); + Ok(()) + } + + #[test] + fn search_workspace_collects_all_matches() -> Result<()> { + let temp = tempdir().context("create tempdir")?; + let root = Utf8PathBuf::from_path_buf(temp.path().to_path_buf()) + .map_err(|path| anyhow!("utf8 path required, got {:?}", path))?; + let first = write_exec(root.as_path(), "tool")?; + let subdir = root.join("bin"); + fs::create_dir_all(subdir.as_std_path()).context("mkdir bin")?; + let second = write_exec(subdir.as_path(), "tool")?; + + let mut results = search_workspace(root.as_path(), "tool", true)?; + results.sort(); + let mut expected = vec![first, second]; + expected.sort(); + ensure!( + results == expected, + "expected both executables to be returned" + ); + Ok(()) + } + + #[test] + fn search_workspace_skips_heavy_directories() -> Result<()> { + let temp = tempdir().context("create tempdir")?; + let root = Utf8PathBuf::from_path_buf(temp.path().to_path_buf()) + .map_err(|path| anyhow!("utf8 path required, got {:?}", path))?; + let heavy = root.join("target"); + fs::create_dir_all(heavy.as_std_path()).context("mkdir target")?; + write_exec(heavy.as_path(), "tool")?; + + let results = search_workspace(root.as_path(), "tool", false)?; + ensure!(results.is_empty(), "expected target/ to be skipped"); + Ok(()) + } + + #[cfg(unix)] + #[test] + fn search_workspace_ignores_unreadable_entries() -> Result<()> { + use std::os::unix::fs::PermissionsExt; + let temp = tempdir().context("create tempdir")?; + let root = Utf8PathBuf::from_path_buf(temp.path().to_path_buf()) + .map_err(|path| anyhow!("utf8 path required, got {:?}", path))?; + let blocked = root.join("blocked"); + fs::create_dir_all(blocked.as_std_path()).context("mkdir blocked")?; + let mut perms = fs::metadata(blocked.as_std_path()) + .context("stat blocked")? + .permissions(); + perms.set_mode(0o000); + fs::set_permissions(blocked.as_std_path(), perms).context("chmod blocked")?; + + let exec = write_exec(root.as_path(), "tool")?; + let results = search_workspace(root.as_path(), "tool", false)?; + ensure!( + results == vec![exec], + "expected readable executable despite blocked dir" + ); + Ok(()) + } +} diff --git a/src/stdlib/which/mod.rs b/src/stdlib/which/mod.rs index d300a532..3556859c 100644 --- a/src/stdlib/which/mod.rs +++ b/src/stdlib/which/mod.rs @@ -24,8 +24,8 @@ pub(crate) use options::WhichOptions; use error::args_error; -pub(crate) fn register(env: &mut Environment<'_>) { - let resolver = Arc::new(WhichResolver::new()); +pub(crate) fn register(env: &mut Environment<'_>, cwd_override: Option>) { + let resolver = Arc::new(WhichResolver::new(cwd_override)); { let filter_resolver = Arc::clone(&resolver); env.add_filter("which", move |value: Value, kwargs: Kwargs| { diff --git a/test_support/src/lib.rs b/test_support/src/lib.rs index 58697d76..82acfc00 100644 --- a/test_support/src/lib.rs +++ b/test_support/src/lib.rs @@ -23,6 +23,7 @@ pub mod http; pub mod manifest; pub mod ninja; pub mod path_guard; +pub mod stdlib_assert; /// Re-export the SHA-256 helper for concise call sites. pub use hash::sha256_hex; /// Re-export of [`PathGuard`] for crate-level ergonomics in tests. diff --git a/test_support/src/stdlib_assert.rs b/test_support/src/stdlib_assert.rs new file mode 100644 index 00000000..b6949c1b --- /dev/null +++ b/test_support/src/stdlib_assert.rs @@ -0,0 +1,45 @@ +//! Helpers for assertions around stdlib rendering outputs. +use anyhow::{Result, bail}; + +/// Extract the stdlib output when present, otherwise surface an informative +/// error. This mirrors the behaviour of the Cucumber step assertions so unit +/// tests can guard the branching logic. +pub fn stdlib_output_or_error<'a>(output: Option<&'a str>, error: Option<&str>) -> Result<&'a str> { + if let Some(out) = output { + return Ok(out); + } + if let Some(err) = error { + bail!("expected stdlib output; stdlib error present: {err}"); + } + bail!("expected stdlib output"); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn returns_output_when_present() { + let result = stdlib_output_or_error(Some("value"), None); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), "value"); + } + + #[test] + fn surfaces_stdlib_error_when_output_missing() { + let err = + stdlib_output_or_error(None, Some("boom")).expect_err("should propagate stdlib error"); + let msg = err.to_string(); + assert!( + msg.contains("expected stdlib output; stdlib error present: boom"), + "message was {msg}" + ); + } + + #[test] + fn reports_missing_output_when_both_absent() { + let err = stdlib_output_or_error(None, None) + .expect_err("should fail when neither output nor error present"); + assert_eq!(err.to_string(), "expected stdlib output"); + } +} diff --git a/tests/cucumber.rs b/tests/cucumber.rs index 3e260139..994bfa77 100644 --- a/tests/cucumber.rs +++ b/tests/cucumber.rs @@ -195,11 +195,17 @@ async fn main() { return; } - CliWorld::run("tests/features").await; + CliWorld::cucumber() + .max_concurrent_scenarios(1) + .run("tests/features") + .await; #[cfg(unix)] { if block_device_exists() { - CliWorld::run("tests/features_unix").await; + CliWorld::cucumber() + .max_concurrent_scenarios(1) + .run("tests/features_unix") + .await; } else { tracing::warn!("No block device in /dev; skipping Unix file-system features."); } diff --git a/tests/std_filter_tests/which_filter_tests.rs b/tests/std_filter_tests/which_filter_tests.rs index 4adae55e..859cd28a 100644 --- a/tests/std_filter_tests/which_filter_tests.rs +++ b/tests/std_filter_tests/which_filter_tests.rs @@ -3,6 +3,8 @@ use camino::{Utf8Path, Utf8PathBuf}; use minijinja::{context, Environment}; use rstest::rstest; use std::ffi::{OsStr, OsString}; +use std::env; +use tempfile::tempdir; use test_support::{env::VarGuard, env_lock::EnvLock}; use super::support::{self, fallible}; @@ -234,3 +236,50 @@ fn which_filter_reports_missing_command() -> Result<()> { assert!(message.contains("netsuke::jinja::which::not_found")); Ok(()) } + +#[rstest] +fn which_filter_falls_back_to_workspace_when_path_empty() -> Result<()> { + let (_temp, root) = support::filter_workspace()?; + let tool = write_tool(&root, &ToolName::from("helper"))?; + let _path = PathEnv::new(&[])?; + let (mut env, _state) = fallible::stdlib_env_with_state()?; + let output = render(&mut env, &Template::from("{{ 'helper' | which }}"))?; + assert_eq!(output, tool.as_str()); + Ok(()) +} + +#[rstest] +fn which_filter_skips_heavy_directories() -> Result<()> { + let (_temp, root) = support::filter_workspace()?; + let target = root.join("target"); + std::fs::create_dir_all(target.as_std_path())?; + write_tool(&target, &ToolName::from("helper"))?; + let _path = PathEnv::new(&[])?; + let (mut env, _state) = fallible::stdlib_env_with_state()?; + let err = env + .render_str("{{ 'helper' | which }}", context! {}) + .unwrap_err(); + assert!(err.to_string().contains("not_found")); + Ok(()) +} + +#[rstest] +fn which_resolver_honours_workspace_root_override() -> Result<()> { + use cap_std::{ambient_authority, fs_utf8::Dir}; + let (_temp, root) = support::filter_workspace()?; + let tool = write_tool(&root, &ToolName::from("helper"))?; + let alt = tempdir().context("create alternate cwd")?; + let orig_cwd = env::current_dir().context("capture cwd")?; + env::set_current_dir(&alt).context("switch cwd")?; + + let config = StdlibConfig::new( + Dir::open_ambient_dir(&root, ambient_authority()).context("open workspace")?, + ) + .with_workspace_root_path(root.clone()); + let _path = PathEnv::new(&[])?; + let (mut env, _state) = fallible::stdlib_env_with_config(config)?; + let output = render(&mut env, &Template::from("{{ 'helper' | which }}"))?; + env::set_current_dir(orig_cwd).context("restore cwd")?; + assert_eq!(output, tool.as_str()); + Ok(()) +} diff --git a/tests/steps/stdlib_steps/assertions.rs b/tests/steps/stdlib_steps/assertions.rs index de1124ec..e9103495 100644 --- a/tests/steps/stdlib_steps/assertions.rs +++ b/tests/steps/stdlib_steps/assertions.rs @@ -7,6 +7,7 @@ use cap_std::{ambient_authority, fs_utf8::Dir}; use cucumber::then; use std::fs; use test_support::hash; +use test_support::stdlib_assert::stdlib_output_or_error; use time::{Duration, OffsetDateTime, UtcOffset}; use url::Url; @@ -37,18 +38,15 @@ fn stdlib_root_and_output(world: &CliWorld) -> Result<(&Utf8Path, &str)> { .stdlib_root .as_deref() .context("expected stdlib workspace root")?; - let output = world - .stdlib_output - .as_deref() - .context("expected stdlib output")?; + let output = stdlib_output(world)?; Ok((root, output)) } fn stdlib_output(world: &CliWorld) -> Result<&str> { - world - .stdlib_output - .as_deref() - .context("expected stdlib output") + stdlib_output_or_error( + world.stdlib_output.as_deref(), + world.stdlib_error.as_deref(), + ) } fn stdlib_output_path(world: &CliWorld) -> Result<&Utf8Path> { From 591d6c406f8eebe1b78ee783de8be8360fffb5b1 Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 24 Nov 2025 02:31:42 +0000 Subject: [PATCH 08/23] feat(which): add LRU caching with env fingerprint for which resolution - Introduce an LRU cache keyed by command, env fingerprint (hash of PATH and PATHEXT), cwd, and relevant options. - Remove repeated executability re-probing on cache hits to optimize performance. - Refactor cache keys to use a fingerprint of environment variables instead of cloning large strings. - Update cache entry validation logic to rely on initial validation at insertion time. - Add extensive unit tests using rstest for deterministic and reusable test fixtures. - Extend which options API with cache-aware views excluding 'fresh' flag. - Document improvements and design rationale in netsuke design docs. This commit improves resolver caching efficiency and robustness by avoiding expensive filesystem calls and string clones during path lookup. Co-authored-by: terragon-labs[bot] --- ...dr-002-replace-cucumber-with-rstest-bdd.md | 10 +-- docs/netsuke-design.md | 21 +++-- src/stdlib/which/cache.rs | 64 ++++---------- src/stdlib/which/env.rs | 2 + src/stdlib/which/error.rs | 2 + src/stdlib/which/lookup.rs | 83 ++++++++++++------- src/stdlib/which/options.rs | 26 ++++-- tests/std_filter_tests/which_filter_tests.rs | 38 +++++++++ 8 files changed, 145 insertions(+), 101 deletions(-) diff --git a/docs/adr-002-replace-cucumber-with-rstest-bdd.md b/docs/adr-002-replace-cucumber-with-rstest-bdd.md index 4f30b472..57bb5af0 100644 --- a/docs/adr-002-replace-cucumber-with-rstest-bdd.md +++ b/docs/adr-002-replace-cucumber-with-rstest-bdd.md @@ -84,14 +84,14 @@ for unit tests. - Decide on the default attributes applied to generated tests (for example, stacking `#[scenario(...)]` with `#[tokio::test(flavor = "multi_thread")]` for steps that require Tokio). Document the pattern in - `docs/rstest-bdd- users-guide.md`. + `docs/rstest-bdd-users-guide.md`. ### Phase 2 – Fixture extraction and world bridging - Refactor `CliWorld` into plain data holders (for example `CliState`, `ManifestState`, `StdlibState`, `ProcessState`) that expose the smallest mutable surface required by each step. Each holder lives in - `tests/bdd/ fixtures.rs` (or similar) and is backed by `Arc>` when + `tests/bdd/fixtures.rs` (or similar) and is backed by `Arc>` when interior mutability is required. - Provide `#[rstest::fixture]` constructors for shared resources: - Temporary workspace plus `PathGuard` handling for PATH edits. @@ -162,9 +162,9 @@ the cucumber equivalents once the new tests pass. - Remove `cucumber` (and `tokio` features added solely for the runner) from `Cargo.toml` along with the `[[test]]` entry that disables the default harness. -- Drop cucumber-specific documentation such as `docs/behavioural-testing-in- - rust-with-cucumber.md - ` once rewritten, and update any references in the user guide and Orca/Ortho docs. +- Drop cucumber-specific documentation such as + `docs/behavioural-testing-in-rust-with-cucumber.md` once rewritten, and + update any references in the user guide and Orca/Ortho docs. ### Phase 5 – Documentation, CI, and enablement diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index 34b45e19..b36079be 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -958,13 +958,16 @@ Semantics honour platform conventions while enforcing predictable behaviour: - Canonicalisation happens after discovery and only when requested so that manifests can balance reproducibility against host-specific absolute paths. -The resolver keeps a small LRU cache keyed by the command, `PATH`, optional -`PATHEXT`, working directory, and filter options. Cache hits are validated with -cheap metadata probes so stale entries heal automatically. Because all inputs -derive from the manifest or process environment, the helper remains effectively -pure and the existing render cache simply incorporates `PATH`/`PATHEXT`/`CWD` -into its key. Callers can request a bypass with `fresh=true` when they need to -observe recent toolchain changes during a long session. +The resolver keeps a small LRU cache keyed by the command, a fingerprint of +`PATH`/`PATHEXT`, the working directory, and the cache-relevant options (`all`, +`canonical`, `cwd_mode`). Entries are validated once at insertion; cache reads +no longer re-probe executability, keeping the hot path lean. Because `fresh` +only controls bypass behaviour it is stripped from the cache key so fresh +lookups still repopulate the cache for subsequent calls. The fingerprint means +environment changes invalidate keys without cloning large strings, and the +helper remains pure because all inputs still derive from the manifest or +process environment. Callers can request a bypass with `fresh=true` when they +need to observe recent toolchain changes during a long session. Errors follow the design’s actionable diagnostic model. Missing executables raise `netsuke::jinja::which::not_found` with context on how many `PATH` @@ -972,8 +975,8 @@ entries were inspected, a shortened preview of the path list, and platform appropriate hints (for example suggesting `cwd_mode="always"` on Windows). Invalid arguments surface as `netsuke::jinja::which::args`. -Unit tests cover POSIX and Windows specifics, canonicalization, cache -validation, and list-all semantics. Behavioural MiniJinja fixtures exercise the +Unit tests cover POSIX and Windows specifics, canonical deduplication, cache +reuse, and list-all semantics. Behavioural MiniJinja fixtures exercise the filter in Stage 3/4 renders to prove determinism across repeated invocations with identical environments. diff --git a/src/stdlib/which/cache.rs b/src/stdlib/which/cache.rs index 96603a0a..0f1b15b0 100644 --- a/src/stdlib/which/cache.rs +++ b/src/stdlib/which/cache.rs @@ -1,5 +1,8 @@ +//! LRU-backed cache for the `which` resolver to avoid repeat filesystem scans. + use std::{ - ffi::OsString, + collections::hash_map::DefaultHasher, + hash::{Hash, Hasher}, num::NonZeroUsize, sync::{Arc, Mutex, MutexGuard}, }; @@ -8,11 +11,7 @@ use camino::Utf8PathBuf; use lru::LruCache; use minijinja::Error; -use super::{ - env::EnvSnapshot, - lookup::{is_executable, lookup}, - options::{CwdMode, WhichOptions}, -}; +use super::{env::EnvSnapshot, lookup::lookup, options::WhichOptions}; pub(super) const CACHE_CAPACITY: usize = 64; @@ -24,11 +23,7 @@ pub(crate) struct WhichResolver { impl WhichResolver { pub(crate) fn new(cwd_override: Option>) -> Self { - #[expect( - clippy::unwrap_used, - reason = "cache capacity constant is greater than zero" - )] - let capacity = NonZeroUsize::new(CACHE_CAPACITY).unwrap(); + let capacity = NonZeroUsize::new(CACHE_CAPACITY).unwrap_or(NonZeroUsize::MIN); Self { cache: Arc::new(Mutex::new(LruCache::new(capacity))), cwd_override, @@ -54,14 +49,7 @@ impl WhichResolver { fn try_cache(&self, key: &CacheKey) -> Option> { let mut guard = self.lock_cache(); - match guard.get(key) { - Some(entry) if entry.is_valid() => Some(entry.matches.clone()), - Some(_) => { - guard.pop(key); - None - } - None => None, - } + guard.get(key).map(|entry| entry.matches.clone()) } fn store(&self, key: CacheKey, matches: Vec) { @@ -82,48 +70,28 @@ struct CacheEntry { matches: Vec, } -impl CacheEntry { - fn is_valid(&self) -> bool { - self.matches - .iter() - .all(|path| is_executable(path.as_path())) - } -} - #[derive(Clone, Debug, PartialEq, Eq, Hash)] struct CacheKey { command: String, - path: Option, - pathext: Option, + env_fingerprint: u64, cwd: Utf8PathBuf, - options: CacheKeyOptions, + options: WhichOptions, } impl CacheKey { fn new(command: &str, env: &EnvSnapshot, options: &WhichOptions) -> Self { Self { command: command.to_owned(), - path: env.raw_path.clone(), - pathext: env.raw_pathext.clone(), + env_fingerprint: env_fingerprint(env), cwd: env.cwd.clone(), - options: CacheKeyOptions::from(options), + options: options.cache_key_view(), } } } -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -struct CacheKeyOptions { - all: bool, - canonical: bool, - cwd_mode: CwdMode, -} - -impl From<&WhichOptions> for CacheKeyOptions { - fn from(value: &WhichOptions) -> Self { - Self { - all: value.all, - canonical: value.canonical, - cwd_mode: value.cwd_mode, - } - } +fn env_fingerprint(env: &EnvSnapshot) -> u64 { + let mut hasher = DefaultHasher::new(); + env.raw_path.hash(&mut hasher); + env.raw_pathext.hash(&mut hasher); + hasher.finish() } diff --git a/src/stdlib/which/env.rs b/src/stdlib/which/env.rs index 0edcbf01..b929af11 100644 --- a/src/stdlib/which/env.rs +++ b/src/stdlib/which/env.rs @@ -1,3 +1,5 @@ +//! Snapshot of PATH, PATHEXT, and current directory for the `which` resolver. + #[cfg(windows)] use std::ffi::OsStr; use std::ffi::OsString; diff --git a/src/stdlib/which/error.rs b/src/stdlib/which/error.rs index 41941e9d..77936fb3 100644 --- a/src/stdlib/which/error.rs +++ b/src/stdlib/which/error.rs @@ -1,3 +1,5 @@ +//! Error helpers for the `MiniJinja` `which` filter/function. + use std::fmt; use camino::{Utf8Path, Utf8PathBuf}; diff --git a/src/stdlib/which/lookup.rs b/src/stdlib/which/lookup.rs index 0f8d3c07..8695162d 100644 --- a/src/stdlib/which/lookup.rs +++ b/src/stdlib/which/lookup.rs @@ -1,3 +1,5 @@ +//! Filesystem search utilities for resolving commands for the `which` feature. + use std::fs; use camino::{Utf8Path, Utf8PathBuf}; @@ -226,8 +228,35 @@ pub(super) fn canonicalise(paths: Vec) -> Result, mod tests { use super::*; use anyhow::{Context, Result, anyhow, ensure}; + use rstest::{fixture, rstest}; use std::fs; - use tempfile::tempdir; + use tempfile::TempDir; + + struct TempWorkspace { + root: Utf8PathBuf, + _tempdir: TempDir, + } + + impl TempWorkspace { + fn new() -> Result { + let tempdir = TempDir::new().context("create tempdir")?; + let root = Utf8PathBuf::from_path_buf(tempdir.path().to_path_buf()) + .map_err(|path| anyhow!("utf8 path required, got {:?}", path))?; + Ok(Self { + root, + _tempdir: tempdir, + }) + } + + fn root(&self) -> &Utf8Path { + self.root.as_path() + } + } + + #[fixture] + fn workspace() -> TempWorkspace { + TempWorkspace::new().expect("create utf8 temp workspace") + } #[cfg(unix)] fn make_executable(path: &Utf8Path) -> Result<()> { @@ -251,16 +280,15 @@ mod tests { Ok(path) } - #[test] - fn search_workspace_returns_executable_and_skips_non_exec() -> Result<()> { - let temp = tempdir().context("create tempdir")?; - let root = Utf8PathBuf::from_path_buf(temp.path().to_path_buf()) - .map_err(|path| anyhow!("utf8 path required, got {:?}", path))?; - let exec = write_exec(root.as_path(), "tool")?; - let non_exec = root.join("tool2"); + #[rstest] + fn search_workspace_returns_executable_and_skips_non_exec( + workspace: TempWorkspace, + ) -> Result<()> { + let exec = write_exec(workspace.root(), "tool")?; + let non_exec = workspace.root().join("tool2"); fs::write(non_exec.as_std_path(), b"not exec").context("write non exec")?; - let results = search_workspace(root.as_path(), "tool", false)?; + let results = search_workspace(workspace.root(), "tool", false)?; ensure!( results == vec![exec], "expected executable to be discovered" @@ -268,17 +296,14 @@ mod tests { Ok(()) } - #[test] - fn search_workspace_collects_all_matches() -> Result<()> { - let temp = tempdir().context("create tempdir")?; - let root = Utf8PathBuf::from_path_buf(temp.path().to_path_buf()) - .map_err(|path| anyhow!("utf8 path required, got {:?}", path))?; - let first = write_exec(root.as_path(), "tool")?; - let subdir = root.join("bin"); + #[rstest] + fn search_workspace_collects_all_matches(workspace: TempWorkspace) -> Result<()> { + let first = write_exec(workspace.root(), "tool")?; + let subdir = workspace.root().join("bin"); fs::create_dir_all(subdir.as_std_path()).context("mkdir bin")?; let second = write_exec(subdir.as_path(), "tool")?; - let mut results = search_workspace(root.as_path(), "tool", true)?; + let mut results = search_workspace(workspace.root(), "tool", true)?; results.sort(); let mut expected = vec![first, second]; expected.sort(); @@ -289,28 +314,22 @@ mod tests { Ok(()) } - #[test] - fn search_workspace_skips_heavy_directories() -> Result<()> { - let temp = tempdir().context("create tempdir")?; - let root = Utf8PathBuf::from_path_buf(temp.path().to_path_buf()) - .map_err(|path| anyhow!("utf8 path required, got {:?}", path))?; - let heavy = root.join("target"); + #[rstest] + fn search_workspace_skips_heavy_directories(workspace: TempWorkspace) -> Result<()> { + let heavy = workspace.root().join("target"); fs::create_dir_all(heavy.as_std_path()).context("mkdir target")?; write_exec(heavy.as_path(), "tool")?; - let results = search_workspace(root.as_path(), "tool", false)?; + let results = search_workspace(workspace.root(), "tool", false)?; ensure!(results.is_empty(), "expected target/ to be skipped"); Ok(()) } #[cfg(unix)] - #[test] - fn search_workspace_ignores_unreadable_entries() -> Result<()> { + #[rstest] + fn search_workspace_ignores_unreadable_entries(workspace: TempWorkspace) -> Result<()> { use std::os::unix::fs::PermissionsExt; - let temp = tempdir().context("create tempdir")?; - let root = Utf8PathBuf::from_path_buf(temp.path().to_path_buf()) - .map_err(|path| anyhow!("utf8 path required, got {:?}", path))?; - let blocked = root.join("blocked"); + let blocked = workspace.root().join("blocked"); fs::create_dir_all(blocked.as_std_path()).context("mkdir blocked")?; let mut perms = fs::metadata(blocked.as_std_path()) .context("stat blocked")? @@ -318,8 +337,8 @@ mod tests { perms.set_mode(0o000); fs::set_permissions(blocked.as_std_path(), perms).context("chmod blocked")?; - let exec = write_exec(root.as_path(), "tool")?; - let results = search_workspace(root.as_path(), "tool", false)?; + let exec = write_exec(workspace.root(), "tool")?; + let results = search_workspace(workspace.root(), "tool", false)?; ensure!( results == vec![exec], "expected readable executable despite blocked dir" diff --git a/src/stdlib/which/options.rs b/src/stdlib/which/options.rs index 58e676fd..c2678116 100644 --- a/src/stdlib/which/options.rs +++ b/src/stdlib/which/options.rs @@ -1,3 +1,5 @@ +//! Parse and hold options for the `which` filter and function. + use minijinja::{Error, value::Kwargs}; use super::error::args_error; @@ -12,16 +14,20 @@ pub(super) enum CwdMode { impl CwdMode { pub(super) fn parse(value: &str) -> Option { - match value { - "auto" => Some(Self::Auto), - "always" => Some(Self::Always), - "never" => Some(Self::Never), - _ => None, - } + parse_cwd_mode(value) + } +} + +fn parse_cwd_mode(value: &str) -> Option { + match value { + "auto" => Some(CwdMode::Auto), + "always" => Some(CwdMode::Always), + "never" => Some(CwdMode::Never), + _ => None, } } -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] pub(crate) struct WhichOptions { pub(super) all: bool, pub(super) canonical: bool, @@ -52,4 +58,10 @@ impl WhichOptions { cwd_mode: cwd_mode.unwrap_or_default(), }) } + + pub(crate) fn cache_key_view(&self) -> Self { + let mut clone = self.clone(); + clone.fresh = false; + clone + } } diff --git a/tests/std_filter_tests/which_filter_tests.rs b/tests/std_filter_tests/which_filter_tests.rs index 859cd28a..4eb5c8ac 100644 --- a/tests/std_filter_tests/which_filter_tests.rs +++ b/tests/std_filter_tests/which_filter_tests.rs @@ -212,6 +212,44 @@ fn which_filter_all_returns_all_matches() -> Result<()> { Ok(()) } +#[rstest] +fn which_filter_all_with_duplicates_respects_canonical_false() -> Result<()> { + let mut fixture = WhichTestFixture::with_tool_in_dirs( + &ToolName::from("helper"), + &[DirName::from("bin"), DirName::from("bin")], + )?; + fixture.state.reset_impure(); + let output = fixture.render(&Template::from( + "{{ 'helper' | which(all=true, canonical=false) | join('|') }}", + ))?; + + let path = fixture.paths[0].as_str(); + let parts: Vec<&str> = output.split('|').collect(); + + assert_eq!(parts, vec![path, path]); + assert!(!fixture.state.is_impure()); + Ok(()) +} + +#[rstest] +fn which_filter_all_with_duplicates_deduplicates_canonicalised_paths() -> Result<()> { + let mut fixture = WhichTestFixture::with_tool_in_dirs( + &ToolName::from("helper"), + &[DirName::from("bin"), DirName::from("bin")], + )?; + fixture.state.reset_impure(); + let output = fixture.render(&Template::from( + "{{ 'helper' | which(all=true, canonical=true) | join('|') }}", + ))?; + + let path = fixture.paths[0].as_str(); + let parts: Vec<&str> = output.split('|').collect(); + + assert_eq!(parts, vec![path]); + assert!(!fixture.state.is_impure()); + Ok(()) +} + #[rstest] fn which_function_honours_cwd_mode() -> Result<()> { let (_temp, root) = support::filter_workspace()?; From afae649518a5122d98cfc5d1779116e6d3d19391 Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 24 Nov 2025 03:11:28 +0000 Subject: [PATCH 09/23] feat(which): support PATHEXT-aware executable resolution on Windows Enhance resolve_direct to append PATHEXT extensions when resolving executables on Windows, allowing matches to files with executable extensions like .bat. Add tests to verify correct behavior of PATHEXT expansion and executable detection. This improves cross-platform compatibility and correctness of executable lookup on Windows. Co-authored-by: terragon-labs[bot] --- src/stdlib/which/lookup.rs | 75 +++++++++++++++++++++++++++++++++++--- 1 file changed, 69 insertions(+), 6 deletions(-) diff --git a/src/stdlib/which/lookup.rs b/src/stdlib/which/lookup.rs index 8695162d..09da724c 100644 --- a/src/stdlib/which/lookup.rs +++ b/src/stdlib/which/lookup.rs @@ -65,13 +65,49 @@ pub(super) fn resolve_direct( } else { env.cwd.join(raw) }; - if !is_executable(&resolved) { - return Err(direct_not_found(command, &resolved)); + #[cfg(windows)] + { + let mut matches = Vec::new(); + let candidates = if resolved.extension().is_some() { + vec![resolved] + } else { + env.pathext() + .iter() + .map(|ext| { + let mut candidate = resolved.as_str().to_owned(); + candidate.push_str(ext); + Utf8PathBuf::from(candidate) + }) + .collect() + }; + for candidate in candidates { + if !is_executable(&candidate) { + continue; + } + matches.push(candidate); + if !options.all { + break; + } + } + if matches.is_empty() { + return Err(direct_not_found(command, &resolved)); + } + if options.canonical { + canonicalise(matches) + } else { + Ok(matches) + } } - if options.canonical { - canonicalise(vec![resolved]) - } else { - Ok(vec![resolved]) + #[cfg(not(windows))] + { + if !is_executable(&resolved) { + return Err(direct_not_found(command, &resolved)); + } + if options.canonical { + canonicalise(vec![resolved]) + } else { + Ok(vec![resolved]) + } } } @@ -345,4 +381,31 @@ mod tests { ); Ok(()) } + + #[cfg(windows)] + #[rstest] + fn resolve_direct_appends_pathext(env: TempWorkspace) -> Result<()> { + let base = env.root().join("tools").join("gradlew"); + fs::create_dir_all(base.parent().expect("tools dir").as_std_path()) + .context("mkdir tools")?; + let exe = base.with_extension("bat"); + fs::write(exe.as_std_path(), b"@echo off\r\n").context("write stub")?; + make_executable(&exe)?; + + let snapshot = EnvSnapshot { + cwd: env.root.clone(), + raw_path: None, + raw_pathext: Some(".bat".into()), + entries: vec![], + pathext: vec![".bat".into()], + }; + + let matches = resolve_direct(".\\tools\\gradlew", &snapshot, &WhichOptions::default())?; + + ensure!( + matches == vec![exe], + "expected PATHEXT to expand direct path; got {matches:?}" + ); + Ok(()) + } } From 04183393908183c9fdb1bf90fb22f88daca0149e Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 24 Nov 2025 03:18:03 +0000 Subject: [PATCH 10/23] refactor(stdlib::which): simplify and modularize direct executable path resolution Refactored `resolve_direct` into platform-specific helper functions to improve clarity and code reuse. On Windows, direct candidates are generated separately, and matching executables are filtered in a dedicated function. On non-Windows, direct path resolution logic is isolated. This change enhances maintainability and readability without changing functionality. Co-authored-by: terragon-labs[bot] --- Cargo.lock | 27 ++++-- Cargo.toml | 2 +- docs/netsuke-design.md | 16 ++-- src/stdlib/which/cache.rs | 3 +- src/stdlib/which/lookup.rs | 92 ++++++++++++-------- test_support/src/stdlib_assert.rs | 5 +- tests/std_filter_tests/which_filter_tests.rs | 4 +- 7 files changed, 94 insertions(+), 55 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0c6a6357..cb99b5a1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -550,6 +550,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -762,10 +768,19 @@ name = "hashbrown" version = "0.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" +dependencies = [ + "foldhash 0.1.5", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" dependencies = [ "allocator-api2", "equivalent", - "foldhash", + "foldhash 0.2.0", ] [[package]] @@ -774,7 +789,7 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" dependencies = [ - "hashbrown", + "hashbrown 0.15.4", ] [[package]] @@ -925,7 +940,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.15.4", "serde", ] @@ -1105,11 +1120,11 @@ checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "lru" -version = "0.12.5" +version = "0.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +checksum = "96051b46fc183dc9cd4a223960ef37b9af631b55191852a8274bfef064cda20f" dependencies = [ - "hashbrown", + "hashbrown 0.16.1", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index af78ce53..9ca2fcee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,7 +35,7 @@ md5 = { package = "md-5", version = "0.10", optional = true } itoa = "1" itertools = "0.12" indexmap = { version = "2.5", features = ["serde"] } -lru = "0.12" +lru = "0.16.2" glob = "0.3.3" walkdir = "2.5.0" tracing = "0.1" diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index b36079be..96137936 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -1101,13 +1101,15 @@ sequenceDiagram ``` Implementation mirrors the design with a small (64-entry) LRU cache keyed by -the command name, current directory, `PATH`, optional `PATHEXT`, and every -filter option aside from `fresh`. Cache hits validate metadata before returning -so deleted or repointed binaries immediately trigger a re-scan, and -`fresh=true` bypasses the cache without discarding previously memoised entries. -Diagnostic errors embed the `netsuke::jinja::which::*` codes and print a -trimmed preview of the scanned `PATH`, giving manifest authors clear -troubleshooting hints on both Unix and Windows hosts. +the command name, current directory, a fingerprint of `PATH`/`PATHEXT`, and +every filter option aside from `fresh`. Entries validate executability on +insertion; cache reads skip revalidation, so stale binaries fall out only when +the cache key changes (for example after a PATH update) or the entry is +evicted. The `fresh=true` flag bypasses the cache for a single lookup without +discarding previously memoised entries. Diagnostic errors embed the +`netsuke::jinja::which::*` codes and print a trimmed preview of the scanned +`PATH`, giving manifest authors clear troubleshooting hints on both Unix and +Windows hosts. #### Generic collection filters diff --git a/src/stdlib/which/cache.rs b/src/stdlib/which/cache.rs index 0f1b15b0..fbeedcf5 100644 --- a/src/stdlib/which/cache.rs +++ b/src/stdlib/which/cache.rs @@ -23,7 +23,8 @@ pub(crate) struct WhichResolver { impl WhichResolver { pub(crate) fn new(cwd_override: Option>) -> Self { - let capacity = NonZeroUsize::new(CACHE_CAPACITY).unwrap_or(NonZeroUsize::MIN); + let capacity = NonZeroUsize::new(CACHE_CAPACITY) + .unwrap_or_else(|| panic!("CACHE_CAPACITY must be greater than zero")); Self { cache: Arc::new(Mutex::new(LruCache::new(capacity))), cwd_override, diff --git a/src/stdlib/which/lookup.rs b/src/stdlib/which/lookup.rs index 09da724c..c4ee80a9 100644 --- a/src/stdlib/which/lookup.rs +++ b/src/stdlib/which/lookup.rs @@ -67,47 +67,63 @@ pub(super) fn resolve_direct( }; #[cfg(windows)] { - let mut matches = Vec::new(); - let candidates = if resolved.extension().is_some() { - vec![resolved] - } else { - env.pathext() - .iter() - .map(|ext| { - let mut candidate = resolved.as_str().to_owned(); - candidate.push_str(ext); - Utf8PathBuf::from(candidate) - }) - .collect() - }; - for candidate in candidates { - if !is_executable(&candidate) { - continue; - } - matches.push(candidate); - if !options.all { - break; - } - } - if matches.is_empty() { - return Err(direct_not_found(command, &resolved)); - } - if options.canonical { - canonicalise(matches) - } else { - Ok(matches) - } + resolve_direct_windows(command, &resolved, env, options) } #[cfg(not(windows))] { - if !is_executable(&resolved) { - return Err(direct_not_found(command, &resolved)); - } - if options.canonical { - canonicalise(vec![resolved]) - } else { - Ok(vec![resolved]) - } + resolve_direct_posix(command, &resolved, options) + } +} + +#[cfg(windows)] +fn resolve_direct_windows( + command: &str, + resolved: &Utf8PathBuf, + env: &EnvSnapshot, + options: &WhichOptions, +) -> Result, Error> { + let candidates = direct_candidates(resolved, env); + let mut matches = Vec::new(); + let _ = push_matches(&mut matches, candidates, options.all); + if matches.is_empty() { + return Err(direct_not_found(command, resolved)); + } + if options.canonical { + canonicalise(matches) + } else { + Ok(matches) + } +} + +#[cfg(windows)] +fn direct_candidates(resolved: &Utf8PathBuf, env: &EnvSnapshot) -> Vec { + if resolved.extension().is_some() { + vec![resolved.clone()] + } else { + env.pathext() + .iter() + .map(|ext| { + let mut candidate = resolved.as_str().to_owned(); + candidate.push_str(ext); + Utf8PathBuf::from(candidate) + }) + .collect() + } +} + +#[cfg(not(windows))] +fn resolve_direct_posix( + command: &str, + resolved: &Utf8PathBuf, + options: &WhichOptions, +) -> Result, Error> { + if !is_executable(resolved) { + return Err(direct_not_found(command, resolved)); + } + if options.canonical { + canonicalise(vec![resolved.clone()]) + } else { + Ok(vec![resolved.clone()]) } } diff --git a/test_support/src/stdlib_assert.rs b/test_support/src/stdlib_assert.rs index b6949c1b..8e521914 100644 --- a/test_support/src/stdlib_assert.rs +++ b/test_support/src/stdlib_assert.rs @@ -22,7 +22,10 @@ mod tests { fn returns_output_when_present() { let result = stdlib_output_or_error(Some("value"), None); assert!(result.is_ok()); - assert_eq!(result.unwrap(), "value"); + assert_eq!( + result.expect("expected stdlib_output_or_error to return output"), + "value" + ); } #[test] diff --git a/tests/std_filter_tests/which_filter_tests.rs b/tests/std_filter_tests/which_filter_tests.rs index 4eb5c8ac..207a1ec8 100644 --- a/tests/std_filter_tests/which_filter_tests.rs +++ b/tests/std_filter_tests/which_filter_tests.rs @@ -4,6 +4,7 @@ use minijinja::{context, Environment}; use rstest::rstest; use std::ffi::{OsStr, OsString}; use std::env; +use netsuke::stdlib::StdlibConfig; use tempfile::tempdir; use test_support::{env::VarGuard, env_lock::EnvLock}; @@ -316,8 +317,9 @@ fn which_resolver_honours_workspace_root_override() -> Result<()> { .with_workspace_root_path(root.clone()); let _path = PathEnv::new(&[])?; let (mut env, _state) = fallible::stdlib_env_with_config(config)?; - let output = render(&mut env, &Template::from("{{ 'helper' | which }}"))?; + let render_result = render(&mut env, &Template::from("{{ 'helper' | which }}")); env::set_current_dir(orig_cwd).context("restore cwd")?; + let output = render_result?; assert_eq!(output, tool.as_str()); Ok(()) } From c5d08ffe854b55bd61ebe6ad51b7b12ce60ea2f4 Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 24 Nov 2025 03:22:01 +0000 Subject: [PATCH 11/23] docs(resolver): add sequence diagram illustrating which resolver flow Added a Mermaid sequence diagram to docs/netsuke-design.md describing the control flow of the 'WhichResolver' including cache lookups, environment snapshot capture, direct path and PATH search, workspace fallback, and caching behavior. This improves documentation clarity on resolver internals. Co-authored-by: terragon-labs[bot] --- docs/netsuke-design.md | 51 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index 96137936..5cdf8417 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -1111,6 +1111,57 @@ discarding previously memoised entries. Diagnostic errors embed the `PATH`, giving manifest authors clear troubleshooting hints on both Unix and Windows hosts. +Figure: Which resolver control flow with cache lookups and workspace fallback. + +```mermaid +sequenceDiagram + participant Caller + participant WhichResolver + participant Cache + participant EnvSnapshot + participant Lookup + participant Workspace + + Caller->>WhichResolver: resolve(command, options) + activate WhichResolver + + WhichResolver->>EnvSnapshot: capture(cwd_override) + activate EnvSnapshot + EnvSnapshot-->>WhichResolver: env snapshot + deactivate EnvSnapshot + + WhichResolver->>Cache: compute key (command, fingerprint, cwd, options) + + alt cache hit (unless fresh=true) + Cache-->>WhichResolver: cached matches + else cache miss or fresh + WhichResolver->>Lookup: lookup(command, env, options) + activate Lookup + + alt direct path + Lookup->>Lookup: resolve_direct(command, env, options) + else PATH search + Lookup->>Lookup: iterate resolved_dirs, collect candidates + end + + alt found + Lookup-->>WhichResolver: matches + else not found in PATH + Lookup->>Workspace: fallback search (if enabled) + activate Workspace + Workspace-->>Lookup: candidates from workspace + deactivate Workspace + Lookup-->>WhichResolver: matches or not_found error + end + deactivate Lookup + + WhichResolver->>Cache: store(key, matches) + end + + WhichResolver-->>Caller: Result, Error> + deactivate WhichResolver +``` + #### Generic collection filters | Filter | Purpose | From c43f37b137f317680980c50514b224fcc9559f07 Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 24 Nov 2025 12:15:17 +0000 Subject: [PATCH 12/23] Update code Co-authored-by: terragon-labs[bot] --- Cargo.lock | 12 ++++++------ Cargo.toml | 4 ++-- docs/netsuke-design.md | 11 ----------- 3 files changed, 8 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cb99b5a1..fd6375b7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -204,7 +204,7 @@ dependencies = [ "maybe-owned", "rustix", "rustix-linux-procfs", - "windows-sys 0.59.0", + "windows-sys 0.52.0", "winx", ] @@ -579,7 +579,7 @@ checksum = "94e7099f6313ecacbe1256e8ff9d617b75d1bcb16a6fddef94866d225a01a14a" dependencies = [ "io-lifetimes", "rustix", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -978,7 +978,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2285ddfe3054097ef4b2fe909ef8c3bcd1ea52a8f0d274416caebeef39f04a65" dependencies = [ "io-lifetimes", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -2140,7 +2140,7 @@ dependencies = [ "getrandom 0.3.3", "once_cell", "rustix", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -2528,7 +2528,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -2699,7 +2699,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f3fd376f71958b862e7afb20cfe5a22830e1963462f3a17f49d82a6c1d1f42d" dependencies = [ "bitflags", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 9ca2fcee..9285fdcb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,9 +35,9 @@ md5 = { package = "md-5", version = "0.10", optional = true } itoa = "1" itertools = "0.12" indexmap = { version = "2.5", features = ["serde"] } -lru = "0.16.2" +lru = "0.16" glob = "0.3.3" -walkdir = "2.5.0" +walkdir = "2.5" tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["fmt"] } serde_json = { version = "1", features = ["preserve_order"] } diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index 5cdf8417..351e6d32 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -1100,17 +1100,6 @@ sequenceDiagram "TestRunner"-->>"Developer": "process exit code and output with improved diagnostics" ``` -Implementation mirrors the design with a small (64-entry) LRU cache keyed by -the command name, current directory, a fingerprint of `PATH`/`PATHEXT`, and -every filter option aside from `fresh`. Entries validate executability on -insertion; cache reads skip revalidation, so stale binaries fall out only when -the cache key changes (for example after a PATH update) or the entry is -evicted. The `fresh=true` flag bypasses the cache for a single lookup without -discarding previously memoised entries. Diagnostic errors embed the -`netsuke::jinja::which::*` codes and print a trimmed preview of the scanned -`PATH`, giving manifest authors clear troubleshooting hints on both Unix and -Windows hosts. - Figure: Which resolver control flow with cache lookups and workspace fallback. ```mermaid From 7caf25063d35992efeeb4f177e7fc806309755fd Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 24 Nov 2025 12:24:48 +0000 Subject: [PATCH 13/23] fix(stdlib): propagate errors from stdlib registration and which cache creation - Changed stdlib::register_with_config to return anyhow::Result instead of StdlibState directly. - Propagate errors from which::register during stdlib registration. - Update which::register and WhichResolver::new to return Result to handle cache capacity errors. - Propagate errors in integration points such as manifest mod and test setup. - Move tests from lookup.rs to lookup/tests.rs for better organization. These changes improve error handling in stdlib initialization and which cache setup. Co-authored-by: terragon-labs[bot] --- src/manifest/mod.rs | 2 +- src/stdlib/mod.rs | 19 ++- src/stdlib/which/cache.rs | 16 +- src/stdlib/which/{lookup.rs => lookup/mod.rs} | 149 +----------------- src/stdlib/which/lookup/tests.rs | 143 +++++++++++++++++ src/stdlib/which/mod.rs | 8 +- test_support/src/stdlib_assert.rs | 10 +- tests/std_filter_tests/support.rs | 2 +- tests/steps/stdlib_steps/rendering.rs | 2 +- 9 files changed, 179 insertions(+), 172 deletions(-) rename src/stdlib/which/{lookup.rs => lookup/mod.rs} (59%) create mode 100644 src/stdlib/which/lookup/tests.rs diff --git a/src/manifest/mod.rs b/src/manifest/mod.rs index 6b7af68c..670af304 100644 --- a/src/manifest/mod.rs +++ b/src/manifest/mod.rs @@ -107,7 +107,7 @@ fn from_str_named( jinja.add_function("env", |var_name: String| env_var(&var_name)); jinja.add_function("glob", |pattern: String| glob_paths(&pattern)); let _stdlib_state = match stdlib_config { - Some(config) => Ok(crate::stdlib::register_with_config(&mut jinja, config)), + Some(config) => crate::stdlib::register_with_config(&mut jinja, config), None => crate::stdlib::register(&mut jinja), }?; diff --git a/src/stdlib/mod.rs b/src/stdlib/mod.rs index 8c879e43..89f2bff1 100644 --- a/src/stdlib/mod.rs +++ b/src/stdlib/mod.rs @@ -362,10 +362,7 @@ pub fn register(env: &mut Environment<'_>) -> anyhow::Result { let path = Utf8PathBuf::from_path_buf(cwd).map_err(|path| { anyhow::anyhow!("current directory contains non-UTF-8 components: {path:?}") })?; - Ok(register_with_config( - env, - StdlibConfig::new(root).with_workspace_root_path(path), - )) + register_with_config(env, StdlibConfig::new(root).with_workspace_root_path(path)) } /// Register stdlib helpers using an explicit configuration. @@ -386,7 +383,15 @@ pub fn register(env: &mut Environment<'_>) -> anyhow::Result { /// let mut env = Environment::new(); /// let _state = stdlib::register_with_config(&mut env, StdlibConfig::new(dir)); /// ``` -pub fn register_with_config(env: &mut Environment<'_>, config: StdlibConfig) -> StdlibState { +/// +/// # Errors +/// +/// Returns an error if stdlib components cannot be registered (for example, +/// when the which resolver cache configuration is invalid). +pub fn register_with_config( + env: &mut Environment<'_>, + config: StdlibConfig, +) -> anyhow::Result { let state = StdlibState::default(); register_file_tests(env); path::register_filters(env); @@ -394,13 +399,13 @@ pub fn register_with_config(env: &mut Environment<'_>, config: StdlibConfig) -> let which_cwd = config .workspace_root_path() .map(|path| Arc::new(path.to_path_buf())); - which::register(env, which_cwd); + which::register(env, which_cwd)?; let impure = state.impure_flag(); let (network_config, command_config) = config.into_components(); network::register_functions(env, Arc::clone(&impure), network_config); command::register(env, impure, command_config); time::register_functions(env); - state + Ok(state) } pub(crate) fn value_from_bytes(bytes: Vec) -> Value { diff --git a/src/stdlib/which/cache.rs b/src/stdlib/which/cache.rs index fbeedcf5..79d69d99 100644 --- a/src/stdlib/which/cache.rs +++ b/src/stdlib/which/cache.rs @@ -9,7 +9,7 @@ use std::{ use camino::Utf8PathBuf; use lru::LruCache; -use minijinja::Error; +use minijinja::{Error, ErrorKind}; use super::{env::EnvSnapshot, lookup::lookup, options::WhichOptions}; @@ -22,13 +22,17 @@ pub(crate) struct WhichResolver { } impl WhichResolver { - pub(crate) fn new(cwd_override: Option>) -> Self { - let capacity = NonZeroUsize::new(CACHE_CAPACITY) - .unwrap_or_else(|| panic!("CACHE_CAPACITY must be greater than zero")); - Self { + pub(crate) fn new(cwd_override: Option>) -> Result { + let capacity = NonZeroUsize::new(CACHE_CAPACITY).ok_or_else(|| { + Error::new( + ErrorKind::InvalidOperation, + "which cache capacity must be greater than zero", + ) + })?; + Ok(Self { cache: Arc::new(Mutex::new(LruCache::new(capacity))), cwd_override, - } + }) } pub(crate) fn resolve( diff --git a/src/stdlib/which/lookup.rs b/src/stdlib/which/lookup/mod.rs similarity index 59% rename from src/stdlib/which/lookup.rs rename to src/stdlib/which/lookup/mod.rs index c4ee80a9..9c0f113a 100644 --- a/src/stdlib/which/lookup.rs +++ b/src/stdlib/which/lookup/mod.rs @@ -277,151 +277,4 @@ pub(super) fn canonicalise(paths: Vec) -> Result, } #[cfg(test)] -mod tests { - use super::*; - use anyhow::{Context, Result, anyhow, ensure}; - use rstest::{fixture, rstest}; - use std::fs; - use tempfile::TempDir; - - struct TempWorkspace { - root: Utf8PathBuf, - _tempdir: TempDir, - } - - impl TempWorkspace { - fn new() -> Result { - let tempdir = TempDir::new().context("create tempdir")?; - let root = Utf8PathBuf::from_path_buf(tempdir.path().to_path_buf()) - .map_err(|path| anyhow!("utf8 path required, got {:?}", path))?; - Ok(Self { - root, - _tempdir: tempdir, - }) - } - - fn root(&self) -> &Utf8Path { - self.root.as_path() - } - } - - #[fixture] - fn workspace() -> TempWorkspace { - TempWorkspace::new().expect("create utf8 temp workspace") - } - - #[cfg(unix)] - fn make_executable(path: &Utf8Path) -> Result<()> { - use std::os::unix::fs::PermissionsExt; - let mut perms = fs::metadata(path.as_std_path()) - .context("stat exec")? - .permissions(); - perms.set_mode(0o755); - fs::set_permissions(path.as_std_path(), perms).context("chmod exec") - } - - #[cfg(not(unix))] - fn make_executable(_path: &Utf8Path) -> Result<()> { - Ok(()) - } - - fn write_exec(root: &Utf8Path, name: &str) -> Result { - let path = root.join(name); - fs::write(path.as_std_path(), b"#!/bin/sh\n").context("write exec stub")?; - make_executable(&path)?; - Ok(path) - } - - #[rstest] - fn search_workspace_returns_executable_and_skips_non_exec( - workspace: TempWorkspace, - ) -> Result<()> { - let exec = write_exec(workspace.root(), "tool")?; - let non_exec = workspace.root().join("tool2"); - fs::write(non_exec.as_std_path(), b"not exec").context("write non exec")?; - - let results = search_workspace(workspace.root(), "tool", false)?; - ensure!( - results == vec![exec], - "expected executable to be discovered" - ); - Ok(()) - } - - #[rstest] - fn search_workspace_collects_all_matches(workspace: TempWorkspace) -> Result<()> { - let first = write_exec(workspace.root(), "tool")?; - let subdir = workspace.root().join("bin"); - fs::create_dir_all(subdir.as_std_path()).context("mkdir bin")?; - let second = write_exec(subdir.as_path(), "tool")?; - - let mut results = search_workspace(workspace.root(), "tool", true)?; - results.sort(); - let mut expected = vec![first, second]; - expected.sort(); - ensure!( - results == expected, - "expected both executables to be returned" - ); - Ok(()) - } - - #[rstest] - fn search_workspace_skips_heavy_directories(workspace: TempWorkspace) -> Result<()> { - let heavy = workspace.root().join("target"); - fs::create_dir_all(heavy.as_std_path()).context("mkdir target")?; - write_exec(heavy.as_path(), "tool")?; - - let results = search_workspace(workspace.root(), "tool", false)?; - ensure!(results.is_empty(), "expected target/ to be skipped"); - Ok(()) - } - - #[cfg(unix)] - #[rstest] - fn search_workspace_ignores_unreadable_entries(workspace: TempWorkspace) -> Result<()> { - use std::os::unix::fs::PermissionsExt; - let blocked = workspace.root().join("blocked"); - fs::create_dir_all(blocked.as_std_path()).context("mkdir blocked")?; - let mut perms = fs::metadata(blocked.as_std_path()) - .context("stat blocked")? - .permissions(); - perms.set_mode(0o000); - fs::set_permissions(blocked.as_std_path(), perms).context("chmod blocked")?; - - let exec = write_exec(workspace.root(), "tool")?; - let results = search_workspace(workspace.root(), "tool", false)?; - ensure!( - results == vec![exec], - "expected readable executable despite blocked dir" - ); - Ok(()) - } - - #[cfg(windows)] - #[rstest] - fn resolve_direct_appends_pathext(env: TempWorkspace) -> Result<()> { - let base = env.root().join("tools").join("gradlew"); - fs::create_dir_all(base.parent().expect("tools dir").as_std_path()) - .context("mkdir tools")?; - let exe = base.with_extension("bat"); - fs::write(exe.as_std_path(), b"@echo off\r\n").context("write stub")?; - make_executable(&exe)?; - - let snapshot = EnvSnapshot { - cwd: env.root.clone(), - raw_path: None, - raw_pathext: Some(".bat".into()), - entries: vec![], - pathext: vec![".bat".into()], - }; - - let matches = resolve_direct(".\\tools\\gradlew", &snapshot, &WhichOptions::default())?; - - ensure!( - matches == vec![exe], - "expected PATHEXT to expand direct path; got {matches:?}" - ); - Ok(()) - } -} +mod tests; diff --git a/src/stdlib/which/lookup/tests.rs b/src/stdlib/which/lookup/tests.rs new file mode 100644 index 00000000..f7d01aa5 --- /dev/null +++ b/src/stdlib/which/lookup/tests.rs @@ -0,0 +1,143 @@ +use super::*; +use anyhow::{Context, Result, anyhow, ensure}; +use rstest::{fixture, rstest}; +use std::fs; +use tempfile::TempDir; + +struct TempWorkspace { + root: Utf8PathBuf, + _tempdir: TempDir, +} + +impl TempWorkspace { + fn new() -> Result { + let tempdir = TempDir::new().context("create tempdir")?; + let root = Utf8PathBuf::from_path_buf(tempdir.path().to_path_buf()) + .map_err(|path| anyhow!("utf8 path required, got {:?}", path))?; + Ok(Self { + root, + _tempdir: tempdir, + }) + } + + fn root(&self) -> &Utf8Path { + self.root.as_path() + } +} + +#[fixture] +fn workspace() -> TempWorkspace { + TempWorkspace::new().expect("create utf8 temp workspace") +} + +#[cfg(unix)] +fn make_executable(path: &Utf8Path) -> Result<()> { + use std::os::unix::fs::PermissionsExt; + let mut perms = fs::metadata(path.as_std_path()) + .context("stat exec")? + .permissions(); + perms.set_mode(0o755); + fs::set_permissions(path.as_std_path(), perms).context("chmod exec") +} + +#[cfg(not(unix))] +fn make_executable(_path: &Utf8Path) -> Result<()> { + Ok(()) +} + +fn write_exec(root: &Utf8Path, name: &str) -> Result { + let path = root.join(name); + fs::write(path.as_std_path(), b"#!/bin/sh\n").context("write exec stub")?; + make_executable(&path)?; + Ok(path) +} + +#[rstest] +fn search_workspace_returns_executable_and_skips_non_exec(workspace: TempWorkspace) -> Result<()> { + let exec = write_exec(workspace.root(), "tool")?; + let non_exec = workspace.root().join("tool2"); + fs::write(non_exec.as_std_path(), b"not exec").context("write non exec")?; + + let results = search_workspace(workspace.root(), "tool", false)?; + ensure!( + results == vec![exec], + "expected executable to be discovered" + ); + Ok(()) +} + +#[rstest] +fn search_workspace_collects_all_matches(workspace: TempWorkspace) -> Result<()> { + let first = write_exec(workspace.root(), "tool")?; + let subdir = workspace.root().join("bin"); + fs::create_dir_all(subdir.as_std_path()).context("mkdir bin")?; + let second = write_exec(subdir.as_path(), "tool")?; + + let mut results = search_workspace(workspace.root(), "tool", true)?; + results.sort(); + let mut expected = vec![first, second]; + expected.sort(); + ensure!( + results == expected, + "expected both executables to be returned" + ); + Ok(()) +} + +#[rstest] +fn search_workspace_skips_heavy_directories(workspace: TempWorkspace) -> Result<()> { + let heavy = workspace.root().join("target"); + fs::create_dir_all(heavy.as_std_path()).context("mkdir target")?; + write_exec(heavy.as_path(), "tool")?; + + let results = search_workspace(workspace.root(), "tool", false)?; + ensure!(results.is_empty(), "expected target/ to be skipped"); + Ok(()) +} + +#[cfg(unix)] +#[rstest] +fn search_workspace_ignores_unreadable_entries(workspace: TempWorkspace) -> Result<()> { + use std::os::unix::fs::PermissionsExt; + let blocked = workspace.root().join("blocked"); + fs::create_dir_all(blocked.as_std_path()).context("mkdir blocked")?; + let mut perms = fs::metadata(blocked.as_std_path()) + .context("stat blocked")? + .permissions(); + perms.set_mode(0o000); + fs::set_permissions(blocked.as_std_path(), perms).context("chmod blocked")?; + + let exec = write_exec(workspace.root(), "tool")?; + let results = search_workspace(workspace.root(), "tool", false)?; + ensure!( + results == vec![exec], + "expected readable executable despite blocked dir" + ); + Ok(()) +} + +#[cfg(windows)] +#[rstest] +fn resolve_direct_appends_pathext(env: TempWorkspace) -> Result<()> { + let base = env.root().join("tools").join("gradlew"); + fs::create_dir_all(base.parent().expect("tools dir").as_std_path()).context("mkdir tools")?; + let exe = base.with_extension("bat"); + fs::write(exe.as_std_path(), b"@echo off\r\n").context("write stub")?; + make_executable(&exe)?; + + let snapshot = EnvSnapshot { + cwd: env.root.clone(), + raw_path: None, + raw_pathext: Some(".bat".into()), + entries: vec![], + pathext: vec![".bat".into()], + }; + + let matches = resolve_direct(".\\tools\\gradlew", &snapshot, &WhichOptions::default())?; + + ensure!( + matches == vec![exe], + "expected PATHEXT to expand direct path; got {matches:?}" + ); + Ok(()) +} diff --git a/src/stdlib/which/mod.rs b/src/stdlib/which/mod.rs index 3556859c..b846d86d 100644 --- a/src/stdlib/which/mod.rs +++ b/src/stdlib/which/mod.rs @@ -24,8 +24,11 @@ pub(crate) use options::WhichOptions; use error::args_error; -pub(crate) fn register(env: &mut Environment<'_>, cwd_override: Option>) { - let resolver = Arc::new(WhichResolver::new(cwd_override)); +pub(crate) fn register( + env: &mut Environment<'_>, + cwd_override: Option>, +) -> Result<(), Error> { + let resolver = Arc::new(WhichResolver::new(cwd_override)?); { let filter_resolver = Arc::clone(&resolver); env.add_filter("which", move |value: Value, kwargs: Kwargs| { @@ -44,6 +47,7 @@ pub(crate) fn register(env: &mut Environment<'_>, cwd_override: Option(output: Option<&'a str>, error: Option<&str>) -> Result<&'a str> { - if let Some(out) = output { - return Ok(out); + match (output, error) { + (Some(out), _) => Ok(out), + (None, Some(err)) => bail!("expected stdlib output; stdlib error present: {err}"), + (None, None) => bail!("expected stdlib output"), } - if let Some(err) = error { - bail!("expected stdlib output; stdlib error present: {err}"); - } - bail!("expected stdlib output"); } #[cfg(test)] diff --git a/tests/std_filter_tests/support.rs b/tests/std_filter_tests/support.rs index 2139acef..c83f998f 100644 --- a/tests/std_filter_tests/support.rs +++ b/tests/std_filter_tests/support.rs @@ -39,7 +39,7 @@ pub(crate) mod fallible { config: StdlibConfig, ) -> Result<(Environment<'static>, StdlibState)> { let mut env = Environment::new(); - let state = stdlib::register_with_config(&mut env, config); + let state = stdlib::register_with_config(&mut env, config)?; Ok((env, state)) } diff --git a/tests/steps/stdlib_steps/rendering.rs b/tests/steps/stdlib_steps/rendering.rs index 8b8d04b4..3066e083 100644 --- a/tests/steps/stdlib_steps/rendering.rs +++ b/tests/steps/stdlib_steps/rendering.rs @@ -37,7 +37,7 @@ pub(crate) fn render_template_with_context( .with_command_max_stream_bytes(limit) .context("configure stdlib command stream limit")?; } - let state = stdlib::register_with_config(&mut env, config); + let state = stdlib::register_with_config(&mut env, config)?; state.reset_impure(); world.stdlib_state = Some(state); let render = env.render_str(template.as_str(), ctx); From b712bbdf52d33160f0fa377532ab20c865e5db3a Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 24 Nov 2025 14:04:50 +0000 Subject: [PATCH 14/23] feat(stdlib/which): enhance workspace command lookup with PATHEXT and case-insensitivity - Added environment snapshot parameter to workspace search to use PATHEXT on Windows. - Implemented workspace_entry_matches to compare command names case-insensitively and account for PATHEXT extensions on Windows. - Updated tests to capture EnvSnapshot for accurate workspace searches. - Improved docs for register function error cases including non-UTF-8 current directory paths. This improves command resolution consistency, especially on Windows where PATHEXT and case-insensitivity are important. Co-authored-by: terragon-labs[bot] --- src/stdlib/mod.rs | 5 +++-- src/stdlib/which/lookup/mod.rs | 34 ++++++++++++++++++++++++++++++-- src/stdlib/which/lookup/tests.rs | 19 ++++++++++++++---- 3 files changed, 50 insertions(+), 8 deletions(-) diff --git a/src/stdlib/mod.rs b/src/stdlib/mod.rs index 89f2bff1..b36dde3e 100644 --- a/src/stdlib/mod.rs +++ b/src/stdlib/mod.rs @@ -353,8 +353,9 @@ impl StdlibState { /// # Errors /// /// Returns an error when the current working directory cannot be opened using -/// capability-based I/O. This occurs when the process lacks permission to read -/// the directory or if it no longer exists. +/// capability-based I/O (for example, when permissions are insufficient or the +/// directory no longer exists) or when the current directory path contains +/// non-UTF-8 components and cannot be converted into a UTF-8 workspace root. pub fn register(env: &mut Environment<'_>) -> anyhow::Result { let root = Dir::open_ambient_dir(".", ambient_authority()) .context("open current directory for stdlib registration")?; diff --git a/src/stdlib/which/lookup/mod.rs b/src/stdlib/which/lookup/mod.rs index 9c0f113a..a12306e9 100644 --- a/src/stdlib/which/lookup/mod.rs +++ b/src/stdlib/which/lookup/mod.rs @@ -169,7 +169,7 @@ fn handle_miss( let path_empty = env.raw_path.as_ref().is_none_or(|path| path.is_empty()); if path_empty && !matches!(options.cwd_mode, CwdMode::Never) { - let discovered = search_workspace(&env.cwd, command, options.all)?; + let discovered = search_workspace(&env.cwd, command, options.all, env)?; if !discovered.is_empty() { return if options.canonical { canonicalise(discovered) @@ -186,6 +186,7 @@ fn search_workspace( cwd: &Utf8Path, command: &str, collect_all: bool, + env: &EnvSnapshot, ) -> Result, Error> { const SKIP_DIRS: &[&str] = &[".git", "target"]; let mut matches = Vec::new(); @@ -218,7 +219,7 @@ fn search_workspace( if !entry.file_type().is_file() { continue; } - if entry.file_name() != command { + if !workspace_entry_matches(&entry, command, env) { continue; } let path = entry.into_path(); @@ -242,6 +243,35 @@ fn search_workspace( Ok(matches) } +fn workspace_entry_matches(entry: &walkdir::DirEntry, command: &str, env: &EnvSnapshot) -> bool { + #[cfg(not(windows))] + let _ = env; + #[cfg(windows)] + { + let file_name = entry.file_name().to_string_lossy().to_ascii_lowercase(); + let command_lower = command.to_ascii_lowercase(); + + if file_name == command_lower { + return true; + } + + if command_lower.contains('.') { + return false; + } + + let candidates = env::candidate_paths(Utf8Path::new(""), &command_lower, env.pathext()); + candidates + .iter() + .filter_map(|candidate| Utf8Path::new(candidate.as_str()).file_name()) + .any(|name| name.to_ascii_lowercase() == file_name) + } + #[cfg(not(windows))] + { + let file_name = entry.file_name().to_string_lossy(); + file_name == command + } +} + #[cfg(unix)] fn has_execute_permission(metadata: &fs::Metadata) -> bool { use std::os::unix::fs::PermissionsExt; diff --git a/src/stdlib/which/lookup/tests.rs b/src/stdlib/which/lookup/tests.rs index f7d01aa5..235aaa08 100644 --- a/src/stdlib/which/lookup/tests.rs +++ b/src/stdlib/which/lookup/tests.rs @@ -1,3 +1,6 @@ +//! Tests for the which lookup helpers, covering PATH search, workspace +//! fallback, canonicalisation, and platform-specific PATHEXT behaviour. + use super::*; use anyhow::{Context, Result, anyhow, ensure}; use rstest::{fixture, rstest}; @@ -58,7 +61,9 @@ fn search_workspace_returns_executable_and_skips_non_exec(workspace: TempWorkspa let non_exec = workspace.root().join("tool2"); fs::write(non_exec.as_std_path(), b"not exec").context("write non exec")?; - let results = search_workspace(workspace.root(), "tool", false)?; + let snapshot = + EnvSnapshot::capture(Some(workspace.root())).expect("capture env for workspace search"); + let results = search_workspace(workspace.root(), "tool", false, &snapshot)?; ensure!( results == vec![exec], "expected executable to be discovered" @@ -73,7 +78,9 @@ fn search_workspace_collects_all_matches(workspace: TempWorkspace) -> Result<()> fs::create_dir_all(subdir.as_std_path()).context("mkdir bin")?; let second = write_exec(subdir.as_path(), "tool")?; - let mut results = search_workspace(workspace.root(), "tool", true)?; + let snapshot = + EnvSnapshot::capture(Some(workspace.root())).expect("capture env for workspace search"); + let mut results = search_workspace(workspace.root(), "tool", true, &snapshot)?; results.sort(); let mut expected = vec![first, second]; expected.sort(); @@ -90,7 +97,9 @@ fn search_workspace_skips_heavy_directories(workspace: TempWorkspace) -> Result< fs::create_dir_all(heavy.as_std_path()).context("mkdir target")?; write_exec(heavy.as_path(), "tool")?; - let results = search_workspace(workspace.root(), "tool", false)?; + let snapshot = + EnvSnapshot::capture(Some(workspace.root())).expect("capture env for workspace search"); + let results = search_workspace(workspace.root(), "tool", false, &snapshot)?; ensure!(results.is_empty(), "expected target/ to be skipped"); Ok(()) } @@ -108,7 +117,9 @@ fn search_workspace_ignores_unreadable_entries(workspace: TempWorkspace) -> Resu fs::set_permissions(blocked.as_std_path(), perms).context("chmod blocked")?; let exec = write_exec(workspace.root(), "tool")?; - let results = search_workspace(workspace.root(), "tool", false)?; + let snapshot = + EnvSnapshot::capture(Some(workspace.root())).expect("capture env for workspace search"); + let results = search_workspace(workspace.root(), "tool", false, &snapshot)?; ensure!( results == vec![exec], "expected readable executable despite blocked dir" From aed5a18f3763c269ff7765233a65547df7a646aa Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 24 Nov 2025 14:29:03 +0000 Subject: [PATCH 15/23] feat(stdlib): extract registration and config modules Split the standard library registration and configuration logic into separate modules `register.rs` and `config.rs` respectively. This refactors `src/stdlib/mod.rs` by moving the registration functions (`register`, `register_with_config`, `value_from_bytes`) to `register.rs`, and the configuration struct `StdlibConfig` along with related constants and `NetworkConfig` to `config.rs`. This improves code organization by separating concerns, making the stdlib codebase more modular and maintainable. The public API is re-exported from `mod.rs` for backwards compatibility. Additional cleanup and simplification of `mod.rs` were performed to remove clutter and centralize related code. Also, minor changes were made in the `which/lookup/mod.rs` to fix Windows path matching logic by caching candidate basenames for more efficient matching. Co-authored-by: terragon-labs[bot] --- src/stdlib/config.rs | 319 +++++++++++++++++++ src/stdlib/mod.rs | 565 +-------------------------------- src/stdlib/register.rs | 165 ++++++++++ src/stdlib/which/lookup/mod.rs | 66 +++- 4 files changed, 545 insertions(+), 570 deletions(-) create mode 100644 src/stdlib/config.rs create mode 100644 src/stdlib/register.rs diff --git a/src/stdlib/config.rs b/src/stdlib/config.rs new file mode 100644 index 00000000..a97dba2a --- /dev/null +++ b/src/stdlib/config.rs @@ -0,0 +1,319 @@ +//! Configuration types and defaults for wiring the stdlib into `MiniJinja`. + +use super::{command, network::NetworkPolicy}; +use anyhow::bail; +use camino::{Utf8Component, Utf8Path, Utf8PathBuf}; +use cap_std::{ambient_authority, fs_utf8::Dir}; +use std::{env, sync::Arc}; + +/// Default relative path for the fetch cache within the workspace. +pub const DEFAULT_FETCH_CACHE_DIR: &str = ".netsuke/fetch"; +/// Default upper bound for network helper responses (8 MiB). +pub const DEFAULT_FETCH_MAX_RESPONSE_BYTES: u64 = 8 * 1024 * 1024; +/// Default upper bound for captured command output (1 MiB). +pub const DEFAULT_COMMAND_MAX_OUTPUT_BYTES: u64 = 1024 * 1024; +/// Default upper bound for streamed command output files (64 MiB). +pub const DEFAULT_COMMAND_MAX_STREAM_BYTES: u64 = 64 * 1024 * 1024; +/// Relative directory for command helper tempfiles. +pub const DEFAULT_COMMAND_TEMP_DIR: &str = ".netsuke/tmp"; + +/// Configuration for registering Netsuke's standard library helpers. +#[derive(Debug, Clone)] +pub struct StdlibConfig { + workspace_root: Arc, + workspace_root_path: Option, + fetch_cache_relative: Utf8PathBuf, + network_policy: NetworkPolicy, + fetch_max_response_bytes: u64, + command_max_output_bytes: u64, + command_max_stream_bytes: u64, +} + +impl StdlibConfig { + /// Create a configuration bound to `workspace_root`. + /// + /// # Panics + /// + /// Panics if Netsuke's built-in fetch cache directory constant fails + /// validation. This indicates a programming error and should never occur in + /// production builds. + #[must_use] + pub fn new(workspace_root: Dir) -> Self { + let default = Utf8PathBuf::from(DEFAULT_FETCH_CACHE_DIR); + // Rationale: the constant is static and validated for defence in depth. + if let Err(err) = Self::validate_cache_relative(&default) { + panic!("default fetch cache path should be valid: {err}"); + } + Self { + workspace_root: Arc::new(workspace_root), + workspace_root_path: None, + fetch_cache_relative: default, + network_policy: NetworkPolicy::default(), + fetch_max_response_bytes: DEFAULT_FETCH_MAX_RESPONSE_BYTES, + command_max_output_bytes: DEFAULT_COMMAND_MAX_OUTPUT_BYTES, + command_max_stream_bytes: DEFAULT_COMMAND_MAX_STREAM_BYTES, + } + } + + /// Record the absolute workspace root path for capability-scoped helpers. + /// + /// # Panics + /// + /// Panics if `path` is not absolute. + #[must_use] + pub fn with_workspace_root_path(mut self, path: impl Into) -> Self { + let absolute = path.into(); + assert!(absolute.is_absolute(), "workspace root must be absolute"); + self.workspace_root_path = Some(absolute); + self + } + + /// Override the network cache location relative to the workspace root. + /// + /// # Errors + /// + /// Returns an error when the path is empty, absolute, or escapes the + /// workspace via parent components. + pub fn with_fetch_cache_relative(mut self, relative_path: Utf8PathBuf) -> anyhow::Result { + Self::validate_cache_relative(&relative_path)?; + self.fetch_cache_relative = relative_path; + Ok(self) + } + + /// Override the network policy used by stdlib helpers. + #[must_use] + pub fn with_network_policy(mut self, policy: NetworkPolicy) -> Self { + self.network_policy = policy; + self + } + + /// Override the maximum size for HTTP responses fetched via stdlib helpers. + /// + /// # Errors + /// + /// Returns an error when `max_bytes` is zero. + pub fn with_fetch_max_response_bytes(mut self, max_bytes: u64) -> anyhow::Result { + anyhow::ensure!(max_bytes > 0, "fetch response limit must be positive"); + self.fetch_max_response_bytes = max_bytes; + Ok(self) + } + + /// Override the maximum captured stdout size for stdlib command helpers. + /// + /// # Errors + /// + /// Returns an error when `max_bytes` is zero. + pub fn with_command_max_output_bytes(mut self, max_bytes: u64) -> anyhow::Result { + anyhow::ensure!(max_bytes > 0, "command output limit must be positive"); + self.command_max_output_bytes = max_bytes; + Ok(self) + } + + /// Override the maximum streamed stdout size for stdlib command helpers. + /// + /// # Errors + /// + /// Returns an error when `max_bytes` is zero. + pub fn with_command_max_stream_bytes(mut self, max_bytes: u64) -> anyhow::Result { + anyhow::ensure!(max_bytes > 0, "command stream limit must be positive"); + self.command_max_stream_bytes = max_bytes; + Ok(self) + } + + /// The configured fetch cache directory relative to the workspace root. + #[must_use] + pub fn fetch_cache_relative(&self) -> &Utf8Path { + &self.fetch_cache_relative + } + + /// Consume the configuration and expose component modules with owned state. + pub(crate) fn into_components(self) -> (NetworkConfig, command::CommandConfig) { + let Self { + workspace_root, + workspace_root_path, + fetch_cache_relative, + network_policy, + fetch_max_response_bytes, + command_max_output_bytes, + command_max_stream_bytes, + } = self; + + let command_root = Arc::clone(&workspace_root); + let network = NetworkConfig { + cache_root: workspace_root, + cache_relative: fetch_cache_relative, + policy: network_policy, + max_response_bytes: fetch_max_response_bytes, + }; + + let command = command::CommandConfig::new( + command_max_output_bytes, + command_max_stream_bytes, + command_root, + workspace_root_path.map(Arc::new), + ); + + (network, command) + } + + pub(crate) fn validate_cache_relative(relative: &Utf8Path) -> anyhow::Result<()> { + if relative.as_str().is_empty() { + bail!("fetch cache path must not be empty"); + } + + if relative.is_absolute() { + bail!( + "fetch cache path '{}' must be relative to the workspace", + relative + ); + } + + for component in relative.components() { + if matches!( + component, + Utf8Component::ParentDir | Utf8Component::Prefix(_) + ) { + bail!( + "fetch cache path '{}' must stay within the workspace", + relative + ); + } + } + + Ok(()) + } + + pub(crate) fn workspace_root_path(&self) -> Option<&Utf8Path> { + self.workspace_root_path.as_deref() + } +} + +impl Default for StdlibConfig { + fn default() -> Self { + let root = Dir::open_ambient_dir(".", ambient_authority()) + .unwrap_or_else(|err| panic!("open stdlib workspace root: {err}")); + let cwd = + env::current_dir().unwrap_or_else(|err| panic!("resolve current directory: {err}")); + let path = Utf8PathBuf::from_path_buf(cwd) + .unwrap_or_else(|path| panic!("cwd contains non-UTF-8 components: {}", path.display())); + Self::new(root).with_workspace_root_path(path) + } +} + +/// Internal configuration passed to the network module for fetch cache initialisation. +#[derive(Clone)] +pub struct NetworkConfig { + /// Capability-scoped workspace root for network caches. + pub cache_root: Arc, + /// Relative cache directory within the workspace. + pub cache_relative: Utf8PathBuf, + /// Network policy applied to fetch helpers. + pub policy: NetworkPolicy, + /// Maximum allowed size for HTTP responses. + pub max_response_bytes: u64, +} + +#[cfg(test)] +mod tests { + use super::{DEFAULT_COMMAND_MAX_OUTPUT_BYTES, DEFAULT_COMMAND_MAX_STREAM_BYTES, StdlibConfig}; + use camino::{Utf8Path, Utf8PathBuf}; + use cap_std::{ambient_authority, fs_utf8::Dir}; + use std::env; + + #[test] + fn validate_cache_relative_rejects_empty() { + let err = StdlibConfig::validate_cache_relative(Utf8Path::new("")) + .expect_err("empty path should fail"); + assert_eq!(err.to_string(), "fetch cache path must not be empty"); + } + + #[test] + fn validate_cache_relative_rejects_absolute_paths() { + let err = StdlibConfig::validate_cache_relative(Utf8Path::new("/cache")) + .expect_err("absolute path should fail"); + assert_eq!( + err.to_string(), + "fetch cache path '/cache' must be relative to the workspace" + ); + } + + #[test] + fn validate_cache_relative_rejects_parent_components() { + let err = StdlibConfig::validate_cache_relative(Utf8Path::new("../escape")) + .expect_err("parent components should fail"); + assert_eq!( + err.to_string(), + "fetch cache path '../escape' must stay within the workspace" + ); + } + + #[test] + fn validate_cache_relative_accepts_workspace_relative_paths() { + StdlibConfig::validate_cache_relative(Utf8Path::new("nested/cache")) + .expect("relative path should be accepted"); + } + + #[test] + fn command_limits_default_to_constants() { + let config = StdlibConfig::default(); + assert_eq!( + config.command_max_output_bytes, + DEFAULT_COMMAND_MAX_OUTPUT_BYTES + ); + assert_eq!( + config.command_max_stream_bytes, + DEFAULT_COMMAND_MAX_STREAM_BYTES + ); + } + + #[test] + fn command_output_limit_builder_updates_value() { + let config = StdlibConfig::default() + .with_command_max_output_bytes(2_048) + .expect("positive limits should succeed"); + assert_eq!(config.command_max_output_bytes, 2_048); + } + + #[test] + fn command_output_limit_builder_rejects_zero() { + let err = StdlibConfig::default() + .with_command_max_output_bytes(0) + .expect_err("zero-byte limits must be rejected"); + assert_eq!(err.to_string(), "command output limit must be positive"); + } + + #[test] + fn command_stream_limit_builder_updates_value() { + let config = StdlibConfig::default() + .with_command_max_stream_bytes(65_536) + .expect("positive limits should succeed"); + assert_eq!(config.command_max_stream_bytes, 65_536); + } + + #[test] + fn command_stream_limit_builder_rejects_zero() { + let err = StdlibConfig::default() + .with_command_max_stream_bytes(0) + .expect_err("zero-byte limits must be rejected"); + assert_eq!(err.to_string(), "command stream limit must be positive"); + } + + #[test] + fn command_limits_propagate_into_components() { + let dir = Dir::open_ambient_dir(".", ambient_authority()) + .expect("open workspace root for config tests"); + let path = Utf8PathBuf::from_path_buf( + env::current_dir().expect("resolve cwd for command config test"), + ) + .expect("cwd should be valid UTF-8"); + let config = StdlibConfig::new(dir) + .with_workspace_root_path(path) + .with_command_max_output_bytes(4_096) + .expect("set capture limit") + .with_command_max_stream_bytes(131_072) + .expect("set streaming limit"); + let (_network, command) = config.into_components(); + assert_eq!(command.max_capture_bytes, 4_096); + assert_eq!(command.max_stream_bytes, 131_072); + } +} diff --git a/src/stdlib/mod.rs b/src/stdlib/mod.rs index b36dde3e..28db0fec 100644 --- a/src/stdlib/mod.rs +++ b/src/stdlib/mod.rs @@ -1,315 +1,32 @@ //! Standard library registration for `MiniJinja` templates. //! -//! The module wires the platform-aware file tests, the path manipulation -//! filters, the collection helpers, the network utilities, and the command -//! wrappers into a single entrypoint so template authors can rely on -//! consistent behaviour across projects. Tests such as `dir`, `file`, and -//! `symlink` inspect metadata without following symlinks, while filters -//! expose conveniences like `basename`, `with_suffix`, `realpath`, content -//! hashing, collection utilities including `flatten`, `group_by`, and `uniq`, -//! HTTP helpers like `fetch`, and shell bridges such as `shell` and `grep`. +//! Wires file tests, path helpers, collection utilities, network helpers, and +//! command wrappers into a single entrypoint so templates behave consistently +//! across projects. mod collections; mod command; +mod config; mod network; mod path; +mod register; mod time; mod which; +pub use config::{ + DEFAULT_COMMAND_MAX_OUTPUT_BYTES, DEFAULT_COMMAND_MAX_STREAM_BYTES, DEFAULT_COMMAND_TEMP_DIR, + DEFAULT_FETCH_CACHE_DIR, DEFAULT_FETCH_MAX_RESPONSE_BYTES, NetworkConfig, StdlibConfig, +}; pub use network::{ HostPatternError, NetworkPolicy, NetworkPolicyConfigError, NetworkPolicyViolation, }; +pub use register::{register, register_with_config, value_from_bytes}; -use anyhow::{Context, bail}; -use camino::{Utf8Component, Utf8Path, Utf8PathBuf}; -#[cfg(unix)] -use cap_std::fs::FileTypeExt; -use cap_std::{ambient_authority, fs, fs_utf8::Dir}; -use minijinja::{Environment, Error, value::Value}; use std::{ - env, sync::Arc, sync::atomic::{AtomicBool, Ordering}, }; -type FileTest = (&'static str, fn(fs::FileType) -> bool); - -/// Default relative path for the fetch cache within the workspace. -pub(crate) const DEFAULT_FETCH_CACHE_DIR: &str = ".netsuke/fetch"; -/// Default upper bound for network helper responses (8 MiB). -pub(crate) const DEFAULT_FETCH_MAX_RESPONSE_BYTES: u64 = 8 * 1024 * 1024; -/// Default upper bound for captured command output (1 MiB). -pub(crate) const DEFAULT_COMMAND_MAX_OUTPUT_BYTES: u64 = 1024 * 1024; -/// Default upper bound for streamed command output files (64 MiB). -pub(crate) const DEFAULT_COMMAND_MAX_STREAM_BYTES: u64 = 64 * 1024 * 1024; -/// Relative directory for command helper tempfiles. -pub(crate) const DEFAULT_COMMAND_TEMP_DIR: &str = ".netsuke/tmp"; - -/// Configuration for registering Netsuke's standard library helpers. -/// -/// The configuration records the capability-scoped workspace directory used to -/// sandbox helper I/O and the relative path where network caches are stored. -/// -/// # Examples -/// -/// ```rust,no_run -/// use cap_std::{ambient_authority, fs_utf8::Dir}; -/// use minijinja::Environment; -/// use netsuke::stdlib::{self, StdlibConfig}; -/// -/// let root = Dir::open_ambient_dir(".", ambient_authority()) -/// .expect("open workspace"); -/// let mut env = Environment::new(); -/// let _state = stdlib::register_with_config(&mut env, StdlibConfig::new(root)); -/// ``` -#[derive(Debug, Clone)] -pub struct StdlibConfig { - workspace_root: Arc, - workspace_root_path: Option, - fetch_cache_relative: Utf8PathBuf, - network_policy: NetworkPolicy, - fetch_max_response_bytes: u64, - command_max_output_bytes: u64, - command_max_stream_bytes: u64, -} - -impl StdlibConfig { - /// Create a configuration bound to `workspace_root`. - /// - /// # Examples - /// - /// ```rust,no_run - /// use cap_std::{ambient_authority, fs_utf8::Dir}; - /// use netsuke::stdlib::StdlibConfig; - /// - /// let dir = Dir::open_ambient_dir(".", ambient_authority()) - /// .expect("open workspace"); - /// let config = StdlibConfig::new(dir); - /// assert_eq!(config.fetch_cache_relative().as_str(), ".netsuke/fetch"); - /// ``` - /// - /// # Panics - /// - /// Panics if Netsuke's built-in fetch cache directory constant fails - /// validation. This indicates a programming error and should never occur in - /// production builds. - #[must_use] - pub fn new(workspace_root: Dir) -> Self { - let default = Utf8PathBuf::from(DEFAULT_FETCH_CACHE_DIR); - // Rationale: the constant is static and validated for defence in depth. - if let Err(err) = Self::validate_cache_relative(&default) { - panic!("default fetch cache path should be valid: {err}"); - } - Self { - workspace_root: Arc::new(workspace_root), - workspace_root_path: None, - fetch_cache_relative: default, - network_policy: NetworkPolicy::default(), - fetch_max_response_bytes: DEFAULT_FETCH_MAX_RESPONSE_BYTES, - command_max_output_bytes: DEFAULT_COMMAND_MAX_OUTPUT_BYTES, - command_max_stream_bytes: DEFAULT_COMMAND_MAX_STREAM_BYTES, - } - } - - /// Record the absolute workspace root path for capability-scoped helpers. - /// - /// # Panics - /// - /// Panics if `path` is not absolute. - #[must_use] - pub fn with_workspace_root_path(mut self, path: impl Into) -> Self { - let workspace_path = path.into(); - assert!( - workspace_path.is_absolute(), - "with_workspace_root_path requires an absolute path, got: {workspace_path}" - ); - self.workspace_root_path = Some(workspace_path); - self - } - - /// Return the recorded absolute workspace root path, when available. - /// - /// # Examples - /// - /// ```rust,no_run - /// # use cap_std::{ambient_authority, fs_utf8::Dir}; - /// # use netsuke::stdlib::StdlibConfig; - /// let root = Dir::open_ambient_dir(".", ambient_authority()).expect("open workspace"); - /// let config = StdlibConfig::new(root).with_workspace_root_path("/tmp/example".into()); - /// assert_eq!( - /// config.workspace_root_path().unwrap().as_str(), - /// "/tmp/example" - /// ); - /// ``` - #[must_use] - pub fn workspace_root_path(&self) -> Option<&Utf8Path> { - self.workspace_root_path.as_deref() - } - - /// Override the relative cache directory within the workspace. - /// - /// # Errors - /// - /// Returns an error when the provided path is empty, absolute, or attempts - /// to escape the workspace via parent components. - pub fn with_fetch_cache_relative( - mut self, - relative: impl Into, - ) -> anyhow::Result { - let relative_path = relative.into(); - Self::validate_cache_relative(&relative_path)?; - self.fetch_cache_relative = relative_path; - Ok(self) - } - - /// Replace the default network policy with a custom configuration. - /// - /// # Examples - /// - /// ```rust,no_run - /// use cap_std::{ambient_authority, fs_utf8::Dir}; - /// use netsuke::stdlib::{NetworkPolicy, StdlibConfig}; - /// - /// let dir = Dir::open_ambient_dir(".", ambient_authority()) - /// .expect("open workspace"); - /// let policy = NetworkPolicy::default() - /// .allow_scheme("http") - /// .expect("allow http"); - /// let config = StdlibConfig::new(dir).with_network_policy(policy); - /// assert_eq!(config.fetch_cache_relative().as_str(), ".netsuke/fetch"); - /// ``` - #[must_use] - pub fn with_network_policy(mut self, policy: NetworkPolicy) -> Self { - self.network_policy = policy; - self - } - - /// Override the maximum fetch response size in bytes. - /// - /// The limit protects renderers from unbounded downloads. Values must be - /// strictly positive; zero-byte responses remain permitted because they do - /// not consume the budget. - /// - /// # Errors - /// - /// Returns an error when `max_bytes` is zero. - pub fn with_fetch_max_response_bytes(mut self, max_bytes: u64) -> anyhow::Result { - anyhow::ensure!(max_bytes > 0, "fetch response limit must be positive"); - self.fetch_max_response_bytes = max_bytes; - Ok(self) - } - - /// Override the maximum captured command output size in bytes. - /// - /// # Errors - /// - /// Returns an error when `max_bytes` is zero. - pub fn with_command_max_output_bytes(mut self, max_bytes: u64) -> anyhow::Result { - anyhow::ensure!(max_bytes > 0, "command output limit must be positive"); - self.command_max_output_bytes = max_bytes; - Ok(self) - } - - /// Override the maximum streamed command output size in bytes. - /// - /// Streaming still enforces a ceiling to prevent helpers from exhausting - /// disk space. Configure the limit according to the largest expected - /// helper output. - /// - /// # Errors - /// - /// Returns an error when `max_bytes` is zero. - pub fn with_command_max_stream_bytes(mut self, max_bytes: u64) -> anyhow::Result { - anyhow::ensure!(max_bytes > 0, "command stream limit must be positive"); - self.command_max_stream_bytes = max_bytes; - Ok(self) - } - - /// The configured fetch cache directory relative to the workspace root. - #[must_use] - pub fn fetch_cache_relative(&self) -> &Utf8Path { - &self.fetch_cache_relative - } - - /// Consume the configuration and expose component modules with owned state. - pub(crate) fn into_components(self) -> (NetworkConfig, command::CommandConfig) { - let Self { - workspace_root, - workspace_root_path, - fetch_cache_relative, - network_policy, - fetch_max_response_bytes, - command_max_output_bytes, - command_max_stream_bytes, - } = self; - - let command_root = Arc::clone(&workspace_root); - let network = NetworkConfig { - cache_root: workspace_root, - cache_relative: fetch_cache_relative, - policy: network_policy, - max_response_bytes: fetch_max_response_bytes, - }; - - let command = command::CommandConfig::new( - command_max_output_bytes, - command_max_stream_bytes, - command_root, - workspace_root_path.map(Arc::new), - ); - - (network, command) - } - - pub(crate) fn validate_cache_relative(relative: &Utf8Path) -> anyhow::Result<()> { - if relative.as_str().is_empty() { - bail!("fetch cache path must not be empty"); - } - - if relative.is_absolute() { - bail!( - "fetch cache path '{}' must be relative to the workspace", - relative - ); - } - - for component in relative.components() { - if matches!( - component, - Utf8Component::ParentDir | Utf8Component::Prefix(_) - ) { - bail!( - "fetch cache path '{}' must stay within the workspace", - relative - ); - } - } - - Ok(()) - } -} - -impl Default for StdlibConfig { - fn default() -> Self { - let root = Dir::open_ambient_dir(".", ambient_authority()) - .unwrap_or_else(|err| panic!("open stdlib workspace root: {err}")); - let cwd = - env::current_dir().unwrap_or_else(|err| panic!("resolve current directory: {err}")); - let path = Utf8PathBuf::from_path_buf(cwd) - .unwrap_or_else(|path| panic!("cwd contains non-UTF-8 components: {}", path.display())); - Self::new(root).with_workspace_root_path(path) - } -} - -/// Internal configuration passed to the network module for fetch cache initialisation. -#[derive(Clone)] -pub(crate) struct NetworkConfig { - pub(crate) cache_root: Arc, - pub(crate) cache_relative: Utf8PathBuf, - pub(crate) policy: NetworkPolicy, - pub(crate) max_response_bytes: u64, -} - /// Captures mutable state shared between stdlib helpers. #[derive(Clone, Default, Debug)] pub struct StdlibState { @@ -332,265 +49,3 @@ impl StdlibState { Arc::clone(&self.impure) } } - -/// Register standard library helpers with the `MiniJinja` environment. -/// -/// # Examples -/// ``` -/// use minijinja::{context, Environment}; -/// use netsuke::stdlib; -/// -/// let mut env = Environment::new(); -/// let _state = stdlib::register(&mut env).expect("register stdlib"); -/// env.add_template("t", "{{ path | basename }}").expect("add template"); -/// let tmpl = env.get_template("t").expect("get template"); -/// let rendered = tmpl -/// .render(context!(path => "foo/bar.txt")) -/// .expect("render"); -/// assert_eq!(rendered, "bar.txt"); -/// ``` -/// -/// # Errors -/// -/// Returns an error when the current working directory cannot be opened using -/// capability-based I/O (for example, when permissions are insufficient or the -/// directory no longer exists) or when the current directory path contains -/// non-UTF-8 components and cannot be converted into a UTF-8 workspace root. -pub fn register(env: &mut Environment<'_>) -> anyhow::Result { - let root = Dir::open_ambient_dir(".", ambient_authority()) - .context("open current directory for stdlib registration")?; - let cwd = env::current_dir().context("resolve current directory for stdlib registration")?; - let path = Utf8PathBuf::from_path_buf(cwd).map_err(|path| { - anyhow::anyhow!("current directory contains non-UTF-8 components: {path:?}") - })?; - register_with_config(env, StdlibConfig::new(root).with_workspace_root_path(path)) -} - -/// Register stdlib helpers using an explicit configuration. -/// -/// This is intended for callers that have already derived a capability-scoped -/// workspace directory and need to wire the stdlib into a `MiniJinja` -/// environment. -/// -/// # Examples -/// -/// ```rust,no_run -/// use cap_std::{ambient_authority, fs_utf8::Dir}; -/// use minijinja::Environment; -/// use netsuke::stdlib::{self, StdlibConfig}; -/// -/// let dir = Dir::open_ambient_dir(".", ambient_authority()) -/// .expect("open workspace"); -/// let mut env = Environment::new(); -/// let _state = stdlib::register_with_config(&mut env, StdlibConfig::new(dir)); -/// ``` -/// -/// # Errors -/// -/// Returns an error if stdlib components cannot be registered (for example, -/// when the which resolver cache configuration is invalid). -pub fn register_with_config( - env: &mut Environment<'_>, - config: StdlibConfig, -) -> anyhow::Result { - let state = StdlibState::default(); - register_file_tests(env); - path::register_filters(env); - collections::register_filters(env); - let which_cwd = config - .workspace_root_path() - .map(|path| Arc::new(path.to_path_buf())); - which::register(env, which_cwd)?; - let impure = state.impure_flag(); - let (network_config, command_config) = config.into_components(); - network::register_functions(env, Arc::clone(&impure), network_config); - command::register(env, impure, command_config); - time::register_functions(env); - Ok(state) -} - -pub(crate) fn value_from_bytes(bytes: Vec) -> Value { - match String::from_utf8(bytes) { - Ok(text) => Value::from(text), - Err(err) => Value::from_bytes(err.into_bytes()), - } -} - -fn register_file_tests(env: &mut Environment<'_>) { - const TESTS: &[FileTest] = &[ - ("dir", is_dir), - ("file", is_file), - ("symlink", is_symlink), - ("pipe", is_fifo), - ("block_device", is_block_device), - ("char_device", is_char_device), - ("device", is_device), - ]; - - for &(name, pred) in TESTS { - env.add_test(name, move |val: Value| -> Result { - if let Some(s) = val.as_str() { - return path::file_type_matches(Utf8Path::new(s), pred); - } - Ok(false) - }); - } -} - -fn is_dir(ft: fs::FileType) -> bool { - ft.is_dir() -} - -fn is_file(ft: fs::FileType) -> bool { - ft.is_file() -} - -fn is_symlink(ft: fs::FileType) -> bool { - ft.is_symlink() -} - -#[cfg(unix)] -fn is_fifo(ft: fs::FileType) -> bool { - ft.is_fifo() -} - -#[cfg(not(unix))] -fn is_fifo(_ft: fs::FileType) -> bool { - false -} - -#[cfg(unix)] -fn is_block_device(ft: fs::FileType) -> bool { - ft.is_block_device() -} - -#[cfg(not(unix))] -fn is_block_device(_ft: fs::FileType) -> bool { - false -} - -#[cfg(unix)] -fn is_char_device(ft: fs::FileType) -> bool { - ft.is_char_device() -} - -#[cfg(not(unix))] -fn is_char_device(_ft: fs::FileType) -> bool { - false -} - -#[cfg(unix)] -fn is_device(ft: fs::FileType) -> bool { - is_block_device(ft) || is_char_device(ft) -} - -#[cfg(not(unix))] -fn is_device(_ft: fs::FileType) -> bool { - false -} - -#[cfg(test)] -mod tests { - use super::{DEFAULT_COMMAND_MAX_OUTPUT_BYTES, DEFAULT_COMMAND_MAX_STREAM_BYTES, StdlibConfig}; - - use camino::{Utf8Path, Utf8PathBuf}; - use cap_std::{ambient_authority, fs_utf8::Dir}; - use std::env; - - #[test] - fn validate_cache_relative_rejects_empty() { - let err = StdlibConfig::validate_cache_relative(Utf8Path::new("")) - .expect_err("empty path should fail"); - assert_eq!(err.to_string(), "fetch cache path must not be empty"); - } - - #[test] - fn validate_cache_relative_rejects_absolute_paths() { - let err = StdlibConfig::validate_cache_relative(Utf8Path::new("/cache")) - .expect_err("absolute path should fail"); - assert_eq!( - err.to_string(), - "fetch cache path '/cache' must be relative to the workspace" - ); - } - - #[test] - fn validate_cache_relative_rejects_parent_components() { - let err = StdlibConfig::validate_cache_relative(Utf8Path::new("../escape")) - .expect_err("parent components should fail"); - assert_eq!( - err.to_string(), - "fetch cache path '../escape' must stay within the workspace" - ); - } - - #[test] - fn validate_cache_relative_accepts_workspace_relative_paths() { - StdlibConfig::validate_cache_relative(Utf8Path::new("nested/cache")) - .expect("relative path should be accepted"); - } - - #[test] - fn command_limits_default_to_constants() { - let config = StdlibConfig::default(); - assert_eq!( - config.command_max_output_bytes, - DEFAULT_COMMAND_MAX_OUTPUT_BYTES - ); - assert_eq!( - config.command_max_stream_bytes, - DEFAULT_COMMAND_MAX_STREAM_BYTES - ); - } - - #[test] - fn command_output_limit_builder_updates_value() { - let config = StdlibConfig::default() - .with_command_max_output_bytes(2_048) - .expect("positive limits should succeed"); - assert_eq!(config.command_max_output_bytes, 2_048); - } - - #[test] - fn command_output_limit_builder_rejects_zero() { - let err = StdlibConfig::default() - .with_command_max_output_bytes(0) - .expect_err("zero-byte limits must be rejected"); - assert_eq!(err.to_string(), "command output limit must be positive"); - } - - #[test] - fn command_stream_limit_builder_updates_value() { - let config = StdlibConfig::default() - .with_command_max_stream_bytes(65_536) - .expect("positive limits should succeed"); - assert_eq!(config.command_max_stream_bytes, 65_536); - } - - #[test] - fn command_stream_limit_builder_rejects_zero() { - let err = StdlibConfig::default() - .with_command_max_stream_bytes(0) - .expect_err("zero-byte limits must be rejected"); - assert_eq!(err.to_string(), "command stream limit must be positive"); - } - - #[test] - fn command_limits_propagate_into_components() { - let dir = Dir::open_ambient_dir(".", ambient_authority()) - .expect("open workspace root for config tests"); - let path = Utf8PathBuf::from_path_buf( - env::current_dir().expect("resolve cwd for command config test"), - ) - .expect("cwd should be valid UTF-8"); - let config = StdlibConfig::new(dir) - .with_workspace_root_path(path) - .with_command_max_output_bytes(4_096) - .expect("set capture limit") - .with_command_max_stream_bytes(131_072) - .expect("set streaming limit"); - let (_network, command) = config.into_components(); - assert_eq!(command.max_capture_bytes, 4_096); - assert_eq!(command.max_stream_bytes, 131_072); - } -} diff --git a/src/stdlib/register.rs b/src/stdlib/register.rs new file mode 100644 index 00000000..e6c3952d --- /dev/null +++ b/src/stdlib/register.rs @@ -0,0 +1,165 @@ +//! Registration entrypoints for wiring stdlib helpers into `MiniJinja`. + +use super::{StdlibConfig, StdlibState, collections, command, network, path, time, which}; +use anyhow::Context; +use camino::Utf8Path; +#[cfg(unix)] +use cap_std::fs::FileTypeExt; +use cap_std::{ambient_authority, fs, fs_utf8::Dir}; +use minijinja::{Environment, Error, value::Value}; +use std::sync::Arc; + +type FileTest = (&'static str, fn(fs::FileType) -> bool); + +/// Register standard library helpers with the `MiniJinja` environment. +/// +/// # Examples +/// ``` +/// use minijinja::{context, Environment}; +/// use netsuke::stdlib; +/// +/// let mut env = Environment::new(); +/// let _state = stdlib::register(&mut env).expect("register stdlib"); +/// env.add_template("t", "{{ path | basename }}").expect("add template"); +/// let tmpl = env.get_template("t").expect("get template"); +/// let rendered = tmpl +/// .render(context!(path => "foo/bar.txt")) +/// .expect("render"); +/// assert_eq!(rendered, "bar.txt"); +/// ``` +/// +/// # Errors +/// +/// Returns an error when the current working directory cannot be opened using +/// capability-based I/O (for example, when permissions are insufficient or the +/// directory no longer exists) or when the current directory path contains +/// non-UTF-8 components and cannot be converted into a UTF-8 workspace root. +pub fn register(env: &mut Environment<'_>) -> anyhow::Result { + let root = Dir::open_ambient_dir(".", ambient_authority()) + .context("open current directory for stdlib registration")?; + let cwd = + std::env::current_dir().context("resolve current directory for stdlib registration")?; + let path = camino::Utf8PathBuf::from_path_buf(cwd).map_err(|path| { + anyhow::anyhow!("current directory contains non-UTF-8 components: {path:?}") + })?; + register_with_config(env, StdlibConfig::new(root).with_workspace_root_path(path)) +} + +/// Register stdlib helpers using an explicit configuration. +/// +/// This is intended for callers that have already derived a capability-scoped +/// workspace directory and need to wire the stdlib into a `MiniJinja` +/// environment. +/// +/// # Examples +/// +/// ```rust,no_run +/// use cap_std::{ambient_authority, fs_utf8::Dir}; +/// use minijinja::Environment; +/// use netsuke::stdlib::{self, StdlibConfig}; +/// +/// let dir = Dir::open_ambient_dir(".", ambient_authority()) +/// .expect("open workspace"); +/// let mut env = Environment::new(); +/// let _state = stdlib::register_with_config(&mut env, StdlibConfig::new(dir)); +/// ``` +/// +/// # Errors +/// +/// Returns an error if stdlib components cannot be registered (for example, +/// when the which resolver cache configuration is invalid). +pub fn register_with_config( + env: &mut Environment<'_>, + config: StdlibConfig, +) -> anyhow::Result { + let state = StdlibState::default(); + register_file_tests(env); + path::register_filters(env); + collections::register_filters(env); + let which_cwd = config + .workspace_root_path() + .map(|path| Arc::new(path.to_path_buf())); + which::register(env, which_cwd)?; + let impure = state.impure_flag(); + let (network_config, command_config) = config.into_components(); + network::register_functions(env, Arc::clone(&impure), network_config); + command::register(env, impure, command_config); + time::register_functions(env); + Ok(state) +} + +/// Convert UTF-8 or fall back to bytes for byte-oriented network helpers. +#[must_use] +pub fn value_from_bytes(bytes: Vec) -> Value { + match String::from_utf8(bytes) { + Ok(text) => Value::from(text), + Err(err) => Value::from_bytes(err.into_bytes()), + } +} + +fn register_file_tests(env: &mut Environment<'_>) { + const TESTS: &[FileTest] = &[ + ("dir", is_dir), + ("file", is_file), + ("symlink", is_symlink), + ("pipe", is_fifo), + ("block_device", is_block_device), + ("char_device", is_char_device), + ("device", is_device), + ]; + + for &(name, pred) in TESTS { + env.add_test(name, move |val: Value| -> Result { + if let Some(s) = val.as_str() { + return path::file_type_matches(Utf8Path::new(s), pred); + } + Ok(false) + }); + } +} + +fn is_dir(ft: fs::FileType) -> bool { + ft.is_dir() +} +fn is_file(ft: fs::FileType) -> bool { + ft.is_file() +} +fn is_symlink(ft: fs::FileType) -> bool { + ft.is_symlink() +} + +#[cfg(unix)] +fn is_fifo(ft: fs::FileType) -> bool { + ft.is_fifo() +} +#[cfg(not(unix))] +fn is_fifo(_ft: fs::FileType) -> bool { + false +} + +#[cfg(unix)] +fn is_block_device(ft: fs::FileType) -> bool { + ft.is_block_device() +} +#[cfg(not(unix))] +fn is_block_device(_ft: fs::FileType) -> bool { + false +} + +#[cfg(unix)] +fn is_char_device(ft: fs::FileType) -> bool { + ft.is_char_device() +} +#[cfg(not(unix))] +fn is_char_device(_ft: fs::FileType) -> bool { + false +} + +#[cfg(unix)] +fn is_device(ft: fs::FileType) -> bool { + is_block_device(ft) || is_char_device(ft) +} +#[cfg(not(unix))] +fn is_device(_ft: fs::FileType) -> bool { + false +} diff --git a/src/stdlib/which/lookup/mod.rs b/src/stdlib/which/lookup/mod.rs index a12306e9..edfbe6f5 100644 --- a/src/stdlib/which/lookup/mod.rs +++ b/src/stdlib/which/lookup/mod.rs @@ -5,6 +5,8 @@ use std::fs; use camino::{Utf8Path, Utf8PathBuf}; use indexmap::IndexSet; use minijinja::{Error, ErrorKind}; +#[cfg(windows)] +use std::collections::HashSet; use walkdir::WalkDir; use super::options::CwdMode; @@ -190,6 +192,12 @@ fn search_workspace( ) -> Result, Error> { const SKIP_DIRS: &[&str] = &[".git", "target"]; let mut matches = Vec::new(); + #[cfg(not(windows))] + let _ = env; + #[cfg(windows)] + let match_ctx = prepare_workspace_match(command, env); + #[cfg(not(windows))] + let match_ctx = (); let walker = WalkDir::new(cwd) .follow_links(false) .sort_by_file_name() @@ -219,7 +227,11 @@ fn search_workspace( if !entry.file_type().is_file() { continue; } - if !workspace_entry_matches(&entry, command, env) { + #[cfg(windows)] + let matches_entry = workspace_entry_matches(&entry, command, &match_ctx); + #[cfg(not(windows))] + let matches_entry = workspace_entry_matches(&entry, command, match_ctx); + if !matches_entry { continue; } let path = entry.into_path(); @@ -243,27 +255,22 @@ fn search_workspace( Ok(matches) } -fn workspace_entry_matches(entry: &walkdir::DirEntry, command: &str, env: &EnvSnapshot) -> bool { - #[cfg(not(windows))] - let _ = env; +fn workspace_entry_matches( + entry: &walkdir::DirEntry, + command: &str, + #[cfg(windows)] ctx: &WorkspaceMatchContext, + #[cfg(not(windows))] _ctx: (), +) -> bool { #[cfg(windows)] { let file_name = entry.file_name().to_string_lossy().to_ascii_lowercase(); - let command_lower = command.to_ascii_lowercase(); - - if file_name == command_lower { + if file_name == ctx.command_lower { return true; } - - if command_lower.contains('.') { + if ctx.command_has_ext { return false; } - - let candidates = env::candidate_paths(Utf8Path::new(""), &command_lower, env.pathext()); - candidates - .iter() - .filter_map(|candidate| Utf8Path::new(candidate.as_str()).file_name()) - .any(|name| name.to_ascii_lowercase() == file_name) + ctx.basenames.contains(&file_name) } #[cfg(not(windows))] { @@ -272,6 +279,35 @@ fn workspace_entry_matches(entry: &walkdir::DirEntry, command: &str, env: &EnvSn } } +#[cfg(windows)] +struct WorkspaceMatchContext { + command_lower: String, + command_has_ext: bool, + basenames: HashSet, +} + +#[cfg(windows)] +fn prepare_workspace_match(command: &str, env: &EnvSnapshot) -> WorkspaceMatchContext { + let command_lower = command.to_ascii_lowercase(); + let command_has_ext = command_lower.contains('.'); + let mut basenames = HashSet::new(); + + if !command_has_ext { + let candidates = env::candidate_paths(Utf8Path::new(""), &command_lower, env.pathext()); + for candidate in candidates { + if let Some(name) = Utf8Path::new(candidate.as_str()).file_name() { + basenames.insert(name.to_ascii_lowercase()); + } + } + } + + WorkspaceMatchContext { + command_lower, + command_has_ext, + basenames, + } +} + #[cfg(unix)] fn has_execute_permission(metadata: &fs::Metadata) -> bool { use std::os::unix::fs::PermissionsExt; From 02897e8e1978e3ac859cdd8097f0bd228ceea74a Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 24 Nov 2025 16:40:31 +0000 Subject: [PATCH 16/23] feat(stdlib/config): update StdlibConfig builders to return Result Replace panics in StdlibConfig::new and builder methods like with_workspace_root_path with error-returning APIs using anyhow::Result. This enhances robustness by enabling callers to handle validation errors rather than panicking. Added detailed error descriptions and updated usages across stdlib and tests accordingly. Co-authored-by: terragon-labs[bot] --- src/manifest/mod.rs | 4 +- src/stdlib/config.rs | 213 ++++++++++--------- src/stdlib/mod.rs | 9 +- src/stdlib/register.rs | 59 ++--- src/stdlib/which/lookup/mod.rs | 124 ++++++++--- tests/std_filter_tests/network_functions.rs | 4 +- tests/std_filter_tests/which_filter_tests.rs | 4 +- tests/steps/stdlib_steps/rendering.rs | 2 +- 8 files changed, 246 insertions(+), 173 deletions(-) diff --git a/src/manifest/mod.rs b/src/manifest/mod.rs index 670af304..138ab651 100644 --- a/src/manifest/mod.rs +++ b/src/manifest/mod.rs @@ -233,7 +233,7 @@ fn stdlib_config_for_manifest(path: &Path, policy: NetworkPolicy) -> Result Self { + /// Returns an error if the default fetch cache path fails validation. This + /// indicates a programming error in the baked-in constant rather than a + /// runtime condition; callers should treat failures as impossible in + /// normal operation. The constructor itself never panics. + pub fn new(workspace_root: Dir) -> anyhow::Result { let default = Utf8PathBuf::from(DEFAULT_FETCH_CACHE_DIR); // Rationale: the constant is static and validated for defence in depth. - if let Err(err) = Self::validate_cache_relative(&default) { - panic!("default fetch cache path should be valid: {err}"); - } - Self { + Self::validate_cache_relative(&default) + .map_err(|err| anyhow!("default fetch cache path should be valid: {err}"))?; + Ok(Self { workspace_root: Arc::new(workspace_root), workspace_root_path: None, fetch_cache_relative: default, @@ -52,20 +51,21 @@ impl StdlibConfig { fetch_max_response_bytes: DEFAULT_FETCH_MAX_RESPONSE_BYTES, command_max_output_bytes: DEFAULT_COMMAND_MAX_OUTPUT_BYTES, command_max_stream_bytes: DEFAULT_COMMAND_MAX_STREAM_BYTES, - } + }) } /// Record the absolute workspace root path for capability-scoped helpers. /// - /// # Panics + /// # Errors /// - /// Panics if `path` is not absolute. - #[must_use] - pub fn with_workspace_root_path(mut self, path: impl Into) -> Self { - let absolute = path.into(); - assert!(absolute.is_absolute(), "workspace root must be absolute"); - self.workspace_root_path = Some(absolute); - self + /// Returns an error if `path` is not absolute. This protects call sites + /// that derive the workspace from user input rather than assuming only + /// programmer-provided paths reach this builder. + pub fn with_workspace_root_path(mut self, path: impl AsRef) -> anyhow::Result { + let absolute = path.as_ref(); + ensure!(absolute.is_absolute(), "workspace root must be absolute"); + self.workspace_root_path = Some(absolute.to_owned()); + Ok(self) } /// Override the network cache location relative to the workspace root. @@ -74,9 +74,13 @@ impl StdlibConfig { /// /// Returns an error when the path is empty, absolute, or escapes the /// workspace via parent components. - pub fn with_fetch_cache_relative(mut self, relative_path: Utf8PathBuf) -> anyhow::Result { - Self::validate_cache_relative(&relative_path)?; - self.fetch_cache_relative = relative_path; + pub fn with_fetch_cache_relative( + mut self, + relative_path: impl AsRef, + ) -> anyhow::Result { + let relative = relative_path.as_ref(); + Self::validate_cache_relative(relative)?; + self.fetch_cache_relative = relative.to_owned(); Ok(self) } @@ -93,7 +97,7 @@ impl StdlibConfig { /// /// Returns an error when `max_bytes` is zero. pub fn with_fetch_max_response_bytes(mut self, max_bytes: u64) -> anyhow::Result { - anyhow::ensure!(max_bytes > 0, "fetch response limit must be positive"); + ensure!(max_bytes > 0, "fetch response limit must be positive"); self.fetch_max_response_bytes = max_bytes; Ok(self) } @@ -104,7 +108,7 @@ impl StdlibConfig { /// /// Returns an error when `max_bytes` is zero. pub fn with_command_max_output_bytes(mut self, max_bytes: u64) -> anyhow::Result { - anyhow::ensure!(max_bytes > 0, "command output limit must be positive"); + ensure!(max_bytes > 0, "command output limit must be positive"); self.command_max_output_bytes = max_bytes; Ok(self) } @@ -115,7 +119,7 @@ impl StdlibConfig { /// /// Returns an error when `max_bytes` is zero. pub fn with_command_max_stream_bytes(mut self, max_bytes: u64) -> anyhow::Result { - anyhow::ensure!(max_bytes > 0, "command stream limit must be positive"); + ensure!(max_bytes > 0, "command stream limit must be positive"); self.command_max_stream_bytes = max_bytes; Ok(self) } @@ -189,6 +193,14 @@ impl StdlibConfig { } impl Default for StdlibConfig { + /// Construct a configuration rooted at the ambient current directory. + /// + /// # Panics + /// + /// Panics when the workspace root cannot be opened with capability-based + /// I/O, when the current directory cannot be resolved, or when the current + /// directory contains non-UTF-8 components. Call [`StdlibConfig::new`] + /// instead when you need an error-returning API. fn default() -> Self { let root = Dir::open_ambient_dir(".", ambient_authority()) .unwrap_or_else(|err| panic!("open stdlib workspace root: {err}")); @@ -196,7 +208,10 @@ impl Default for StdlibConfig { env::current_dir().unwrap_or_else(|err| panic!("resolve current directory: {err}")); let path = Utf8PathBuf::from_path_buf(cwd) .unwrap_or_else(|path| panic!("cwd contains non-UTF-8 components: {}", path.display())); - Self::new(root).with_workspace_root_path(path) + Self::new(root) + .unwrap_or_else(|err| panic!("default fetch cache path should be valid: {err}")) + .with_workspace_root_path(path) + .unwrap_or_else(|err| panic!("workspace root must be absolute: {err}")) } } @@ -218,96 +233,94 @@ mod tests { use super::{DEFAULT_COMMAND_MAX_OUTPUT_BYTES, DEFAULT_COMMAND_MAX_STREAM_BYTES, StdlibConfig}; use camino::{Utf8Path, Utf8PathBuf}; use cap_std::{ambient_authority, fs_utf8::Dir}; + use rstest::{fixture, rstest}; use std::env; - #[test] - fn validate_cache_relative_rejects_empty() { - let err = StdlibConfig::validate_cache_relative(Utf8Path::new("")) - .expect_err("empty path should fail"); - assert_eq!(err.to_string(), "fetch cache path must not be empty"); + #[fixture] + fn workspace() -> (Dir, Utf8PathBuf) { + let dir = + Dir::open_ambient_dir(".", ambient_authority()).expect("open workspace root fixture"); + let path = Utf8PathBuf::from_path_buf( + env::current_dir().expect("resolve cwd for workspace fixture"), + ) + .expect("cwd should be valid UTF-8"); + (dir, path) } - #[test] - fn validate_cache_relative_rejects_absolute_paths() { - let err = StdlibConfig::validate_cache_relative(Utf8Path::new("/cache")) - .expect_err("absolute path should fail"); - assert_eq!( - err.to_string(), - "fetch cache path '/cache' must be relative to the workspace" - ); + #[fixture] + fn base_config(#[from(workspace)] workspace: (Dir, Utf8PathBuf)) -> StdlibConfig { + let (dir, path) = workspace; + StdlibConfig::new(dir) + .expect("construct stdlib config") + .with_workspace_root_path(path) + .expect("record workspace root") } - #[test] - fn validate_cache_relative_rejects_parent_components() { - let err = StdlibConfig::validate_cache_relative(Utf8Path::new("../escape")) - .expect_err("parent components should fail"); - assert_eq!( - err.to_string(), - "fetch cache path '../escape' must stay within the workspace" - ); + #[rstest] + #[case(Utf8Path::new(""), "fetch cache path must not be empty")] + #[case( + Utf8Path::new("/cache"), + "fetch cache path '/cache' must be relative to the workspace" + )] + #[case( + Utf8Path::new("../escape"), + "fetch cache path '../escape' must stay within the workspace" + )] + fn validate_cache_relative_rejects_invalid_inputs( + #[case] path: &Utf8Path, + #[case] message: &str, + ) { + let err = + StdlibConfig::validate_cache_relative(path).expect_err("invalid paths should fail"); + assert_eq!(err.to_string(), message); } - #[test] + #[rstest] fn validate_cache_relative_accepts_workspace_relative_paths() { StdlibConfig::validate_cache_relative(Utf8Path::new("nested/cache")) .expect("relative path should be accepted"); } - #[test] - fn command_limits_default_to_constants() { - let config = StdlibConfig::default(); - assert_eq!( - config.command_max_output_bytes, - DEFAULT_COMMAND_MAX_OUTPUT_BYTES - ); - assert_eq!( - config.command_max_stream_bytes, - DEFAULT_COMMAND_MAX_STREAM_BYTES - ); - } - - #[test] - fn command_output_limit_builder_updates_value() { - let config = StdlibConfig::default() - .with_command_max_output_bytes(2_048) - .expect("positive limits should succeed"); - assert_eq!(config.command_max_output_bytes, 2_048); + #[rstest] + #[case::output(CommandLimitCase { + builder: StdlibConfig::with_command_max_output_bytes, + accessor: |cfg: &StdlibConfig| cfg.command_max_output_bytes, + default_value: DEFAULT_COMMAND_MAX_OUTPUT_BYTES, + updated: 2_048, + zero_err: "command output limit must be positive", + })] + #[case::stream(CommandLimitCase { + builder: StdlibConfig::with_command_max_stream_bytes, + accessor: |cfg: &StdlibConfig| cfg.command_max_stream_bytes, + default_value: DEFAULT_COMMAND_MAX_STREAM_BYTES, + updated: 65_536, + zero_err: "command stream limit must be positive", + })] + fn command_limit_builders_validate_and_update( + base_config: StdlibConfig, + #[case] case: CommandLimitCase, + ) { + assert_eq!((case.accessor)(&base_config), case.default_value); + + let updated_config = + (case.builder)(base_config.clone(), case.updated).expect("positive limit"); + assert_eq!((case.accessor)(&updated_config), case.updated); + + let err = (case.builder)(base_config, 0).expect_err("zero-byte limits must be rejected"); + assert_eq!(err.to_string(), case.zero_err); } - #[test] - fn command_output_limit_builder_rejects_zero() { - let err = StdlibConfig::default() - .with_command_max_output_bytes(0) - .expect_err("zero-byte limits must be rejected"); - assert_eq!(err.to_string(), "command output limit must be positive"); + struct CommandLimitCase { + builder: fn(StdlibConfig, u64) -> anyhow::Result, + accessor: fn(&StdlibConfig) -> u64, + default_value: u64, + updated: u64, + zero_err: &'static str, } - #[test] - fn command_stream_limit_builder_updates_value() { - let config = StdlibConfig::default() - .with_command_max_stream_bytes(65_536) - .expect("positive limits should succeed"); - assert_eq!(config.command_max_stream_bytes, 65_536); - } - - #[test] - fn command_stream_limit_builder_rejects_zero() { - let err = StdlibConfig::default() - .with_command_max_stream_bytes(0) - .expect_err("zero-byte limits must be rejected"); - assert_eq!(err.to_string(), "command stream limit must be positive"); - } - - #[test] - fn command_limits_propagate_into_components() { - let dir = Dir::open_ambient_dir(".", ambient_authority()) - .expect("open workspace root for config tests"); - let path = Utf8PathBuf::from_path_buf( - env::current_dir().expect("resolve cwd for command config test"), - ) - .expect("cwd should be valid UTF-8"); - let config = StdlibConfig::new(dir) - .with_workspace_root_path(path) + #[rstest] + fn command_limits_propagate_into_components(base_config: StdlibConfig) { + let config = base_config .with_command_max_output_bytes(4_096) .expect("set capture limit") .with_command_max_stream_bytes(131_072) diff --git a/src/stdlib/mod.rs b/src/stdlib/mod.rs index 28db0fec..f05ab48e 100644 --- a/src/stdlib/mod.rs +++ b/src/stdlib/mod.rs @@ -1,8 +1,11 @@ //! Standard library registration for `MiniJinja` templates. //! -//! Wires file tests, path helpers, collection utilities, network helpers, and -//! command wrappers into a single entrypoint so templates behave consistently -//! across projects. +//! Wires file tests, path helpers (including the `which` filter/function), +//! collection utilities, time helpers, network helpers, and command wrappers +//! into a single entrypoint so templates behave consistently across projects. +//! Configuration and registration helpers are re-exported to keep integration +//! points discoverable: `StdlibConfig`, `NetworkConfig`, `register`, +//! `register_with_config`, and `value_from_bytes`. mod collections; mod command; diff --git a/src/stdlib/register.rs b/src/stdlib/register.rs index e6c3952d..7b503a2d 100644 --- a/src/stdlib/register.rs +++ b/src/stdlib/register.rs @@ -1,4 +1,10 @@ //! Registration entrypoints for wiring stdlib helpers into `MiniJinja`. +//! +//! Hooks file tests, path helpers, collection utilities, time functions, +//! network fetch helpers, command wrappers, and the `which` filter/function +//! into a single environment. The public `register` and +//! `register_with_config` entrypoints are re-exported from `netsuke::stdlib` +//! alongside `StdlibConfig` and `NetworkConfig`. use super::{StdlibConfig, StdlibState, collections, command, network, path, time, which}; use anyhow::Context; @@ -42,7 +48,10 @@ pub fn register(env: &mut Environment<'_>) -> anyhow::Result { let path = camino::Utf8PathBuf::from_path_buf(cwd).map_err(|path| { anyhow::anyhow!("current directory contains non-UTF-8 components: {path:?}") })?; - register_with_config(env, StdlibConfig::new(root).with_workspace_root_path(path)) + register_with_config( + env, + StdlibConfig::new(root)?.with_workspace_root_path(path)?, + ) } /// Register stdlib helpers using an explicit configuration. @@ -61,7 +70,8 @@ pub fn register(env: &mut Environment<'_>) -> anyhow::Result { /// let dir = Dir::open_ambient_dir(".", ambient_authority()) /// .expect("open workspace"); /// let mut env = Environment::new(); -/// let _state = stdlib::register_with_config(&mut env, StdlibConfig::new(dir)); +/// let config = StdlibConfig::new(dir).expect("configure stdlib workspace"); +/// let _state = stdlib::register_with_config(&mut env, config); /// ``` /// /// # Errors @@ -97,22 +107,29 @@ pub fn value_from_bytes(bytes: Vec) -> Value { } } -fn register_file_tests(env: &mut Environment<'_>) { - const TESTS: &[FileTest] = &[ - ("dir", is_dir), - ("file", is_file), - ("symlink", is_symlink), - ("pipe", is_fifo), - ("block_device", is_block_device), - ("char_device", is_char_device), - ("device", is_device), - ]; +#[cfg(unix)] +const FILE_TESTS: &[FileTest] = &[ + ("dir", is_dir), + ("file", is_file), + ("symlink", is_symlink), + ("pipe", is_fifo), + ("block_device", is_block_device), + ("char_device", is_char_device), + ("device", is_device), +]; + +#[cfg(not(unix))] +const FILE_TESTS: &[FileTest] = &[("dir", is_dir), ("file", is_file), ("symlink", is_symlink)]; - for &(name, pred) in TESTS { +fn register_file_tests(env: &mut Environment<'_>) { + for &(name, pred) in FILE_TESTS { env.add_test(name, move |val: Value| -> Result { if let Some(s) = val.as_str() { return path::file_type_matches(Utf8Path::new(s), pred); } + // Treat non-string inputs as a negative match to mirror MiniJinja's + // permissive truthiness semantics (for example `42 is odd` yields + // `false` rather than raising a type error). Ok(false) }); } @@ -132,34 +149,18 @@ fn is_symlink(ft: fs::FileType) -> bool { fn is_fifo(ft: fs::FileType) -> bool { ft.is_fifo() } -#[cfg(not(unix))] -fn is_fifo(_ft: fs::FileType) -> bool { - false -} #[cfg(unix)] fn is_block_device(ft: fs::FileType) -> bool { ft.is_block_device() } -#[cfg(not(unix))] -fn is_block_device(_ft: fs::FileType) -> bool { - false -} #[cfg(unix)] fn is_char_device(ft: fs::FileType) -> bool { ft.is_char_device() } -#[cfg(not(unix))] -fn is_char_device(_ft: fs::FileType) -> bool { - false -} #[cfg(unix)] fn is_device(ft: fs::FileType) -> bool { is_block_device(ft) || is_char_device(ft) } -#[cfg(not(unix))] -fn is_device(_ft: fs::FileType) -> bool { - false -} diff --git a/src/stdlib/which/lookup/mod.rs b/src/stdlib/which/lookup/mod.rs index edfbe6f5..667c7134 100644 --- a/src/stdlib/which/lookup/mod.rs +++ b/src/stdlib/which/lookup/mod.rs @@ -19,6 +19,19 @@ use super::{ options::WhichOptions, }; +/// Resolve `command` either as a direct path or by searching the environment's +/// PATH, optionally canonicalising or collecting all matches. +/// +/// When `options.all` is `true`, every executable candidate is returned; +/// otherwise resolution stops at the first match. The current working directory +/// is injected according to `options.cwd_mode`. Results are canonicalised when +/// requested, and cache-friendly options (such as `fresh`) are respected +/// upstream by the resolver. +/// +/// # Errors +/// +/// Propagates filesystem errors during lookup and canonicalisation, and +/// returns `netsuke::jinja::which::not_found` when no candidate is discovered. pub(super) fn lookup( command: &str, env: &EnvSnapshot, @@ -56,6 +69,17 @@ pub(super) fn lookup( } } +/// Resolve a command that already looks like a path (absolute or relative). +/// +/// On Windows this honours `PATHEXT` when the path is missing an extension so +/// callers can pass `.\gradlew` and still resolve `gradlew.bat`. On POSIX the +/// candidate must already be executable. Canonicalisation is applied when +/// requested in `options`. +/// +/// # Errors +/// +/// Returns `netsuke::jinja::which::not_found` when no matching executable is +/// discovered. pub(super) fn resolve_direct( command: &str, env: &EnvSnapshot, @@ -129,6 +153,12 @@ fn resolve_direct_posix( } } +/// Push executable candidates into `matches`, optionally short-circuiting when +/// only the first hit is required. +/// +/// Returns `true` when at least one candidate was added and `collect_all` is +/// `false`, signalling to callers that the search can stop; returns `false` +/// otherwise. pub(super) fn push_matches( matches: &mut Vec, candidates: Vec, @@ -146,6 +176,8 @@ pub(super) fn push_matches( false } +/// Return `true` when the command string already includes path separators or, +/// on Windows, a drive letter, meaning PATH traversal should be skipped. pub(super) fn is_direct_path(command: &str) -> bool { #[cfg(windows)] { @@ -157,6 +189,10 @@ pub(super) fn is_direct_path(command: &str) -> bool { } } +/// Check whether `path` points to an executable file. +/// +/// On Unix this requires at least one execute bit. On other platforms it only +/// verifies that the path exists and is a file. pub(super) fn is_executable(path: &Utf8Path) -> bool { fs::metadata(path.as_std_path()) .is_ok_and(|metadata| metadata.is_file() && has_execute_permission(&metadata)) @@ -184,13 +220,14 @@ fn handle_miss( Err(not_found_error(command, dirs, options.cwd_mode)) } +/// Walk the workspace looking for executables when PATH is empty and the +/// resolver is allowed to consult the current directory. fn search_workspace( cwd: &Utf8Path, command: &str, collect_all: bool, env: &EnvSnapshot, ) -> Result, Error> { - const SKIP_DIRS: &[&str] = &[".git", "target"]; let mut matches = Vec::new(); #[cfg(not(windows))] let _ = env; @@ -202,15 +239,7 @@ fn search_workspace( .follow_links(false) .sort_by_file_name() .into_iter() - .filter_entry(|entry| { - let ft = entry.file_type(); - if ft.is_dir() { - let name = entry.file_name().to_string_lossy(); - !SKIP_DIRS.iter().any(|skip| name == *skip) - } else { - true - } - }); + .filter_entry(should_visit_entry); for walk_entry in walker { let entry = match walk_entry { @@ -224,37 +253,60 @@ fn search_workspace( continue; } }; - if !entry.file_type().is_file() { - continue; - } #[cfg(windows)] - let matches_entry = workspace_entry_matches(&entry, command, &match_ctx); + let maybe_match = process_workspace_entry(entry, command, &match_ctx)?; #[cfg(not(windows))] - let matches_entry = workspace_entry_matches(&entry, command, match_ctx); - if !matches_entry { - continue; - } - let path = entry.into_path(); - let utf8 = Utf8PathBuf::from_path_buf(path).map_err(|path_buf| { - let lossy_path = path_buf.to_string_lossy(); - Error::new( - ErrorKind::InvalidOperation, - format!( - "workspace path contains non-UTF-8 components while resolving command '{command}': {lossy_path}" - ), - ) - })?; - if !is_executable(&utf8) { - continue; - } - matches.push(utf8); - if !collect_all { - break; + let maybe_match = process_workspace_entry(entry, command, match_ctx)?; + + if let Some(path) = maybe_match { + matches.push(path); + if !collect_all { + break; + } } } Ok(matches) } +const WORKSPACE_SKIP_DIRS: &[&str] = &[".git", "target"]; + +fn should_visit_entry(entry: &walkdir::DirEntry) -> bool { + if !entry.file_type().is_dir() { + return true; + } + let name = entry.file_name().to_string_lossy(); + !WORKSPACE_SKIP_DIRS.iter().any(|skip| name == *skip) +} + +fn process_workspace_entry( + entry: walkdir::DirEntry, + command: &str, + #[cfg(windows)] ctx: &WorkspaceMatchContext, + #[cfg(not(windows))] ctx: (), +) -> Result, Error> { + if !entry.file_type().is_file() { + return Ok(None); + } + #[cfg(windows)] + let matches_entry = workspace_entry_matches(&entry, command, ctx); + #[cfg(not(windows))] + let matches_entry = workspace_entry_matches(&entry, command, ctx); + if !matches_entry { + return Ok(None); + } + let path = entry.into_path(); + let utf8 = Utf8PathBuf::from_path_buf(path).map_err(|path_buf| { + let lossy_path = path_buf.to_string_lossy(); + Error::new( + ErrorKind::InvalidOperation, + format!( + "workspace path contains non-UTF-8 components while resolving command '{command}': {lossy_path}" + ), + ) + })?; + Ok(is_executable(&utf8).then_some(utf8)) +} + fn workspace_entry_matches( entry: &walkdir::DirEntry, command: &str, @@ -319,6 +371,10 @@ fn has_execute_permission(metadata: &fs::Metadata) -> bool { metadata.is_file() } +/// Canonicalise, de-duplicate, and UTF-8 validate discovered paths. +/// +/// Returns an error when canonicalisation fails or when any canonical path +/// cannot be represented as UTF-8. pub(super) fn canonicalise(paths: Vec) -> Result, Error> { let mut unique = IndexSet::new(); let mut resolved = Vec::new(); diff --git a/tests/std_filter_tests/network_functions.rs b/tests/std_filter_tests/network_functions.rs index 778d40d7..2e6496c0 100644 --- a/tests/std_filter_tests/network_functions.rs +++ b/tests/std_filter_tests/network_functions.rs @@ -28,8 +28,8 @@ fn env_with_workspace_policy( policy: NetworkPolicy, ) -> Result<(Environment<'static>, StdlibState)> { fallible::stdlib_env_with_config( - StdlibConfig::new(workspace) - .with_workspace_root_path(workspace_path) + StdlibConfig::new(workspace)? + .with_workspace_root_path(workspace_path)? .with_network_policy(policy), ) } diff --git a/tests/std_filter_tests/which_filter_tests.rs b/tests/std_filter_tests/which_filter_tests.rs index 207a1ec8..3b77f7d6 100644 --- a/tests/std_filter_tests/which_filter_tests.rs +++ b/tests/std_filter_tests/which_filter_tests.rs @@ -313,8 +313,8 @@ fn which_resolver_honours_workspace_root_override() -> Result<()> { let config = StdlibConfig::new( Dir::open_ambient_dir(&root, ambient_authority()).context("open workspace")?, - ) - .with_workspace_root_path(root.clone()); + )? + .with_workspace_root_path(root.clone())?; let _path = PathEnv::new(&[])?; let (mut env, _state) = fallible::stdlib_env_with_config(config)?; let render_result = render(&mut env, &Template::from("{{ 'helper' | which }}")); diff --git a/tests/steps/stdlib_steps/rendering.rs b/tests/steps/stdlib_steps/rendering.rs index 3066e083..58eae270 100644 --- a/tests/steps/stdlib_steps/rendering.rs +++ b/tests/steps/stdlib_steps/rendering.rs @@ -18,7 +18,7 @@ pub(crate) fn render_template_with_context( let mut env = Environment::new(); let workspace = Dir::open_ambient_dir(&root, ambient_authority()) .context("open stdlib workspace directory")?; - let mut config = StdlibConfig::new(workspace).with_workspace_root_path(root.clone()); + let mut config = StdlibConfig::new(workspace)?.with_workspace_root_path(root.clone())?; if let Some(policy) = world.stdlib_policy.clone() { config = config.with_network_policy(policy); } From 799798ff42690754473376f67ef2e6c0b8d1c8a2 Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 24 Nov 2025 16:52:57 +0000 Subject: [PATCH 17/23] refactor(which): use cfg attributes to adjust workspace_entry_matches params Changed the parameters of the workspace_entry_matches function to conditionally compile the 'command' argument for non-Windows targets and prefix it with underscore on Windows, similarly for 'ctx' argument. This improves platform-specific parameter usage and compilation cleanliness. Co-authored-by: terragon-labs[bot] --- src/stdlib/which/lookup/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/stdlib/which/lookup/mod.rs b/src/stdlib/which/lookup/mod.rs index 667c7134..abf03b1e 100644 --- a/src/stdlib/which/lookup/mod.rs +++ b/src/stdlib/which/lookup/mod.rs @@ -309,7 +309,8 @@ fn process_workspace_entry( fn workspace_entry_matches( entry: &walkdir::DirEntry, - command: &str, + #[cfg(windows)] _command: &str, + #[cfg(not(windows))] command: &str, #[cfg(windows)] ctx: &WorkspaceMatchContext, #[cfg(not(windows))] _ctx: (), ) -> bool { From f1a343e1c7fb797544db9895f49b6e791634b89e Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 24 Nov 2025 17:05:04 +0000 Subject: [PATCH 18/23] refactor(which/lookup): move workspace fallback search to a new module Extract the workspace fallback search logic from mod.rs into a dedicated workspace.rs module. This change organizes code better by separating concerns, improves code readability and maintainability without altering functionality. Co-authored-by: terragon-labs[bot] --- src/stdlib/which/lookup/mod.rs | 146 +------------------------- src/stdlib/which/lookup/workspace.rs | 147 +++++++++++++++++++++++++++ 2 files changed, 149 insertions(+), 144 deletions(-) create mode 100644 src/stdlib/which/lookup/workspace.rs diff --git a/src/stdlib/which/lookup/mod.rs b/src/stdlib/which/lookup/mod.rs index abf03b1e..0e27e7b4 100644 --- a/src/stdlib/which/lookup/mod.rs +++ b/src/stdlib/which/lookup/mod.rs @@ -5,9 +5,6 @@ use std::fs; use camino::{Utf8Path, Utf8PathBuf}; use indexmap::IndexSet; use minijinja::{Error, ErrorKind}; -#[cfg(windows)] -use std::collections::HashSet; -use walkdir::WalkDir; use super::options::CwdMode; @@ -18,6 +15,8 @@ use super::{ error::{direct_not_found, not_found_error}, options::WhichOptions, }; +mod workspace; +use workspace::search_workspace; /// Resolve `command` either as a direct path or by searching the environment's /// PATH, optionally canonicalising or collecting all matches. @@ -220,147 +219,6 @@ fn handle_miss( Err(not_found_error(command, dirs, options.cwd_mode)) } -/// Walk the workspace looking for executables when PATH is empty and the -/// resolver is allowed to consult the current directory. -fn search_workspace( - cwd: &Utf8Path, - command: &str, - collect_all: bool, - env: &EnvSnapshot, -) -> Result, Error> { - let mut matches = Vec::new(); - #[cfg(not(windows))] - let _ = env; - #[cfg(windows)] - let match_ctx = prepare_workspace_match(command, env); - #[cfg(not(windows))] - let match_ctx = (); - let walker = WalkDir::new(cwd) - .follow_links(false) - .sort_by_file_name() - .into_iter() - .filter_entry(should_visit_entry); - - for walk_entry in walker { - let entry = match walk_entry { - Ok(value) => value, - Err(err) => { - tracing::debug!( - %command, - error = %err, - "skipping unreadable workspace entry during which fallback" - ); - continue; - } - }; - #[cfg(windows)] - let maybe_match = process_workspace_entry(entry, command, &match_ctx)?; - #[cfg(not(windows))] - let maybe_match = process_workspace_entry(entry, command, match_ctx)?; - - if let Some(path) = maybe_match { - matches.push(path); - if !collect_all { - break; - } - } - } - Ok(matches) -} - -const WORKSPACE_SKIP_DIRS: &[&str] = &[".git", "target"]; - -fn should_visit_entry(entry: &walkdir::DirEntry) -> bool { - if !entry.file_type().is_dir() { - return true; - } - let name = entry.file_name().to_string_lossy(); - !WORKSPACE_SKIP_DIRS.iter().any(|skip| name == *skip) -} - -fn process_workspace_entry( - entry: walkdir::DirEntry, - command: &str, - #[cfg(windows)] ctx: &WorkspaceMatchContext, - #[cfg(not(windows))] ctx: (), -) -> Result, Error> { - if !entry.file_type().is_file() { - return Ok(None); - } - #[cfg(windows)] - let matches_entry = workspace_entry_matches(&entry, command, ctx); - #[cfg(not(windows))] - let matches_entry = workspace_entry_matches(&entry, command, ctx); - if !matches_entry { - return Ok(None); - } - let path = entry.into_path(); - let utf8 = Utf8PathBuf::from_path_buf(path).map_err(|path_buf| { - let lossy_path = path_buf.to_string_lossy(); - Error::new( - ErrorKind::InvalidOperation, - format!( - "workspace path contains non-UTF-8 components while resolving command '{command}': {lossy_path}" - ), - ) - })?; - Ok(is_executable(&utf8).then_some(utf8)) -} - -fn workspace_entry_matches( - entry: &walkdir::DirEntry, - #[cfg(windows)] _command: &str, - #[cfg(not(windows))] command: &str, - #[cfg(windows)] ctx: &WorkspaceMatchContext, - #[cfg(not(windows))] _ctx: (), -) -> bool { - #[cfg(windows)] - { - let file_name = entry.file_name().to_string_lossy().to_ascii_lowercase(); - if file_name == ctx.command_lower { - return true; - } - if ctx.command_has_ext { - return false; - } - ctx.basenames.contains(&file_name) - } - #[cfg(not(windows))] - { - let file_name = entry.file_name().to_string_lossy(); - file_name == command - } -} - -#[cfg(windows)] -struct WorkspaceMatchContext { - command_lower: String, - command_has_ext: bool, - basenames: HashSet, -} - -#[cfg(windows)] -fn prepare_workspace_match(command: &str, env: &EnvSnapshot) -> WorkspaceMatchContext { - let command_lower = command.to_ascii_lowercase(); - let command_has_ext = command_lower.contains('.'); - let mut basenames = HashSet::new(); - - if !command_has_ext { - let candidates = env::candidate_paths(Utf8Path::new(""), &command_lower, env.pathext()); - for candidate in candidates { - if let Some(name) = Utf8Path::new(candidate.as_str()).file_name() { - basenames.insert(name.to_ascii_lowercase()); - } - } - } - - WorkspaceMatchContext { - command_lower, - command_has_ext, - basenames, - } -} - #[cfg(unix)] fn has_execute_permission(metadata: &fs::Metadata) -> bool { use std::os::unix::fs::PermissionsExt; diff --git a/src/stdlib/which/lookup/workspace.rs b/src/stdlib/which/lookup/workspace.rs new file mode 100644 index 00000000..f0444a11 --- /dev/null +++ b/src/stdlib/which/lookup/workspace.rs @@ -0,0 +1,147 @@ +//! Workspace fallback search helpers for the `which` resolver. + +#[cfg(windows)] +use std::collections::HashSet; + +use camino::{Utf8Path, Utf8PathBuf}; +use minijinja::{Error, ErrorKind}; +use walkdir::WalkDir; + +#[cfg(windows)] +use super::env; +use super::{EnvSnapshot, is_executable}; + +pub(super) fn search_workspace( + cwd: &Utf8Path, + command: &str, + collect_all: bool, + env: &EnvSnapshot, +) -> Result, Error> { + #[cfg(windows)] + let match_ctx = prepare_workspace_match(command, env); + #[cfg(not(windows))] + let match_ctx = (); + #[cfg(not(windows))] + let _ = env; + let walker = WalkDir::new(cwd) + .follow_links(false) + .sort_by_file_name() + .into_iter() + .filter_entry(should_visit_entry) + .filter_map(|walk_entry| match walk_entry { + Ok(entry) => Some(entry), + Err(err) => { + tracing::debug!( + %command, + error = %err, + "skipping unreadable workspace entry during which fallback" + ); + None + } + }); + + #[cfg(windows)] + let matches_iter = walker.filter_map(|entry| match process_workspace_entry(entry, &match_ctx) { + Ok(Some(path)) => Some(Ok(path)), + Ok(None) => None, + Err(err) => Some(Err(err)), + }); + #[cfg(not(windows))] + let matches_iter = walker.filter_map(|entry| match process_workspace_entry(entry, command, match_ctx) { + Ok(Some(path)) => Some(Ok(path)), + Ok(None) => None, + Err(err) => Some(Err(err)), + }); + + if collect_all { + matches_iter.collect() + } else { + matches_iter.take(1).collect() + } +} + +const WORKSPACE_SKIP_DIRS: &[&str] = &[".git", "target"]; + +fn should_visit_entry(entry: &walkdir::DirEntry) -> bool { + if !entry.file_type().is_dir() { + return true; + } + let name = entry.file_name().to_string_lossy(); + !WORKSPACE_SKIP_DIRS.iter().any(|skip| name == *skip) +} + +fn process_workspace_entry( + entry: walkdir::DirEntry, + #[cfg(windows)] ctx: &WorkspaceMatchContext, + #[cfg(not(windows))] command: &str, + #[cfg(not(windows))] ctx: (), +) -> Result, Error> { + if !entry.file_type().is_file() { + return Ok(None); + } + #[cfg(windows)] + let matches_entry = workspace_entry_matches(&entry, ctx); + #[cfg(not(windows))] + let matches_entry = workspace_entry_matches(&entry, command, ctx); + if !matches_entry { + return Ok(None); + } + let path = entry.into_path(); + let utf8 = Utf8PathBuf::from_path_buf(path).map_err(|path_buf| { + let lossy_path = path_buf.to_string_lossy(); + Error::new( + ErrorKind::InvalidOperation, + format!( + "workspace path contains non-UTF-8 components while resolving command '{command}': {lossy_path}" + ), + ) + })?; + Ok(is_executable(&utf8).then_some(utf8)) +} + +#[cfg(windows)] +struct WorkspaceMatchContext { + command_lower: String, + command_has_ext: bool, + basenames: HashSet, +} + +#[cfg(windows)] +fn workspace_entry_matches(entry: &walkdir::DirEntry, ctx: &WorkspaceMatchContext) -> bool { + let file_name = entry.file_name().to_string_lossy().to_ascii_lowercase(); + if file_name == ctx.command_lower { + return true; + } + if ctx.command_has_ext { + return false; + } + ctx.basenames.contains(&file_name) +} + +#[cfg(not(windows))] +fn workspace_entry_matches(entry: &walkdir::DirEntry, command: &str, _ctx: ()) -> bool { + let file_name = entry.file_name().to_string_lossy(); + file_name == command +} + +#[cfg(windows)] +fn prepare_workspace_match(command: &str, env: &EnvSnapshot) -> WorkspaceMatchContext { + let command_lower = command.to_ascii_lowercase(); + let command_has_ext = command_lower.contains('.'); + let mut basenames = HashSet::new(); + + if !command_has_ext { + let candidates = env::candidate_paths(Utf8Path::new(""), &command_lower, env.pathext()); + for candidate in candidates { + if let Some(name) = Utf8Path::new(candidate.as_str()).file_name() { + basenames.insert(name.to_ascii_lowercase()); + } + } + } + + WorkspaceMatchContext { + command_lower, + command_has_ext, + basenames, + } +} From 108c0a736a99a3632363a229a661c52d797738a1 Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 24 Nov 2025 17:09:05 +0000 Subject: [PATCH 19/23] refactor(which): Refactor workspace search to improve readability and error handling - Extracted workspace match collection into a new function `collect_workspace_matches` for clarity. - Enhanced error handling during directory entry iteration with improved tracing. - Removed platform-dependent conditional compilation from iterator construction, centralizing logic. - Maintained functionality to collect all matches or stop after first match. - Cleaned up code structure to simplify maintenance and readability. Co-authored-by: terragon-labs[bot] --- src/stdlib/which/lookup/workspace.rs | 68 ++++++++++++++++------------ 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/src/stdlib/which/lookup/workspace.rs b/src/stdlib/which/lookup/workspace.rs index f0444a11..298019b4 100644 --- a/src/stdlib/which/lookup/workspace.rs +++ b/src/stdlib/which/lookup/workspace.rs @@ -23,41 +23,52 @@ pub(super) fn search_workspace( let match_ctx = (); #[cfg(not(windows))] let _ = env; - let walker = WalkDir::new(cwd) + + let entries = WalkDir::new(cwd) .follow_links(false) .sort_by_file_name() .into_iter() .filter_entry(should_visit_entry) - .filter_map(|walk_entry| match walk_entry { - Ok(entry) => Some(entry), - Err(err) => { - tracing::debug!( - %command, - error = %err, - "skipping unreadable workspace entry during which fallback" - ); - None - } + .filter_map(|walk_entry| { + walk_entry + .map_err(|err| { + tracing::debug!( + %command, + error = %err, + "skipping unreadable workspace entry during which fallback" + ); + err + }) + .ok() }); - #[cfg(windows)] - let matches_iter = walker.filter_map(|entry| match process_workspace_entry(entry, &match_ctx) { - Ok(Some(path)) => Some(Ok(path)), - Ok(None) => None, - Err(err) => Some(Err(err)), - }); - #[cfg(not(windows))] - let matches_iter = walker.filter_map(|entry| match process_workspace_entry(entry, command, match_ctx) { - Ok(Some(path)) => Some(Ok(path)), - Ok(None) => None, - Err(err) => Some(Err(err)), - }); - - if collect_all { - matches_iter.collect() - } else { - matches_iter.take(1).collect() + collect_workspace_matches(entries, command, collect_all, match_ctx) +} + +fn collect_workspace_matches( + entries: impl Iterator, + command: &str, + collect_all: bool, + #[cfg(windows)] match_ctx: WorkspaceMatchContext, + #[cfg(not(windows))] match_ctx: (), +) -> Result, Error> { + let mut matches = Vec::new(); + + for entry in entries { + #[cfg(windows)] + let maybe_match = process_workspace_entry(entry, command, &match_ctx)?; + #[cfg(not(windows))] + let maybe_match = process_workspace_entry(entry, command, match_ctx)?; + + if let Some(path) = maybe_match { + matches.push(path); + if !collect_all { + break; + } + } } + + Ok(matches) } const WORKSPACE_SKIP_DIRS: &[&str] = &[".git", "target"]; @@ -100,6 +111,7 @@ fn process_workspace_entry( } #[cfg(windows)] +#[derive(Clone)] struct WorkspaceMatchContext { command_lower: String, command_has_ext: bool, From 68fd98eea1f3cb4832db0fdcf5abdb4f6055f3a4 Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 24 Nov 2025 17:34:39 +0000 Subject: [PATCH 20/23] feat(which): add workspace-based executable search with platform-specific matching - Implement workspace traversal to find executables respecting VCS and build directories. - Introduce platform-specific matching contexts to handle case sensitivity on Windows vs Unix. - Add skip logic for heavy directories like .git and target to keep scans fast. - Enhance tests to cover workspace filtering, PATH resolution, and diagnostic output. Co-authored-by: terragon-labs[bot] --- src/stdlib/which/lookup/workspace.rs | 13 ++++++++++++- tests/std_filter_tests/which_filter_tests.rs | 3 +++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/stdlib/which/lookup/workspace.rs b/src/stdlib/which/lookup/workspace.rs index 298019b4..0ec83660 100644 --- a/src/stdlib/which/lookup/workspace.rs +++ b/src/stdlib/which/lookup/workspace.rs @@ -45,6 +45,10 @@ pub(super) fn search_workspace( collect_workspace_matches(entries, command, collect_all, match_ctx) } +/// Accumulates executable matches from workspace traversal, stopping early +/// when `collect_all` is `false`. The iterator supplies already-filtered +/// directory entries; platform-specific match contexts ensure consistent +/// filename matching semantics. fn collect_workspace_matches( entries: impl Iterator, command: &str, @@ -73,6 +77,8 @@ fn collect_workspace_matches( const WORKSPACE_SKIP_DIRS: &[&str] = &[".git", "target"]; +/// Allow traversal for all files and directories except heavy/VCS roots to +/// keep workspace scans fast. fn should_visit_entry(entry: &walkdir::DirEntry) -> bool { if !entry.file_type().is_dir() { return true; @@ -83,8 +89,8 @@ fn should_visit_entry(entry: &walkdir::DirEntry) -> bool { fn process_workspace_entry( entry: walkdir::DirEntry, + command: &str, #[cfg(windows)] ctx: &WorkspaceMatchContext, - #[cfg(not(windows))] command: &str, #[cfg(not(windows))] ctx: (), ) -> Result, Error> { if !entry.file_type().is_file() { @@ -111,6 +117,7 @@ fn process_workspace_entry( } #[cfg(windows)] +/// Windows-only match context carrying normalised command state. #[derive(Clone)] struct WorkspaceMatchContext { command_lower: String, @@ -119,6 +126,7 @@ struct WorkspaceMatchContext { } #[cfg(windows)] +/// Perform case-insensitive matching or PATHEXT-expanded basename matching. fn workspace_entry_matches(entry: &walkdir::DirEntry, ctx: &WorkspaceMatchContext) -> bool { let file_name = entry.file_name().to_string_lossy().to_ascii_lowercase(); if file_name == ctx.command_lower { @@ -131,12 +139,15 @@ fn workspace_entry_matches(entry: &walkdir::DirEntry, ctx: &WorkspaceMatchContex } #[cfg(not(windows))] +/// Perform exact case-sensitive filename matching on non-Windows platforms. fn workspace_entry_matches(entry: &walkdir::DirEntry, command: &str, _ctx: ()) -> bool { let file_name = entry.file_name().to_string_lossy(); file_name == command } #[cfg(windows)] +/// Build the Windows match context by lowercasing the command and expanding +/// PATHEXT suffixes when the command lacks an explicit extension. fn prepare_workspace_match(command: &str, env: &EnvSnapshot) -> WorkspaceMatchContext { let command_lower = command.to_ascii_lowercase(); let command_has_ext = command_lower.contains('.'); diff --git a/tests/std_filter_tests/which_filter_tests.rs b/tests/std_filter_tests/which_filter_tests.rs index 3b77f7d6..1a46f498 100644 --- a/tests/std_filter_tests/which_filter_tests.rs +++ b/tests/std_filter_tests/which_filter_tests.rs @@ -1,3 +1,6 @@ +//! Integration tests for the `which` filter/function covering PATH resolution, +//! canonicalisation, cwd behaviour, workspace fallback, and diagnostic output. + use anyhow::{Context, Result, anyhow}; use camino::{Utf8Path, Utf8PathBuf}; use minijinja::{context, Environment}; From 76d428e2afb59c0f1e78eae3fc1a81c1c73e4cf6 Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 24 Nov 2025 17:56:44 +0000 Subject: [PATCH 21/23] feat(which/lookup): add cross-platform workspace search for commands - Introduce conditional compilation to enable workspace search on non-Windows platforms by passing unit () instead of EnvSnapshot. - Refactor workspace search to support exact case-sensitive matching on Unix-like systems and case-insensitive PATHEXT-aware matching on Windows. - Enhance tests to run workspace search on both Windows and non-Windows environments. - Document and encapsulate platform-specific filename matching logic into separate functions to unify and clarify behavior. - This enables improved command lookup support across platforms by searching the workspace with appropriate matching logic. Co-authored-by: terragon-labs[bot] --- src/stdlib/which/lookup/mod.rs | 3 ++ src/stdlib/which/lookup/tests.rs | 16 +++++++++++ src/stdlib/which/lookup/workspace.rs | 41 ++++++++++++++++++++++------ 3 files changed, 51 insertions(+), 9 deletions(-) diff --git a/src/stdlib/which/lookup/mod.rs b/src/stdlib/which/lookup/mod.rs index 0e27e7b4..2a9c0eee 100644 --- a/src/stdlib/which/lookup/mod.rs +++ b/src/stdlib/which/lookup/mod.rs @@ -206,7 +206,10 @@ fn handle_miss( let path_empty = env.raw_path.as_ref().is_none_or(|path| path.is_empty()); if path_empty && !matches!(options.cwd_mode, CwdMode::Never) { + #[cfg(windows)] let discovered = search_workspace(&env.cwd, command, options.all, env)?; + #[cfg(not(windows))] + let discovered = search_workspace(&env.cwd, command, options.all, ())?; if !discovered.is_empty() { return if options.canonical { canonicalise(discovered) diff --git a/src/stdlib/which/lookup/tests.rs b/src/stdlib/which/lookup/tests.rs index 235aaa08..7f1d54ee 100644 --- a/src/stdlib/which/lookup/tests.rs +++ b/src/stdlib/which/lookup/tests.rs @@ -61,9 +61,13 @@ fn search_workspace_returns_executable_and_skips_non_exec(workspace: TempWorkspa let non_exec = workspace.root().join("tool2"); fs::write(non_exec.as_std_path(), b"not exec").context("write non exec")?; + #[cfg(windows)] let snapshot = EnvSnapshot::capture(Some(workspace.root())).expect("capture env for workspace search"); + #[cfg(windows)] let results = search_workspace(workspace.root(), "tool", false, &snapshot)?; + #[cfg(not(windows))] + let results = search_workspace(workspace.root(), "tool", false, ())?; ensure!( results == vec![exec], "expected executable to be discovered" @@ -78,9 +82,13 @@ fn search_workspace_collects_all_matches(workspace: TempWorkspace) -> Result<()> fs::create_dir_all(subdir.as_std_path()).context("mkdir bin")?; let second = write_exec(subdir.as_path(), "tool")?; + #[cfg(windows)] let snapshot = EnvSnapshot::capture(Some(workspace.root())).expect("capture env for workspace search"); + #[cfg(windows)] let mut results = search_workspace(workspace.root(), "tool", true, &snapshot)?; + #[cfg(not(windows))] + let mut results = search_workspace(workspace.root(), "tool", true, ())?; results.sort(); let mut expected = vec![first, second]; expected.sort(); @@ -97,9 +105,13 @@ fn search_workspace_skips_heavy_directories(workspace: TempWorkspace) -> Result< fs::create_dir_all(heavy.as_std_path()).context("mkdir target")?; write_exec(heavy.as_path(), "tool")?; + #[cfg(windows)] let snapshot = EnvSnapshot::capture(Some(workspace.root())).expect("capture env for workspace search"); + #[cfg(windows)] let results = search_workspace(workspace.root(), "tool", false, &snapshot)?; + #[cfg(not(windows))] + let results = search_workspace(workspace.root(), "tool", false, ())?; ensure!(results.is_empty(), "expected target/ to be skipped"); Ok(()) } @@ -117,9 +129,13 @@ fn search_workspace_ignores_unreadable_entries(workspace: TempWorkspace) -> Resu fs::set_permissions(blocked.as_std_path(), perms).context("chmod blocked")?; let exec = write_exec(workspace.root(), "tool")?; + #[cfg(windows)] let snapshot = EnvSnapshot::capture(Some(workspace.root())).expect("capture env for workspace search"); + #[cfg(windows)] let results = search_workspace(workspace.root(), "tool", false, &snapshot)?; + #[cfg(not(windows))] + let results = search_workspace(workspace.root(), "tool", false, ())?; ensure!( results == vec![exec], "expected readable executable despite blocked dir" diff --git a/src/stdlib/which/lookup/workspace.rs b/src/stdlib/which/lookup/workspace.rs index 0ec83660..6ae04cef 100644 --- a/src/stdlib/which/lookup/workspace.rs +++ b/src/stdlib/which/lookup/workspace.rs @@ -9,20 +9,21 @@ use walkdir::WalkDir; #[cfg(windows)] use super::env; -use super::{EnvSnapshot, is_executable}; +#[cfg(windows)] +use super::EnvSnapshot; +use super::is_executable; pub(super) fn search_workspace( cwd: &Utf8Path, command: &str, collect_all: bool, - env: &EnvSnapshot, + #[cfg(windows)] env: &EnvSnapshot, + #[cfg(not(windows))] _env: (), ) -> Result, Error> { #[cfg(windows)] let match_ctx = prepare_workspace_match(command, env); #[cfg(not(windows))] let match_ctx = (); - #[cfg(not(windows))] - let _ = env; let entries = WalkDir::new(cwd) .follow_links(false) @@ -87,6 +88,10 @@ fn should_visit_entry(entry: &walkdir::DirEntry) -> bool { !WORKSPACE_SKIP_DIRS.iter().any(|skip| name == *skip) } +/// Process a single `walkdir::DirEntry`: ensure it is a file, apply the +/// platform-specific filename match, convert the path to UTF-8 (erroring on +/// non-UTF-8 components), and return `Some(path)` only when the entry is +/// executable; otherwise return `None`. fn process_workspace_entry( entry: walkdir::DirEntry, command: &str, @@ -117,7 +122,14 @@ fn process_workspace_entry( } #[cfg(windows)] -/// Windows-only match context carrying normalised command state. +/// Windows-specific match context for case-insensitive filename matching. +/// +/// Encapsulates normalised command state for workspace traversal: +/// - `command_lower`: lowercased command name. +/// - `command_has_ext`: whether the supplied command already includes a file +/// extension. +/// - `basenames`: PATHEXT-expanded candidate filenames for extension-less +/// commands, stored in lowercase for case-insensitive comparisons. #[derive(Clone)] struct WorkspaceMatchContext { command_lower: String, @@ -126,7 +138,11 @@ struct WorkspaceMatchContext { } #[cfg(windows)] -/// Perform case-insensitive matching or PATHEXT-expanded basename matching. +/// Perform case-insensitive filename matching with PATHEXT expansion. +/// +/// Returns `true` when the entry's lowercased filename matches the command +/// directly or—when the command lacks an extension—any PATHEXT-expanded +/// basename candidate. fn workspace_entry_matches(entry: &walkdir::DirEntry, ctx: &WorkspaceMatchContext) -> bool { let file_name = entry.file_name().to_string_lossy().to_ascii_lowercase(); if file_name == ctx.command_lower { @@ -139,15 +155,22 @@ fn workspace_entry_matches(entry: &walkdir::DirEntry, ctx: &WorkspaceMatchContex } #[cfg(not(windows))] -/// Perform exact case-sensitive filename matching on non-Windows platforms. +/// Perform exact case-sensitive filename matching. +/// +/// Returns `true` when the entry's filename matches the command string. fn workspace_entry_matches(entry: &walkdir::DirEntry, command: &str, _ctx: ()) -> bool { let file_name = entry.file_name().to_string_lossy(); file_name == command } #[cfg(windows)] -/// Build the Windows match context by lowercasing the command and expanding -/// PATHEXT suffixes when the command lacks an explicit extension. +/// Initialise Windows match context by normalising the command and expanding +/// PATHEXT. +/// +/// Lowercases the command, records whether it already contains an extension, +/// and—when extension-less—derives candidate basenames by applying PATHEXT +/// suffixes via `env::candidate_paths`. All basenames are stored in lowercase +/// to enable case-insensitive comparisons during workspace traversal. fn prepare_workspace_match(command: &str, env: &EnvSnapshot) -> WorkspaceMatchContext { let command_lower = command.to_ascii_lowercase(); let command_has_ext = command_lower.contains('.'); From ba24621f73aa07086fd3729586ee932e6dd7055f Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 24 Nov 2025 18:08:34 +0000 Subject: [PATCH 22/23] style(which): fix imports ordering under Windows cfg Reordered the imports of env and EnvSnapshot under the Windows cfg to maintain consistent style and grouping. Co-authored-by: terragon-labs[bot] --- src/stdlib/which/lookup/workspace.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/stdlib/which/lookup/workspace.rs b/src/stdlib/which/lookup/workspace.rs index 6ae04cef..c83614ec 100644 --- a/src/stdlib/which/lookup/workspace.rs +++ b/src/stdlib/which/lookup/workspace.rs @@ -7,10 +7,10 @@ use camino::{Utf8Path, Utf8PathBuf}; use minijinja::{Error, ErrorKind}; use walkdir::WalkDir; -#[cfg(windows)] -use super::env; #[cfg(windows)] use super::EnvSnapshot; +#[cfg(windows)] +use super::env; use super::is_executable; pub(super) fn search_workspace( From e02cbacd9231161cf77164fc31ba2557d4c7ba17 Mon Sep 17 00:00:00 2001 From: Leynos Date: Mon, 24 Nov 2025 18:58:59 +0000 Subject: [PATCH 23/23] docs(which): add detailed docs to workspace executable search functions - Documented the `search_workspace` function detailing parameters and behavior. - Added doc comments to `collect_workspace_matches` explaining its parameters and return values. - Improved clarity on platform-specific matching and error handling in comments. Co-authored-by: terragon-labs[bot] --- src/stdlib/which/lookup/workspace.rs | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/src/stdlib/which/lookup/workspace.rs b/src/stdlib/which/lookup/workspace.rs index c83614ec..abbd3713 100644 --- a/src/stdlib/which/lookup/workspace.rs +++ b/src/stdlib/which/lookup/workspace.rs @@ -13,6 +13,19 @@ use super::EnvSnapshot; use super::env; use super::is_executable; +/// Recursively search the workspace rooted at `cwd` for executables matching +/// `command`. +/// +/// - `cwd`: workspace root to traverse (symlinks are not followed). +/// - `command`: name to match (Windows: case-insensitive with `PATHEXT` +/// expansion; other platforms: exact case-sensitive filename match). +/// - `collect_all`: when `true`, return every match; otherwise stop after the +/// first executable. +/// - `env`: provided only on Windows to supply `PATHEXT` for matching. +/// +/// Skips unreadable entries, ignores heavy/VCS directories via +/// `should_visit_entry`, and returns `Ok(Vec)` containing the +/// discovered executables or an `Error` if UTF-8 conversion fails. pub(super) fn search_workspace( cwd: &Utf8Path, command: &str, @@ -46,10 +59,17 @@ pub(super) fn search_workspace( collect_workspace_matches(entries, command, collect_all, match_ctx) } -/// Accumulates executable matches from workspace traversal, stopping early -/// when `collect_all` is `false`. The iterator supplies already-filtered -/// directory entries; platform-specific match contexts ensure consistent -/// filename matching semantics. +/// Collect executable matches from workspace traversal. +/// +/// Parameters: +/// - `entries`: iterator of `walkdir::DirEntry` values to inspect. +/// - `command`: command name used for platform-specific filename matching. +/// - `collect_all`: when `false`, stops after the first executable match. +/// - `match_ctx`: on Windows, a `WorkspaceMatchContext`; on other platforms, +/// the unit type to align signatures. +/// +/// Returns a `Result, Error>` containing matched executable +/// paths or an error when UTF-8 conversion fails. fn collect_workspace_matches( entries: impl Iterator, command: &str,