From 6255aba316bb40d3d03c78cc5c27e835fba52e3c Mon Sep 17 00:00:00 2001 From: Leynos Date: Tue, 4 Nov 2025 19:33:25 +0000 Subject: [PATCH 1/4] Enforce bounded command output and stream large results --- docs/netsuke-design.md | 16 + docs/security-network-command-audit.md | 17 +- docs/users-guide.md | 22 +- src/stdlib/command.rs | 634 ++++++++++++++++++---- src/stdlib/mod.rs | 71 ++- test_support/src/command_helper.rs | 20 + tests/cucumber.rs | 7 + tests/features/stdlib.feature | 25 + tests/std_filter_tests/command_filters.rs | 280 +++++++++- tests/steps/stdlib_steps/assertions.rs | 66 ++- tests/steps/stdlib_steps/config.rs | 22 + tests/steps/stdlib_steps/rendering.rs | 26 + tests/steps/stdlib_steps/workspace.rs | 15 +- 13 files changed, 1101 insertions(+), 120 deletions(-) diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index 5f948d56..d4eda21d 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -1045,6 +1045,22 @@ Implementation details: shared `StdlibState` that flips an `impure` flag whenever these helpers execute so callers can detect templates that interacted with the outside world. +- `shell` and `grep` enforce a configurable stdout capture limit (default + 1 MiB) via `StdlibConfig::with_command_max_output_bytes`. Exceeding the limit + raises an error that quotes the configured budget so manifests can adjust. + Templates can request streaming by passing `{'mode': 'tempfile'}` as the + second filter argument. Streaming writes stdout to a temporary file guarded + by `StdlibConfig::with_command_max_stream_bytes`, which defaults to 64 MiB to + prevent runaway disk usage while still tolerating deliberate large outputs. +- The command helpers manage pipe budgets using a `PipeSpec`/`PipeLimit` + tracker. Each pipe spawns a dedicated reader thread that records how many + bytes were drained and aborts once the configured limit is exceeded, + surfacing an `OutputLimit` diagnostic that names the stream and mode. When + streaming is requested the reader persists data to a temporary file, keeping + the limit in place so exceptionally large outputs are rejected before the + filesystem fills up. The `StdlibConfig::into_components` helper consumes the + builder and hands owned network/command configurations to the registration + routines, avoiding needless cloning of the capability handles. Custom external commands can be registered as additional filters. Those should be marked `pure` if safe for caching or `impure` otherwise. diff --git a/docs/security-network-command-audit.md b/docs/security-network-command-audit.md index 901f7c5a..3392f753 100644 --- a/docs/security-network-command-audit.md +++ b/docs/security-network-command-audit.md @@ -63,12 +63,21 @@ introduces, and concrete remediation tasks that would harden the helpers. - Provide an allowlist-based command runner (e.g. declarative mapping of helper names to binaries) so manifests can reference vetted utilities without shell access. -- [ ] **Helpers buffer stdout/stderr without limits.** Both filters capture the - entire command output into memory before returning it. A command that writes - an unbounded stream will lead to memory exhaustion or at least prolonged - blocking. *Remediation tasks:* +- [x] **Helpers buffer stdout/stderr without limits.** *(Status: done.)* Both + filters capture the entire command output into memory before returning it. A + command that writes an unbounded stream will lead to memory exhaustion or at + least prolonged blocking. *Remediation tasks:* - Enforce maximum output sizes with clear errors when exceeded. - Stream results to temporary files when callers opt in to large outputs. + - **Remediation:** `StdlibConfig` now exposes + `with_command_max_output_bytes` and `with_command_max_stream_bytes` so + integrators can tailor command limits. The `shell` and `grep` filters + honour those limits, reporting the configured byte budget when commands + exceed it. Templates can request streaming by passing + `{'mode': 'tempfile'}` as an options argument, which spools stdout to a + temporary file guarded by the streaming limit. Pipe readers enforce their + budgets incrementally so long-running commands fail fast once the + configured allowance is exceeded. ## Next steps diff --git a/docs/users-guide.md b/docs/users-guide.md index 1f24f18d..3b849237 100644 --- a/docs/users-guide.md +++ b/docs/users-guide.md @@ -249,8 +249,8 @@ dynamic capabilities to your manifest. - Expressions: `{{ 1 + 1 }}`, `{{ sources | map('basename') }}` - Control Structures (within specific keys like `foreach`, `when`, or inside - `macros`): `{% if enable %}…{% endif %}`, `{% for item in list %}…{% - endfor %}` + `macros`): `{% if enable %}…{% endif %}`, `{% for item in list %}…{% endfor + %}` **Important:** Structural Jinja (`{% %}`) is generally **not** allowed directly within the YAML structure outside of `macros`. Logic should primarily be within @@ -337,8 +337,8 @@ templates. Enforces a configurable maximum response size (default 8 MiB); requests abort with an error quoting the configured threshold when the limit is exceeded. Cached downloads stream directly to disk and remove partial files on error. - Configure the limit with `StdlibConfig::with_fetch_max_response_bytes`. - Marks template as impure. + Configure the limit with `StdlibConfig::with_fetch_max_response_bytes`. Marks + template as impure. - `now(offset=None)`: Returns the current time as a timezone-aware object (defaults to UTC). `offset` can be '+HH:MM' or 'Z'. Exposes `.iso8601`, @@ -408,12 +408,20 @@ Apply filters using the pipe `|` operator: `{{ value | filter_name(args...) }}` - `shell(command_string)`: Pipes the input value (string or bytes) as stdin to `command_string` executed via the system shell (`sh -c` or `cmd /C`). - Returns stdout. **Marks the template as impure.** Example: `{{ user_list | - shell('grep admin') }}` + Returns stdout. **Marks the template as impure.** Example: + `{{ user_list | shell('grep admin') }}`. The captured stdout is limited to 1 + MiB by default; configure a different budget with + `StdlibConfig::with_command_max_output_bytes`. Exceeding the limit raises an + `InvalidOperation` error that quotes the configured threshold. Templates can + pass an options mapping such as `{'mode': 'tempfile'}` to stream stdout into + a temporary file instead. The file path is returned to the template and + remains bounded by `StdlibConfig::with_command_max_stream_bytes` (default 64 + MiB). - `grep(pattern, flags=None)`: Filters input lines matching `pattern`. `flags` can be a string (e.g., `'-i'`) or list of strings. Implemented via - `shell`. Marks template as impure. + `shell`. Marks template as impure. The same output and streaming limits apply + when `grep` emits large result sets. **Impurity:** Filters like `shell` and functions like `fetch` interact with the outside world. Netsuke tracks this "impurity". Impure templates might affect diff --git a/src/stdlib/command.rs b/src/stdlib/command.rs index ac5604ea..4003b94a 100644 --- a/src/stdlib/command.rs +++ b/src/stdlib/command.rs @@ -38,6 +38,7 @@ use std::{ fmt::{self}, io::{self, Read, Write}, + path::{Path, PathBuf}, process::{Child, Command, ExitStatus, Stdio}, sync::{ Arc, @@ -48,12 +49,14 @@ use std::{ }; use super::value_from_bytes; +use camino::Utf8PathBuf; use minijinja::{ Error, ErrorKind, State, value::{Value, ValueKind}, }; #[cfg(not(windows))] use shell_quote::{QuoteRefExt, Sh}; +use tempfile::NamedTempFile; use wait_timeout::ChildExt; #[cfg(windows)] @@ -75,6 +78,262 @@ const SHELL_ARGS: &[&str] = &["-c"]; // while still surfacing timeouts for misbehaving commands. const COMMAND_TIMEOUT: Duration = Duration::from_secs(5); +#[derive(Clone)] +pub(crate) struct CommandConfig { + pub(crate) max_capture_bytes: u64, + pub(crate) max_stream_bytes: u64, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum OutputMode { + Capture, + Tempfile, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum OutputStream { + Stdout, + Stderr, +} + +impl OutputStream { + const fn describe(self) -> &'static str { + match self { + Self::Stdout => "stdout", + Self::Stderr => "stderr", + } + } +} + +impl OutputMode { + const fn describe(self) -> &'static str { + match self { + Self::Capture => "capture", + Self::Tempfile => "streaming", + } + } +} + +#[derive(Clone, Copy)] +struct PipeSpec { + stream: OutputStream, + mode: OutputMode, + limit: u64, +} + +impl PipeSpec { + const fn new(stream: OutputStream, mode: OutputMode, limit: u64) -> Self { + Self { + stream, + mode, + limit, + } + } + + const fn stream(self) -> OutputStream { + self.stream + } + + const fn mode(self) -> OutputMode { + self.mode + } + + const fn limit(self) -> u64 { + self.limit + } + + const fn into_limit(self) -> PipeLimit { + PipeLimit { + spec: self, + consumed: 0, + } + } +} + +struct PipeLimit { + spec: PipeSpec, + consumed: u64, +} + +impl PipeLimit { + fn record(&mut self, read: usize) -> Result<(), CommandFailure> { + let bytes = u64::try_from(read) + .map_err(|_| CommandFailure::Io(io::Error::other("pipe read size overflow")))?; + let new_total = self + .consumed + .checked_add(bytes) + .ok_or_else(|| CommandFailure::Io(io::Error::other("pipe output size overflow")))?; + if new_total > self.spec.limit() { + return Err(CommandFailure::OutputLimit { + stream: self.spec.stream(), + mode: self.spec.mode(), + limit: self.spec.limit(), + }); + } + self.consumed = new_total; + Ok(()) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +struct CommandOptions { + stdout_mode: OutputMode, +} + +impl CommandOptions { + fn from_value(options: Option) -> Result { + let Some(raw) = options else { + return Ok(Self::default()); + }; + + if raw.is_undefined() { + return Ok(Self::default()); + } + + match raw.kind() { + ValueKind::String => { + let Some(text) = raw.as_str() else { + return Err(Error::new( + ErrorKind::InvalidOperation, + "command options string must be valid UTF-8", + )); + }; + Self::from_mode_str(text) + } + ValueKind::Map | ValueKind::Plain => { + let mode_value = raw.get_attr("mode")?; + if mode_value.is_undefined() { + return Ok(Self::default()); + } + let Some(mode) = mode_value.as_str() else { + return Err(Error::new( + ErrorKind::InvalidOperation, + "command option 'mode' must be a string", + )); + }; + Self::from_mode_str(mode) + } + _ => Err(Error::new( + ErrorKind::InvalidOperation, + "command options must be a string or mapping", + )), + } + } + + fn from_mode_str(mode: &str) -> Result { + match mode { + "capture" => Ok(Self { + stdout_mode: OutputMode::Capture, + }), + "tempfile" | "stream" | "streaming" => Ok(Self { + stdout_mode: OutputMode::Tempfile, + }), + other => Err(Error::new( + ErrorKind::InvalidOperation, + format!("unsupported command output mode '{other}'"), + )), + } + } + + const fn stdout_mode(self) -> OutputMode { + self.stdout_mode + } +} + +impl Default for CommandOptions { + fn default() -> Self { + Self { + stdout_mode: OutputMode::Capture, + } + } +} + +enum StdoutResult { + Bytes(Vec), + Tempfile(Utf8PathBuf), +} + +enum PipeOutcome { + Bytes(Vec), + Tempfile(Utf8PathBuf), +} + +#[derive(Clone, Copy)] +struct CommandContext<'a> { + config: &'a CommandConfig, + options: CommandOptions, +} + +impl<'a> CommandContext<'a> { + const fn new(config: &'a CommandConfig, options: CommandOptions) -> Self { + Self { config, options } + } + + const fn stdout_mode(self) -> OutputMode { + self.options.stdout_mode() + } + + const fn config(self) -> &'a CommandConfig { + self.config + } +} + +struct GrepCall<'a> { + pattern: &'a str, + flags: Option, +} + +impl<'a> GrepCall<'a> { + const fn new(pattern: &'a str, flags: Option) -> Self { + Self { pattern, flags } + } +} + +#[derive(Clone, Copy)] +struct CommandLocation<'a> { + template: &'a str, + command: &'a str, +} + +impl<'a> CommandLocation<'a> { + const fn new(template: &'a str, command: &'a str) -> Self { + Self { template, command } + } + + fn describe(self) -> String { + format!("command '{}' in template '{}'", self.command, self.template) + } +} + +#[derive(Clone, Copy)] +struct ExitDetails<'a> { + status: Option, + stderr: &'a [u8], +} + +impl<'a> ExitDetails<'a> { + const fn new(status: Option, stderr: &'a [u8]) -> Self { + Self { status, stderr } + } +} + +#[derive(Clone, Copy)] +struct LimitExceeded { + stream: OutputStream, + mode: OutputMode, + limit: u64, +} + +impl LimitExceeded { + const fn new(stream: OutputStream, mode: OutputMode, limit: u64) -> Self { + Self { + stream, + mode, + limit, + } + } +} + /// Registers shell-oriented filters in the `MiniJinja` environment. /// /// The `shell` filter executes arbitrary shell commands and returns their @@ -86,27 +345,48 @@ const COMMAND_TIMEOUT: Duration = Duration::from_secs(5); /// /// Only use these filters with trusted templates. See the module-level /// documentation for further details about the associated risks. -pub(crate) fn register(env: &mut minijinja::Environment<'_>, impure: Arc) { +pub(crate) fn register( + env: &mut minijinja::Environment<'_>, + impure: Arc, + config: CommandConfig, +) { + let shared_config = Arc::new(config); let shell_flag = Arc::clone(&impure); + let shell_config = Arc::clone(&shared_config); env.add_filter( "shell", - move |state: &State, value: Value, command: String| { + move |state: &State, value: Value, command: String, options: Option| { shell_flag.store(true, Ordering::Relaxed); - execute_shell(state, &value, &command) + let parsed = CommandOptions::from_value(options)?; + let context = CommandContext::new(shell_config.as_ref(), parsed); + execute_shell(state, &value, &command, context) }, ); let grep_flag = impure; + let grep_config = Arc::clone(&shared_config); env.add_filter( "grep", - move |state: &State, value: Value, pattern: String, flags: Option| { + move |state: &State, + value: Value, + pattern: String, + flags: Option, + options: Option| { grep_flag.store(true, Ordering::Relaxed); - execute_grep(state, &value, &pattern, flags) + let parsed = CommandOptions::from_value(options)?; + let context = CommandContext::new(grep_config.as_ref(), parsed); + let call = GrepCall::new(&pattern, flags); + execute_grep(state, &value, call, context) }, ); } -fn execute_shell(state: &State, value: &Value, command: &str) -> Result { +fn execute_shell( + state: &State, + value: &Value, + command: &str, + context: CommandContext<'_>, +) -> Result { let cmd = command.trim(); if cmd.is_empty() { return Err(Error::new( @@ -116,16 +396,21 @@ fn execute_shell(state: &State, value: &Value, command: &str) -> Result Ok(value_from_bytes(bytes)), + StdoutResult::Tempfile(path) => Ok(Value::from(path.as_str())), + } } fn execute_grep( state: &State, value: &Value, - pattern: &str, - flags: Option, + call: GrepCall<'_>, + context: CommandContext<'_>, ) -> Result { + let GrepCall { pattern, flags } = call; if pattern.is_empty() { return Err(Error::new( ErrorKind::InvalidOperation, @@ -139,14 +424,17 @@ fn execute_grep( let input = to_bytes(value)?; #[cfg(windows)] - let output = run_program("grep", &args, &input) + let output = run_program("grep", &args, &input, context) .map_err(|err| command_error(err, state.name(), &command))?; #[cfg(not(windows))] - let output = - run_command(&command, &input).map_err(|err| command_error(err, state.name(), &command))?; + let output = run_command(&command, &input, context) + .map_err(|err| command_error(err, state.name(), &command))?; - Ok(value_from_bytes(output)) + match output { + StdoutResult::Bytes(bytes) => Ok(value_from_bytes(bytes)), + StdoutResult::Tempfile(path) => Ok(Value::from(path.as_str())), + } } fn collect_flag_args(flags: Option) -> Result, Error> { @@ -289,7 +577,11 @@ fn to_bytes(value: &Value) -> Result, Error> { Ok(value.to_string().into_bytes()) } -fn run_command(command: &str, input: &[u8]) -> Result, CommandFailure> { +fn run_command( + command: &str, + input: &[u8], + context: CommandContext<'_>, +) -> Result { let mut cmd = Command::new(SHELL); cmd.args(SHELL_ARGS) .arg(command) @@ -297,29 +589,47 @@ fn run_command(command: &str, input: &[u8]) -> Result, CommandFailure> { .stdout(Stdio::piped()) .stderr(Stdio::piped()); - run_child(cmd, input) + run_child(cmd, input, context) } #[cfg(windows)] -fn run_program(program: &str, args: &[String], input: &[u8]) -> Result, CommandFailure> { +fn run_program( + program: &str, + args: &[String], + input: &[u8], + context: CommandContext<'_>, +) -> Result { let mut cmd = Command::new(program); cmd.args(args) .stdin(Stdio::piped()) .stdout(Stdio::piped()) .stderr(Stdio::piped()); - run_child(cmd, input) + run_child(cmd, input, context) } -fn run_child(mut command: Command, input: &[u8]) -> Result, CommandFailure> { +fn run_child( + mut command: Command, + input: &[u8], + context: CommandContext<'_>, +) -> Result { let mut child = command.spawn().map_err(CommandFailure::Spawn)?; let mut stdin_handle = child.stdin.take().map(|mut stdin| { let buffer = input.to_vec(); thread::spawn(move || stdin.write_all(&buffer)) }); - let mut stdout_reader = spawn_pipe_reader(child.stdout.take()); - let mut stderr_reader = spawn_pipe_reader(child.stderr.take()); + let stdout_limit = match context.stdout_mode() { + OutputMode::Capture => context.config().max_capture_bytes, + OutputMode::Tempfile => context.config().max_stream_bytes, + }; + let stderr_limit = context.config().max_capture_bytes; + + let stdout_spec = PipeSpec::new(OutputStream::Stdout, context.stdout_mode(), stdout_limit); + let stderr_spec = PipeSpec::new(OutputStream::Stderr, OutputMode::Capture, stderr_limit); + + let mut stdout_reader = spawn_pipe_reader(child.stdout.take(), stdout_spec); + let mut stderr_reader = spawn_pipe_reader(child.stderr.take(), stderr_spec); let status = match wait_for_exit(&mut child, COMMAND_TIMEOUT) { Ok(status) => status, @@ -329,13 +639,24 @@ fn run_child(mut command: Command, input: &[u8]) -> Result, CommandFailu } }; - let stdout = join_reader(stdout_reader.take()).map_err(CommandFailure::Io)?; - let stderr = join_reader(stderr_reader.take()).map_err(CommandFailure::Io)?; + let stdout = join_reader(stdout_reader.take(), stdout_spec)?; + let stderr_outcome = join_reader(stderr_reader.take(), stderr_spec)?; + + let stderr = match stderr_outcome { + PipeOutcome::Bytes(bytes) => bytes, + PipeOutcome::Tempfile(path) => { + tracing::warn!(?path, "stderr reader returned a temp file; discarding path"); + Vec::new() + } + }; handle_stdin_result(stdin_handle.take(), status.code(), &stderr)?; if status.success() { - Ok(stdout) + Ok(match stdout { + PipeOutcome::Bytes(bytes) => StdoutResult::Bytes(bytes), + PipeOutcome::Tempfile(path) => StdoutResult::Tempfile(path), + }) } else { Err(CommandFailure::Exit { status: status.code(), @@ -345,16 +666,12 @@ fn run_child(mut command: Command, input: &[u8]) -> Result, CommandFailu } fn cleanup_readers( - stdout_reader: &mut Option>>>, - stderr_reader: &mut Option>>>, + stdout_reader: &mut Option>>, + stderr_reader: &mut Option>>, stdin_handle: &mut Option>>, ) { - if let Err(err) = join_reader(stdout_reader.take()) { - tracing::warn!("failed to join stdout reader: {err}"); - } - if let Err(err) = join_reader(stderr_reader.take()) { - tracing::warn!("failed to join stderr reader: {err}"); - } + join_pipe_for_cleanup("stdout", stdout_reader); + join_pipe_for_cleanup("stderr", stderr_reader); if let Some(handle) = stdin_handle.take() && let Err(join_err) = handle.join() { @@ -362,6 +679,23 @@ fn cleanup_readers( } } +fn join_pipe_for_cleanup( + label: &str, + reader_handle: &mut Option>>, +) { + if let Some(join_handle) = reader_handle.take() { + match join_handle.join() { + Ok(Ok(_)) => {} + Ok(Err(err)) => { + tracing::warn!(stream = label, ?err, "pipe reader failed during cleanup"); + } + Err(join_err) => { + tracing::warn!(stream = label, ?join_err, "pipe reader thread panicked"); + } + } + } +} + fn handle_stdin_result( stdin_handle: Option>>, status: Option, @@ -393,62 +727,112 @@ fn handle_stdin_result( } fn command_error(err: CommandFailure, template: &str, command: &str) -> Error { + let location = CommandLocation::new(template, command); match err { - CommandFailure::Spawn(spawn) => Error::new( - ErrorKind::InvalidOperation, - format!("failed to spawn command '{command}' in template '{template}': {spawn}"), - ), - CommandFailure::Io(io_err) => { - let pipe_msg = if io_err.kind() == io::ErrorKind::BrokenPipe { - " (command closed input early)" - } else { - "" - }; - Error::new( - ErrorKind::InvalidOperation, - format!("command '{command}' in template '{template}' failed: {io_err}{pipe_msg}"), - ) - } + CommandFailure::Spawn(spawn) => spawn_error(location, &spawn), + CommandFailure::Io(io_err) => io_error(location, &io_err), CommandFailure::BrokenPipe { source, status, stderr, - } => { - let mut msg = format!( - "command '{command}' in template '{template}' failed: {source} (command closed input early)" - ); - if let Some(code) = status { - msg.push_str("; exited with status "); - let code_text = code.to_string(); - msg.push_str(&code_text); - } else { - msg.push_str("; terminated by signal"); - } - append_stderr(&mut msg, &stderr); - Error::new(ErrorKind::InvalidOperation, msg) - } + } => broken_pipe_error(location, &source, ExitDetails::new(status, &stderr)), CommandFailure::Exit { status, stderr } => { - let mut msg = status.map_or_else( - || format!("command '{command}' in template '{template}' terminated by signal"), - |code| { - format!( - "command '{command}' in template '{template}' exited with status {code}" - ) - }, - ); - append_stderr(&mut msg, &stderr); - Error::new(ErrorKind::InvalidOperation, msg) + exit_error(location, ExitDetails::new(status, &stderr)) } - CommandFailure::Timeout(duration) => Error::new( - ErrorKind::InvalidOperation, - format!( - "command '{command}' in template '{template}' timed out after {}s", - duration.as_secs() - ), + CommandFailure::OutputLimit { + stream, + mode, + limit, + } => output_limit_error(location, LimitExceeded::new(stream, mode, limit)), + CommandFailure::StreamPathNotUtf8(path) => stream_path_error(location, &path), + CommandFailure::Timeout(duration) => timeout_error(location, duration), + } +} + +fn spawn_error(location: CommandLocation<'_>, err: &io::Error) -> Error { + Error::new( + ErrorKind::InvalidOperation, + format!("failed to spawn {}: {err}", location.describe()), + ) +} + +fn io_error(location: CommandLocation<'_>, err: &io::Error) -> Error { + let mut message = format!("{} failed: {err}", location.describe()); + if err.kind() == io::ErrorKind::BrokenPipe { + message.push_str(" (command closed input early)"); + } + Error::new(ErrorKind::InvalidOperation, message) +} + +fn broken_pipe_error( + location: CommandLocation<'_>, + err: &io::Error, + details: ExitDetails<'_>, +) -> Error { + let mut message = format!( + "{} failed: {err} (command closed input early)", + location.describe() + ); + append_exit_status(&mut message, details.status); + append_stderr(&mut message, details.stderr); + Error::new(ErrorKind::InvalidOperation, message) +} + +fn exit_error(location: CommandLocation<'_>, details: ExitDetails<'_>) -> Error { + let mut message = details.status.map_or_else( + || format!("{} terminated by signal", location.describe()), + |code| format!("{} exited with status {code}", location.describe()), + ); + append_stderr(&mut message, details.stderr); + Error::new(ErrorKind::InvalidOperation, message) +} + +fn output_limit_error(location: CommandLocation<'_>, exceeded: LimitExceeded) -> Error { + Error::new( + ErrorKind::InvalidOperation, + format!( + "{} exceeded {stream} {mode} limit of {limit} bytes", + location.describe(), + stream = exceeded.stream.describe(), + mode = exceeded.mode.describe(), + limit = exceeded.limit, + ), + ) +} + +fn stream_path_error(location: CommandLocation<'_>, path: &Path) -> Error { + Error::new( + ErrorKind::InvalidOperation, + format!( + "{} produced a temporary output path that is not valid UTF-8: {}", + location.describe(), + path.display() ), + ) +} + +fn timeout_error(location: CommandLocation<'_>, duration: Duration) -> Error { + Error::new( + ErrorKind::InvalidOperation, + format!( + "{} timed out after {}s", + location.describe(), + duration.as_secs() + ), + ) +} + +fn append_exit_status(message: &mut String, status: Option) { + if let Some(code) = status { + message.push_str("; exited with status "); + let code_text = code.to_string(); + message.push_str(&code_text); + } else { + message.push_str("; terminated by signal"); } } +#[derive(Debug)] enum CommandFailure { Spawn(io::Error), Io(io::Error), @@ -461,6 +845,12 @@ enum CommandFailure { status: Option, stderr: Vec, }, + OutputLimit { + stream: OutputStream, + mode: OutputMode, + limit: u64, + }, + StreamPathNotUtf8(PathBuf), Timeout(Duration), } @@ -486,28 +876,86 @@ fn wait_for_exit(child: &mut Child, timeout: Duration) -> Result(pipe: Option) -> Option>>> +fn spawn_pipe_reader( + pipe: Option, + spec: PipeSpec, +) -> Option>> where R: Read + Send + 'static, { - pipe.map(|mut reader| { - thread::spawn(move || { + pipe.map(|reader| thread::spawn(move || read_pipe(reader, spec))) +} + +fn join_reader( + reader_handle: Option>>, + spec: PipeSpec, +) -> Result { + match reader_handle { + Some(join_handle) => join_handle + .join() + .map_err(|_| CommandFailure::Io(io::Error::other("pipe reader panicked")))?, + None => { + if matches!(spec.mode(), OutputMode::Tempfile) { + create_empty_tempfile().map(PipeOutcome::Tempfile) + } else { + Ok(PipeOutcome::Bytes(Vec::new())) + } + } + } +} + +fn read_pipe(mut reader: R, spec: PipeSpec) -> Result +where + R: Read, +{ + let mut limit = spec.into_limit(); + match spec.mode() { + OutputMode::Capture => { let mut buf = Vec::new(); - reader.read_to_end(&mut buf)?; - Ok(buf) - }) - }) + let mut chunk = [0_u8; 8192]; + loop { + let read = reader.read(&mut chunk).map_err(CommandFailure::Io)?; + if read == 0 { + break; + } + limit.record(read)?; + buf.extend(chunk.iter().take(read).copied()); + } + Ok(PipeOutcome::Bytes(buf)) + } + OutputMode::Tempfile => { + let mut file = NamedTempFile::new().map_err(CommandFailure::Io)?; + let mut chunk = [0_u8; 8192]; + loop { + let read = reader.read(&mut chunk).map_err(CommandFailure::Io)?; + if read == 0 { + break; + } + limit.record(read)?; + let slice = chunk.get(..read).ok_or_else(|| { + CommandFailure::Io(io::Error::other("pipe read out of range")) + })?; + file.write_all(slice).map_err(CommandFailure::Io)?; + } + file.flush().map_err(CommandFailure::Io)?; + let temp_path = file.into_temp_path(); + let path = temp_path + .keep() + .map_err(|err| CommandFailure::Io(err.error))?; + let utf8 = + Utf8PathBuf::from_path_buf(path).map_err(CommandFailure::StreamPathNotUtf8)?; + Ok(PipeOutcome::Tempfile(utf8)) + } + } } -fn join_reader(handle: Option>>>) -> io::Result> { - handle.map_or_else( - || Ok(Vec::new()), - |join_handle| { - join_handle - .join() - .map_err(|_| io::Error::other("pipe reader panicked"))? - }, - ) +fn create_empty_tempfile() -> Result { + let file = NamedTempFile::new().map_err(CommandFailure::Io)?; + let path = file + .into_temp_path() + .keep() + .map_err(|err| CommandFailure::Io(err.error))?; + Utf8PathBuf::from_path_buf(path).map_err(CommandFailure::StreamPathNotUtf8) } fn append_stderr(message: &mut String, stderr: &[u8]) { diff --git a/src/stdlib/mod.rs b/src/stdlib/mod.rs index c7e6223c..0260bd1f 100644 --- a/src/stdlib/mod.rs +++ b/src/stdlib/mod.rs @@ -36,6 +36,10 @@ type FileTest = (&'static str, fn(fs::FileType) -> bool); pub(crate) const DEFAULT_FETCH_CACHE_DIR: &str = ".netsuke/fetch"; /// Default upper bound for network helper responses (8 MiB). pub(crate) const DEFAULT_FETCH_MAX_RESPONSE_BYTES: u64 = 8 * 1024 * 1024; +/// Default upper bound for captured command output (1 MiB). +pub(crate) const DEFAULT_COMMAND_MAX_OUTPUT_BYTES: u64 = 1024 * 1024; +/// Default upper bound for streamed command output files (64 MiB). +pub(crate) const DEFAULT_COMMAND_MAX_STREAM_BYTES: u64 = 64 * 1024 * 1024; /// Configuration for registering Netsuke's standard library helpers. /// @@ -60,6 +64,8 @@ pub struct StdlibConfig { fetch_cache_relative: Utf8PathBuf, network_policy: NetworkPolicy, fetch_max_response_bytes: u64, + command_max_output_bytes: u64, + command_max_stream_bytes: u64, } impl StdlibConfig { @@ -94,6 +100,8 @@ impl StdlibConfig { fetch_cache_relative: default, network_policy: NetworkPolicy::default(), fetch_max_response_bytes: DEFAULT_FETCH_MAX_RESPONSE_BYTES, + command_max_output_bytes: DEFAULT_COMMAND_MAX_OUTPUT_BYTES, + command_max_stream_bytes: DEFAULT_COMMAND_MAX_STREAM_BYTES, } } @@ -150,20 +158,62 @@ impl StdlibConfig { Ok(self) } + /// Override the maximum captured command output size in bytes. + /// + /// # Errors + /// + /// Returns an error when `max_bytes` is zero. + pub fn with_command_max_output_bytes(mut self, max_bytes: u64) -> anyhow::Result { + anyhow::ensure!(max_bytes > 0, "command output limit must be positive"); + self.command_max_output_bytes = max_bytes; + Ok(self) + } + + /// Override the maximum streamed command output size in bytes. + /// + /// Streaming still enforces a ceiling to prevent helpers from exhausting + /// disk space. Configure the limit according to the largest expected + /// helper output. + /// + /// # Errors + /// + /// Returns an error when `max_bytes` is zero. + pub fn with_command_max_stream_bytes(mut self, max_bytes: u64) -> anyhow::Result { + anyhow::ensure!(max_bytes > 0, "command stream limit must be positive"); + self.command_max_stream_bytes = max_bytes; + Ok(self) + } + /// The configured fetch cache directory relative to the workspace root. #[must_use] pub fn fetch_cache_relative(&self) -> &Utf8Path { &self.fetch_cache_relative } - /// Consume the configuration and produce a `NetworkConfig` for the network module. - pub(crate) fn network_config(self) -> NetworkConfig { - NetworkConfig { - cache_root: self.workspace_root, - cache_relative: self.fetch_cache_relative, - policy: self.network_policy, - max_response_bytes: self.fetch_max_response_bytes, - } + /// Consume the configuration and expose component modules with owned state. + pub(crate) fn into_components(self) -> (NetworkConfig, command::CommandConfig) { + let Self { + workspace_root, + fetch_cache_relative, + network_policy, + fetch_max_response_bytes, + command_max_output_bytes, + command_max_stream_bytes, + } = self; + + let network = NetworkConfig { + cache_root: workspace_root, + cache_relative: fetch_cache_relative, + policy: network_policy, + max_response_bytes: fetch_max_response_bytes, + }; + + let command = command::CommandConfig { + max_capture_bytes: command_max_output_bytes, + max_stream_bytes: command_max_stream_bytes, + }; + + (network, command) } pub(crate) fn validate_cache_relative(relative: &Utf8Path) -> anyhow::Result<()> { @@ -286,8 +336,9 @@ pub fn register_with_config(env: &mut Environment<'_>, config: StdlibConfig) -> path::register_filters(env); collections::register_filters(env); let impure = state.impure_flag(); - network::register_functions(env, Arc::clone(&impure), config.network_config()); - command::register(env, impure); + let (network_config, command_config) = config.into_components(); + network::register_functions(env, Arc::clone(&impure), network_config); + command::register(env, impure, command_config); time::register_functions(env); state } diff --git a/test_support/src/command_helper.rs b/test_support/src/command_helper.rs index 3daa0d88..0f03bb12 100644 --- a/test_support/src/command_helper.rs +++ b/test_support/src/command_helper.rs @@ -26,6 +26,17 @@ const FAILURE_SOURCE: &str = concat!( "}\n", ); +const LARGE_OUTPUT_SOURCE: &str = concat!( + "use std::io::{self, Write};\n", + "fn main() {\n", + " let mut out = io::stdout();\n", + " let chunk = [b'x'; 1024];\n", + " for _ in 0..128 {\n", + " out.write_all(&chunk).expect(\"stdout\");\n", + " }\n", + "}\n", +); + /// Compile a helper binary that converts stdin to upper case and return the /// executable path. /// @@ -71,6 +82,15 @@ pub fn compile_failure_helper(dir: &Dir, root: &Utf8PathBuf, name: &str) -> Resu compile_rust_helper(dir, root, name, FAILURE_SOURCE) } +/// Compile a helper that writes a sizeable payload to stdout. +pub fn compile_large_output_helper( + dir: &Dir, + root: &Utf8PathBuf, + name: &str, +) -> Result { + compile_rust_helper(dir, root, name, LARGE_OUTPUT_SOURCE) +} + /// Compile an arbitrary Rust helper source to an executable. /// /// Writes `{name}.rs` into `dir`, invokes the toolchain, and returns the diff --git a/tests/cucumber.rs b/tests/cucumber.rs index a017e8a2..3e260139 100644 --- a/tests/cucumber.rs +++ b/tests/cucumber.rs @@ -51,6 +51,12 @@ pub struct CliWorld { pub stdlib_policy: Option, /// Maximum fetch response size configured for the active scenario. pub stdlib_fetch_max_bytes: Option, + /// Maximum captured command output size configured for the scenario. + pub stdlib_command_max_output_bytes: Option, + /// Maximum streamed command output size configured for the scenario. + pub stdlib_command_stream_max_bytes: Option, + /// Text payload injected into stdlib templates for streaming scenarios. + pub stdlib_text: Option, /// Last HTTP server fixture started by stdlib steps. pub http_server: Option, /// URL exposed by the active HTTP server fixture. @@ -176,6 +182,7 @@ impl Drop for CliWorld { fn drop(&mut self) { self.shutdown_http_server(); self.restore_environment(); + self.stdlib_text = None; } } diff --git a/tests/features/stdlib.feature b/tests/features/stdlib.feature index cf7ebfde..c29c12d3 100644 --- a/tests/features/stdlib.feature +++ b/tests/features/stdlib.feature @@ -84,6 +84,31 @@ Feature: Template stdlib filters Then the stdlib error contains "exited" And the stdlib template is impure + Scenario: shell filter enforces command output limits + Given a large-output stdlib command helper + And the stdlib command output limit is 512 bytes + When I render the stdlib template "{{ '' | shell(cmd) }}" using the stdlib command helper + Then the stdlib error contains "stdout capture limit of 512 bytes" + And the stdlib template is impure + + Scenario: shell filter streams large output to a temporary file + Given a large-output stdlib command helper + And the stdlib command output limit is 512 bytes + And the stdlib command stream limit is 200000 bytes + When I render the stdlib template "{{ '' | shell(cmd, {'mode': 'tempfile'}) }}" using the stdlib command helper + Then the stdlib output file has at least 65000 bytes + And the stdlib output file contains only "x" + And the stdlib template is impure + + Scenario: grep filter streams large output to a temporary file + Given the stdlib command output limit is 512 bytes + And the stdlib command stream limit is 200000 bytes + And the stdlib template text contains 32768 lines of "match" + When I render the stdlib template "{{ text | grep('match', none, {'mode': 'tempfile'}) }}" using the stdlib text + Then the stdlib output file has at least 190000 bytes + And the stdlib output file equals the stdlib text + And the stdlib template is impure + Scenario: grep filter extracts matching lines When I render the stdlib template "{{ 'alpha\nbeta\n' | grep('beta') | trim }}" Then the stdlib output is "beta" diff --git a/tests/std_filter_tests/command_filters.rs b/tests/std_filter_tests/command_filters.rs index cc446e9b..a90a5de2 100644 --- a/tests/std_filter_tests/command_filters.rs +++ b/tests/std_filter_tests/command_filters.rs @@ -1,15 +1,21 @@ use anyhow::{anyhow, bail, ensure, Context, Result}; -use camino::Utf8PathBuf; +use camino::{Utf8Path, Utf8PathBuf}; use cap_std::{ambient_authority, fs_utf8::Dir}; use minijinja::{context, value::Value, Environment, ErrorKind}; use rstest::rstest; use tempfile::tempdir; -use test_support::command_helper::{compile_failure_helper, compile_uppercase_helper}; +use test_support::command_helper::{ + compile_failure_helper, + compile_large_output_helper, + compile_uppercase_helper, +}; +use std::fs; #[cfg(windows)] use test_support::command_helper::compile_rust_helper; use super::support::fallible; +use netsuke::stdlib::StdlibConfig; struct CommandFixture { _temp: tempfile::TempDir, @@ -19,9 +25,10 @@ struct CommandFixture { } impl CommandFixture { - fn new( + fn with_config( compiler: CommandCompiler, binary: &str, + config: StdlibConfig, ) -> Result { let temp = tempdir().context("create command fixture tempdir")?; let root = Utf8PathBuf::from_path_buf(temp.path().to_path_buf()) @@ -30,7 +37,7 @@ impl CommandFixture { .context("open command fixture directory")?; let helper = compiler(&dir, &root, binary)?; let command = format!("\"{}\"", helper.as_str()); - let (env, mut state) = fallible::stdlib_env_with_state()?; + let (env, mut state) = fallible::stdlib_env_with_config(config)?; state.reset_impure(); Ok(Self { _temp: temp, @@ -40,6 +47,13 @@ impl CommandFixture { }) } + fn new( + compiler: CommandCompiler, + binary: &str, + ) -> Result { + Self::with_config(compiler, binary, StdlibConfig::default()) + } + fn env(&mut self) -> &mut Environment<'static> { &mut self.env } @@ -60,6 +74,16 @@ enum ShellExpectation { Failure { substrings: &'static [&'static str] }, } +const STREAM_LINE_COUNT: usize = 32_768; + +fn streaming_match_payload() -> String { + let mut text = String::with_capacity(STREAM_LINE_COUNT * 6); + for _ in 0..STREAM_LINE_COUNT { + text.push_str("match\n"); + } + text +} + #[cfg(windows)] use { super::support::{EnvLock, EnvVarGuard}, @@ -192,6 +216,187 @@ fn shell_filter_tolerates_commands_that_close_stdin() -> Result<()> { Ok(()) } +#[rstest] +fn shell_filter_enforces_output_limit() -> Result<()> { + let config = StdlibConfig::default() + .with_command_max_output_bytes(1024)?; + let mut fixture = + CommandFixture::with_config(compile_large_output_helper, "cmd_large", config)?; + { + let env = fixture.env(); + fallible::register_template(env, "shell_large", "{{ '' | shell(cmd) }}")?; + } + let command = fixture.command().to_owned(); + let template = { + let env = fixture.env(); + env.get_template("shell_large") + .context("fetch template 'shell_large'")? + }; + let err = match template.render(context!(cmd => command)) { + Ok(output) => bail!("expected shell output limit error but rendered {output}"), + Err(err) => err, + }; + ensure!( + err.kind() == ErrorKind::InvalidOperation, + "shell output limit should report InvalidOperation but was {:?}", + err.kind() + ); + ensure!( + err.to_string().contains("stdout capture limit of 1024 bytes"), + "limit error should mention configured budget: {err}" + ); + ensure!(fixture.state().is_impure(), "limit error should mark template impure"); + Ok(()) +} + +#[rstest] +fn shell_filter_streams_to_tempfiles() -> Result<()> { + let config = StdlibConfig::default() + .with_command_max_output_bytes(512)? + .with_command_max_stream_bytes(200_000)?; + let mut fixture = + CommandFixture::with_config(compile_large_output_helper, "cmd_stream", config)?; + { + let env = fixture.env(); + fallible::register_template( + env, + "shell_stream", + "{{ '' | shell(cmd, {'mode': 'tempfile'}) }}", + )?; + } + let command = fixture.command().to_owned(); + let template = { + let env = fixture.env(); + env.get_template("shell_stream") + .context("fetch template 'shell_stream'")? + }; + let rendered = template + .render(context!(cmd => command)) + .context("render shell streaming template")?; + ensure!(fixture.state().is_impure(), "streaming should mark template impure"); + let path = Utf8Path::new(&rendered); + let data = fs::read(path.as_std_path()).with_context(|| { + format!("read streamed output from {}", path.as_str()) + })?; + ensure!( + data.len() >= 65_000, + "expected streamed output to contain command data" + ); + ensure!( + data.iter().all(|byte| *byte == b'x'), + "streamed file should contain the helper payload" + ); + Ok(()) +} + +#[rstest] +fn shell_streaming_honours_size_limit() -> Result<()> { + let config = StdlibConfig::default() + .with_command_max_output_bytes(256)? + .with_command_max_stream_bytes(1024)?; + let mut fixture = + CommandFixture::with_config(compile_large_output_helper, "cmd_stream_limit", config)?; + { + let env = fixture.env(); + fallible::register_template( + env, + "shell_stream_limit", + "{{ '' | shell(cmd, {'mode': 'tempfile'}) }}", + )?; + } + let command = fixture.command().to_owned(); + let template = { + let env = fixture.env(); + env.get_template("shell_stream_limit") + .context("fetch template 'shell_stream_limit'")? + }; + let err = match template.render(context!(cmd => command)) { + Ok(output) => bail!("expected streaming limit error but rendered {output}"), + Err(err) => err, + }; + ensure!( + err.kind() == ErrorKind::InvalidOperation, + "streaming limit should report InvalidOperation but was {:?}", + err.kind() + ); + ensure!( + err.to_string().contains("stdout streaming limit of 1024 bytes"), + "streaming limit error should mention configured budget: {err}" + ); + ensure!(fixture.state().is_impure(), "streaming limit should mark template impure"); + Ok(()) +} + +#[cfg(not(windows))] +#[rstest] +fn grep_filter_streams_to_tempfiles() -> Result<()> { + let config = StdlibConfig::default() + .with_command_max_output_bytes(512)? + .with_command_max_stream_bytes(200_000)?; + let (mut env, mut state) = fallible::stdlib_env_with_config(config)?; + state.reset_impure(); + fallible::register_template( + &mut env, + "grep_stream", + "{{ text | grep('match', none, {'mode': 'tempfile'}) }}", + )?; + let template = env + .get_template("grep_stream") + .context("fetch template 'grep_stream'")?; + let payload = streaming_match_payload(); + let rendered = template + .render(context!(text => payload.clone())) + .context("render grep streaming template")?; + ensure!(state.is_impure(), "grep streaming should mark template impure"); + let path = Utf8Path::new(rendered.as_str()); + let metadata = fs::metadata(path.as_std_path()) + .with_context(|| format!("stat streamed grep output {}", path))?; + ensure!( + metadata.len() >= payload.len() as u64, + "streamed grep output should retain payload size" + ); + let contents = fs::read_to_string(path.as_std_path()) + .with_context(|| format!("read streamed grep output {}", path))?; + ensure!( + contents == payload, + "streamed grep file should contain the helper payload" + ); + Ok(()) +} + +#[cfg(not(windows))] +#[rstest] +fn grep_filter_enforces_output_limit() -> Result<()> { + let config = StdlibConfig::default() + .with_command_max_output_bytes(1024)?; + let (mut env, mut state) = fallible::stdlib_env_with_config(config)?; + state.reset_impure(); + let long_text = "x".repeat(2_500); + fallible::register_template( + &mut env, + "grep_limit", + "{{ text | grep('x') }}", + )?; + let template = env + .get_template("grep_limit") + .context("fetch template 'grep_limit'")?; + let err = match template.render(context!(text => long_text)) { + Ok(output) => bail!("expected grep output limit error but rendered {output}"), + Err(err) => err, + }; + ensure!( + err.kind() == ErrorKind::InvalidOperation, + "grep limit should report InvalidOperation but was {:?}", + err.kind() + ); + ensure!( + err.to_string().contains("stdout capture limit of 1024 bytes"), + "grep error should mention configured limit: {err}" + ); + ensure!(state.is_impure(), "grep limit should mark template impure"); + Ok(()) +} + #[rstest] fn grep_filter_filters_lines() -> Result<()> { let (mut env, mut state) = fallible::stdlib_env_with_state()?; @@ -308,6 +513,22 @@ const GREP_STUB: &str = concat!( "}\n", ); +#[cfg(windows)] +const GREP_STREAM_STUB: &str = concat!( + "use std::io::{self, Write};\n", + "fn main() {\n", + " let pattern = std::env::args().last().expect(\"pattern\");\n", + " if pattern != \"match\" {\n", + " std::process::exit(1);\n", + " }\n", + " let mut out = io::stdout();\n", + " let line = b\"match\\n\";\n", + " for _ in 0..32_768 {\n", + " out.write_all(line).expect(\"stdout\");\n", + " }\n", + "}\n", +); + #[cfg(windows)] const ARGS_STUB: &str = concat!( "fn main() {\n", @@ -355,6 +576,57 @@ line2 Ok(()) } +#[cfg(windows)] +#[rstest] +fn grep_streams_large_output_on_windows(env_lock: EnvLock) -> Result<()> { + let _lock = env_lock; + let temp = tempdir().context("create windows grep stream tempdir")?; + let root = Utf8PathBuf::from_path_buf(temp.path().to_path_buf()) + .map_err(|path| anyhow!("windows grep root is not valid UTF-8: {path:?}"))?; + let dir = Dir::open_ambient_dir(&root, ambient_authority()) + .context("open windows grep stream temp dir")?; + compile_rust_helper(&dir, &root, "grep", GREP_STREAM_STUB) + .context("compile streaming grep helper for windows tests")?; + + let mut path_value = OsString::from(root.as_str()); + path_value.push(";"); + path_value.push(std::env::var_os("PATH").unwrap_or_default()); + let _path = EnvVarGuard::set("PATH", &path_value); + + let config = StdlibConfig::default() + .with_command_max_output_bytes(512)? + .with_command_max_stream_bytes(200_000)?; + let (mut env, mut state) = fallible::stdlib_env_with_config(config)?; + state.reset_impure(); + fallible::register_template( + &mut env, + "grep_win_stream", + "{{ text | grep('match', none, {'mode': 'tempfile'}) }}", + )?; + let template = env + .get_template("grep_win_stream") + .context("fetch template 'grep_win_stream'")?; + let payload = streaming_match_payload(); + let rendered = template + .render(context!(text => payload.clone())) + .context("render windows grep streaming template")?; + ensure!(state.is_impure(), "grep streaming should mark template impure"); + let path = Utf8Path::new(rendered.as_str()); + let metadata = fs::metadata(path.as_std_path()) + .with_context(|| format!("stat streamed windows grep output {}", path))?; + ensure!( + metadata.len() >= payload.len() as u64, + "streamed grep output should retain payload size" + ); + let contents = fs::read_to_string(path.as_std_path()) + .with_context(|| format!("read streamed windows grep output {}", path))?; + ensure!( + contents == payload, + "streamed grep file should contain the helper payload" + ); + Ok(()) +} + #[cfg(windows)] #[rstest] fn shell_preserves_cmd_meta_characters(env_lock: EnvLock) -> Result<()> { diff --git a/tests/steps/stdlib_steps/assertions.rs b/tests/steps/stdlib_steps/assertions.rs index cd9428a6..a6e05600 100644 --- a/tests/steps/stdlib_steps/assertions.rs +++ b/tests/steps/stdlib_steps/assertions.rs @@ -1,10 +1,11 @@ //! Assertions shared by stdlib-related Cucumber steps, providing //! reusable checks for rendered output, errors, and cache behaviour. use crate::CliWorld; -use anyhow::{Context, Result, ensure}; +use anyhow::{Context, Result, bail, ensure}; use camino::Utf8Path; use cap_std::{ambient_authority, fs_utf8::Dir}; use cucumber::then; +use std::fs; use test_support::hash; use time::{Duration, OffsetDateTime, UtcOffset}; use url::Url; @@ -49,6 +50,10 @@ fn stdlib_output(world: &CliWorld) -> Result<&str> { .context("expected stdlib output") } +fn stdlib_output_path(world: &CliWorld) -> Result<&Utf8Path> { + stdlib_output(world).map(Utf8Path::new) +} + #[then(regex = r#"^the stdlib error contains "(.+)"$"#)] pub(crate) fn assert_stdlib_error( world: &mut CliWorld, @@ -172,3 +177,62 @@ pub(crate) fn assert_stdlib_output_offset( ); Ok(()) } + +#[then(regex = r"^the stdlib output file has at least (\d+) bytes$")] +pub(crate) fn assert_stdlib_output_file_min_size(world: &mut CliWorld, minimum: u64) -> Result<()> { + let path = stdlib_output_path(world)?; + let metadata = fs::metadata(path.as_std_path()) + .with_context(|| format!("stat stdlib output file {}", path.as_str()))?; + ensure!( + metadata.len() >= minimum, + "expected {} to contain at least {minimum} bytes but found {}", + path, + metadata.len(), + ); + Ok(()) +} + +#[then(regex = r#"^the stdlib output file contains only "(.+)"$"#)] +pub(crate) fn assert_stdlib_output_file_uniform( + world: &mut CliWorld, + expected: ExpectedOutput, +) -> Result<()> { + let pattern = expected.into_inner(); + ensure!( + pattern.chars().count() == 1, + "expected a single-character pattern but received '{pattern}'", + ); + let bytes = pattern.as_bytes(); + ensure!( + bytes.len() == 1, + "expected a single-byte pattern but received '{pattern}'", + ); + let Some(target) = bytes.first().copied() else { + bail!("pattern should contain a single byte"); + }; + let path = stdlib_output_path(world)?; + let data = fs::read(path.as_std_path()) + .with_context(|| format!("read stdlib output file {}", path.as_str()))?; + ensure!( + data.iter().all(|byte| *byte == target), + "expected {} to contain only '{pattern}'", + path, + ); + Ok(()) +} + +#[then("the stdlib output file equals the stdlib text")] +pub(crate) fn assert_stdlib_output_equals_text(world: &mut CliWorld) -> Result<()> { + let expected = world + .stdlib_text + .clone() + .context("expected stdlib template text to be configured")?; + let path = stdlib_output_path(world)?; + let data = fs::read_to_string(path.as_std_path()) + .with_context(|| format!("read stdlib output file {}", path.as_str()))?; + ensure!( + data == expected, + "expected streamed output to match configured text" + ); + Ok(()) +} diff --git a/tests/steps/stdlib_steps/config.rs b/tests/steps/stdlib_steps/config.rs index 91b545ff..265a6508 100644 --- a/tests/steps/stdlib_steps/config.rs +++ b/tests/steps/stdlib_steps/config.rs @@ -1,5 +1,6 @@ //! Configuration-related Cucumber steps for stdlib scenarios. +use super::types::ExpectedOutput; use crate::CliWorld; use cucumber::given; @@ -7,3 +8,24 @@ use cucumber::given; pub(crate) const fn configure_fetch_limit(world: &mut CliWorld, limit: u64) { world.stdlib_fetch_max_bytes = Some(limit); } + +#[given(regex = r"^the stdlib command output limit is (\d+) bytes$")] +pub(crate) const fn configure_command_output_limit(world: &mut CliWorld, limit: u64) { + world.stdlib_command_max_output_bytes = Some(limit); +} + +#[given(regex = r"^the stdlib command stream limit is (\d+) bytes$")] +pub(crate) const fn configure_command_stream_limit(world: &mut CliWorld, limit: u64) { + world.stdlib_command_stream_max_bytes = Some(limit); +} + +#[given(regex = r#"^the stdlib template text contains (\d+) lines of "(.+)"$"#)] +pub(crate) fn configure_stdlib_text(world: &mut CliWorld, lines: usize, line: ExpectedOutput) { + let line = line.into_inner(); + let mut text = String::with_capacity(line.len().saturating_mul(lines + 1)); + for _ in 0..lines { + text.push_str(&line); + text.push('\n'); + } + world.stdlib_text = Some(text); +} diff --git a/tests/steps/stdlib_steps/rendering.rs b/tests/steps/stdlib_steps/rendering.rs index 9829f494..15b9ef47 100644 --- a/tests/steps/stdlib_steps/rendering.rs +++ b/tests/steps/stdlib_steps/rendering.rs @@ -27,6 +27,16 @@ pub(crate) fn render_template_with_context( .with_fetch_max_response_bytes(limit) .context("configure stdlib fetch response limit")?; } + if let Some(limit) = world.stdlib_command_max_output_bytes { + config = config + .with_command_max_output_bytes(limit) + .context("configure stdlib command output limit")?; + } + if let Some(limit) = world.stdlib_command_stream_max_bytes { + config = config + .with_command_max_stream_bytes(limit) + .context("configure stdlib command stream limit")?; + } let state = stdlib::register_with_config(&mut env, config); state.reset_impure(); world.stdlib_state = Some(state); @@ -111,3 +121,19 @@ pub(crate) fn render_stdlib_template_with_command( .context("expected stdlib command helper to be compiled")?; render_template_with_context(world, &template_content, context!(cmd => command)) } + +#[expect( + clippy::needless_pass_by_value, + reason = "Cucumber requires owned capture arguments" +)] +#[when(regex = r#"^I render the stdlib template "(.+)" using the stdlib text$"#)] +pub(crate) fn render_stdlib_template_with_text( + world: &mut CliWorld, + template_content: TemplateContent, +) -> Result<()> { + let text = world + .stdlib_text + .clone() + .context("expected stdlib template text to be configured")?; + render_template_with_context(world, &template_content, context!(text => text)) +} diff --git a/tests/steps/stdlib_steps/workspace.rs b/tests/steps/stdlib_steps/workspace.rs index 3fa2e303..64193c9b 100644 --- a/tests/steps/stdlib_steps/workspace.rs +++ b/tests/steps/stdlib_steps/workspace.rs @@ -7,7 +7,9 @@ use cap_std::{ambient_authority, fs_utf8::Dir}; use cucumber::given; use std::ffi::OsStr; use test_support::{ - command_helper::{compile_failure_helper, compile_uppercase_helper}, + command_helper::{ + compile_failure_helper, compile_large_output_helper, compile_uppercase_helper, + }, env::set_var, }; @@ -72,6 +74,17 @@ pub(crate) fn failing_stdlib_command_helper(world: &mut CliWorld) -> Result<()> Ok(()) } +#[given("a large-output stdlib command helper")] +pub(crate) fn large_output_stdlib_command_helper(world: &mut CliWorld) -> Result<()> { + let root = ensure_workspace(world)?; + let handle = Dir::open_ambient_dir(&root, ambient_authority()) + .context("open stdlib workspace directory")?; + let helper = compile_large_output_helper(&handle, &root, "cmd_large") + .context("compile large-output helper")?; + world.stdlib_command = Some(format!("\"{}\"", helper.as_str())); + Ok(()) +} + #[given(regex = r#"^an HTTP server returning "(.+)"$"#)] pub(crate) fn http_server_returning(world: &mut CliWorld, body: ServerBody) -> Result<()> { let body = body.into_inner(); From 66b3f80a528bb2be6732f4d0d5da45109d8ba900 Mon Sep 17 00:00:00 2001 From: Leynos Date: Thu, 6 Nov 2025 19:12:35 +0000 Subject: [PATCH 2/4] Refactor read_pipe and workspace-scoped tempfile plumbing (#232) * refactor(stdlib): split read_pipe into separate capture and tempfile functions Refactored the read_pipe function in src/stdlib/command.rs by splitting its logic based on OutputMode into two distinct functions, read_pipe_capture and read_pipe_tempfile. This improves code clarity and separation of concerns while maintaining existing behavior. Additionally, refactored template rendering test helpers in tests/steps/stdlib_steps/rendering.rs to consolidate context construction into a single helper function render_with_single_context, reducing code duplication in tests. Co-authored-by: terragon-labs[bot] * feat(stdlib/command): enforce byte limits and use workspace-rooted tempfiles for command output - Introduce distinct configurable byte limits for stdout capture and streaming in CommandConfig. - Implement workspace-rooted temporary file creation for streaming command outputs, replacing system temp directory usage. - Modify child process pipe readers to enforce configured limits and produce bytes or workspace-based tempfiles accordingly. - Propagate workspace root path into stdlib configuration, enabling temp file placement inside project directories. - Add tests validating capturing and streaming output within limits and error handling on limit exceedance. - Enhance documentation with a Mermaid class diagram illustrating pipe reading abstractions. - Update stdlib configuration defaults to include workspace root path for consistent temp file handling. - Adjust tests and feature files to verify new output limit enforcement and streaming behavior. This improves resource control and workspace hygiene for command output handling in Netsuke standard library. Co-authored-by: terragon-labs[bot] * refactor(stdlib/command): use references for CommandContext to avoid cloning Changed functions to accept `&CommandContext` instead of consuming `CommandContext` by value to improve efficiency and avoid unnecessary cloning. Made `CommandContext::new` a `const fn`. Updated related pipe reader functions to accept config by reference rather than Arc cloning. Fixed a slice indexing panic by validating bounds before write. Co-authored-by: terragon-labs[bot] --------- Co-authored-by: terragon-labs[bot] --- docs/netsuke-design.md | 44 +++ src/manifest/mod.rs | 4 +- src/stdlib/command.rs | 369 ++++++++++++++++---- src/stdlib/mod.rs | 109 +++++- tests/features/stdlib.feature | 9 +- tests/std_filter_tests/network_functions.rs | 9 +- tests/steps/stdlib_steps/rendering.rs | 25 +- 7 files changed, 486 insertions(+), 83 deletions(-) diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index d4eda21d..c2a3008f 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -1062,6 +1062,50 @@ Implementation details: builder and hands owned network/command configurations to the registration routines, avoiding needless cloning of the capability handles. +```mermaid +classDiagram + class read_pipe { + +read_pipe(reader: R, spec: PipeSpec): Result + } + class read_pipe_capture { + +read_pipe_capture(reader: R, limit: PipeLimit): Result + } + class read_pipe_tempfile { + +read_pipe_tempfile(reader: R, limit: PipeLimit): Result + } + read_pipe --> read_pipe_capture : calls + read_pipe --> read_pipe_tempfile : calls + class PipeSpec { + +into_limit(): PipeLimit + +mode(): OutputMode + } + class PipeOutcome { + <> + Bytes(Vec) + Tempfile(Utf8PathBuf) + } + class CommandFailure { + <> + Io + StreamPathNotUtf8 + } + class PipeLimit { + +record(read: usize): Result<(), CommandFailure> + } + class OutputMode { + <> + Capture + Tempfile + } + read_pipe ..> PipeSpec : uses + read_pipe_capture ..> PipeLimit : uses + read_pipe_tempfile ..> PipeLimit : uses + read_pipe_capture ..> PipeOutcome : returns + read_pipe_tempfile ..> PipeOutcome : returns + read_pipe_capture ..> CommandFailure : error + read_pipe_tempfile ..> CommandFailure : error +``` + Custom external commands can be registered as additional filters. Those should be marked `pure` if safe for caching or `impure` otherwise. diff --git a/src/manifest/mod.rs b/src/manifest/mod.rs index bbf91500..f1cb8237 100644 --- a/src/manifest/mod.rs +++ b/src/manifest/mod.rs @@ -212,5 +212,7 @@ fn stdlib_config_for_manifest(path: &Path, policy: NetworkPolicy) -> Result, + workspace_root_path: Option>, + ) -> Self { + Self { + max_capture_bytes, + max_stream_bytes, + temp_dir: CommandTempDir::new(workspace_root, workspace_root_path), + } + } + + fn create_tempfile(&self, label: &str) -> io::Result { + self.temp_dir.create(label) + } +} + +#[derive(Clone)] +struct CommandTempDir { + workspace_root: Arc, + workspace_root_path: Option>, + relative: Utf8PathBuf, +} + +impl CommandTempDir { + fn new(workspace_root: Arc, workspace_root_path: Option>) -> Self { + Self { + workspace_root, + workspace_root_path, + relative: Utf8PathBuf::from(DEFAULT_COMMAND_TEMP_DIR), + } + } + + fn create(&self, label: &str) -> io::Result { + self.workspace_root.create_dir_all(&self.relative)?; + let mut builder = Builder::new(); + builder.prefix(label); + let file = if let Some(root_path) = &self.workspace_root_path { + let dir_path = root_path.join(&self.relative); + fs::create_dir_all(dir_path.as_std_path())?; + builder.tempfile_in(dir_path.as_std_path())? + } else { + builder.tempfile()? + }; + Ok(CommandTempFile { file }) + } +} + +struct CommandTempFile { + file: NamedTempFile, +} + +impl CommandTempFile { + fn into_file(self) -> NamedTempFile { + self.file + } } #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -103,6 +177,20 @@ impl OutputStream { Self::Stderr => "stderr", } } + + const fn tempfile_label(self) -> &'static str { + match self { + Self::Stdout => "stdout", + Self::Stderr => "stderr", + } + } + + const fn empty_tempfile_label(self) -> &'static str { + match self { + Self::Stdout => "stdout-empty", + Self::Stderr => "stderr-empty", + } + } } impl OutputMode { @@ -248,33 +336,39 @@ impl Default for CommandOptions { } } +#[derive(Debug)] enum StdoutResult { Bytes(Vec), Tempfile(Utf8PathBuf), } +#[derive(Debug)] enum PipeOutcome { Bytes(Vec), Tempfile(Utf8PathBuf), } -#[derive(Clone, Copy)] -struct CommandContext<'a> { - config: &'a CommandConfig, +#[derive(Clone)] +struct CommandContext { + config: Arc, options: CommandOptions, } -impl<'a> CommandContext<'a> { - const fn new(config: &'a CommandConfig, options: CommandOptions) -> Self { +impl CommandContext { + const fn new(config: Arc, options: CommandOptions) -> Self { Self { config, options } } - const fn stdout_mode(self) -> OutputMode { + const fn stdout_mode(&self) -> OutputMode { self.options.stdout_mode() } - const fn config(self) -> &'a CommandConfig { - self.config + fn config(&self) -> &CommandConfig { + &self.config + } + + fn config_handle(&self) -> Arc { + Arc::clone(&self.config) } } @@ -358,8 +452,8 @@ pub(crate) fn register( move |state: &State, value: Value, command: String, options: Option| { shell_flag.store(true, Ordering::Relaxed); let parsed = CommandOptions::from_value(options)?; - let context = CommandContext::new(shell_config.as_ref(), parsed); - execute_shell(state, &value, &command, context) + let context = CommandContext::new(Arc::clone(&shell_config), parsed); + execute_shell(state, &value, &command, &context) }, ); @@ -374,9 +468,9 @@ pub(crate) fn register( options: Option| { grep_flag.store(true, Ordering::Relaxed); let parsed = CommandOptions::from_value(options)?; - let context = CommandContext::new(grep_config.as_ref(), parsed); + let context = CommandContext::new(Arc::clone(&grep_config), parsed); let call = GrepCall::new(&pattern, flags); - execute_grep(state, &value, call, context) + execute_grep(state, &value, call, &context) }, ); } @@ -385,7 +479,7 @@ fn execute_shell( state: &State, value: &Value, command: &str, - context: CommandContext<'_>, + context: &CommandContext, ) -> Result { let cmd = command.trim(); if cmd.is_empty() { @@ -408,7 +502,7 @@ fn execute_grep( state: &State, value: &Value, call: GrepCall<'_>, - context: CommandContext<'_>, + context: &CommandContext, ) -> Result { let GrepCall { pattern, flags } = call; if pattern.is_empty() { @@ -580,7 +674,7 @@ fn to_bytes(value: &Value) -> Result, Error> { fn run_command( command: &str, input: &[u8], - context: CommandContext<'_>, + context: &CommandContext, ) -> Result { let mut cmd = Command::new(SHELL); cmd.args(SHELL_ARGS) @@ -597,7 +691,7 @@ fn run_program( program: &str, args: &[String], input: &[u8], - context: CommandContext<'_>, + context: &CommandContext, ) -> Result { let mut cmd = Command::new(program); cmd.args(args) @@ -611,7 +705,7 @@ fn run_program( fn run_child( mut command: Command, input: &[u8], - context: CommandContext<'_>, + context: &CommandContext, ) -> Result { let mut child = command.spawn().map_err(CommandFailure::Spawn)?; let mut stdin_handle = child.stdin.take().map(|mut stdin| { @@ -628,8 +722,13 @@ fn run_child( let stdout_spec = PipeSpec::new(OutputStream::Stdout, context.stdout_mode(), stdout_limit); let stderr_spec = PipeSpec::new(OutputStream::Stderr, OutputMode::Capture, stderr_limit); - let mut stdout_reader = spawn_pipe_reader(child.stdout.take(), stdout_spec); - let mut stderr_reader = spawn_pipe_reader(child.stderr.take(), stderr_spec); + let stdout_config = context.config_handle(); + let stderr_config = context.config_handle(); + + let mut stdout_reader = + spawn_pipe_reader(child.stdout.take(), stdout_spec, Arc::clone(&stdout_config)); + let mut stderr_reader = + spawn_pipe_reader(child.stderr.take(), stderr_spec, Arc::clone(&stderr_config)); let status = match wait_for_exit(&mut child, COMMAND_TIMEOUT) { Ok(status) => status, @@ -639,8 +738,8 @@ fn run_child( } }; - let stdout = join_reader(stdout_reader.take(), stdout_spec)?; - let stderr_outcome = join_reader(stderr_reader.take(), stderr_spec)?; + let stdout = join_reader(stdout_reader.take(), stdout_spec, stdout_config.as_ref())?; + let stderr_outcome = join_reader(stderr_reader.take(), stderr_spec, stderr_config.as_ref())?; let stderr = match stderr_outcome { PipeOutcome::Bytes(bytes) => bytes, @@ -879,16 +978,18 @@ fn wait_for_exit(child: &mut Child, timeout: Duration) -> Result( pipe: Option, spec: PipeSpec, + config: Arc, ) -> Option>> where R: Read + Send + 'static, { - pipe.map(|reader| thread::spawn(move || read_pipe(reader, spec))) + pipe.map(|reader| thread::spawn(move || read_pipe(reader, spec, config.as_ref()))) } fn join_reader( reader_handle: Option>>, spec: PipeSpec, + config: &CommandConfig, ) -> Result { match reader_handle { Some(join_handle) => join_handle @@ -896,7 +997,8 @@ fn join_reader( .map_err(|_| CommandFailure::Io(io::Error::other("pipe reader panicked")))?, None => { if matches!(spec.mode(), OutputMode::Tempfile) { - create_empty_tempfile().map(PipeOutcome::Tempfile) + create_empty_tempfile(config, spec.stream().empty_tempfile_label()) + .map(PipeOutcome::Tempfile) } else { Ok(PipeOutcome::Bytes(Vec::new())) } @@ -904,53 +1006,85 @@ fn join_reader( } } -fn read_pipe(mut reader: R, spec: PipeSpec) -> Result +/// Drains a child process pipe according to the provided `PipeSpec`, enforcing +/// the configured byte limit and producing either in-memory bytes or a +/// tempfile-backed outcome. +fn read_pipe( + reader: R, + spec: PipeSpec, + config: &CommandConfig, +) -> Result where R: Read, { - let mut limit = spec.into_limit(); + let limit = spec.into_limit(); match spec.mode() { - OutputMode::Capture => { - let mut buf = Vec::new(); - let mut chunk = [0_u8; 8192]; - loop { - let read = reader.read(&mut chunk).map_err(CommandFailure::Io)?; - if read == 0 { - break; - } - limit.record(read)?; - buf.extend(chunk.iter().take(read).copied()); - } - Ok(PipeOutcome::Bytes(buf)) - } + OutputMode::Capture => read_pipe_capture(reader, limit), OutputMode::Tempfile => { - let mut file = NamedTempFile::new().map_err(CommandFailure::Io)?; - let mut chunk = [0_u8; 8192]; - loop { - let read = reader.read(&mut chunk).map_err(CommandFailure::Io)?; - if read == 0 { - break; - } - limit.record(read)?; - let slice = chunk.get(..read).ok_or_else(|| { - CommandFailure::Io(io::Error::other("pipe read out of range")) - })?; - file.write_all(slice).map_err(CommandFailure::Io)?; - } - file.flush().map_err(CommandFailure::Io)?; - let temp_path = file.into_temp_path(); - let path = temp_path - .keep() - .map_err(|err| CommandFailure::Io(err.error))?; - let utf8 = - Utf8PathBuf::from_path_buf(path).map_err(CommandFailure::StreamPathNotUtf8)?; - Ok(PipeOutcome::Tempfile(utf8)) + read_pipe_tempfile(reader, limit, spec.stream().tempfile_label(), config) + } + } +} + +/// Reads a pipe into memory while enforcing the capture byte limit recorded in +/// the `PipeLimit` tracker. +fn read_pipe_capture(mut reader: R, mut limit: PipeLimit) -> Result +where + R: Read, +{ + let mut buf = Vec::new(); + let mut chunk = [0_u8; PIPE_CHUNK_SIZE]; + loop { + let read = reader.read(&mut chunk).map_err(CommandFailure::Io)?; + if read == 0 { + break; + } + limit.record(read)?; + buf.extend(chunk.iter().take(read).copied()); + } + Ok(PipeOutcome::Bytes(buf)) +} + +/// Streams a pipe into a tempfile rooted within the workspace, enforcing the +/// streaming byte limit recorded in `PipeLimit` and returning the final path. +fn read_pipe_tempfile( + mut reader: R, + mut limit: PipeLimit, + label: &str, + config: &CommandConfig, +) -> Result +where + R: Read, +{ + let tempfile = config.create_tempfile(label).map_err(CommandFailure::Io)?; + let mut file = tempfile.into_file(); + let mut chunk = [0_u8; PIPE_CHUNK_SIZE]; + loop { + let read = reader.read(&mut chunk).map_err(CommandFailure::Io)?; + if read == 0 { + break; } + limit.record(read)?; + let slice = chunk + .get(..read) + .ok_or_else(|| CommandFailure::Io(io::Error::other("pipe read out of range")))?; + file.write_all(slice).map_err(CommandFailure::Io)?; } + file.flush().map_err(CommandFailure::Io)?; + let temp_path = file.into_temp_path(); + let path = temp_path + .keep() + .map_err(|err| CommandFailure::Io(err.error))?; + let utf8 = Utf8PathBuf::from_path_buf(path).map_err(CommandFailure::StreamPathNotUtf8)?; + Ok(PipeOutcome::Tempfile(utf8)) } -fn create_empty_tempfile() -> Result { - let file = NamedTempFile::new().map_err(CommandFailure::Io)?; +fn create_empty_tempfile( + config: &CommandConfig, + label: &str, +) -> Result { + let tempfile = config.create_tempfile(label).map_err(CommandFailure::Io)?; + let file = tempfile.into_file(); let path = file .into_temp_path() .keep() @@ -969,12 +1103,34 @@ fn append_stderr(message: &mut String, stderr: &[u8]) { #[cfg(test)] mod tests { + use super::*; + use crate::stdlib::{DEFAULT_COMMAND_MAX_OUTPUT_BYTES, DEFAULT_COMMAND_MAX_STREAM_BYTES}; + use camino::Utf8PathBuf; + use cap_std::{ambient_authority, fs_utf8::Dir}; + use std::{fs, io::Cursor}; + use tempfile::tempdir; + #[cfg(windows)] - #[test] - fn quote_escapes_cmd_metacharacters() -> anyhow::Result<()> { - use super::{QuoteError, quote}; - use anyhow::ensure; + use anyhow::{Result, ensure}; + + fn test_command_config() -> (tempfile::TempDir, CommandConfig) { + let temp = tempdir().expect("create command temp workspace"); + let path = Utf8PathBuf::from_path_buf(temp.path().to_path_buf()) + .expect("temp workspace should be valid UTF-8"); + let dir = + Dir::open_ambient_dir(&path, ambient_authority()).expect("open temp workspace dir"); + let config = CommandConfig::new( + DEFAULT_COMMAND_MAX_OUTPUT_BYTES, + DEFAULT_COMMAND_MAX_STREAM_BYTES, + Arc::new(dir), + Some(Arc::new(path)), + ); + (temp, config) + } + #[cfg(windows)] + #[test] + fn quote_escapes_cmd_metacharacters() -> Result<()> { let success_cases = [ ("simple", "simple"), ("", "\"\""), @@ -1019,4 +1175,83 @@ mod tests { } Ok(()) } + + #[test] + fn read_pipe_capture_collects_bytes_within_limit() { + let data = b"payload".to_vec(); + let outcome = read_pipe_capture( + Cursor::new(data.clone()), + PipeSpec::new(OutputStream::Stdout, OutputMode::Capture, 128).into_limit(), + ) + .expect("capture should succeed within the configured limit"); + match outcome { + PipeOutcome::Bytes(buf) => assert_eq!(buf, data), + PipeOutcome::Tempfile(_) => panic!("capture mode should emit bytes"), + } + } + + #[test] + fn read_pipe_capture_reports_limit_exceedance() { + let err = read_pipe_capture( + Cursor::new(vec![0_u8; 16]), + PipeSpec::new(OutputStream::Stdout, OutputMode::Capture, 8).into_limit(), + ) + .expect_err("capture should fail when it exceeds the configured limit"); + match err { + CommandFailure::OutputLimit { + stream, + mode, + limit, + } => { + assert_eq!(stream, OutputStream::Stdout); + assert_eq!(mode, OutputMode::Capture); + assert_eq!(limit, 8); + } + other => panic!("unexpected error: {other:?}"), + } + } + + #[test] + fn read_pipe_tempfile_writes_streamed_data() { + let payload = vec![b'x'; 32]; + let (_temp_dir, config) = test_command_config(); + let outcome = read_pipe_tempfile( + Cursor::new(payload.clone()), + PipeSpec::new(OutputStream::Stdout, OutputMode::Tempfile, 64).into_limit(), + "stdout", + &config, + ) + .expect("streaming should succeed within the configured limit"); + let path = match outcome { + PipeOutcome::Tempfile(path) => path, + PipeOutcome::Bytes(_) => panic!("streaming mode should emit a tempfile path"), + }; + let disk = fs::read(path.as_std_path()).expect("read streamed output"); + assert_eq!(disk, payload); + fs::remove_file(path.as_std_path()).expect("cleanup streamed file"); + } + + #[test] + fn read_pipe_tempfile_respects_stream_limit() { + let (_temp_dir, config) = test_command_config(); + let err = read_pipe_tempfile( + Cursor::new(vec![b'y'; 32]), + PipeSpec::new(OutputStream::Stdout, OutputMode::Tempfile, 8).into_limit(), + "stdout", + &config, + ) + .expect_err("streaming should fail when it exceeds the configured limit"); + match err { + CommandFailure::OutputLimit { + stream, + mode, + limit, + } => { + assert_eq!(stream, OutputStream::Stdout); + assert_eq!(mode, OutputMode::Tempfile); + assert_eq!(limit, 8); + } + other => panic!("unexpected error: {other:?}"), + } + } } diff --git a/src/stdlib/mod.rs b/src/stdlib/mod.rs index 0260bd1f..377505ae 100644 --- a/src/stdlib/mod.rs +++ b/src/stdlib/mod.rs @@ -26,6 +26,7 @@ use cap_std::fs::FileTypeExt; use cap_std::{ambient_authority, fs, fs_utf8::Dir}; use minijinja::{Environment, Error, value::Value}; use std::{ + env, sync::Arc, sync::atomic::{AtomicBool, Ordering}, }; @@ -40,6 +41,8 @@ pub(crate) const DEFAULT_FETCH_MAX_RESPONSE_BYTES: u64 = 8 * 1024 * 1024; pub(crate) const DEFAULT_COMMAND_MAX_OUTPUT_BYTES: u64 = 1024 * 1024; /// Default upper bound for streamed command output files (64 MiB). pub(crate) const DEFAULT_COMMAND_MAX_STREAM_BYTES: u64 = 64 * 1024 * 1024; +/// Relative directory for command helper tempfiles. +pub(crate) const DEFAULT_COMMAND_TEMP_DIR: &str = ".netsuke/tmp"; /// Configuration for registering Netsuke's standard library helpers. /// @@ -61,6 +64,7 @@ pub(crate) const DEFAULT_COMMAND_MAX_STREAM_BYTES: u64 = 64 * 1024 * 1024; #[derive(Debug, Clone)] pub struct StdlibConfig { workspace_root: Arc, + workspace_root_path: Option, fetch_cache_relative: Utf8PathBuf, network_policy: NetworkPolicy, fetch_max_response_bytes: u64, @@ -97,6 +101,7 @@ impl StdlibConfig { } Self { workspace_root: Arc::new(workspace_root), + workspace_root_path: None, fetch_cache_relative: default, network_policy: NetworkPolicy::default(), fetch_max_response_bytes: DEFAULT_FETCH_MAX_RESPONSE_BYTES, @@ -105,6 +110,13 @@ impl StdlibConfig { } } + /// Record the absolute workspace root path for capability-scoped helpers. + #[must_use] + pub fn with_workspace_root_path(mut self, path: impl Into) -> Self { + self.workspace_root_path = Some(path.into()); + self + } + /// Override the relative cache directory within the workspace. /// /// # Errors @@ -194,6 +206,7 @@ impl StdlibConfig { pub(crate) fn into_components(self) -> (NetworkConfig, command::CommandConfig) { let Self { workspace_root, + workspace_root_path, fetch_cache_relative, network_policy, fetch_max_response_bytes, @@ -201,6 +214,7 @@ impl StdlibConfig { command_max_stream_bytes, } = self; + let command_root = Arc::clone(&workspace_root); let network = NetworkConfig { cache_root: workspace_root, cache_relative: fetch_cache_relative, @@ -208,10 +222,12 @@ impl StdlibConfig { max_response_bytes: fetch_max_response_bytes, }; - let command = command::CommandConfig { - max_capture_bytes: command_max_output_bytes, - max_stream_bytes: command_max_stream_bytes, - }; + let command = command::CommandConfig::new( + command_max_output_bytes, + command_max_stream_bytes, + command_root, + workspace_root_path.map(Arc::new), + ); (network, command) } @@ -248,7 +264,11 @@ impl Default for StdlibConfig { fn default() -> Self { let root = Dir::open_ambient_dir(".", ambient_authority()) .unwrap_or_else(|err| panic!("open stdlib workspace root: {err}")); - Self::new(root) + let cwd = + env::current_dir().unwrap_or_else(|err| panic!("resolve current directory: {err}")); + let path = Utf8PathBuf::from_path_buf(cwd) + .unwrap_or_else(|path| panic!("cwd contains non-UTF-8 components: {}", path.display())); + Self::new(root).with_workspace_root_path(path) } } @@ -309,7 +329,14 @@ impl StdlibState { pub fn register(env: &mut Environment<'_>) -> anyhow::Result { let root = Dir::open_ambient_dir(".", ambient_authority()) .context("open current directory for stdlib registration")?; - Ok(register_with_config(env, StdlibConfig::new(root))) + let cwd = env::current_dir().context("resolve current directory for stdlib registration")?; + let path = Utf8PathBuf::from_path_buf(cwd).map_err(|path| { + anyhow::anyhow!("current directory contains non-UTF-8 components: {path:?}") + })?; + Ok(register_with_config( + env, + StdlibConfig::new(root).with_workspace_root_path(path), + )) } /// Register stdlib helpers using an explicit configuration. @@ -425,9 +452,11 @@ fn is_device(_ft: fs::FileType) -> bool { #[cfg(test)] mod tests { - use super::StdlibConfig; + use super::{DEFAULT_COMMAND_MAX_OUTPUT_BYTES, DEFAULT_COMMAND_MAX_STREAM_BYTES, StdlibConfig}; - use camino::Utf8Path; + use camino::{Utf8Path, Utf8PathBuf}; + use cap_std::{ambient_authority, fs_utf8::Dir}; + use std::env; #[test] fn validate_cache_relative_rejects_empty() { @@ -461,4 +490,68 @@ mod tests { StdlibConfig::validate_cache_relative(Utf8Path::new("nested/cache")) .expect("relative path should be accepted"); } + + #[test] + fn command_limits_default_to_constants() { + let config = StdlibConfig::default(); + assert_eq!( + config.command_max_output_bytes, + DEFAULT_COMMAND_MAX_OUTPUT_BYTES + ); + assert_eq!( + config.command_max_stream_bytes, + DEFAULT_COMMAND_MAX_STREAM_BYTES + ); + } + + #[test] + fn command_output_limit_builder_updates_value() { + let config = StdlibConfig::default() + .with_command_max_output_bytes(2_048) + .expect("positive limits should succeed"); + assert_eq!(config.command_max_output_bytes, 2_048); + } + + #[test] + fn command_output_limit_builder_rejects_zero() { + let err = StdlibConfig::default() + .with_command_max_output_bytes(0) + .expect_err("zero-byte limits must be rejected"); + assert_eq!(err.to_string(), "command output limit must be positive"); + } + + #[test] + fn command_stream_limit_builder_updates_value() { + let config = StdlibConfig::default() + .with_command_max_stream_bytes(65_536) + .expect("positive limits should succeed"); + assert_eq!(config.command_max_stream_bytes, 65_536); + } + + #[test] + fn command_stream_limit_builder_rejects_zero() { + let err = StdlibConfig::default() + .with_command_max_stream_bytes(0) + .expect_err("zero-byte limits must be rejected"); + assert_eq!(err.to_string(), "command stream limit must be positive"); + } + + #[test] + fn command_limits_propagate_into_components() { + let dir = Dir::open_ambient_dir(".", ambient_authority()) + .expect("open workspace root for config tests"); + let path = Utf8PathBuf::from_path_buf( + env::current_dir().expect("resolve cwd for command config test"), + ) + .expect("cwd should be valid UTF-8"); + let config = StdlibConfig::new(dir) + .with_workspace_root_path(path) + .with_command_max_output_bytes(4_096) + .expect("set capture limit") + .with_command_max_stream_bytes(131_072) + .expect("set streaming limit"); + let (_network, command) = config.into_components(); + assert_eq!(command.max_capture_bytes, 4_096); + assert_eq!(command.max_stream_bytes, 131_072); + } } diff --git a/tests/features/stdlib.feature b/tests/features/stdlib.feature index c29c12d3..51b70294 100644 --- a/tests/features/stdlib.feature +++ b/tests/features/stdlib.feature @@ -100,6 +100,14 @@ Feature: Template stdlib filters And the stdlib output file contains only "x" And the stdlib template is impure + Scenario: shell filter enforces command stream limits + Given a large-output stdlib command helper + And the stdlib command output limit is 512 bytes + And the stdlib command stream limit is 1024 bytes + When I render the stdlib template "{{ '' | shell(cmd, {'mode': 'tempfile'}) }}" using the stdlib command helper + Then the stdlib error contains "stdout streaming limit of 1024 bytes" + And the stdlib template is impure + Scenario: grep filter streams large output to a temporary file Given the stdlib command output limit is 512 bytes And the stdlib command stream limit is 200000 bytes @@ -175,4 +183,3 @@ Feature: Template stdlib filters When I render the stdlib template "{{ fetch(url, cache=true, cache_dir='../cache') }}" with stdlib url Then the stdlib error contains "cache_dir" And the stdlib template is pure - diff --git a/tests/std_filter_tests/network_functions.rs b/tests/std_filter_tests/network_functions.rs index e091759d..778d40d7 100644 --- a/tests/std_filter_tests/network_functions.rs +++ b/tests/std_filter_tests/network_functions.rs @@ -24,9 +24,14 @@ fn env_with_policy(policy: NetworkPolicy) -> Result<(Environment<'static>, Stdli fn env_with_workspace_policy( workspace: Dir, + workspace_path: Utf8PathBuf, policy: NetworkPolicy, ) -> Result<(Environment<'static>, StdlibState)> { - fallible::stdlib_env_with_config(StdlibConfig::new(workspace).with_network_policy(policy)) + fallible::stdlib_env_with_config( + StdlibConfig::new(workspace) + .with_workspace_root_path(workspace_path) + .with_network_policy(policy), + ) } struct FetchTestContext<'env> { @@ -220,7 +225,7 @@ fn fetch_function_respects_cache(http_policy: Result) -> Result<( }; let workspace = Dir::open_ambient_dir(&temp_root, ambient_authority()) .context("open fetch cache workspace")?; - let (mut env, mut state) = env_with_workspace_policy(workspace, http_policy?)?; + let (mut env, mut state) = env_with_workspace_policy(workspace, temp_root.clone(), http_policy?)?; state.reset_impure(); fallible::register_template(&mut env, "fetch_cache", "{{ fetch(url, cache=true) }}")?; let tmpl = env diff --git a/tests/steps/stdlib_steps/rendering.rs b/tests/steps/stdlib_steps/rendering.rs index 15b9ef47..ab6fc66a 100644 --- a/tests/steps/stdlib_steps/rendering.rs +++ b/tests/steps/stdlib_steps/rendering.rs @@ -18,7 +18,7 @@ pub(crate) fn render_template_with_context( let mut env = Environment::new(); let workspace = Dir::open_ambient_dir(&root, ambient_authority()) .context("open stdlib workspace directory")?; - let mut config = StdlibConfig::new(workspace); + let mut config = StdlibConfig::new(workspace).with_workspace_root_path(root.clone()); if let Some(policy) = world.stdlib_policy.clone() { config = config.with_network_policy(policy); } @@ -54,6 +54,23 @@ pub(crate) fn render_template_with_context( Ok(()) } +fn render_with_single_context( + world: &mut CliWorld, + template: &TemplateContent, + key: &str, + value: String, +) -> Result<()> { + use minijinja::value::Value; + use std::collections::BTreeMap; + + let ctx = Value::from_serialize( + [(key, value)] + .into_iter() + .collect::>(), + ); + render_template_with_context(world, template, ctx) +} + fn render_template( world: &mut CliWorld, template: &TemplateContent, @@ -103,7 +120,7 @@ pub(crate) fn render_stdlib_template_with_url( .stdlib_url .clone() .context("expected stdlib HTTP server to be initialised")?; - render_template_with_context(world, &template_content, context!(url => url)) + render_with_single_context(world, &template_content, "url", url) } #[expect( @@ -119,7 +136,7 @@ pub(crate) fn render_stdlib_template_with_command( .stdlib_command .clone() .context("expected stdlib command helper to be compiled")?; - render_template_with_context(world, &template_content, context!(cmd => command)) + render_with_single_context(world, &template_content, "cmd", command) } #[expect( @@ -135,5 +152,5 @@ pub(crate) fn render_stdlib_template_with_text( .stdlib_text .clone() .context("expected stdlib template text to be configured")?; - render_template_with_context(world, &template_content, context!(text => text)) + render_with_single_context(world, &template_content, "text", text) } From e4704dabb383854b4b12be90e8ed7d6a01f11ff8 Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 7 Nov 2025 02:23:42 +0000 Subject: [PATCH 3/4] Refactor rendering tests with macro and absolute workspace roots (#234) * refactor(stdlib,manifest): improve workspace root handling and render helpers - Resolve workspace root as absolute path in manifest loading, supporting relative paths by joining with current working directory. - Enforce absolute workspace root path in StdlibConfig with panic on invalid input. - Return error if workspace root not configured when creating command temp files. - Replace unsafe slice indexing with safe slice usage in command file writes. - Introduce macro in stdlib_steps for DRY rendering with world string fields, simplifying template rendering functions. Co-authored-by: terragon-labs[bot] * feat(manifest): add workspace root path handling and related tests - Handle resolution of relative and absolute workspace root paths with UTF-8 validation - Expose workspace_root_path() getter in StdlibConfig - Add tests for resolving workspace roots and rejecting non-UTF-8 paths - Add test ensuring command tempdir requires workspace root path - Add manifest subcommand test for relative manifest paths These changes improve manifest workspace handling by enforcing UTF-8 constraints and providing better API visibility and test coverage. Co-authored-by: terragon-labs[bot] * refactor(manifest): extract absolute workspace root resolution to helper Refactored workspace root resolution logic in stdlib_config_for_manifest into a new function resolve_absolute_workspace_root for clarity and reuse. Updated related tests to combine relative and absolute workspace root checks into a single parameterized test for better coverage and reduced duplication. Additionally, introduced assert_output_limit_error helper in stdlib/command tests to consolidate output limit error assertions in multiple test cases. This improves code maintainability and test clarity. Co-authored-by: terragon-labs[bot] --------- Co-authored-by: terragon-labs[bot] --- src/manifest/mod.rs | 37 ++++++++--- src/manifest/tests.rs | 50 +++++++++++++++ src/stdlib/command.rs | 91 ++++++++++++++++----------- src/stdlib/mod.rs | 30 ++++++++- tests/runner_tests.rs | 21 +++++++ tests/steps/stdlib_steps/rendering.rs | 45 ++++++++----- 6 files changed, 215 insertions(+), 59 deletions(-) diff --git a/src/manifest/mod.rs b/src/manifest/mod.rs index f1cb8237..6b7af68c 100644 --- a/src/manifest/mod.rs +++ b/src/manifest/mod.rs @@ -23,11 +23,11 @@ use crate::{ stdlib::{NetworkPolicy, StdlibConfig}, }; use anyhow::{Context, Result, anyhow}; -use camino::Utf8Path; +use camino::{Utf8Path, Utf8PathBuf}; use cap_std::{ambient_authority, fs_utf8::Dir}; use minijinja::{Environment, Error, ErrorKind, UndefinedBehavior, value::Value}; use serde::de::Error as _; -use std::{fs, path::Path}; +use std::{env, fs, path::Path}; mod diagnostics; mod expand; @@ -191,6 +191,23 @@ pub fn from_path_with_policy( #[cfg(test)] mod tests; +/// Resolve a potentially relative manifest parent path to an absolute UTF-8 workspace root. +fn resolve_absolute_workspace_root(utf8_parent: &Utf8Path) -> Result { + let workspace_base = if utf8_parent.is_absolute() { + utf8_parent.to_path_buf().into_std_path_buf() + } else { + env::current_dir() + .context("resolve current directory for manifest workspace root")? + .join(utf8_parent.as_std_path()) + }; + Utf8PathBuf::from_path_buf(workspace_base).map_err(|invalid| { + anyhow!( + "workspace root '{}' contains non-UTF-8 components", + invalid.display() + ) + }) +} + fn stdlib_config_for_manifest(path: &Path, policy: NetworkPolicy) -> Result { let parent = match path.parent() { Some(parent) if !parent.as_os_str().is_empty() => parent, @@ -207,12 +224,16 @@ fn stdlib_config_for_manifest(path: &Path, policy: NetworkPolicy) -> Result AnyResult<()> { + let temp = tempdir().context("create temp workspace")?; + let _guard = if use_relative { + Some(CurrentDirGuard::change_to(temp.path())?) + } else { + None + }; + let manifest_path = if use_relative { + Path::new("Netsukefile").to_path_buf() + } else { + temp.path().join("Netsukefile") + }; + let config = stdlib_config_for_manifest(&manifest_path, NetworkPolicy::default())?; + let recorded = config + .workspace_root_path() + .context("workspace root path should be recorded")?; + let expected = camino::Utf8Path::from_path(temp.path()) + .context("temp workspace path should be valid UTF-8")?; + ensure!( + recorded == expected, + "expected workspace root {expected}, got {recorded}" + ); + Ok(()) +} + +#[cfg(unix)] +#[rstest] +fn stdlib_config_for_manifest_rejects_non_utf_workspace_root() -> AnyResult<()> { + use std::ffi::OsString; + use std::os::unix::ffi::OsStringExt; + + let temp = tempdir().context("create temp workspace")?; + let invalid_component = OsString::from_vec(vec![0xFF]); // invalid standalone byte + let manifest_dir = temp.path().join(&invalid_component); + fs::create_dir_all(&manifest_dir) + .context("create manifest directory with invalid UTF-8 component")?; + let manifest_path = manifest_dir.join("manifest.yml"); + let err = stdlib_config_for_manifest(&manifest_path, NetworkPolicy::default()) + .expect_err("config should fail when workspace root contains non-UTF-8 components"); + ensure!( + err.to_string().contains("contains non-UTF-8 components"), + "error should mention non-UTF-8 components but was {err}" + ); + Ok(()) +} + #[rstest] fn from_path_uses_manifest_directory_for_caches() -> AnyResult<()> { let temp = tempdir()?; diff --git a/src/stdlib/command.rs b/src/stdlib/command.rs index 87da7c4f..6d319538 100644 --- a/src/stdlib/command.rs +++ b/src/stdlib/command.rs @@ -142,7 +142,10 @@ impl CommandTempDir { fs::create_dir_all(dir_path.as_std_path())?; builder.tempfile_in(dir_path.as_std_path())? } else { - builder.tempfile()? + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "workspace root path must be configured for command tempfiles", + )); }; Ok(CommandTempFile { file }) } @@ -1065,10 +1068,11 @@ where break; } limit.record(read)?; - let slice = chunk - .get(..read) - .ok_or_else(|| CommandFailure::Io(io::Error::other("pipe read out of range")))?; - file.write_all(slice).map_err(CommandFailure::Io)?; + #[expect( + clippy::indexing_slicing, + reason = "Read::read guarantees `read` does not exceed `chunk.len()`" + )] + file.write_all(&chunk[..read]).map_err(CommandFailure::Io)?; } file.flush().map_err(CommandFailure::Io)?; let temp_path = file.into_temp_path(); @@ -1107,7 +1111,7 @@ mod tests { use crate::stdlib::{DEFAULT_COMMAND_MAX_OUTPUT_BYTES, DEFAULT_COMMAND_MAX_STREAM_BYTES}; use camino::Utf8PathBuf; use cap_std::{ambient_authority, fs_utf8::Dir}; - use std::{fs, io::Cursor}; + use std::{fs, io, io::Cursor}; use tempfile::tempdir; #[cfg(windows)] @@ -1128,6 +1132,28 @@ mod tests { (temp, config) } + fn assert_output_limit_error( + outcome: Result, + expected_stream: OutputStream, + expected_mode: OutputMode, + expected_limit: u64, + ) { + let err = + outcome.expect_err("expected command to exceed the configured output limit for test"); + match err { + CommandFailure::OutputLimit { + stream, + mode, + limit, + } => { + assert_eq!(stream, expected_stream); + assert_eq!(mode, expected_mode); + assert_eq!(limit, expected_limit); + } + other => panic!("unexpected error variant: {other:?}"), + } + } + #[cfg(windows)] #[test] fn quote_escapes_cmd_metacharacters() -> Result<()> { @@ -1192,23 +1218,11 @@ mod tests { #[test] fn read_pipe_capture_reports_limit_exceedance() { - let err = read_pipe_capture( + let outcome = read_pipe_capture( Cursor::new(vec![0_u8; 16]), PipeSpec::new(OutputStream::Stdout, OutputMode::Capture, 8).into_limit(), - ) - .expect_err("capture should fail when it exceeds the configured limit"); - match err { - CommandFailure::OutputLimit { - stream, - mode, - limit, - } => { - assert_eq!(stream, OutputStream::Stdout); - assert_eq!(mode, OutputMode::Capture); - assert_eq!(limit, 8); - } - other => panic!("unexpected error: {other:?}"), - } + ); + assert_output_limit_error(outcome, OutputStream::Stdout, OutputMode::Capture, 8); } #[test] @@ -1234,24 +1248,31 @@ mod tests { #[test] fn read_pipe_tempfile_respects_stream_limit() { let (_temp_dir, config) = test_command_config(); - let err = read_pipe_tempfile( + let outcome = read_pipe_tempfile( Cursor::new(vec![b'y'; 32]), PipeSpec::new(OutputStream::Stdout, OutputMode::Tempfile, 8).into_limit(), "stdout", &config, - ) - .expect_err("streaming should fail when it exceeds the configured limit"); - match err { - CommandFailure::OutputLimit { - stream, - mode, - limit, - } => { - assert_eq!(stream, OutputStream::Stdout); - assert_eq!(mode, OutputMode::Tempfile); - assert_eq!(limit, 8); - } - other => panic!("unexpected error: {other:?}"), + ); + assert_output_limit_error(outcome, OutputStream::Stdout, OutputMode::Tempfile, 8); + } + + #[test] + fn command_tempdir_requires_workspace_root_path() { + let temp = tempdir().expect("create temp workspace for command"); + let path = Utf8PathBuf::from_path_buf(temp.path().to_path_buf()) + .expect("temp workspace should be valid UTF-8"); + let dir = + Dir::open_ambient_dir(&path, ambient_authority()).expect("open temp workspace dir"); + let config = CommandConfig::new( + DEFAULT_COMMAND_MAX_OUTPUT_BYTES, + DEFAULT_COMMAND_MAX_STREAM_BYTES, + Arc::new(dir), + None, + ); + match config.create_tempfile("stdout") { + Ok(_) => panic!("command temp dir should require workspace root path"), + Err(err) => assert_eq!(err.kind(), io::ErrorKind::InvalidInput), } } } diff --git a/src/stdlib/mod.rs b/src/stdlib/mod.rs index 377505ae..16fd29ef 100644 --- a/src/stdlib/mod.rs +++ b/src/stdlib/mod.rs @@ -111,12 +111,40 @@ impl StdlibConfig { } /// Record the absolute workspace root path for capability-scoped helpers. + /// + /// # Panics + /// + /// Panics if `path` is not absolute. #[must_use] pub fn with_workspace_root_path(mut self, path: impl Into) -> Self { - self.workspace_root_path = Some(path.into()); + let workspace_path = path.into(); + assert!( + workspace_path.is_absolute(), + "workspace root path must be absolute: {workspace_path}" + ); + self.workspace_root_path = Some(workspace_path); self } + /// Return the recorded absolute workspace root path, when available. + /// + /// # Examples + /// + /// ```rust,no_run + /// # use cap_std::{ambient_authority, fs_utf8::Dir}; + /// # use netsuke::stdlib::StdlibConfig; + /// let root = Dir::open_ambient_dir(".", ambient_authority()).expect("open workspace"); + /// let config = StdlibConfig::new(root).with_workspace_root_path("/tmp/example".into()); + /// assert_eq!( + /// config.workspace_root_path().unwrap().as_str(), + /// "/tmp/example" + /// ); + /// ``` + #[must_use] + pub fn workspace_root_path(&self) -> Option<&Utf8Path> { + self.workspace_root_path.as_deref() + } + /// Override the relative cache directory within the workspace. /// /// # Errors diff --git a/tests/runner_tests.rs b/tests/runner_tests.rs index 4646d14a..cb7cdddf 100644 --- a/tests/runner_tests.rs +++ b/tests/runner_tests.rs @@ -220,6 +220,27 @@ fn run_manifest_subcommand_writes_file() -> Result<()> { Ok(()) } +#[test] +fn run_manifest_subcommand_accepts_relative_manifest_path() -> Result<()> { + let (temp, _manifest_path) = create_test_manifest()?; + let output_path = temp.path().join("relative.ninja"); + let cli = Cli { + file: PathBuf::from("Netsukefile"), + directory: Some(temp.path().to_path_buf()), + command: Some(Commands::Manifest { + file: output_path.clone(), + }), + ..Cli::default() + }; + + run(&cli).context("expected manifest subcommand to accept relative manifest path")?; + ensure!( + output_path.exists(), + "manifest command should create output file for relative manifest path" + ); + Ok(()) +} + #[test] #[serial] fn run_respects_env_override_for_ninja() -> Result<()> { diff --git a/tests/steps/stdlib_steps/rendering.rs b/tests/steps/stdlib_steps/rendering.rs index ab6fc66a..8b8d04b4 100644 --- a/tests/steps/stdlib_steps/rendering.rs +++ b/tests/steps/stdlib_steps/rendering.rs @@ -71,6 +71,15 @@ fn render_with_single_context( render_template_with_context(world, template, ctx) } +/// Macro to render a template using a string field from world. +macro_rules! render_with_world_string_field { + ($world:expr, $template:expr, $key:literal, $field:ident, $error:literal) => {{ + let __render_world = $world; + let value = __render_world.$field.clone().context($error)?; + render_with_single_context(__render_world, $template, $key, value) + }}; +} + fn render_template( world: &mut CliWorld, template: &TemplateContent, @@ -116,11 +125,13 @@ pub(crate) fn render_stdlib_template_with_url( world: &mut CliWorld, template_content: TemplateContent, ) -> Result<()> { - let url = world - .stdlib_url - .clone() - .context("expected stdlib HTTP server to be initialised")?; - render_with_single_context(world, &template_content, "url", url) + render_with_world_string_field!( + world, + &template_content, + "url", + stdlib_url, + "expected stdlib HTTP server to be initialised" + ) } #[expect( @@ -132,11 +143,13 @@ pub(crate) fn render_stdlib_template_with_command( world: &mut CliWorld, template_content: TemplateContent, ) -> Result<()> { - let command = world - .stdlib_command - .clone() - .context("expected stdlib command helper to be compiled")?; - render_with_single_context(world, &template_content, "cmd", command) + render_with_world_string_field!( + world, + &template_content, + "cmd", + stdlib_command, + "expected stdlib command helper to be compiled" + ) } #[expect( @@ -148,9 +161,11 @@ pub(crate) fn render_stdlib_template_with_text( world: &mut CliWorld, template_content: TemplateContent, ) -> Result<()> { - let text = world - .stdlib_text - .clone() - .context("expected stdlib template text to be configured")?; - render_with_single_context(world, &template_content, "text", text) + render_with_world_string_field!( + world, + &template_content, + "text", + stdlib_text, + "expected stdlib template text to be configured" + ) } From 7b2e32d6ac03a74d585c95f640d1e1695fe87eae Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 7 Nov 2025 12:02:12 +0000 Subject: [PATCH 4/4] Enforce workspace-scoped tempfile creation with UTF-8 paths (#235) * refactor(command): replace tempfile crate with cap-std for command temp files Reworked command temporary file management to use cap-std crate instead of tempfile. Implemented a custom unique tempfile naming strategy with atomic counter. Improved tempfile lifecycle handling and removal on drop when not persisted. Standardized sanitation of tempfile labels for safer filenames. Added robust error handling for tempfile creation attempts. Smoothed tempfile usage in pipe reading and empty tempfile creation. Changed workspace root path validation message for clarity. Co-authored-by: terragon-labs[bot] * refactor(command): replace manual tempfile creation with tempfile crate Replaced the custom logic for creating temporary files within the command module with the tempfile crate's Builder API. This simplifies tempfile creation, removes counters and manual uniqueness handling, and improves error handling. Dropped the custom CommandTempFile's complex management for a simpler wrapper around NamedTempFile, ensuring proper cleanup and persistence semantics. Added tests for label sanitization and tempfile lifecycle behavior. Co-authored-by: terragon-labs[bot] * style(stdlib/command): simplify flush call by chaining method calls Co-authored-by: terragon-labs[bot] --------- Co-authored-by: terragon-labs[bot] --- src/stdlib/command.rs | 143 ++++++++++++++++++++++++++++++------------ src/stdlib/mod.rs | 2 +- 2 files changed, 103 insertions(+), 42 deletions(-) diff --git a/src/stdlib/command.rs b/src/stdlib/command.rs index 6d319538..4a4adc22 100644 --- a/src/stdlib/command.rs +++ b/src/stdlib/command.rs @@ -47,7 +47,6 @@ use std::{ fmt::{self}, fs, io::{self, Read, Write}, - path::{Path, PathBuf}, process::{Child, Command, ExitStatus, Stdio}, sync::{ Arc, @@ -134,30 +133,83 @@ impl CommandTempDir { } fn create(&self, label: &str) -> io::Result { - self.workspace_root.create_dir_all(&self.relative)?; - let mut builder = Builder::new(); - builder.prefix(label); - let file = if let Some(root_path) = &self.workspace_root_path { - let dir_path = root_path.join(&self.relative); - fs::create_dir_all(dir_path.as_std_path())?; - builder.tempfile_in(dir_path.as_std_path())? - } else { + let Some(root_path) = &self.workspace_root_path else { return Err(io::Error::new( io::ErrorKind::InvalidInput, "workspace root path must be configured for command tempfiles", )); }; - Ok(CommandTempFile { file }) + self.workspace_root.create_dir_all(&self.relative)?; + let dir_path = root_path.join(&self.relative); + fs::create_dir_all(dir_path.as_std_path())?; + + let prefix = sanitize_label(label); + let mut builder = Builder::new(); + builder.prefix(&prefix); + builder.suffix(".tmp"); + + let file = builder.tempfile_in(dir_path.as_std_path()).map_err(|err| { + io::Error::new( + err.kind(), + format!("failed to create command tempfile for '{label}': {err}"), + ) + })?; + + Ok(CommandTempFile::new(file)) } } +fn sanitize_label(label: &str) -> String { + let mut sanitized = String::with_capacity(label.len()); + for ch in label.chars() { + if ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_') { + sanitized.push(ch); + } else { + sanitized.push('-'); + } + } + if sanitized.is_empty() { + sanitized.push('t'); + } + sanitized +} + struct CommandTempFile { file: NamedTempFile, } impl CommandTempFile { - fn into_file(self) -> NamedTempFile { - self.file + #[expect( + clippy::missing_const_for_fn, + reason = "NamedTempFile creation is inherently runtime-only" + )] + fn new(file: NamedTempFile) -> Self { + Self { file } + } + + #[expect( + clippy::missing_const_for_fn, + reason = "Mutable handles cannot be borrowed in const contexts" + )] + fn as_file_mut(&mut self) -> &mut NamedTempFile { + &mut self.file + } + + #[cfg(test)] + fn path(&self) -> &std::path::Path { + self.file.path() + } + + fn into_path(mut self) -> io::Result { + self.file.flush()?; + let temp_path = self.file.into_temp_path(); + let path = temp_path.keep().map_err(|err| err.error)?; + Utf8PathBuf::from_path_buf(path).map_err(|_| { + io::Error::new( + io::ErrorKind::InvalidData, + "command tempfile path is not valid UTF-8", + ) + }) } } @@ -846,7 +898,6 @@ fn command_error(err: CommandFailure, template: &str, command: &str) -> Error { mode, limit, } => output_limit_error(location, LimitExceeded::new(stream, mode, limit)), - CommandFailure::StreamPathNotUtf8(path) => stream_path_error(location, &path), CommandFailure::Timeout(duration) => timeout_error(location, duration), } } @@ -902,17 +953,6 @@ fn output_limit_error(location: CommandLocation<'_>, exceeded: LimitExceeded) -> ) } -fn stream_path_error(location: CommandLocation<'_>, path: &Path) -> Error { - Error::new( - ErrorKind::InvalidOperation, - format!( - "{} produced a temporary output path that is not valid UTF-8: {}", - location.describe(), - path.display() - ), - ) -} - fn timeout_error(location: CommandLocation<'_>, duration: Duration) -> Error { Error::new( ErrorKind::InvalidOperation, @@ -952,7 +992,6 @@ enum CommandFailure { mode: OutputMode, limit: u64, }, - StreamPathNotUtf8(PathBuf), Timeout(Duration), } @@ -1059,8 +1098,7 @@ fn read_pipe_tempfile( where R: Read, { - let tempfile = config.create_tempfile(label).map_err(CommandFailure::Io)?; - let mut file = tempfile.into_file(); + let mut tempfile = config.create_tempfile(label).map_err(CommandFailure::Io)?; let mut chunk = [0_u8; PIPE_CHUNK_SIZE]; loop { let read = reader.read(&mut chunk).map_err(CommandFailure::Io)?; @@ -1072,15 +1110,14 @@ where clippy::indexing_slicing, reason = "Read::read guarantees `read` does not exceed `chunk.len()`" )] - file.write_all(&chunk[..read]).map_err(CommandFailure::Io)?; + tempfile + .as_file_mut() + .write_all(&chunk[..read]) + .map_err(CommandFailure::Io)?; } - file.flush().map_err(CommandFailure::Io)?; - let temp_path = file.into_temp_path(); - let path = temp_path - .keep() - .map_err(|err| CommandFailure::Io(err.error))?; - let utf8 = Utf8PathBuf::from_path_buf(path).map_err(CommandFailure::StreamPathNotUtf8)?; - Ok(PipeOutcome::Tempfile(utf8)) + tempfile.as_file_mut().flush().map_err(CommandFailure::Io)?; + let path = tempfile.into_path().map_err(CommandFailure::Io)?; + Ok(PipeOutcome::Tempfile(path)) } fn create_empty_tempfile( @@ -1088,12 +1125,7 @@ fn create_empty_tempfile( label: &str, ) -> Result { let tempfile = config.create_tempfile(label).map_err(CommandFailure::Io)?; - let file = tempfile.into_file(); - let path = file - .into_temp_path() - .keep() - .map_err(|err| CommandFailure::Io(err.error))?; - Utf8PathBuf::from_path_buf(path).map_err(CommandFailure::StreamPathNotUtf8) + tempfile.into_path().map_err(CommandFailure::Io) } fn append_stderr(message: &mut String, stderr: &[u8]) { @@ -1132,6 +1164,35 @@ mod tests { (temp, config) } + #[test] + fn sanitize_label_replaces_disallowed_characters() { + assert_eq!(sanitize_label("std:out/..*"), "std-out----"); + } + + #[test] + fn command_tempfile_drop_removes_file() { + let (_temp_dir, config) = test_command_config(); + let temp_path = { + let tempfile = config.create_tempfile("stdout").expect("create temp file"); + tempfile.path().to_path_buf() + }; + assert!( + !temp_path.exists(), + "temporary file should be removed on drop" + ); + } + + #[test] + fn command_tempfile_into_path_persists_file() { + let (_temp_dir, config) = test_command_config(); + let tempfile = config.create_tempfile("stdout").expect("create temp file"); + let expected = tempfile.path().to_path_buf(); + let persisted = tempfile.into_path().expect("persist temp file"); + assert_eq!(persisted.as_std_path(), expected.as_path()); + assert!(persisted.as_std_path().exists()); + fs::remove_file(persisted.as_std_path()).expect("cleanup persisted temp file"); + } + fn assert_output_limit_error( outcome: Result, expected_stream: OutputStream, diff --git a/src/stdlib/mod.rs b/src/stdlib/mod.rs index 16fd29ef..57636c6c 100644 --- a/src/stdlib/mod.rs +++ b/src/stdlib/mod.rs @@ -120,7 +120,7 @@ impl StdlibConfig { let workspace_path = path.into(); assert!( workspace_path.is_absolute(), - "workspace root path must be absolute: {workspace_path}" + "with_workspace_root_path requires an absolute path, got: {workspace_path}" ); self.workspace_root_path = Some(workspace_path); self