From aa8a0651d5e0694920340ced7529fc75050114e6 Mon Sep 17 00:00:00 2001 From: Leynos Date: Thu, 6 Nov 2025 12:03:18 +0000 Subject: [PATCH 1/3] refactor(stdlib): split read_pipe into separate capture and tempfile functions Refactored the read_pipe function in src/stdlib/command.rs by splitting its logic based on OutputMode into two distinct functions, read_pipe_capture and read_pipe_tempfile. This improves code clarity and separation of concerns while maintaining existing behavior. Additionally, refactored template rendering test helpers in tests/steps/stdlib_steps/rendering.rs to consolidate context construction into a single helper function render_with_single_context, reducing code duplication in tests. Co-authored-by: terragon-labs[bot] --- src/stdlib/command.rs | 81 +++++++++++++++------------ tests/steps/stdlib_steps/rendering.rs | 23 +++++++- 2 files changed, 65 insertions(+), 39 deletions(-) diff --git a/src/stdlib/command.rs b/src/stdlib/command.rs index 4003b94a..b29b6d09 100644 --- a/src/stdlib/command.rs +++ b/src/stdlib/command.rs @@ -904,49 +904,58 @@ fn join_reader( } } -fn read_pipe(mut reader: R, spec: PipeSpec) -> Result +fn read_pipe(reader: R, spec: PipeSpec) -> Result where R: Read, { - let mut limit = spec.into_limit(); + let limit = spec.into_limit(); match spec.mode() { - OutputMode::Capture => { - let mut buf = Vec::new(); - let mut chunk = [0_u8; 8192]; - loop { - let read = reader.read(&mut chunk).map_err(CommandFailure::Io)?; - if read == 0 { - break; - } - limit.record(read)?; - buf.extend(chunk.iter().take(read).copied()); - } - Ok(PipeOutcome::Bytes(buf)) + OutputMode::Capture => read_pipe_capture(reader, limit), + OutputMode::Tempfile => read_pipe_tempfile(reader, limit), + } +} + +fn read_pipe_capture(mut reader: R, mut limit: PipeLimit) -> Result +where + R: Read, +{ + let mut buf = Vec::new(); + let mut chunk = [0_u8; 8192]; + loop { + let read = reader.read(&mut chunk).map_err(CommandFailure::Io)?; + if read == 0 { + break; } - OutputMode::Tempfile => { - let mut file = NamedTempFile::new().map_err(CommandFailure::Io)?; - let mut chunk = [0_u8; 8192]; - loop { - let read = reader.read(&mut chunk).map_err(CommandFailure::Io)?; - if read == 0 { - break; - } - limit.record(read)?; - let slice = chunk.get(..read).ok_or_else(|| { - CommandFailure::Io(io::Error::other("pipe read out of range")) - })?; - file.write_all(slice).map_err(CommandFailure::Io)?; - } - file.flush().map_err(CommandFailure::Io)?; - let temp_path = file.into_temp_path(); - let path = temp_path - .keep() - .map_err(|err| CommandFailure::Io(err.error))?; - let utf8 = - Utf8PathBuf::from_path_buf(path).map_err(CommandFailure::StreamPathNotUtf8)?; - Ok(PipeOutcome::Tempfile(utf8)) + limit.record(read)?; + buf.extend(chunk.iter().take(read).copied()); + } + Ok(PipeOutcome::Bytes(buf)) +} + +fn read_pipe_tempfile(mut reader: R, mut limit: PipeLimit) -> Result +where + R: Read, +{ + let mut file = NamedTempFile::new().map_err(CommandFailure::Io)?; + let mut chunk = [0_u8; 8192]; + loop { + let read = reader.read(&mut chunk).map_err(CommandFailure::Io)?; + if read == 0 { + break; } + limit.record(read)?; + let slice = chunk + .get(..read) + .ok_or_else(|| CommandFailure::Io(io::Error::other("pipe read out of range")))?; + file.write_all(slice).map_err(CommandFailure::Io)?; } + file.flush().map_err(CommandFailure::Io)?; + let temp_path = file.into_temp_path(); + let path = temp_path + .keep() + .map_err(|err| CommandFailure::Io(err.error))?; + let utf8 = Utf8PathBuf::from_path_buf(path).map_err(CommandFailure::StreamPathNotUtf8)?; + Ok(PipeOutcome::Tempfile(utf8)) } fn create_empty_tempfile() -> Result { diff --git a/tests/steps/stdlib_steps/rendering.rs b/tests/steps/stdlib_steps/rendering.rs index 15b9ef47..63c796d8 100644 --- a/tests/steps/stdlib_steps/rendering.rs +++ b/tests/steps/stdlib_steps/rendering.rs @@ -54,6 +54,23 @@ pub(crate) fn render_template_with_context( Ok(()) } +fn render_with_single_context( + world: &mut CliWorld, + template: &TemplateContent, + key: &str, + value: String, +) -> Result<()> { + use minijinja::value::Value; + use std::collections::BTreeMap; + + let ctx = Value::from_serialize( + [(key, value)] + .into_iter() + .collect::>(), + ); + render_template_with_context(world, template, ctx) +} + fn render_template( world: &mut CliWorld, template: &TemplateContent, @@ -103,7 +120,7 @@ pub(crate) fn render_stdlib_template_with_url( .stdlib_url .clone() .context("expected stdlib HTTP server to be initialised")?; - render_template_with_context(world, &template_content, context!(url => url)) + render_with_single_context(world, &template_content, "url", url) } #[expect( @@ -119,7 +136,7 @@ pub(crate) fn render_stdlib_template_with_command( .stdlib_command .clone() .context("expected stdlib command helper to be compiled")?; - render_template_with_context(world, &template_content, context!(cmd => command)) + render_with_single_context(world, &template_content, "cmd", command) } #[expect( @@ -135,5 +152,5 @@ pub(crate) fn render_stdlib_template_with_text( .stdlib_text .clone() .context("expected stdlib template text to be configured")?; - render_template_with_context(world, &template_content, context!(text => text)) + render_with_single_context(world, &template_content, "text", text) } From cfa3ffcb8e821e22932c8efea3518f33d7bfee47 Mon Sep 17 00:00:00 2001 From: Leynos Date: Thu, 6 Nov 2025 13:21:26 +0000 Subject: [PATCH 2/3] feat(stdlib/command): enforce byte limits and use workspace-rooted tempfiles for command output - Introduce distinct configurable byte limits for stdout capture and streaming in CommandConfig. - Implement workspace-rooted temporary file creation for streaming command outputs, replacing system temp directory usage. - Modify child process pipe readers to enforce configured limits and produce bytes or workspace-based tempfiles accordingly. - Propagate workspace root path into stdlib configuration, enabling temp file placement inside project directories. - Add tests validating capturing and streaming output within limits and error handling on limit exceedance. - Enhance documentation with a Mermaid class diagram illustrating pipe reading abstractions. - Update stdlib configuration defaults to include workspace root path for consistent temp file handling. - Adjust tests and feature files to verify new output limit enforcement and streaming behavior. This improves resource control and workspace hygiene for command output handling in Netsuke standard library. Co-authored-by: terragon-labs[bot] --- docs/netsuke-design.md | 44 +++ src/manifest/mod.rs | 4 +- src/stdlib/command.rs | 304 +++++++++++++++++--- src/stdlib/mod.rs | 109 ++++++- tests/features/stdlib.feature | 9 +- tests/std_filter_tests/network_functions.rs | 9 +- tests/steps/stdlib_steps/rendering.rs | 2 +- 7 files changed, 429 insertions(+), 52 deletions(-) diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index d4eda21d..c2a3008f 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -1062,6 +1062,50 @@ Implementation details: builder and hands owned network/command configurations to the registration routines, avoiding needless cloning of the capability handles. +```mermaid +classDiagram + class read_pipe { + +read_pipe(reader: R, spec: PipeSpec): Result + } + class read_pipe_capture { + +read_pipe_capture(reader: R, limit: PipeLimit): Result + } + class read_pipe_tempfile { + +read_pipe_tempfile(reader: R, limit: PipeLimit): Result + } + read_pipe --> read_pipe_capture : calls + read_pipe --> read_pipe_tempfile : calls + class PipeSpec { + +into_limit(): PipeLimit + +mode(): OutputMode + } + class PipeOutcome { + <> + Bytes(Vec) + Tempfile(Utf8PathBuf) + } + class CommandFailure { + <> + Io + StreamPathNotUtf8 + } + class PipeLimit { + +record(read: usize): Result<(), CommandFailure> + } + class OutputMode { + <> + Capture + Tempfile + } + read_pipe ..> PipeSpec : uses + read_pipe_capture ..> PipeLimit : uses + read_pipe_tempfile ..> PipeLimit : uses + read_pipe_capture ..> PipeOutcome : returns + read_pipe_tempfile ..> PipeOutcome : returns + read_pipe_capture ..> CommandFailure : error + read_pipe_tempfile ..> CommandFailure : error +``` + Custom external commands can be registered as additional filters. Those should be marked `pure` if safe for caching or `impure` otherwise. diff --git a/src/manifest/mod.rs b/src/manifest/mod.rs index bbf91500..f1cb8237 100644 --- a/src/manifest/mod.rs +++ b/src/manifest/mod.rs @@ -212,5 +212,7 @@ fn stdlib_config_for_manifest(path: &Path, policy: NetworkPolicy) -> Result, + workspace_root_path: Option>, + ) -> Self { + Self { + max_capture_bytes, + max_stream_bytes, + temp_dir: CommandTempDir::new(workspace_root, workspace_root_path), + } + } + + fn create_tempfile(&self, label: &str) -> io::Result { + self.temp_dir.create(label) + } +} + +#[derive(Clone)] +struct CommandTempDir { + workspace_root: Arc, + workspace_root_path: Option>, + relative: Utf8PathBuf, +} + +impl CommandTempDir { + fn new(workspace_root: Arc, workspace_root_path: Option>) -> Self { + Self { + workspace_root, + workspace_root_path, + relative: Utf8PathBuf::from(DEFAULT_COMMAND_TEMP_DIR), + } + } + + fn create(&self, label: &str) -> io::Result { + self.workspace_root.create_dir_all(&self.relative)?; + let mut builder = Builder::new(); + builder.prefix(label); + let file = if let Some(root_path) = &self.workspace_root_path { + let dir_path = root_path.join(&self.relative); + fs::create_dir_all(dir_path.as_std_path())?; + builder.tempfile_in(dir_path.as_std_path())? + } else { + builder.tempfile()? + }; + Ok(CommandTempFile { file }) + } +} + +struct CommandTempFile { + file: NamedTempFile, +} + +impl CommandTempFile { + fn into_file(self) -> NamedTempFile { + self.file + } } #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -103,6 +177,20 @@ impl OutputStream { Self::Stderr => "stderr", } } + + const fn tempfile_label(self) -> &'static str { + match self { + Self::Stdout => "stdout", + Self::Stderr => "stderr", + } + } + + const fn empty_tempfile_label(self) -> &'static str { + match self { + Self::Stdout => "stdout-empty", + Self::Stderr => "stderr-empty", + } + } } impl OutputMode { @@ -248,33 +336,39 @@ impl Default for CommandOptions { } } +#[derive(Debug)] enum StdoutResult { Bytes(Vec), Tempfile(Utf8PathBuf), } +#[derive(Debug)] enum PipeOutcome { Bytes(Vec), Tempfile(Utf8PathBuf), } -#[derive(Clone, Copy)] -struct CommandContext<'a> { - config: &'a CommandConfig, +#[derive(Clone)] +struct CommandContext { + config: Arc, options: CommandOptions, } -impl<'a> CommandContext<'a> { - const fn new(config: &'a CommandConfig, options: CommandOptions) -> Self { +impl CommandContext { + fn new(config: Arc, options: CommandOptions) -> Self { Self { config, options } } - const fn stdout_mode(self) -> OutputMode { + const fn stdout_mode(&self) -> OutputMode { self.options.stdout_mode() } - const fn config(self) -> &'a CommandConfig { - self.config + fn config(&self) -> &CommandConfig { + &self.config + } + + fn config_handle(&self) -> Arc { + Arc::clone(&self.config) } } @@ -358,7 +452,7 @@ pub(crate) fn register( move |state: &State, value: Value, command: String, options: Option| { shell_flag.store(true, Ordering::Relaxed); let parsed = CommandOptions::from_value(options)?; - let context = CommandContext::new(shell_config.as_ref(), parsed); + let context = CommandContext::new(Arc::clone(&shell_config), parsed); execute_shell(state, &value, &command, context) }, ); @@ -374,7 +468,7 @@ pub(crate) fn register( options: Option| { grep_flag.store(true, Ordering::Relaxed); let parsed = CommandOptions::from_value(options)?; - let context = CommandContext::new(grep_config.as_ref(), parsed); + let context = CommandContext::new(Arc::clone(&grep_config), parsed); let call = GrepCall::new(&pattern, flags); execute_grep(state, &value, call, context) }, @@ -385,7 +479,7 @@ fn execute_shell( state: &State, value: &Value, command: &str, - context: CommandContext<'_>, + context: CommandContext, ) -> Result { let cmd = command.trim(); if cmd.is_empty() { @@ -408,7 +502,7 @@ fn execute_grep( state: &State, value: &Value, call: GrepCall<'_>, - context: CommandContext<'_>, + context: CommandContext, ) -> Result { let GrepCall { pattern, flags } = call; if pattern.is_empty() { @@ -580,7 +674,7 @@ fn to_bytes(value: &Value) -> Result, Error> { fn run_command( command: &str, input: &[u8], - context: CommandContext<'_>, + context: CommandContext, ) -> Result { let mut cmd = Command::new(SHELL); cmd.args(SHELL_ARGS) @@ -597,7 +691,7 @@ fn run_program( program: &str, args: &[String], input: &[u8], - context: CommandContext<'_>, + context: CommandContext, ) -> Result { let mut cmd = Command::new(program); cmd.args(args) @@ -611,7 +705,7 @@ fn run_program( fn run_child( mut command: Command, input: &[u8], - context: CommandContext<'_>, + context: CommandContext, ) -> Result { let mut child = command.spawn().map_err(CommandFailure::Spawn)?; let mut stdin_handle = child.stdin.take().map(|mut stdin| { @@ -628,8 +722,13 @@ fn run_child( let stdout_spec = PipeSpec::new(OutputStream::Stdout, context.stdout_mode(), stdout_limit); let stderr_spec = PipeSpec::new(OutputStream::Stderr, OutputMode::Capture, stderr_limit); - let mut stdout_reader = spawn_pipe_reader(child.stdout.take(), stdout_spec); - let mut stderr_reader = spawn_pipe_reader(child.stderr.take(), stderr_spec); + let stdout_config = context.config_handle(); + let stderr_config = context.config_handle(); + + let mut stdout_reader = + spawn_pipe_reader(child.stdout.take(), stdout_spec, Arc::clone(&stdout_config)); + let mut stderr_reader = + spawn_pipe_reader(child.stderr.take(), stderr_spec, Arc::clone(&stderr_config)); let status = match wait_for_exit(&mut child, COMMAND_TIMEOUT) { Ok(status) => status, @@ -639,8 +738,8 @@ fn run_child( } }; - let stdout = join_reader(stdout_reader.take(), stdout_spec)?; - let stderr_outcome = join_reader(stderr_reader.take(), stderr_spec)?; + let stdout = join_reader(stdout_reader.take(), stdout_spec, stdout_config)?; + let stderr_outcome = join_reader(stderr_reader.take(), stderr_spec, stderr_config)?; let stderr = match stderr_outcome { PipeOutcome::Bytes(bytes) => bytes, @@ -879,16 +978,18 @@ fn wait_for_exit(child: &mut Child, timeout: Duration) -> Result( pipe: Option, spec: PipeSpec, + config: Arc, ) -> Option>> where R: Read + Send + 'static, { - pipe.map(|reader| thread::spawn(move || read_pipe(reader, spec))) + pipe.map(|reader| thread::spawn(move || read_pipe(reader, spec, config))) } fn join_reader( reader_handle: Option>>, spec: PipeSpec, + config: Arc, ) -> Result { match reader_handle { Some(join_handle) => join_handle @@ -896,7 +997,8 @@ fn join_reader( .map_err(|_| CommandFailure::Io(io::Error::other("pipe reader panicked")))?, None => { if matches!(spec.mode(), OutputMode::Tempfile) { - create_empty_tempfile().map(PipeOutcome::Tempfile) + create_empty_tempfile(&config, spec.stream().empty_tempfile_label()) + .map(PipeOutcome::Tempfile) } else { Ok(PipeOutcome::Bytes(Vec::new())) } @@ -904,23 +1006,37 @@ fn join_reader( } } -fn read_pipe(reader: R, spec: PipeSpec) -> Result +/// Drains a child process pipe according to the provided `PipeSpec`, enforcing +/// the configured byte limit and producing either in-memory bytes or a +/// tempfile-backed outcome. +fn read_pipe( + reader: R, + spec: PipeSpec, + config: Arc, +) -> Result where R: Read, { let limit = spec.into_limit(); match spec.mode() { OutputMode::Capture => read_pipe_capture(reader, limit), - OutputMode::Tempfile => read_pipe_tempfile(reader, limit), + OutputMode::Tempfile => read_pipe_tempfile( + reader, + limit, + spec.stream().tempfile_label(), + config.as_ref(), + ), } } +/// Reads a pipe into memory while enforcing the capture byte limit recorded in +/// the `PipeLimit` tracker. fn read_pipe_capture(mut reader: R, mut limit: PipeLimit) -> Result where R: Read, { let mut buf = Vec::new(); - let mut chunk = [0_u8; 8192]; + let mut chunk = [0_u8; PIPE_CHUNK_SIZE]; loop { let read = reader.read(&mut chunk).map_err(CommandFailure::Io)?; if read == 0 { @@ -932,22 +1048,27 @@ where Ok(PipeOutcome::Bytes(buf)) } -fn read_pipe_tempfile(mut reader: R, mut limit: PipeLimit) -> Result +/// Streams a pipe into a tempfile rooted within the workspace, enforcing the +/// streaming byte limit recorded in `PipeLimit` and returning the final path. +fn read_pipe_tempfile( + mut reader: R, + mut limit: PipeLimit, + label: &str, + config: &CommandConfig, +) -> Result where R: Read, { - let mut file = NamedTempFile::new().map_err(CommandFailure::Io)?; - let mut chunk = [0_u8; 8192]; + let tempfile = config.create_tempfile(label).map_err(CommandFailure::Io)?; + let mut file = tempfile.into_file(); + let mut chunk = [0_u8; PIPE_CHUNK_SIZE]; loop { let read = reader.read(&mut chunk).map_err(CommandFailure::Io)?; if read == 0 { break; } limit.record(read)?; - let slice = chunk - .get(..read) - .ok_or_else(|| CommandFailure::Io(io::Error::other("pipe read out of range")))?; - file.write_all(slice).map_err(CommandFailure::Io)?; + file.write_all(&chunk[..read]).map_err(CommandFailure::Io)?; } file.flush().map_err(CommandFailure::Io)?; let temp_path = file.into_temp_path(); @@ -958,8 +1079,12 @@ where Ok(PipeOutcome::Tempfile(utf8)) } -fn create_empty_tempfile() -> Result { - let file = NamedTempFile::new().map_err(CommandFailure::Io)?; +fn create_empty_tempfile( + config: &CommandConfig, + label: &str, +) -> Result { + let tempfile = config.create_tempfile(label).map_err(CommandFailure::Io)?; + let file = tempfile.into_file(); let path = file .into_temp_path() .keep() @@ -978,12 +1103,34 @@ fn append_stderr(message: &mut String, stderr: &[u8]) { #[cfg(test)] mod tests { + use super::*; + use crate::stdlib::{DEFAULT_COMMAND_MAX_OUTPUT_BYTES, DEFAULT_COMMAND_MAX_STREAM_BYTES}; + use camino::Utf8PathBuf; + use cap_std::{ambient_authority, fs_utf8::Dir}; + use std::{fs, io::Cursor}; + use tempfile::tempdir; + #[cfg(windows)] - #[test] - fn quote_escapes_cmd_metacharacters() -> anyhow::Result<()> { - use super::{QuoteError, quote}; - use anyhow::ensure; + use anyhow::{Result, ensure}; + + fn test_command_config() -> (tempfile::TempDir, CommandConfig) { + let temp = tempdir().expect("create command temp workspace"); + let path = Utf8PathBuf::from_path_buf(temp.path().to_path_buf()) + .expect("temp workspace should be valid UTF-8"); + let dir = + Dir::open_ambient_dir(&path, ambient_authority()).expect("open temp workspace dir"); + let config = CommandConfig::new( + DEFAULT_COMMAND_MAX_OUTPUT_BYTES, + DEFAULT_COMMAND_MAX_STREAM_BYTES, + Arc::new(dir), + Some(Arc::new(path)), + ); + (temp, config) + } + #[cfg(windows)] + #[test] + fn quote_escapes_cmd_metacharacters() -> Result<()> { let success_cases = [ ("simple", "simple"), ("", "\"\""), @@ -1028,4 +1175,83 @@ mod tests { } Ok(()) } + + #[test] + fn read_pipe_capture_collects_bytes_within_limit() { + let data = b"payload".to_vec(); + let outcome = read_pipe_capture( + Cursor::new(data.clone()), + PipeSpec::new(OutputStream::Stdout, OutputMode::Capture, 128).into_limit(), + ) + .expect("capture should succeed within the configured limit"); + match outcome { + PipeOutcome::Bytes(buf) => assert_eq!(buf, data), + PipeOutcome::Tempfile(_) => panic!("capture mode should emit bytes"), + } + } + + #[test] + fn read_pipe_capture_reports_limit_exceedance() { + let err = read_pipe_capture( + Cursor::new(vec![0_u8; 16]), + PipeSpec::new(OutputStream::Stdout, OutputMode::Capture, 8).into_limit(), + ) + .expect_err("capture should fail when it exceeds the configured limit"); + match err { + CommandFailure::OutputLimit { + stream, + mode, + limit, + } => { + assert_eq!(stream, OutputStream::Stdout); + assert_eq!(mode, OutputMode::Capture); + assert_eq!(limit, 8); + } + other => panic!("unexpected error: {other:?}"), + } + } + + #[test] + fn read_pipe_tempfile_writes_streamed_data() { + let payload = vec![b'x'; 32]; + let (_temp_dir, config) = test_command_config(); + let outcome = read_pipe_tempfile( + Cursor::new(payload.clone()), + PipeSpec::new(OutputStream::Stdout, OutputMode::Tempfile, 64).into_limit(), + "stdout", + &config, + ) + .expect("streaming should succeed within the configured limit"); + let path = match outcome { + PipeOutcome::Tempfile(path) => path, + PipeOutcome::Bytes(_) => panic!("streaming mode should emit a tempfile path"), + }; + let disk = fs::read(path.as_std_path()).expect("read streamed output"); + assert_eq!(disk, payload); + fs::remove_file(path.as_std_path()).expect("cleanup streamed file"); + } + + #[test] + fn read_pipe_tempfile_respects_stream_limit() { + let (_temp_dir, config) = test_command_config(); + let err = read_pipe_tempfile( + Cursor::new(vec![b'y'; 32]), + PipeSpec::new(OutputStream::Stdout, OutputMode::Tempfile, 8).into_limit(), + "stdout", + &config, + ) + .expect_err("streaming should fail when it exceeds the configured limit"); + match err { + CommandFailure::OutputLimit { + stream, + mode, + limit, + } => { + assert_eq!(stream, OutputStream::Stdout); + assert_eq!(mode, OutputMode::Tempfile); + assert_eq!(limit, 8); + } + other => panic!("unexpected error: {other:?}"), + } + } } diff --git a/src/stdlib/mod.rs b/src/stdlib/mod.rs index 0260bd1f..5439a4bf 100644 --- a/src/stdlib/mod.rs +++ b/src/stdlib/mod.rs @@ -26,6 +26,7 @@ use cap_std::fs::FileTypeExt; use cap_std::{ambient_authority, fs, fs_utf8::Dir}; use minijinja::{Environment, Error, value::Value}; use std::{ + env, sync::Arc, sync::atomic::{AtomicBool, Ordering}, }; @@ -40,6 +41,8 @@ pub(crate) const DEFAULT_FETCH_MAX_RESPONSE_BYTES: u64 = 8 * 1024 * 1024; pub(crate) const DEFAULT_COMMAND_MAX_OUTPUT_BYTES: u64 = 1024 * 1024; /// Default upper bound for streamed command output files (64 MiB). pub(crate) const DEFAULT_COMMAND_MAX_STREAM_BYTES: u64 = 64 * 1024 * 1024; +/// Relative directory for command helper tempfiles. +pub(crate) const DEFAULT_COMMAND_TEMP_DIR: &str = ".netsuke/tmp"; /// Configuration for registering Netsuke's standard library helpers. /// @@ -61,6 +64,7 @@ pub(crate) const DEFAULT_COMMAND_MAX_STREAM_BYTES: u64 = 64 * 1024 * 1024; #[derive(Debug, Clone)] pub struct StdlibConfig { workspace_root: Arc, + workspace_root_path: Option, fetch_cache_relative: Utf8PathBuf, network_policy: NetworkPolicy, fetch_max_response_bytes: u64, @@ -97,6 +101,7 @@ impl StdlibConfig { } Self { workspace_root: Arc::new(workspace_root), + workspace_root_path: None, fetch_cache_relative: default, network_policy: NetworkPolicy::default(), fetch_max_response_bytes: DEFAULT_FETCH_MAX_RESPONSE_BYTES, @@ -105,6 +110,13 @@ impl StdlibConfig { } } + /// Record the absolute workspace root path for capability-scoped helpers. + #[must_use] + pub fn with_workspace_root_path(mut self, path: impl Into) -> Self { + self.workspace_root_path = Some(path.into()); + self + } + /// Override the relative cache directory within the workspace. /// /// # Errors @@ -194,6 +206,7 @@ impl StdlibConfig { pub(crate) fn into_components(self) -> (NetworkConfig, command::CommandConfig) { let Self { workspace_root, + workspace_root_path, fetch_cache_relative, network_policy, fetch_max_response_bytes, @@ -201,6 +214,7 @@ impl StdlibConfig { command_max_stream_bytes, } = self; + let command_root = Arc::clone(&workspace_root); let network = NetworkConfig { cache_root: workspace_root, cache_relative: fetch_cache_relative, @@ -208,10 +222,12 @@ impl StdlibConfig { max_response_bytes: fetch_max_response_bytes, }; - let command = command::CommandConfig { - max_capture_bytes: command_max_output_bytes, - max_stream_bytes: command_max_stream_bytes, - }; + let command = command::CommandConfig::new( + command_max_output_bytes, + command_max_stream_bytes, + command_root, + workspace_root_path.map(Arc::new), + ); (network, command) } @@ -248,7 +264,11 @@ impl Default for StdlibConfig { fn default() -> Self { let root = Dir::open_ambient_dir(".", ambient_authority()) .unwrap_or_else(|err| panic!("open stdlib workspace root: {err}")); - Self::new(root) + let cwd = + env::current_dir().unwrap_or_else(|err| panic!("resolve current directory: {err}")); + let path = Utf8PathBuf::from_path_buf(cwd) + .unwrap_or_else(|path| panic!("cwd contains non-UTF-8 components: {path:?}")); + Self::new(root).with_workspace_root_path(path) } } @@ -309,7 +329,14 @@ impl StdlibState { pub fn register(env: &mut Environment<'_>) -> anyhow::Result { let root = Dir::open_ambient_dir(".", ambient_authority()) .context("open current directory for stdlib registration")?; - Ok(register_with_config(env, StdlibConfig::new(root))) + let cwd = env::current_dir().context("resolve current directory for stdlib registration")?; + let path = Utf8PathBuf::from_path_buf(cwd).map_err(|path| { + anyhow::anyhow!("current directory contains non-UTF-8 components: {path:?}") + })?; + Ok(register_with_config( + env, + StdlibConfig::new(root).with_workspace_root_path(path), + )) } /// Register stdlib helpers using an explicit configuration. @@ -425,9 +452,11 @@ fn is_device(_ft: fs::FileType) -> bool { #[cfg(test)] mod tests { - use super::StdlibConfig; + use super::{DEFAULT_COMMAND_MAX_OUTPUT_BYTES, DEFAULT_COMMAND_MAX_STREAM_BYTES, StdlibConfig}; - use camino::Utf8Path; + use camino::{Utf8Path, Utf8PathBuf}; + use cap_std::{ambient_authority, fs_utf8::Dir}; + use std::env; #[test] fn validate_cache_relative_rejects_empty() { @@ -461,4 +490,68 @@ mod tests { StdlibConfig::validate_cache_relative(Utf8Path::new("nested/cache")) .expect("relative path should be accepted"); } + + #[test] + fn command_limits_default_to_constants() { + let config = StdlibConfig::default(); + assert_eq!( + config.command_max_output_bytes, + DEFAULT_COMMAND_MAX_OUTPUT_BYTES + ); + assert_eq!( + config.command_max_stream_bytes, + DEFAULT_COMMAND_MAX_STREAM_BYTES + ); + } + + #[test] + fn command_output_limit_builder_updates_value() { + let config = StdlibConfig::default() + .with_command_max_output_bytes(2_048) + .expect("positive limits should succeed"); + assert_eq!(config.command_max_output_bytes, 2_048); + } + + #[test] + fn command_output_limit_builder_rejects_zero() { + let err = StdlibConfig::default() + .with_command_max_output_bytes(0) + .expect_err("zero-byte limits must be rejected"); + assert_eq!(err.to_string(), "command output limit must be positive"); + } + + #[test] + fn command_stream_limit_builder_updates_value() { + let config = StdlibConfig::default() + .with_command_max_stream_bytes(65_536) + .expect("positive limits should succeed"); + assert_eq!(config.command_max_stream_bytes, 65_536); + } + + #[test] + fn command_stream_limit_builder_rejects_zero() { + let err = StdlibConfig::default() + .with_command_max_stream_bytes(0) + .expect_err("zero-byte limits must be rejected"); + assert_eq!(err.to_string(), "command stream limit must be positive"); + } + + #[test] + fn command_limits_propagate_into_components() { + let dir = Dir::open_ambient_dir(".", ambient_authority()) + .expect("open workspace root for config tests"); + let path = Utf8PathBuf::from_path_buf( + env::current_dir().expect("resolve cwd for command config test"), + ) + .expect("cwd should be valid UTF-8"); + let config = StdlibConfig::new(dir) + .with_workspace_root_path(path) + .with_command_max_output_bytes(4_096) + .expect("set capture limit") + .with_command_max_stream_bytes(131_072) + .expect("set streaming limit"); + let (_network, command) = config.into_components(); + assert_eq!(command.max_capture_bytes, 4_096); + assert_eq!(command.max_stream_bytes, 131_072); + } } diff --git a/tests/features/stdlib.feature b/tests/features/stdlib.feature index c29c12d3..51b70294 100644 --- a/tests/features/stdlib.feature +++ b/tests/features/stdlib.feature @@ -100,6 +100,14 @@ Feature: Template stdlib filters And the stdlib output file contains only "x" And the stdlib template is impure + Scenario: shell filter enforces command stream limits + Given a large-output stdlib command helper + And the stdlib command output limit is 512 bytes + And the stdlib command stream limit is 1024 bytes + When I render the stdlib template "{{ '' | shell(cmd, {'mode': 'tempfile'}) }}" using the stdlib command helper + Then the stdlib error contains "stdout streaming limit of 1024 bytes" + And the stdlib template is impure + Scenario: grep filter streams large output to a temporary file Given the stdlib command output limit is 512 bytes And the stdlib command stream limit is 200000 bytes @@ -175,4 +183,3 @@ Feature: Template stdlib filters When I render the stdlib template "{{ fetch(url, cache=true, cache_dir='../cache') }}" with stdlib url Then the stdlib error contains "cache_dir" And the stdlib template is pure - diff --git a/tests/std_filter_tests/network_functions.rs b/tests/std_filter_tests/network_functions.rs index e091759d..778d40d7 100644 --- a/tests/std_filter_tests/network_functions.rs +++ b/tests/std_filter_tests/network_functions.rs @@ -24,9 +24,14 @@ fn env_with_policy(policy: NetworkPolicy) -> Result<(Environment<'static>, Stdli fn env_with_workspace_policy( workspace: Dir, + workspace_path: Utf8PathBuf, policy: NetworkPolicy, ) -> Result<(Environment<'static>, StdlibState)> { - fallible::stdlib_env_with_config(StdlibConfig::new(workspace).with_network_policy(policy)) + fallible::stdlib_env_with_config( + StdlibConfig::new(workspace) + .with_workspace_root_path(workspace_path) + .with_network_policy(policy), + ) } struct FetchTestContext<'env> { @@ -220,7 +225,7 @@ fn fetch_function_respects_cache(http_policy: Result) -> Result<( }; let workspace = Dir::open_ambient_dir(&temp_root, ambient_authority()) .context("open fetch cache workspace")?; - let (mut env, mut state) = env_with_workspace_policy(workspace, http_policy?)?; + let (mut env, mut state) = env_with_workspace_policy(workspace, temp_root.clone(), http_policy?)?; state.reset_impure(); fallible::register_template(&mut env, "fetch_cache", "{{ fetch(url, cache=true) }}")?; let tmpl = env diff --git a/tests/steps/stdlib_steps/rendering.rs b/tests/steps/stdlib_steps/rendering.rs index 63c796d8..ab6fc66a 100644 --- a/tests/steps/stdlib_steps/rendering.rs +++ b/tests/steps/stdlib_steps/rendering.rs @@ -18,7 +18,7 @@ pub(crate) fn render_template_with_context( let mut env = Environment::new(); let workspace = Dir::open_ambient_dir(&root, ambient_authority()) .context("open stdlib workspace directory")?; - let mut config = StdlibConfig::new(workspace); + let mut config = StdlibConfig::new(workspace).with_workspace_root_path(root.clone()); if let Some(policy) = world.stdlib_policy.clone() { config = config.with_network_policy(policy); } From 3bd373e9be33002f4e0998ed3b0a5bb86e593ec6 Mon Sep 17 00:00:00 2001 From: Leynos Date: Thu, 6 Nov 2025 15:36:02 +0000 Subject: [PATCH 3/3] refactor(stdlib/command): use references for CommandContext to avoid cloning Changed functions to accept `&CommandContext` instead of consuming `CommandContext` by value to improve efficiency and avoid unnecessary cloning. Made `CommandContext::new` a `const fn`. Updated related pipe reader functions to accept config by reference rather than Arc cloning. Fixed a slice indexing panic by validating bounds before write. Co-authored-by: terragon-labs[bot] --- src/stdlib/command.rs | 42 +++++++++++++++++++++--------------------- src/stdlib/mod.rs | 2 +- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/stdlib/command.rs b/src/stdlib/command.rs index bb038cca..87da7c4f 100644 --- a/src/stdlib/command.rs +++ b/src/stdlib/command.rs @@ -355,7 +355,7 @@ struct CommandContext { } impl CommandContext { - fn new(config: Arc, options: CommandOptions) -> Self { + const fn new(config: Arc, options: CommandOptions) -> Self { Self { config, options } } @@ -453,7 +453,7 @@ pub(crate) fn register( shell_flag.store(true, Ordering::Relaxed); let parsed = CommandOptions::from_value(options)?; let context = CommandContext::new(Arc::clone(&shell_config), parsed); - execute_shell(state, &value, &command, context) + execute_shell(state, &value, &command, &context) }, ); @@ -470,7 +470,7 @@ pub(crate) fn register( let parsed = CommandOptions::from_value(options)?; let context = CommandContext::new(Arc::clone(&grep_config), parsed); let call = GrepCall::new(&pattern, flags); - execute_grep(state, &value, call, context) + execute_grep(state, &value, call, &context) }, ); } @@ -479,7 +479,7 @@ fn execute_shell( state: &State, value: &Value, command: &str, - context: CommandContext, + context: &CommandContext, ) -> Result { let cmd = command.trim(); if cmd.is_empty() { @@ -502,7 +502,7 @@ fn execute_grep( state: &State, value: &Value, call: GrepCall<'_>, - context: CommandContext, + context: &CommandContext, ) -> Result { let GrepCall { pattern, flags } = call; if pattern.is_empty() { @@ -674,7 +674,7 @@ fn to_bytes(value: &Value) -> Result, Error> { fn run_command( command: &str, input: &[u8], - context: CommandContext, + context: &CommandContext, ) -> Result { let mut cmd = Command::new(SHELL); cmd.args(SHELL_ARGS) @@ -691,7 +691,7 @@ fn run_program( program: &str, args: &[String], input: &[u8], - context: CommandContext, + context: &CommandContext, ) -> Result { let mut cmd = Command::new(program); cmd.args(args) @@ -705,7 +705,7 @@ fn run_program( fn run_child( mut command: Command, input: &[u8], - context: CommandContext, + context: &CommandContext, ) -> Result { let mut child = command.spawn().map_err(CommandFailure::Spawn)?; let mut stdin_handle = child.stdin.take().map(|mut stdin| { @@ -738,8 +738,8 @@ fn run_child( } }; - let stdout = join_reader(stdout_reader.take(), stdout_spec, stdout_config)?; - let stderr_outcome = join_reader(stderr_reader.take(), stderr_spec, stderr_config)?; + let stdout = join_reader(stdout_reader.take(), stdout_spec, stdout_config.as_ref())?; + let stderr_outcome = join_reader(stderr_reader.take(), stderr_spec, stderr_config.as_ref())?; let stderr = match stderr_outcome { PipeOutcome::Bytes(bytes) => bytes, @@ -983,13 +983,13 @@ fn spawn_pipe_reader( where R: Read + Send + 'static, { - pipe.map(|reader| thread::spawn(move || read_pipe(reader, spec, config))) + pipe.map(|reader| thread::spawn(move || read_pipe(reader, spec, config.as_ref()))) } fn join_reader( reader_handle: Option>>, spec: PipeSpec, - config: Arc, + config: &CommandConfig, ) -> Result { match reader_handle { Some(join_handle) => join_handle @@ -997,7 +997,7 @@ fn join_reader( .map_err(|_| CommandFailure::Io(io::Error::other("pipe reader panicked")))?, None => { if matches!(spec.mode(), OutputMode::Tempfile) { - create_empty_tempfile(&config, spec.stream().empty_tempfile_label()) + create_empty_tempfile(config, spec.stream().empty_tempfile_label()) .map(PipeOutcome::Tempfile) } else { Ok(PipeOutcome::Bytes(Vec::new())) @@ -1012,7 +1012,7 @@ fn join_reader( fn read_pipe( reader: R, spec: PipeSpec, - config: Arc, + config: &CommandConfig, ) -> Result where R: Read, @@ -1020,12 +1020,9 @@ where let limit = spec.into_limit(); match spec.mode() { OutputMode::Capture => read_pipe_capture(reader, limit), - OutputMode::Tempfile => read_pipe_tempfile( - reader, - limit, - spec.stream().tempfile_label(), - config.as_ref(), - ), + OutputMode::Tempfile => { + read_pipe_tempfile(reader, limit, spec.stream().tempfile_label(), config) + } } } @@ -1068,7 +1065,10 @@ where break; } limit.record(read)?; - file.write_all(&chunk[..read]).map_err(CommandFailure::Io)?; + let slice = chunk + .get(..read) + .ok_or_else(|| CommandFailure::Io(io::Error::other("pipe read out of range")))?; + file.write_all(slice).map_err(CommandFailure::Io)?; } file.flush().map_err(CommandFailure::Io)?; let temp_path = file.into_temp_path(); diff --git a/src/stdlib/mod.rs b/src/stdlib/mod.rs index 5439a4bf..377505ae 100644 --- a/src/stdlib/mod.rs +++ b/src/stdlib/mod.rs @@ -267,7 +267,7 @@ impl Default for StdlibConfig { let cwd = env::current_dir().unwrap_or_else(|err| panic!("resolve current directory: {err}")); let path = Utf8PathBuf::from_path_buf(cwd) - .unwrap_or_else(|path| panic!("cwd contains non-UTF-8 components: {path:?}")); + .unwrap_or_else(|path| panic!("cwd contains non-UTF-8 components: {}", path.display())); Self::new(root).with_workspace_root_path(path) } }