From 4151776de0adf6ddc5940125b0d42cf5bddef08b Mon Sep 17 00:00:00 2001 From: David Wiesen Date: Thu, 19 Mar 2026 11:39:48 -0700 Subject: [PATCH 1/4] add specific tool guidance for Windows destructive commands --- codex-rs/core/src/tools/spec.rs | 46 ++++++++++++++++++++------- codex-rs/core/src/tools/spec_tests.rs | 33 ++++++++++++++++++- 2 files changed, 67 insertions(+), 12 deletions(-) diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index 8992eb44561..a91e95bbe54 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -587,6 +587,12 @@ fn create_request_permissions_schema() -> JsonSchema { } } +fn windows_destructive_filesystem_guidance() -> &'static str { + r#"Windows safety rules: +- Do not compose destructive filesystem commands across shells. Do not enumerate paths in PowerShell and then pass them to `cmd /c`, batch builtins, or another shell for deletion or moving. Use one shell end-to-end, prefer native PowerShell cmdlets such as `Remove-Item` / `Move-Item` with `-LiteralPath`, and avoid string-built shell commands for file operations. +- Before any recursive delete or move on Windows, verify the resolved absolute target paths stay within the intended workspace or explicitly named target directory. Never issue a recursive delete or move against a computed path if the final target has not been checked."# +} + fn create_approval_parameters( exec_permission_approvals_enabled: bool, ) -> BTreeMap { @@ -709,9 +715,15 @@ fn create_exec_command_tool( ToolSpec::Function(ResponsesApiTool { name: "exec_command".to_string(), - description: + description: if cfg!(windows) { + format!( + "Runs a command in a PTY, returning output or a session ID for ongoing interaction.\n\n{}", + windows_destructive_filesystem_guidance() + ) + } else { "Runs a command in a PTY, returning output or a session ID for ongoing interaction." - .to_string(), + .to_string() + }, strict: false, defer_loading: None, parameters: JsonSchema::Object { @@ -848,22 +860,28 @@ fn create_shell_tool(exec_permission_approvals_enabled: bool) -> ToolSpec { exec_permission_approvals_enabled, )); - let description = if cfg!(windows) { - r#"Runs a Powershell command (Windows) and returns its output. Arguments to `shell` will be passed to CreateProcessW(). Most commands should be prefixed with ["powershell.exe", "-Command"]. + let description = if cfg!(windows) { + format!( + r#"Runs a Powershell command (Windows) and returns its output. Arguments to `shell` will be passed to CreateProcessW(). Most commands should be prefixed with ["powershell.exe", "-Command"]. Examples of valid command strings: - ls -a (show hidden): ["powershell.exe", "-Command", "Get-ChildItem -Force"] - recursive find by name: ["powershell.exe", "-Command", "Get-ChildItem -Recurse -Filter *.py"] - recursive grep: ["powershell.exe", "-Command", "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive"] -- ps aux | grep python: ["powershell.exe", "-Command", "Get-Process | Where-Object { $_.ProcessName -like '*python*' }"] +- ps aux | grep python: ["powershell.exe", "-Command", "Get-Process | Where-Object {{ $_.ProcessName -like '*python*' }}"] - setting an env var: ["powershell.exe", "-Command", "$env:FOO='bar'; echo $env:FOO"] -- running an inline Python script: ["powershell.exe", "-Command", "@'\\nprint('Hello, world!')\\n'@ | python -"]"# +- running an inline Python script: ["powershell.exe", "-Command", "@'\\nprint('Hello, world!')\\n'@ | python -"] + +{}"#, + windows_destructive_filesystem_guidance() + ) } else { r#"Runs a shell command and returns its output. - The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"]. - Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary."# - }.to_string(); + .to_string() + }; ToolSpec::Function(ResponsesApiTool { name: "shell".to_string(), @@ -921,20 +939,26 @@ fn create_shell_command_tool( )); let description = if cfg!(windows) { - r#"Runs a Powershell command (Windows) and returns its output. + format!( + r#"Runs a Powershell command (Windows) and returns its output. Examples of valid command strings: - ls -a (show hidden): "Get-ChildItem -Force" - recursive find by name: "Get-ChildItem -Recurse -Filter *.py" - recursive grep: "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive" -- ps aux | grep python: "Get-Process | Where-Object { $_.ProcessName -like '*python*' }" +- ps aux | grep python: "Get-Process | Where-Object {{ $_.ProcessName -like '*python*' }}" - setting an env var: "$env:FOO='bar'; echo $env:FOO" -- running an inline Python script: "@'\\nprint('Hello, world!')\\n'@ | python -"# +- running an inline Python script: "@'\\nprint('Hello, world!')\\n'@ | python -" + +{}"#, + windows_destructive_filesystem_guidance() + ) } else { r#"Runs a shell command and returns its output. - Always set the `workdir` param when using the shell_command function. Do not use `cd` unless absolutely necessary."# - }.to_string(); + .to_string() + }; ToolSpec::Function(ResponsesApiTool { name: "shell_command".to_string(), diff --git a/codex-rs/core/src/tools/spec_tests.rs b/codex-rs/core/src/tools/spec_tests.rs index 2d0a23431ca..d3d5ff6b44d 100644 --- a/codex-rs/core/src/tools/spec_tests.rs +++ b/codex-rs/core/src/tools/spec_tests.rs @@ -50,6 +50,10 @@ fn discoverable_connector(id: &str, name: &str, description: &str) -> Discoverab })) } +fn windows_shell_safety_description() -> String { + format!("\n\n{}", super::windows_destructive_filesystem_guidance()) +} + fn search_capable_model_info() -> ModelInfo { let config = test_config(); let mut model_info = @@ -2373,6 +2377,8 @@ Examples of valid command strings: - ps aux | grep python: ["powershell.exe", "-Command", "Get-Process | Where-Object { $_.ProcessName -like '*python*' }"] - setting an env var: ["powershell.exe", "-Command", "$env:FOO='bar'; echo $env:FOO"] - running an inline Python script: ["powershell.exe", "-Command", "@'\\nprint('Hello, world!')\\n'@ | python -"]"# + .to_string() + + &windows_shell_safety_description() } else { r#"Runs a shell command and returns its output. - The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"]. @@ -2381,6 +2387,29 @@ Examples of valid command strings: assert_eq!(description, &expected); } +#[test] +fn test_exec_command_tool_windows_description_includes_shell_safety_guidance() { + let tool = super::create_exec_command_tool(true, false); + let ToolSpec::Function(ResponsesApiTool { + description, name, .. + }) = &tool + else { + panic!("expected function tool"); + }; + assert_eq!(name, "exec_command"); + + let expected = if cfg!(windows) { + format!( + "Runs a command in a PTY, returning output or a session ID for ongoing interaction.{}", + windows_shell_safety_description() + ) + } else { + "Runs a command in a PTY, returning output or a session ID for ongoing interaction." + .to_string() + }; + assert_eq!(description, &expected); +} + #[test] fn shell_tool_with_request_permission_includes_additional_permissions() { let tool = super::create_shell_tool(true); @@ -2482,7 +2511,9 @@ Examples of valid command strings: - recursive grep: "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive" - ps aux | grep python: "Get-Process | Where-Object { $_.ProcessName -like '*python*' }" - setting an env var: "$env:FOO='bar'; echo $env:FOO" -- running an inline Python script: "@'\\nprint('Hello, world!')\\n'@ | python -"#.to_string() +- running an inline Python script: "@'\\nprint('Hello, world!')\\n'@ | python -"# + .to_string() + + &windows_shell_safety_description() } else { r#"Runs a shell command and returns its output. - Always set the `workdir` param when using the shell_command function. Do not use `cd` unless absolutely necessary."#.to_string() From 5c8adebb02266094d5017fd30f3b576645e28e49 Mon Sep 17 00:00:00 2001 From: David Wiesen Date: Thu, 19 Mar 2026 12:09:35 -0700 Subject: [PATCH 2/4] fix test compile error --- codex-rs/core/src/tools/spec_tests.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/codex-rs/core/src/tools/spec_tests.rs b/codex-rs/core/src/tools/spec_tests.rs index d3d5ff6b44d..6261417b125 100644 --- a/codex-rs/core/src/tools/spec_tests.rs +++ b/codex-rs/core/src/tools/spec_tests.rs @@ -2367,7 +2367,7 @@ fn test_shell_tool() { assert_eq!(name, "shell"); let expected = if cfg!(windows) { - r#"Runs a Powershell command (Windows) and returns its output. Arguments to `shell` will be passed to CreateProcessW(). Most commands should be prefixed with ["powershell.exe", "-Command"]. + r#"Runs a Powershell command (Windows) and returns its output. Arguments to `shell` will be passed to CreateProcessW(). Most commands should be prefixed with ["powershell.exe", "-Command"]. Examples of valid command strings: @@ -2379,11 +2379,12 @@ Examples of valid command strings: - running an inline Python script: ["powershell.exe", "-Command", "@'\\nprint('Hello, world!')\\n'@ | python -"]"# .to_string() + &windows_shell_safety_description() - } else { - r#"Runs a shell command and returns its output. + } else { + r#"Runs a shell command and returns its output. - The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"]. - Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary."# - }.to_string(); + .to_string() + }; assert_eq!(description, &expected); } From 58ab27ff592e1a7723d07c56862f15ad9584a7b5 Mon Sep 17 00:00:00 2001 From: David Wiesen Date: Thu, 19 Mar 2026 13:00:31 -0700 Subject: [PATCH 3/4] fix environment compile error --- codex-rs/exec-server/src/server/filesystem.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codex-rs/exec-server/src/server/filesystem.rs b/codex-rs/exec-server/src/server/filesystem.rs index bc3d22a4da3..a263bb1fee0 100644 --- a/codex-rs/exec-server/src/server/filesystem.rs +++ b/codex-rs/exec-server/src/server/filesystem.rs @@ -36,7 +36,7 @@ pub(crate) struct ExecServerFileSystem { impl Default for ExecServerFileSystem { fn default() -> Self { Self { - file_system: Arc::new(Environment.get_filesystem()), + file_system: Arc::new(Environment::default().get_filesystem()), } } } From 503fc5670f9cfc2d58954a821f658b260c982df6 Mon Sep 17 00:00:00 2001 From: David Wiesen Date: Thu, 19 Mar 2026 13:44:00 -0700 Subject: [PATCH 4/4] fix failing test --- codex-rs/core/src/tools/spec_tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codex-rs/core/src/tools/spec_tests.rs b/codex-rs/core/src/tools/spec_tests.rs index 6261417b125..3142dd46a3a 100644 --- a/codex-rs/core/src/tools/spec_tests.rs +++ b/codex-rs/core/src/tools/spec_tests.rs @@ -2512,7 +2512,7 @@ Examples of valid command strings: - recursive grep: "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive" - ps aux | grep python: "Get-Process | Where-Object { $_.ProcessName -like '*python*' }" - setting an env var: "$env:FOO='bar'; echo $env:FOO" -- running an inline Python script: "@'\\nprint('Hello, world!')\\n'@ | python -"# +- running an inline Python script: "@'\\nprint('Hello, world!')\\n'@ | python -""# .to_string() + &windows_shell_safety_description() } else {