diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index bd74d83ad63f..3f27fe10e5c5 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -2334,6 +2334,7 @@ dependencies = [ "codex-protocol", "codex-sandboxing", "codex-utils-absolute-path", + "globset", "landlock", "libc", "pkg-config", @@ -2760,6 +2761,7 @@ dependencies = [ "dunce", "libc", "pretty_assertions", + "regex-lite", "serde_json", "tempfile", "tokio", diff --git a/codex-rs/config/src/permissions_toml.rs b/codex-rs/config/src/permissions_toml.rs index fcc3e006b165..cee68d7abba0 100644 --- a/codex-rs/config/src/permissions_toml.rs +++ b/codex-rs/config/src/permissions_toml.rs @@ -31,6 +31,10 @@ pub struct PermissionProfileToml { #[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq, JsonSchema)] pub struct FilesystemPermissionsToml { + /// Optional maximum depth for expanding unreadable glob patterns on + /// platforms that snapshot glob matches before sandbox startup. + #[schemars(range(min = 1))] + pub glob_scan_max_depth: Option, #[serde(flatten)] pub entries: BTreeMap, } diff --git a/codex-rs/core/config.schema.json b/codex-rs/core/config.schema.json index 966555856b90..67d051a69992 100644 --- a/codex-rs/core/config.schema.json +++ b/codex-rs/core/config.schema.json @@ -673,6 +673,14 @@ ] }, "FilesystemPermissionsToml": { + "properties": { + "glob_scan_max_depth": { + "description": "Optional maximum depth for expanding unreadable glob patterns on platforms that snapshot glob matches before sandbox startup.", + "format": "uint", + "minimum": 1.0, + "type": "integer" + } + }, "type": "object" }, "ForcedLoginMethod": { diff --git a/codex-rs/core/src/config/config_tests.rs b/codex-rs/core/src/config/config_tests.rs index 34002b129a7d..028ec1686ad8 100644 --- a/codex-rs/core/src/config/config_tests.rs +++ b/codex-rs/core/src/config/config_tests.rs @@ -428,6 +428,7 @@ allow_upstream_proxy = false "workspace".to_string(), PermissionProfileToml { filesystem: Some(FilesystemPermissionsToml { + glob_scan_max_depth: None, entries: BTreeMap::from([ ( ":minimal".to_string(), @@ -482,6 +483,7 @@ async fn permissions_profiles_network_populates_runtime_network_proxy_spec() -> "workspace".to_string(), PermissionProfileToml { filesystem: Some(FilesystemPermissionsToml { + glob_scan_max_depth: None, entries: BTreeMap::from([( ":minimal".to_string(), FilesystemPermissionToml::Access(FileSystemAccessMode::Read), @@ -531,6 +533,7 @@ async fn permissions_profiles_network_disabled_by_default_does_not_start_proxy() "workspace".to_string(), PermissionProfileToml { filesystem: Some(FilesystemPermissionsToml { + glob_scan_max_depth: None, entries: BTreeMap::from([( ":minimal".to_string(), FilesystemPermissionToml::Access(FileSystemAccessMode::Read), @@ -576,6 +579,7 @@ async fn default_permissions_profile_populates_runtime_sandbox_policy() -> std:: "workspace".to_string(), PermissionProfileToml { filesystem: Some(FilesystemPermissionsToml { + glob_scan_max_depth: None, entries: BTreeMap::from([ ( ":minimal".to_string(), @@ -671,6 +675,7 @@ async fn project_root_glob_none_compiles_to_filesystem_pattern_entry() -> std::i "workspace".to_string(), PermissionProfileToml { filesystem: Some(FilesystemPermissionsToml { + glob_scan_max_depth: Some(2), entries: BTreeMap::from([( ":project_roots".to_string(), FilesystemPermissionToml::Scoped(BTreeMap::from([ @@ -693,6 +698,13 @@ async fn project_root_glob_none_compiles_to_filesystem_pattern_entry() -> std::i ) .await?; + assert_eq!( + config + .permissions + .file_system_sandbox_policy + .glob_scan_max_depth, + Some(2) + ); let expected_pattern = AbsolutePathBuf::resolve_path_against_base("**/*.env", cwd.path()) .to_string_lossy() .into_owned(); @@ -738,6 +750,7 @@ async fn permissions_profiles_require_default_permissions() -> std::io::Result<( "workspace".to_string(), PermissionProfileToml { filesystem: Some(FilesystemPermissionsToml { + glob_scan_max_depth: None, entries: BTreeMap::from([( ":minimal".to_string(), FilesystemPermissionToml::Access(FileSystemAccessMode::Read), @@ -781,6 +794,7 @@ async fn permissions_profiles_reject_writes_outside_workspace_root() -> std::io: "workspace".to_string(), PermissionProfileToml { filesystem: Some(FilesystemPermissionsToml { + glob_scan_max_depth: None, entries: BTreeMap::from([( external_write_path.to_string(), FilesystemPermissionToml::Access(FileSystemAccessMode::Write), @@ -824,6 +838,7 @@ async fn permissions_profiles_reject_nested_entries_for_non_project_roots() -> s "workspace".to_string(), PermissionProfileToml { filesystem: Some(FilesystemPermissionsToml { + glob_scan_max_depth: None, entries: BTreeMap::from([( ":minimal".to_string(), FilesystemPermissionToml::Scoped(BTreeMap::from([( @@ -883,6 +898,7 @@ async fn load_workspace_permission_profile( async fn permissions_profiles_allow_unknown_special_paths() -> std::io::Result<()> { let config = load_workspace_permission_profile(PermissionProfileToml { filesystem: Some(FilesystemPermissionsToml { + glob_scan_max_depth: None, entries: BTreeMap::from([( ":future_special_path".to_string(), FilesystemPermissionToml::Access(FileSystemAccessMode::Read), @@ -929,6 +945,7 @@ async fn permissions_profiles_allow_unknown_special_paths_with_nested_entries() -> std::io::Result<()> { let config = load_workspace_permission_profile(PermissionProfileToml { filesystem: Some(FilesystemPermissionsToml { + glob_scan_max_depth: None, entries: BTreeMap::from([( ":future_special_path".to_string(), FilesystemPermissionToml::Scoped(BTreeMap::from([( @@ -996,6 +1013,7 @@ async fn permissions_profiles_allow_missing_filesystem_with_warning() -> std::io async fn permissions_profiles_allow_empty_filesystem_with_warning() -> std::io::Result<()> { let config = load_workspace_permission_profile(PermissionProfileToml { filesystem: Some(FilesystemPermissionsToml { + glob_scan_max_depth: None, entries: BTreeMap::new(), }), network: None, @@ -1030,6 +1048,7 @@ async fn permissions_profiles_reject_project_root_parent_traversal() -> std::io: "workspace".to_string(), PermissionProfileToml { filesystem: Some(FilesystemPermissionsToml { + glob_scan_max_depth: None, entries: BTreeMap::from([( ":project_roots".to_string(), FilesystemPermissionToml::Scoped(BTreeMap::from([( @@ -1075,6 +1094,7 @@ async fn permissions_profiles_allow_network_enablement() -> std::io::Result<()> "workspace".to_string(), PermissionProfileToml { filesystem: Some(FilesystemPermissionsToml { + glob_scan_max_depth: None, entries: BTreeMap::from([( ":minimal".to_string(), FilesystemPermissionToml::Access(FileSystemAccessMode::Read), diff --git a/codex-rs/core/src/config/permissions.rs b/codex-rs/core/src/config/permissions.rs index 1609b696d8a4..943c82c0e9f6 100644 --- a/codex-rs/core/src/config/permissions.rs +++ b/codex-rs/core/src/config/permissions.rs @@ -66,6 +66,14 @@ pub(crate) fn compile_permission_profile( ), ); } + for pattern in unbounded_unreadable_globstar_paths(filesystem) { + push_warning( + startup_warnings, + format!( + "Filesystem deny-read glob `{pattern}` uses `**`. Non-macOS sandboxing does not support unbounded `**` natively; set `glob_scan_max_depth` in this filesystem profile to cap Linux glob expansion and silence this warning, or enumerate explicit depths such as `*.env`, `*/*.env`, and `*/*/*.env`." + ), + ); + } } for (path, permission) in &filesystem.entries { entries.extend(compile_filesystem_permission( @@ -82,12 +90,17 @@ pub(crate) fn compile_permission_profile( missing_filesystem_entries_warning(profile_name), ); } + let glob_scan_max_depth = validate_glob_scan_max_depth( + profile + .filesystem + .as_ref() + .and_then(|filesystem| filesystem.glob_scan_max_depth), + )?; let network_sandbox_policy = compile_network_sandbox_policy(profile.network.as_ref()); - Ok(( - FileSystemSandboxPolicy::restricted(entries), - network_sandbox_policy, - )) + let mut file_system_sandbox_policy = FileSystemSandboxPolicy::restricted(entries); + file_system_sandbox_policy.glob_scan_max_depth = glob_scan_max_depth; + Ok((file_system_sandbox_policy, network_sandbox_policy)) } /// Returns a list of paths that must be readable by shell tools in order @@ -333,6 +346,42 @@ fn unsupported_read_write_glob_paths(filesystem: &FilesystemPermissionsToml) -> patterns } +fn unbounded_unreadable_globstar_paths(filesystem: &FilesystemPermissionsToml) -> Vec { + if filesystem.glob_scan_max_depth.is_some() { + return Vec::new(); + } + + let mut patterns = Vec::new(); + for (path, permission) in &filesystem.entries { + match permission { + FilesystemPermissionToml::Access(FileSystemAccessMode::None) => { + if path.contains("**") { + patterns.push(path.clone()); + } + } + FilesystemPermissionToml::Access(_) => {} + FilesystemPermissionToml::Scoped(scoped_entries) => { + for (subpath, access) in scoped_entries { + if *access == FileSystemAccessMode::None && subpath.contains("**") { + patterns.push(format!("{path}/{subpath}")); + } + } + } + } + } + patterns +} + +fn validate_glob_scan_max_depth(max_depth: Option) -> io::Result> { + match max_depth { + Some(0) => Err(io::Error::new( + io::ErrorKind::InvalidInput, + "glob_scan_max_depth must be at least 1", + )), + _ => Ok(max_depth), + } +} + fn contains_glob_chars(path: &str) -> bool { path.chars().any(|ch| matches!(ch, '*' | '?' | '[' | ']')) } diff --git a/codex-rs/core/src/config/permissions_tests.rs b/codex-rs/core/src/config/permissions_tests.rs index 7116e32e70bd..e22376b21452 100644 --- a/codex-rs/core/src/config/permissions_tests.rs +++ b/codex-rs/core/src/config/permissions_tests.rs @@ -55,6 +55,7 @@ async fn restricted_read_implicitly_allows_helper_executables() -> std::io::Resu "workspace".to_string(), PermissionProfileToml { filesystem: Some(FilesystemPermissionsToml { + glob_scan_max_depth: None, entries: BTreeMap::new(), }), network: None, @@ -226,6 +227,7 @@ fn network_toml_overlays_unix_socket_permissions_by_path() { #[test] fn read_write_glob_warnings_skip_supported_deny_read_globs_and_trailing_subpaths() { let filesystem = FilesystemPermissionsToml { + glob_scan_max_depth: None, entries: BTreeMap::from([ ( "/tmp/**/*.log".to_string(), @@ -256,6 +258,47 @@ fn read_write_glob_warnings_skip_supported_deny_read_globs_and_trailing_subpaths ); } +#[test] +fn unreadable_globstar_warning_is_suppressed_when_scan_depth_is_configured() { + let filesystem = FilesystemPermissionsToml { + glob_scan_max_depth: None, + entries: BTreeMap::from([( + ":project_roots".to_string(), + FilesystemPermissionToml::Scoped(BTreeMap::from([ + ("**/*.env".to_string(), FileSystemAccessMode::None), + ("*.pem".to_string(), FileSystemAccessMode::None), + ])), + )]), + }; + + assert_eq!( + unbounded_unreadable_globstar_paths(&filesystem), + vec![":project_roots/**/*.env".to_string()] + ); + + let configured_filesystem = FilesystemPermissionsToml { + glob_scan_max_depth: Some(2), + ..filesystem + }; + assert_eq!( + unbounded_unreadable_globstar_paths(&configured_filesystem), + Vec::::new() + ); +} + +#[test] +fn glob_scan_max_depth_must_be_positive() { + let err = validate_glob_scan_max_depth(Some(0)) + .expect_err("zero depth would silently skip deny-read glob expansion"); + + assert_eq!(err.kind(), std::io::ErrorKind::InvalidInput); + assert_eq!(err.to_string(), "glob_scan_max_depth must be at least 1"); + assert_eq!( + validate_glob_scan_max_depth(Some(2)).expect("depth should be valid"), + Some(2) + ); +} + #[test] fn read_write_trailing_glob_suffix_compiles_as_subpath() -> std::io::Result<()> { let cwd = TempDir::new()?; @@ -266,6 +309,7 @@ fn read_write_trailing_glob_suffix_compiles_as_subpath() -> std::io::Result<()> "workspace".to_string(), PermissionProfileToml { filesystem: Some(FilesystemPermissionsToml { + glob_scan_max_depth: None, entries: BTreeMap::from([( ":project_roots".to_string(), FilesystemPermissionToml::Scoped(BTreeMap::from([( diff --git a/codex-rs/core/src/tools/handlers/unified_exec.rs b/codex-rs/core/src/tools/handlers/unified_exec.rs index 99c7e4a1955c..f2604a57d6de 100644 --- a/codex-rs/core/src/tools/handlers/unified_exec.rs +++ b/codex-rs/core/src/tools/handlers/unified_exec.rs @@ -20,8 +20,10 @@ use crate::tools::registry::ToolHandler; use crate::tools::registry::ToolKind; use crate::unified_exec::ExecCommandRequest; use crate::unified_exec::UnifiedExecContext; +use crate::unified_exec::UnifiedExecError; use crate::unified_exec::UnifiedExecProcessManager; use crate::unified_exec::WriteStdinRequest; +use crate::unified_exec::generate_chunk_id; use codex_features::Feature; use codex_otel::SessionTelemetry; use codex_otel::TOOL_CALL_UNIFIED_EXEC_METRIC; @@ -30,6 +32,7 @@ use codex_protocol::protocol::EventMsg; use codex_protocol::protocol::TerminalInteractionEvent; use codex_shell_command::is_safe_command::is_known_safe_command; use codex_tools::UnifiedExecShellMode; +use codex_utils_output_truncation::approx_token_count; use serde::Deserialize; use std::path::PathBuf; use std::sync::Arc; @@ -309,7 +312,8 @@ impl ToolHandler for UnifiedExecHandler { } emit_unified_exec_tty_metric(&turn.session_telemetry, tty); - manager + let session_command = command.clone(); + match manager .exec_command( ExecCommandRequest { command, @@ -330,11 +334,31 @@ impl ToolHandler for UnifiedExecHandler { &context, ) .await - .map_err(|err| { - FunctionCallError::RespondToModel(format!( + { + Ok(response) => response, + Err(UnifiedExecError::SandboxDenied { output, .. }) => { + let output_text = output.aggregated_output.text; + let original_token_count = approx_token_count(&output_text); + ExecCommandToolOutput { + event_call_id: context.call_id.clone(), + chunk_id: generate_chunk_id(), + wall_time: output.duration, + raw_output: output_text.into_bytes(), + max_output_tokens, + // Sandbox denial is terminal, so there is no live + // process for write_stdin to resume. + process_id: None, + exit_code: Some(output.exit_code), + original_token_count: Some(original_token_count), + session_command: Some(session_command), + } + } + Err(err) => { + return Err(FunctionCallError::RespondToModel(format!( "exec_command failed for `{command_for_display}`: {err:?}" - )) - })? + ))); + } + } } "write_stdin" => { let args: WriteStdinArgs = parse_arguments(&arguments)?; diff --git a/codex-rs/core/src/unified_exec/process_manager.rs b/codex-rs/core/src/unified_exec/process_manager.rs index aaaa7076f443..9f9189abcc2d 100644 --- a/codex-rs/core/src/unified_exec/process_manager.rs +++ b/codex-rs/core/src/unified_exec/process_manager.rs @@ -26,6 +26,7 @@ use crate::tools::orchestrator::ToolOrchestrator; use crate::tools::runtimes::unified_exec::UnifiedExecRequest as UnifiedExecToolRequest; use crate::tools::runtimes::unified_exec::UnifiedExecRuntime; use crate::tools::sandboxing::ToolCtx; +use crate::tools::sandboxing::ToolError; use crate::unified_exec::ExecCommandRequest; use crate::unified_exec::MAX_UNIFIED_EXEC_PROCESSES; use crate::unified_exec::MAX_YIELD_TIME_MS; @@ -50,6 +51,8 @@ use crate::unified_exec::process::OutputHandles; use crate::unified_exec::process::SpawnLifecycleHandle; use crate::unified_exec::process::UnifiedExecProcess; use codex_config::types::ShellEnvironmentPolicy; +use codex_protocol::error::CodexErr; +use codex_protocol::error::SandboxErr; use codex_protocol::protocol::ExecCommandSource; use codex_utils_absolute_path::AbsolutePathBuf; use codex_utils_output_truncation::approx_token_count; @@ -778,7 +781,19 @@ impl UnifiedExecProcessManager { ) .await .map(|result| (result.output, result.deferred_network_approval)) - .map_err(|e| UnifiedExecError::create_process(format!("{e:?}"))) + .map_err(|err| match err { + ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output, .. })) => { + let output = *output; + let message = if output.aggregated_output.text.is_empty() { + let exit_code = output.exit_code; + format!("Process exited with code {exit_code}") + } else { + output.aggregated_output.text.clone() + }; + UnifiedExecError::sandbox_denied(message, output) + } + other => UnifiedExecError::create_process(format!("{other:?}")), + }) } pub(super) async fn collect_output_until_deadline( diff --git a/codex-rs/core/tests/suite/tools.rs b/codex-rs/core/tests/suite/tools.rs index bc286ce5ea37..795aa1c847c3 100644 --- a/codex-rs/core/tests/suite/tools.rs +++ b/codex-rs/core/tests/suite/tools.rs @@ -9,10 +9,15 @@ use std::time::Instant; use anyhow::Context; use anyhow::Result; +use codex_config::Constrained; use codex_config::types::McpServerConfig; use codex_config::types::McpServerTransportConfig; use codex_core::sandboxing::SandboxPermissions; use codex_features::Feature; +use codex_protocol::permissions::FileSystemAccessMode; +use codex_protocol::permissions::FileSystemPath; +use codex_protocol::permissions::FileSystemSandboxEntry; +use codex_protocol::permissions::FileSystemSandboxPolicy; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::SandboxPolicy; use core_test_support::assert_regex_match; @@ -26,6 +31,7 @@ use core_test_support::responses::mount_sse_sequence; use core_test_support::responses::sse; use core_test_support::responses::start_mock_server; use core_test_support::skip_if_no_network; +use core_test_support::skip_if_sandbox; use core_test_support::stdio_server_bin; use core_test_support::test_codex::test_codex; use regex_lite::Regex; @@ -379,7 +385,7 @@ async fn sandbox_denied_shell_returns_original_output() -> Result<()> { ]; let args = json!({ "command": command, - "timeout_ms": 1_000, + "timeout_ms": 5_000, }); let responses = vec![ @@ -449,6 +455,109 @@ async fn sandbox_denied_shell_returns_original_output() -> Result<()> { Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn shell_enforces_glob_deny_read_policy() -> Result<()> { + skip_if_no_network!(Ok(())); + skip_if_sandbox!(Ok(())); + + let server = start_mock_server().await; + let read_only_policy = SandboxPolicy::new_read_only_policy(); + let read_only_policy_for_config = read_only_policy.clone(); + let mut builder = test_codex() + .with_model("gpt-5.1-codex") + .with_config(move |config| { + config.permissions.sandbox_policy = Constrained::allow_any(read_only_policy_for_config); + let mut file_system_sandbox_policy = FileSystemSandboxPolicy::default(); + file_system_sandbox_policy + .entries + .push(FileSystemSandboxEntry { + path: FileSystemPath::GlobPattern { + pattern: format!("{}/**/*.env", config.cwd.as_path().display()), + }, + access: FileSystemAccessMode::None, + }); + config.permissions.file_system_sandbox_policy = file_system_sandbox_policy; + }); + let fixture = builder.build(&server).await?; + + let fixture_dir = fixture.workspace_path("glob-deny-read"); + fs::create_dir_all(&fixture_dir).context("create glob deny-read fixture directory")?; + let denied_path = fixture_dir.join("secret.env"); + let allowed_path = fixture_dir.join("notes.txt"); + let secret = "shell glob deny-read secret"; + let allowed = "shell glob deny-read allowed"; + fs::write(&denied_path, format!("{secret}\n")).context("write denied fixture")?; + fs::write(&allowed_path, format!("{allowed}\n")).context("write allowed fixture")?; + + let call_id = "shell-glob-deny-read"; + let command = vec![ + "/bin/sh".to_string(), + "-c".to_string(), + "status=0; cat \"$1\" || status=$?; cat \"$2\"; exit \"$status\"".to_string(), + "sh".to_string(), + denied_path.to_string_lossy().into_owned(), + allowed_path.to_string_lossy().into_owned(), + ]; + let args = json!({ + "command": command, + "timeout_ms": 1_000, + }); + + let responses = vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ]; + let mock = mount_sse_sequence(&server, responses).await; + + fixture + .submit_turn_with_policy("read the fixture files", read_only_policy) + .await?; + + let output_text = mock + .function_call_output_text(call_id) + .context("shell output present")?; + let exit_code_line = output_text + .lines() + .next() + .context("exit code line present")?; + let exit_code = exit_code_line + .strip_prefix("Exit code: ") + .context("exit code prefix present")? + .trim() + .parse::() + .context("exit code is integer")?; + + assert_ne!( + exit_code, 0, + "glob deny-read should surface a non-zero exit code" + ); + assert!( + output_text.contains(allowed), + "expected allowed file contents in shell output: {output_text}" + ); + assert!( + !output_text.contains(secret), + "denied file contents leaked into shell output: {output_text}" + ); + let output_lower = output_text.to_lowercase(); + let has_denial = output_lower.contains("permission denied") + || output_lower.contains("operation not permitted") + || output_lower.contains("read-only file system"); + assert!( + has_denial, + "expected sandbox denial details in shell output: {output_text}" + ); + + Ok(()) +} + async fn collect_tools(use_unified_exec: bool) -> Result> { let server = start_mock_server().await; diff --git a/codex-rs/core/tests/suite/unified_exec.rs b/codex-rs/core/tests/suite/unified_exec.rs index b14c93099923..77a15e92c023 100644 --- a/codex-rs/core/tests/suite/unified_exec.rs +++ b/codex-rs/core/tests/suite/unified_exec.rs @@ -5,8 +5,13 @@ use std::sync::OnceLock; use anyhow::Context; use anyhow::Result; +use codex_config::Constrained; use codex_exec_server::CreateDirectoryOptions; use codex_features::Feature; +use codex_protocol::permissions::FileSystemAccessMode; +use codex_protocol::permissions::FileSystemPath; +use codex_protocol::permissions::FileSystemSandboxEntry; +use codex_protocol::permissions::FileSystemSandboxPolicy; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::EventMsg; use codex_protocol::protocol::ExecCommandSource; @@ -2341,6 +2346,127 @@ async fn unified_exec_runs_under_sandbox() -> Result<()> { Ok(()) } +#[cfg(unix)] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn unified_exec_enforces_glob_deny_read_policy() -> Result<()> { + skip_if_no_network!(Ok(())); + skip_if_sandbox!(Ok(())); + + let server = start_mock_server().await; + let read_only_policy = SandboxPolicy::new_read_only_policy(); + let read_only_policy_for_config = read_only_policy.clone(); + let mut builder = test_codex().with_config(move |config| { + config + .features + .enable(Feature::UnifiedExec) + .expect("test config should allow feature update"); + config.permissions.sandbox_policy = Constrained::allow_any(read_only_policy_for_config); + let mut file_system_sandbox_policy = FileSystemSandboxPolicy::default(); + file_system_sandbox_policy + .entries + .push(FileSystemSandboxEntry { + path: FileSystemPath::GlobPattern { + pattern: format!("{}/**/*.env", config.cwd.as_path().display()), + }, + access: FileSystemAccessMode::None, + }); + config.permissions.file_system_sandbox_policy = file_system_sandbox_policy; + }); + let TestCodex { + codex, + cwd, + session_configured, + .. + } = builder.build(&server).await?; + + let fixture_dir = cwd.path().join("glob-deny-read"); + fs::create_dir_all(&fixture_dir).context("create glob deny-read fixture directory")?; + let denied_path = fixture_dir.join("secret.env"); + let allowed_path = fixture_dir.join("notes.txt"); + let secret = "unified exec glob deny-read secret"; + let allowed = "unified exec glob deny-read allowed"; + fs::write(&denied_path, format!("{secret}\n")).context("write denied fixture")?; + fs::write(&allowed_path, format!("{allowed}\n")).context("write allowed fixture")?; + + let call_id = "uexec-glob-deny-read"; + let cmd = format!( + "read_status=0; cat {denied_path:?} || read_status=$?; cat {allowed_path:?}; exit $read_status" + ); + let args = serde_json::json!({ + "cmd": cmd, + "yield_time_ms": 5_000, + }); + + let responses = vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ]; + let request_log = mount_sse_sequence(&server, responses).await; + + let session_model = session_configured.model.clone(); + codex + .submit(Op::UserTurn { + items: vec![UserInput::Text { + text: "read the fixture files".into(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + cwd: cwd.path().to_path_buf(), + approval_policy: AskForApproval::Never, + approvals_reviewer: None, + sandbox_policy: read_only_policy, + model: session_model, + effort: None, + summary: None, + service_tier: None, + collaboration_mode: None, + personality: None, + }) + .await?; + + wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await; + + let requests = request_log.requests(); + assert!(!requests.is_empty(), "expected at least one POST request"); + let bodies = requests + .into_iter() + .map(|request| request.body_json()) + .collect::>(); + + let outputs = collect_tool_outputs(&bodies)?; + let output = outputs.get(call_id).expect("missing output"); + + assert!( + output.exit_code.is_some_and(|code| code != 0), + "glob deny-read should surface a non-zero exit code: {output:?}" + ); + assert!( + output.output.contains(allowed), + "expected allowed file contents in unified exec output: {output:?}" + ); + assert!( + !output.output.contains(secret), + "denied file contents leaked into unified exec output: {output:?}" + ); + let output_lower = output.output.to_lowercase(); + let has_denial = output_lower.contains("permission denied") + || output_lower.contains("operation not permitted") + || output_lower.contains("read-only file system"); + assert!( + has_denial, + "expected sandbox denial details in unified exec output: {output:?}" + ); + + Ok(()) +} + #[cfg(target_os = "macos")] #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn unified_exec_python_prompt_under_seatbelt() -> Result<()> { diff --git a/codex-rs/linux-sandbox/Cargo.toml b/codex-rs/linux-sandbox/Cargo.toml index 751d6d5d17bc..c624251e63a7 100644 --- a/codex-rs/linux-sandbox/Cargo.toml +++ b/codex-rs/linux-sandbox/Cargo.toml @@ -20,6 +20,7 @@ clap = { workspace = true, features = ["derive"] } codex-protocol = { workspace = true } codex-sandboxing = { workspace = true } codex-utils-absolute-path = { workspace = true } +globset = { workspace = true } landlock = { workspace = true } libc = { workspace = true } seccompiler = { workspace = true } diff --git a/codex-rs/linux-sandbox/README.md b/codex-rs/linux-sandbox/README.md index 3a004934b2b9..5745f4816ca3 100644 --- a/codex-rs/linux-sandbox/README.md +++ b/codex-rs/linux-sandbox/README.md @@ -7,7 +7,7 @@ This crate is responsible for producing: - the `codex-exec` CLI can check if its arg0 is `codex-linux-sandbox` and, if so, execute as if it were `codex-linux-sandbox` - this should also be true of the `codex` multitool CLI -On Linux, the bubblewrap pipeline prefers the first `bwrap` found on `PATH` +On Linux, Codex prefers the first `bwrap` found on `PATH` outside the current working directory whenever it is available. If `bwrap` is present but too old to support `--argv0`, the helper keeps using system bubblewrap and switches to a @@ -23,7 +23,7 @@ commands that would enter the bubblewrap path. **Current Behavior** - Legacy `SandboxPolicy` / `sandbox_mode` configs remain supported. -- Bubblewrap is the default filesystem sandbox pipeline. +- Bubblewrap is the default filesystem sandbox. - If `bwrap` is present on `PATH` outside the current working directory, the helper uses it. - If `bwrap` is present but too old to support `--argv0`, the helper uses a @@ -46,32 +46,51 @@ commands that would enter the bubblewrap path. - Split-only filesystem policies that do not round-trip through the legacy `SandboxPolicy` model stay on bubblewrap so nested read-only or denied carveouts are preserved. -- When the default bubblewrap pipeline is active, the helper applies `PR_SET_NO_NEW_PRIVS` and a +- When bubblewrap is active, the helper applies `PR_SET_NO_NEW_PRIVS` and a seccomp network filter in-process. -- When the default bubblewrap pipeline is active, the filesystem is read-only by default via `--ro-bind / /`. -- When the default bubblewrap pipeline is active, writable roots are layered with `--bind `. -- When the default bubblewrap pipeline is active, protected subpaths under writable roots (for +- When bubblewrap is active, the filesystem is read-only by default via `--ro-bind / /`. +- When bubblewrap is active, writable roots are layered with `--bind `. +- When bubblewrap is active, protected subpaths under writable roots (for example `.git`, resolved `gitdir:`, and `.codex`) are re-applied as read-only via `--ro-bind`. -- When the default bubblewrap pipeline is active, overlapping split-policy +- When bubblewrap is active, overlapping split-policy entries are applied in path-specificity order so narrower writable children can reopen broader read-only or denied parents while narrower denied subpaths still win. For example, `/repo = write`, `/repo/a = none`, `/repo/a/b = write` keeps `/repo` writable, denies `/repo/a`, and reopens `/repo/a/b` as writable again. -- When the default bubblewrap pipeline is active, symlink-in-path and non-existent protected paths inside +- When bubblewrap is active, unreadable glob entries are expanded before + launching the sandbox and matching files are masked in bubblewrap: + + ```text + Prefer: rg --files --hidden --no-ignore --glob -- + Fallback: internal globset walker when rg is not installed + Failure: any other rg failure aborts sandbox construction + ``` + + Users can cap the scan depth per permissions profile: + + ```toml + [permissions.workspace.filesystem] + glob_scan_max_depth = 2 + + [permissions.workspace.filesystem.":project_roots"] + "**/*.env" = "none" + ``` + +- When bubblewrap is active, symlink-in-path and non-existent protected paths inside writable roots are blocked by mounting `/dev/null` on the symlink or first missing component. -- When the default bubblewrap pipeline is active, the helper explicitly isolates the user namespace via +- When bubblewrap is active, the helper explicitly isolates the user namespace via `--unshare-user` and the PID namespace via `--unshare-pid`. -- When the default bubblewrap pipeline is active and network is restricted without proxy routing, the helper also +- When bubblewrap is active and network is restricted without proxy routing, the helper also isolates the network namespace via `--unshare-net`. - In managed proxy mode, the helper uses `--unshare-net` plus an internal TCP->UDS->TCP routing bridge so tool traffic reaches only configured proxy endpoints. - In managed proxy mode, after the bridge is live, seccomp blocks new AF_UNIX/socketpair creation for the user command. -- When the default bubblewrap pipeline is active, it mounts a fresh `/proc` via `--proc /proc` by default, but +- When bubblewrap is active, it mounts a fresh `/proc` via `--proc /proc` by default, but you can skip this in restrictive container environments with `--no-proc`. **Notes** diff --git a/codex-rs/linux-sandbox/src/bwrap.rs b/codex-rs/linux-sandbox/src/bwrap.rs index 91d66210291b..64d1342bef1c 100644 --- a/codex-rs/linux-sandbox/src/bwrap.rs +++ b/codex-rs/linux-sandbox/src/bwrap.rs @@ -9,17 +9,27 @@ //! The overall Linux sandbox is composed of: //! - seccomp + `PR_SET_NO_NEW_PRIVS` applied in-process, and //! - bubblewrap used to construct the filesystem view before exec. +use std::collections::BTreeMap; use std::collections::BTreeSet; use std::collections::HashSet; +use std::ffi::OsString; use std::fs; use std::fs::File; +use std::io; use std::os::fd::AsRawFd; +use std::os::unix::ffi::OsStringExt; use std::path::Path; use std::path::PathBuf; +use std::process::Command; +use codex_protocol::error::CodexErr; use codex_protocol::error::Result; use codex_protocol::protocol::FileSystemSandboxPolicy; +use codex_protocol::protocol::WritableRoot; use codex_utils_absolute_path::AbsolutePathBuf; +use globset::GlobBuilder; +use globset::GlobSet; +use globset::GlobSetBuilder; /// Linux "platform defaults" that keep common system binaries and dynamic /// libraries readable when `ReadOnlyAccess::Restricted` requests them. @@ -37,6 +47,8 @@ const LINUX_PLATFORM_DEFAULT_READ_ROOTS: &[&str] = &[ "/run/current-system/sw", ]; +const MAX_UNREADABLE_GLOB_MATCHES: usize = 8192; + /// Options that control how bubblewrap is invoked. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub(crate) struct BwrapOptions { @@ -47,6 +59,11 @@ pub(crate) struct BwrapOptions { pub mount_proc: bool, /// How networking should be configured inside the bubblewrap sandbox. pub network_mode: BwrapNetworkMode, + /// Optional maximum depth for expanding unreadable glob patterns with ripgrep. + /// + /// Keep this uncapped by default so existing nested deny-read matches are + /// masked before the sandboxed command starts. + pub glob_scan_max_depth: Option, } impl Default for BwrapOptions { @@ -54,6 +71,7 @@ impl Default for BwrapOptions { Self { mount_proc: true, network_mode: BwrapNetworkMode::FullAccess, + glob_scan_max_depth: None, } } } @@ -99,7 +117,11 @@ pub(crate) fn create_bwrap_command_args( command_cwd: &Path, options: BwrapOptions, ) -> Result { - if file_system_sandbox_policy.has_full_disk_write_access() { + let unreadable_globs = + file_system_sandbox_policy.get_unreadable_globs_with_cwd(sandbox_policy_cwd); + // Full disk write normally skips bwrap, but unreadable glob patterns still + // need concrete bwrap masks for the matches expanded below. + if file_system_sandbox_policy.has_full_disk_write_access() && unreadable_globs.is_empty() { return if options.network_mode == BwrapNetworkMode::FullAccess { Ok(BwrapArgs { args: command, @@ -157,7 +179,13 @@ fn create_bwrap_flags( let BwrapArgs { args: filesystem_args, preserved_files, - } = create_filesystem_args(file_system_sandbox_policy, sandbox_policy_cwd)?; + } = create_filesystem_args( + file_system_sandbox_policy, + sandbox_policy_cwd, + options + .glob_scan_max_depth + .or(file_system_sandbox_policy.glob_scan_max_depth), + )?; let normalized_command_cwd = normalize_command_cwd_for_bwrap(command_cwd); let mut args = Vec::new(); args.push("--new-session".to_string()); @@ -210,16 +238,41 @@ fn create_bwrap_flags( fn create_filesystem_args( file_system_sandbox_policy: &FileSystemSandboxPolicy, cwd: &Path, + glob_scan_max_depth: Option, ) -> Result { + let unreadable_globs = file_system_sandbox_policy.get_unreadable_globs_with_cwd(cwd); // Bubblewrap requires bind mount targets to exist. Skip missing writable // roots so mixed-platform configs can keep harmless paths for other // environments without breaking Linux command startup. - let writable_roots = file_system_sandbox_policy + let mut writable_roots = file_system_sandbox_policy .get_writable_roots_with_cwd(cwd) .into_iter() .filter(|writable_root| writable_root.root.as_path().exists()) .collect::>(); - let unreadable_roots = file_system_sandbox_policy.get_unreadable_roots_with_cwd(cwd); + if writable_roots.is_empty() + && file_system_sandbox_policy.has_full_disk_write_access() + && !unreadable_globs.is_empty() + { + writable_roots.push(WritableRoot { + root: AbsolutePathBuf::from_absolute_path("/")?, + read_only_subpaths: Vec::new(), + }); + } + let mut unreadable_roots = file_system_sandbox_policy + .get_unreadable_roots_with_cwd(cwd) + .into_iter() + .map(AbsolutePathBuf::into_path_buf) + .collect::>(); + // Bubblewrap can only mask concrete paths. Expand unreadable glob patterns + // to the existing matches we can see before constructing the mount overlay; + // core tool helpers still evaluate the original patterns directly at read time. + unreadable_roots.extend( + expand_unreadable_globs_with_ripgrep(&unreadable_globs, cwd, glob_scan_max_depth)? + .into_iter() + .map(AbsolutePathBuf::into_path_buf), + ); + unreadable_roots.sort(); + unreadable_roots.dedup(); let mut args = if file_system_sandbox_policy.has_full_disk_read_access() { // Read-only root, then mount a minimal device tree. @@ -303,10 +356,7 @@ fn create_filesystem_args( allowed_write_paths.push(target); } } - let unreadable_paths: HashSet = unreadable_roots - .iter() - .map(|path| path.as_path().to_path_buf()) - .collect(); + let unreadable_paths: HashSet = unreadable_roots.iter().cloned().collect(); let mut sorted_writable_roots = writable_roots; sorted_writable_roots.sort_by_key(|writable_root| path_depth(writable_root.root.as_path())); // Mask only the unreadable ancestors that sit outside every writable root. @@ -323,7 +373,7 @@ fn create_filesystem_args( .iter() .any(|root| root.starts_with(unreadable_root)) }) - .map(|path| path.as_path().to_path_buf()) + .cloned() .collect(); unreadable_ancestors_of_writable_roots.sort_by_key(|path| path_depth(path)); @@ -343,7 +393,7 @@ fn create_filesystem_args( // target parents before binding the narrower writable descendant. if let Some(masking_root) = unreadable_roots .iter() - .map(AbsolutePathBuf::as_path) + .map(PathBuf::as_path) .filter(|unreadable_root| root.starts_with(unreadable_root)) .max_by_key(|unreadable_root| path_depth(unreadable_root)) { @@ -366,12 +416,12 @@ fn create_filesystem_args( } read_only_subpaths.sort_by_key(|path| path_depth(path)); for subpath in read_only_subpaths { - append_read_only_subpath_args(&mut args, &subpath, &allowed_write_paths); + append_read_only_subpath_args(&mut args, &subpath, &allowed_write_paths)?; } let mut nested_unreadable_roots: Vec = unreadable_roots .iter() - .filter(|path| path.as_path().starts_with(root)) - .map(|path| path.as_path().to_path_buf()) + .filter(|path| path.starts_with(root)) + .cloned() .collect(); if let Some(target) = &symlink_target { nested_unreadable_roots = @@ -396,7 +446,7 @@ fn create_filesystem_args( .iter() .any(|root| unreadable_root.starts_with(root) || root.starts_with(unreadable_root)) }) - .map(|path| path.as_path().to_path_buf()) + .cloned() .collect(); rootless_unreadable_roots.sort_by_key(|path| path_depth(path)); for unreadable_root in rootless_unreadable_roots { @@ -414,6 +464,244 @@ fn create_filesystem_args( }) } +fn expand_unreadable_globs_with_ripgrep( + patterns: &[String], + cwd: &Path, + max_depth: Option, +) -> Result> { + if patterns.is_empty() || max_depth == Some(0) { + return Ok(Vec::new()); + } + + // Group each pattern by the static path prefix before its first glob + // metacharacter. That keeps scans narrow, avoids searching from `/`, and + // lets one `rg --files` call handle all patterns under the same root. + let mut patterns_by_search_root: BTreeMap> = BTreeMap::new(); + for pattern in patterns { + if let Some((search_root, glob)) = split_pattern_for_ripgrep(pattern, cwd) + && search_root.as_path().is_dir() + { + patterns_by_search_root + .entry(search_root) + .or_default() + .push(glob); + } + } + + // Record both the logical match and any canonical symlink target. The bwrap + // overlay needs the resolved target to prevent a readable symlink path from + // bypassing an unreadable glob match. + let mut expanded_paths = BTreeSet::new(); + for (search_root, globs) in patterns_by_search_root { + for path in ripgrep_files(search_root.as_path(), &globs, max_depth)? { + if let Some(target) = canonical_target_if_symlinked_path(path.as_path()) { + expanded_paths.insert(AbsolutePathBuf::from_absolute_path_checked(target)?); + } + expanded_paths.insert(path); + if expanded_paths.len() > MAX_UNREADABLE_GLOB_MATCHES { + return Err(CodexErr::Fatal(format!( + "unreadable glob expansion for {} matched more than {MAX_UNREADABLE_GLOB_MATCHES} paths", + search_root.display() + ))); + } + } + } + + Ok(expanded_paths.into_iter().collect()) +} + +fn split_pattern_for_ripgrep(pattern: &str, cwd: &Path) -> Option<(AbsolutePathBuf, String)> { + // Resolve relative patterns once, then split at the first glob + // metacharacter. The prefix becomes the search root and the suffix stays as + // the ripgrep glob. Root-level glob scans are intentionally skipped because + // they are too broad for startup-time sandbox construction. + let absolute_pattern = AbsolutePathBuf::resolve_path_against_base(pattern, cwd); + let pattern = absolute_pattern.to_string_lossy(); + let first_glob_index = pattern + .char_indices() + .find_map(|(index, ch)| matches!(ch, '*' | '?' | '[' | ']').then_some(index))?; + let static_prefix = &pattern[..first_glob_index]; + if static_prefix.is_empty() || static_prefix == "/" { + return None; + } + let search_root_end = if static_prefix.ends_with('/') { + static_prefix.len() - 1 + } else { + static_prefix.rfind('/').unwrap_or(0) + }; + let search_root = if search_root_end == 0 { + PathBuf::from("/") + } else { + PathBuf::from(&pattern[..search_root_end]) + }; + let search_root = AbsolutePathBuf::from_absolute_path_checked(search_root).ok()?; + let glob = escape_unclosed_glob_classes(&pattern[search_root_end + 1..]); + (!glob.is_empty()).then_some((search_root, glob)) +} + +fn escape_unclosed_glob_classes(glob: &str) -> String { + // The filesystem policy accepts an unclosed `[` as a literal. Ripgrep treats + // that as invalid glob syntax, so escape only the unclosed class opener. + let mut escaped = String::with_capacity(glob.len()); + let mut chars = glob.chars(); + + while let Some(ch) = chars.next() { + if ch != '[' { + escaped.push(ch); + continue; + } + + let mut class = String::new(); + let mut closed = false; + for class_ch in chars.by_ref() { + if class_ch == ']' { + closed = true; + break; + } + class.push(class_ch); + } + + if closed { + escaped.push('['); + escaped.push_str(&class); + escaped.push(']'); + } else { + escaped.push_str(r"\["); + escaped.push_str(&class); + } + } + + escaped +} + +fn ripgrep_files( + search_root: &Path, + globs: &[String], + max_depth: Option, +) -> Result> { + // Use `rg --files` rather than shell expansion so dotfiles and ignored files + // are still considered. A status 1 with no stderr is ripgrep's "no matches" + // case, not a sandbox construction error. + let mut command = Command::new("rg"); + command + .arg("--files") + .arg("--hidden") + .arg("--no-ignore") + .arg("--null"); + if let Some(max_depth) = max_depth { + command.arg("--max-depth").arg(max_depth.to_string()); + } + for glob in globs { + command.arg("--glob").arg(glob); + } + command.arg("--").arg(search_root); + + /* + * Prefer ripgrep for unreadable glob expansion because it is fast and + * already implements the file-walking semantics we want here: include + * dotfiles, ignore ignore files, and do not recurse through symlinked + * directories. If `rg` is not installed in the runtime environment, fall + * back to the internal globset walker so sandbox construction still masks + * matching paths. Other ripgrep failures stay fatal so deny-read does not + * silently weaken. + */ + let output = match command.output() { + Ok(output) => output, + Err(err) if err.kind() == io::ErrorKind::NotFound => { + return glob_files(search_root, globs, max_depth); + } + Err(err) => return Err(err.into()), + }; + if !output.status.success() { + if output.status.code() == Some(1) && output.stderr.is_empty() { + return Ok(Vec::new()); + } + + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(CodexErr::Fatal(format!( + "ripgrep unreadable glob scan failed for {}: {stderr}", + search_root.display() + ))); + } + + let paths = output + .stdout + .split(|byte| *byte == b'\0') + .filter(|path| !path.is_empty()) + .map(|path| { + let path = PathBuf::from(OsString::from_vec(path.to_vec())); + if path.is_absolute() { + path + } else { + search_root.join(path) + } + }) + .map(AbsolutePathBuf::from_absolute_path_checked) + .collect::>>()?; + Ok(paths) +} + +fn glob_files( + search_root: &Path, + globs: &[String], + max_depth: Option, +) -> Result> { + let mut builder = GlobSetBuilder::new(); + for glob in globs { + let glob = GlobBuilder::new(glob) + .literal_separator(true) + .allow_unclosed_class(true) + .build() + .map_err(|err| { + CodexErr::Fatal(format!( + "unreadable glob pattern is invalid for {}: {err}", + search_root.display() + )) + })?; + builder.add(glob); + } + let glob_set = builder.build().map_err(|err| { + CodexErr::Fatal(format!( + "unreadable glob matcher failed for {}: {err}", + search_root.display() + )) + })?; + + let mut paths = Vec::new(); + collect_glob_files(search_root, search_root, &glob_set, max_depth, &mut paths)?; + Ok(paths) +} + +fn collect_glob_files( + search_root: &Path, + dir: &Path, + glob_set: &GlobSet, + remaining_depth: Option, + paths: &mut Vec, +) -> Result<()> { + for entry in fs::read_dir(dir)? { + let entry = entry?; + let path = entry.path(); + let file_type = entry.file_type()?; + let relative = path.strip_prefix(search_root).unwrap_or(path.as_path()); + + if (file_type.is_file() || file_type.is_symlink()) && glob_set.is_match(relative) { + paths.push(AbsolutePathBuf::from_absolute_path_checked(&path)?); + } + + if !file_type.is_dir() { + continue; + } + let remaining_depth = match remaining_depth { + Some(0 | 1) => continue, + Some(depth) => Some(depth - 1), + None => None, + }; + collect_glob_files(search_root, &path, glob_set, remaining_depth, paths)?; + } + Ok(()) +} + fn path_to_string(path: &Path) -> String { path.to_string_lossy().to_string() } @@ -423,6 +711,9 @@ fn path_depth(path: &Path) -> usize { } fn canonical_target_if_symlinked_path(path: &Path) -> Option { + // Return the fully resolved target only when some path component is a + // symlink. Callers use this to bind/mask the real filesystem location while + // leaving ordinary paths in their logical form. let mut current = PathBuf::new(); for component in path.components() { use std::path::Component; @@ -498,17 +789,19 @@ fn append_read_only_subpath_args( args: &mut Vec, subpath: &Path, allowed_write_paths: &[PathBuf], -) { - if let Some(target) = canonical_target_for_symlink_in_path(subpath, allowed_write_paths) { - // bwrap takes `--ro-bind `. Use the resolved target - // for both operands so a protected symlinked directory is remounted - // read-only in place instead of binding onto the symlink path itself. - let mount_source = path_to_string(&target); - let mount_destination = path_to_string(&target); - args.push("--ro-bind".to_string()); - args.push(mount_source); - args.push(mount_destination); - return; +) -> Result<()> { + if let Some(symlink) = first_writable_symlink_component_in_path(subpath, allowed_write_paths) { + /* + * A read-only carveout under a writable symlink cannot be made reliable + * with bwrap path arguments. Binding the symlink's current target would + * only protect a startup-time snapshot; the sandboxed process could + * replace the writable symlink before it reads through the logical path. + */ + return Err(CodexErr::Fatal(format!( + "cannot enforce sandbox read-only path {} because it crosses writable symlink {}", + subpath.display(), + symlink.display() + ))); } if !subpath.exists() { @@ -519,7 +812,7 @@ fn append_read_only_subpath_args( args.push("/dev/null".to_string()); args.push(path_to_string(&first_missing_component)); } - return; + return Ok(()); } if is_within_allowed_write_paths(subpath, allowed_write_paths) { @@ -527,6 +820,7 @@ fn append_read_only_subpath_args( args.push(path_to_string(subpath)); args.push(path_to_string(subpath)); } + Ok(()) } fn append_unreadable_root_args( @@ -535,16 +829,21 @@ fn append_unreadable_root_args( unreadable_root: &Path, allowed_write_paths: &[PathBuf], ) -> Result<()> { - if let Some(target) = canonical_target_for_symlink_in_path(unreadable_root, allowed_write_paths) + if let Some(symlink) = + first_writable_symlink_component_in_path(unreadable_root, allowed_write_paths) { - // Apply unreadable handling to the resolved symlink target, not the - // logical symlink path, to avoid file-vs-directory bind mismatches. - return append_existing_unreadable_path_args( - args, - preserved_files, - &target, - allowed_write_paths, - ); + /* + * Deny-read masks must fail closed when the protected path crosses a + * symlink that remains writable to the sandboxed process. Resolving and + * masking the symlink's current target is a TOCTTOU snapshot: bwrap would + * protect the old target while the logical path could later point + * somewhere else. + */ + return Err(CodexErr::Fatal(format!( + "cannot enforce sandbox deny-read path {} because it crosses writable symlink {}", + unreadable_root.display(), + symlink.display() + ))); } if !unreadable_root.exists() { @@ -621,10 +920,16 @@ fn is_within_allowed_write_paths(path: &Path, allowed_write_paths: &[PathBuf]) - .any(|root| path.starts_with(root)) } -fn canonical_target_for_symlink_in_path( +fn first_writable_symlink_component_in_path( target_path: &Path, allowed_write_paths: &[PathBuf], ) -> Option { + /* + * Walk the logical path and report the first symlink component that lives + * under a writable root. These symlinks are mutable from inside the sandbox, + * so any mount or mask based on their resolved target would be racing a path + * the sandboxed process can change. + */ let mut current = PathBuf::new(); for component in target_path.components() { @@ -651,7 +956,7 @@ fn canonical_target_for_symlink_in_path( if metadata.file_type().is_symlink() && is_within_allowed_write_paths(¤t, allowed_write_paths) { - return fs::canonicalize(¤t).ok(); + return Some(current); } } @@ -703,6 +1008,26 @@ mod tests { use pretty_assertions::assert_eq; use tempfile::TempDir; + const NO_UNREADABLE_GLOB_SCAN_MAX_DEPTH: Option = None; + + #[test] + fn default_unreadable_glob_scan_has_no_depth_cap() { + assert_eq!(BwrapOptions::default().glob_scan_max_depth, None); + } + + fn unreadable_glob_entry(pattern: String) -> FileSystemSandboxEntry { + FileSystemSandboxEntry { + path: FileSystemPath::GlobPattern { pattern }, + access: FileSystemAccessMode::None, + } + } + + fn default_policy_with_unreadable_glob(pattern: String) -> FileSystemSandboxPolicy { + let mut policy = FileSystemSandboxPolicy::default(); + policy.entries.push(unreadable_glob_entry(pattern)); + policy + } + #[test] fn full_disk_write_full_network_returns_unwrapped_command() { let command = vec!["/bin/true".to_string()]; @@ -714,6 +1039,7 @@ mod tests { BwrapOptions { mount_proc: true, network_mode: BwrapNetworkMode::FullAccess, + ..Default::default() }, ) .expect("create bwrap args"); @@ -732,6 +1058,7 @@ mod tests { BwrapOptions { mount_proc: true, network_mode: BwrapNetworkMode::ProxyOnly, + ..Default::default() }, ) .expect("create bwrap args"); @@ -755,6 +1082,42 @@ mod tests { ); } + #[test] + fn full_disk_write_with_unreadable_glob_still_wraps_and_masks_match() { + if !ripgrep_available() { + return; + } + + let temp_dir = TempDir::new().expect("temp dir"); + let root_env = temp_dir.path().join(".env"); + std::fs::write(&root_env, "secret").expect("write env"); + let policy = FileSystemSandboxPolicy::restricted(vec![ + FileSystemSandboxEntry { + path: FileSystemPath::Special { + value: FileSystemSpecialPath::Root, + }, + access: FileSystemAccessMode::Write, + }, + unreadable_glob_entry(format!("{}/**/*.env", temp_dir.path().display())), + ]); + let command = vec!["/bin/true".to_string()]; + + let args = create_bwrap_command_args( + command.clone(), + &policy, + temp_dir.path(), + temp_dir.path(), + BwrapOptions::default(), + ) + .expect("create bwrap args"); + + assert_ne!( + args.args, command, + "full-write policy with unreadable globs must still use bwrap" + ); + assert_file_masked(&args.args, &root_env); + } + #[cfg(unix)] #[test] fn restricted_policy_chdirs_to_canonical_command_cwd() { @@ -859,7 +1222,9 @@ mod tests { }, ]); - let args = create_filesystem_args(&policy, temp_dir.path()).expect("filesystem args"); + let args = + create_filesystem_args(&policy, temp_dir.path(), NO_UNREADABLE_GLOB_SCAN_MAX_DEPTH) + .expect("filesystem args"); assert!(args.args.windows(3).any(|window| { window == ["--bind", real_root_str.as_str(), real_root_str.as_str()] @@ -902,7 +1267,9 @@ mod tests { access: FileSystemAccessMode::Write, }]); - let args = create_filesystem_args(&policy, temp_dir.path()).expect("filesystem args"); + let args = + create_filesystem_args(&policy, temp_dir.path(), NO_UNREADABLE_GLOB_SCAN_MAX_DEPTH) + .expect("filesystem args"); assert!(args.args.windows(3).any(|window| { window @@ -924,7 +1291,7 @@ mod tests { #[cfg(unix)] #[test] - fn protected_symlinked_directory_subpaths_bind_target_read_only() { + fn protected_symlinked_directory_subpaths_fail_closed() { let temp_dir = TempDir::new().expect("temp dir"); let root = temp_dir.path().join("root"); let agents_target = root.join("agents-target"); @@ -934,28 +1301,26 @@ mod tests { let root = AbsolutePathBuf::from_absolute_path(&root).expect("absolute root"); let agents_link_str = path_to_string(&agents_link); - let agents_target_str = path_to_string(&agents_target); let policy = FileSystemSandboxPolicy::restricted(vec![FileSystemSandboxEntry { path: FileSystemPath::Path { path: root }, access: FileSystemAccessMode::Write, }]); - let args = create_filesystem_args(&policy, temp_dir.path()).expect("filesystem args"); + let err = + create_filesystem_args(&policy, temp_dir.path(), NO_UNREADABLE_GLOB_SCAN_MAX_DEPTH) + .expect_err("protected symlinked subpath should fail closed"); + let message = err.to_string(); - assert!(args.args.windows(3).any(|window| { - window - == [ - "--ro-bind", - agents_target_str.as_str(), - agents_target_str.as_str(), - ] - })); - assert!(!args.args.iter().any(|arg| arg == agents_link_str.as_str())); + assert!( + message.contains("cannot enforce sandbox read-only path"), + "{message}" + ); + assert!(message.contains(&agents_link_str), "{message}"); } #[cfg(unix)] #[test] - fn symlinked_writable_roots_mask_nested_symlink_escape_paths_without_binding_targets() { + fn symlinked_writable_roots_nested_symlink_escape_paths_fail_closed() { let temp_dir = TempDir::new().expect("temp dir"); let real_root = temp_dir.path().join("real"); let link_root = temp_dir.path().join("link"); @@ -971,7 +1336,6 @@ mod tests { AbsolutePathBuf::from_absolute_path(&link_root).expect("absolute symlinked root"); let link_private = link_root.join("linked-private"); let real_linked_private_str = path_to_string(&linked_private); - let outside_str = path_to_string(&outside); let policy = FileSystemSandboxPolicy::restricted(vec![ FileSystemSandboxEntry { path: FileSystemPath::Path { path: link_root }, @@ -983,25 +1347,16 @@ mod tests { }, ]); - let args = create_filesystem_args(&policy, temp_dir.path()).expect("filesystem args"); + let err = + create_filesystem_args(&policy, temp_dir.path(), NO_UNREADABLE_GLOB_SCAN_MAX_DEPTH) + .expect_err("deny-read path crossing writable symlink should fail closed"); + let message = err.to_string(); - assert!(args.args.windows(6).any(|window| { - window - == [ - "--perms", - "000", - "--tmpfs", - outside_str.as_str(), - "--remount-ro", - outside_str.as_str(), - ] - })); assert!( - !args - .args - .iter() - .any(|arg| arg == real_linked_private_str.as_str()) + message.contains("cannot enforce sandbox deny-read path"), + "{message}" ); + assert!(message.contains(&real_linked_private_str), "{message}"); } #[test] @@ -1022,8 +1377,12 @@ mod tests { exclude_slash_tmp: true, }; - let args = create_filesystem_args(&FileSystemSandboxPolicy::from(&policy), temp_dir.path()) - .expect("filesystem args"); + let args = create_filesystem_args( + &FileSystemSandboxPolicy::from(&policy), + temp_dir.path(), + NO_UNREADABLE_GLOB_SCAN_MAX_DEPTH, + ) + .expect("filesystem args"); let existing_root = path_to_string(&existing_root); let missing_root = path_to_string(&missing_root); @@ -1052,6 +1411,7 @@ mod tests { let args = create_filesystem_args( &FileSystemSandboxPolicy::from(&sandbox_policy), Path::new("/"), + NO_UNREADABLE_GLOB_SCAN_MAX_DEPTH, ) .expect("bwrap fs args"); assert_eq!( @@ -1100,8 +1460,12 @@ mod tests { network_access: false, }; - let args = create_filesystem_args(&FileSystemSandboxPolicy::from(&policy), temp_dir.path()) - .expect("filesystem args"); + let args = create_filesystem_args( + &FileSystemSandboxPolicy::from(&policy), + temp_dir.path(), + NO_UNREADABLE_GLOB_SCAN_MAX_DEPTH, + ) + .expect("filesystem args"); assert_eq!(args.args[0..4], ["--tmpfs", "/", "--dev", "/dev"]); @@ -1130,8 +1494,12 @@ mod tests { // `ReadOnlyAccess::Restricted` always includes `cwd` as a readable // root. Using `"/"` here would intentionally collapse to broad read // access, so use a non-root cwd to exercise the restricted path. - let args = create_filesystem_args(&FileSystemSandboxPolicy::from(&policy), temp_dir.path()) - .expect("filesystem args"); + let args = create_filesystem_args( + &FileSystemSandboxPolicy::from(&policy), + temp_dir.path(), + NO_UNREADABLE_GLOB_SCAN_MAX_DEPTH, + ) + .expect("filesystem args"); assert!( args.args @@ -1171,7 +1539,9 @@ mod tests { }, ]); - let args = create_filesystem_args(&policy, temp_dir.path()).expect("filesystem args"); + let args = + create_filesystem_args(&policy, temp_dir.path(), NO_UNREADABLE_GLOB_SCAN_MAX_DEPTH) + .expect("filesystem args"); assert!(args.args.windows(3).any(|window| { window @@ -1248,7 +1618,9 @@ mod tests { }, ]); - let args = create_filesystem_args(&policy, temp_dir.path()).expect("filesystem args"); + let args = + create_filesystem_args(&policy, temp_dir.path(), NO_UNREADABLE_GLOB_SCAN_MAX_DEPTH) + .expect("filesystem args"); let docs_str = path_to_string(docs.as_path()); let docs_public_str = path_to_string(docs_public.as_path()); let docs_ro_index = args @@ -1300,7 +1672,9 @@ mod tests { }, ]); - let args = create_filesystem_args(&policy, temp_dir.path()).expect("filesystem args"); + let args = + create_filesystem_args(&policy, temp_dir.path(), NO_UNREADABLE_GLOB_SCAN_MAX_DEPTH) + .expect("filesystem args"); let blocked_str = path_to_string(blocked.as_path()); let allowed_str = path_to_string(allowed.as_path()); let blocked_none_index = args @@ -1367,7 +1741,9 @@ mod tests { }, ]); - let args = create_filesystem_args(&policy, temp_dir.path()).expect("filesystem args"); + let args = + create_filesystem_args(&policy, temp_dir.path(), NO_UNREADABLE_GLOB_SCAN_MAX_DEPTH) + .expect("filesystem args"); let blocked_str = path_to_string(blocked.as_path()); let allowed_dir_str = path_to_string(allowed_dir.as_path()); let allowed_file_str = path_to_string(allowed_file.as_path()); @@ -1442,7 +1818,9 @@ mod tests { }, ]); - let args = create_filesystem_args(&policy, temp_dir.path()).expect("filesystem args"); + let args = + create_filesystem_args(&policy, temp_dir.path(), NO_UNREADABLE_GLOB_SCAN_MAX_DEPTH) + .expect("filesystem args"); let blocked_none_index = args .args .windows(4) @@ -1487,7 +1865,9 @@ mod tests { }, ]); - let args = create_filesystem_args(&policy, temp_dir.path()).expect("filesystem args"); + let args = + create_filesystem_args(&policy, temp_dir.path(), NO_UNREADABLE_GLOB_SCAN_MAX_DEPTH) + .expect("filesystem args"); let blocked_str = path_to_string(blocked.as_path()); assert!( @@ -1529,7 +1909,9 @@ mod tests { }, ]); - let args = create_filesystem_args(&policy, temp_dir.path()).expect("filesystem args"); + let args = + create_filesystem_args(&policy, temp_dir.path(), NO_UNREADABLE_GLOB_SCAN_MAX_DEPTH) + .expect("filesystem args"); let blocked_file_str = path_to_string(blocked_file.as_path()); assert_eq!(args.preserved_files.len(), 1); @@ -1540,4 +1922,100 @@ mod tests { && window[4] == blocked_file_str })); } + + #[test] + fn unreadable_globs_expand_existing_matches_with_configured_depth() { + if !ripgrep_available() { + return; + } + + let temp_dir = TempDir::new().expect("temp dir"); + let root_env = temp_dir.path().join(".env"); + let nested_env = temp_dir.path().join("app").join(".env"); + let too_deep_env = temp_dir.path().join("app").join("deep").join(".env"); + std::fs::create_dir_all(too_deep_env.parent().expect("parent")).expect("create parent"); + std::fs::write(temp_dir.path().join(".gitignore"), ".env\n").expect("write gitignore"); + std::fs::write(&root_env, "secret").expect("write root env"); + std::fs::write(&nested_env, "secret").expect("write nested env"); + std::fs::write(&too_deep_env, "secret").expect("write deep env"); + let policy = + default_policy_with_unreadable_glob(format!("{}/**/*.env", temp_dir.path().display())); + + let args = + create_filesystem_args(&policy, temp_dir.path(), Some(2)).expect("filesystem args"); + + assert_file_masked(&args.args, &root_env); + assert_file_masked(&args.args, &nested_env); + assert!( + !args + .args + .iter() + .any(|arg| arg == &path_to_string(&too_deep_env)), + "max depth should keep deeper matches out of bwrap args: {:#?}", + args.args + ); + } + + #[cfg(unix)] + #[test] + fn unreadable_globs_add_canonical_targets_for_symlink_matches() { + if !ripgrep_available() { + return; + } + + let temp_dir = TempDir::new().expect("temp dir"); + let real_root = temp_dir.path().join("real"); + let link_root = temp_dir.path().join("link"); + let real_secret = real_root.join("secret.env"); + std::fs::create_dir_all(&real_root).expect("create real root"); + std::fs::write(&real_secret, "secret").expect("write real secret"); + std::os::unix::fs::symlink(&real_root, &link_root).expect("create symlink"); + let policy = + default_policy_with_unreadable_glob(format!("{}/**/*.env", link_root.display())); + + let args = + create_filesystem_args(&policy, temp_dir.path(), Some(2)).expect("filesystem args"); + + assert_file_masked(&args.args, &real_secret); + } + + #[test] + fn root_prefix_unreadable_globs_are_too_broad_for_linux_expansion() { + assert_eq!( + split_pattern_for_ripgrep("/**/*.env", Path::new("/tmp")), + None + ); + } + + #[test] + fn unclosed_character_classes_are_escaped_for_ripgrep() { + let (search_root, glob) = + split_pattern_for_ripgrep("/tmp/[*.env", Path::new("/")).expect("split pattern"); + + assert_eq!(search_root.as_path(), Path::new("/tmp")); + assert_eq!(glob, r"\[*.env"); + } + + fn ripgrep_available() -> bool { + Command::new("rg") + .arg("--version") + .output() + .is_ok_and(|output| output.status.success()) + } + + /// Assert that `path` is masked due to a bwrap arg sequence like: + /// + /// `bwrap ... --perms 000 --ro-bind-data FD PATH` + fn assert_file_masked(args: &[String], path: &Path) { + let path = path_to_string(path); + assert!( + args.windows(5).any(|window| { + window[0] == "--perms" + && window[1] == "000" + && window[2] == "--ro-bind-data" + && window[4] == path + }), + "expected file mask for {path}: {args:#?}" + ); + } } diff --git a/codex-rs/linux-sandbox/src/linux_run_main.rs b/codex-rs/linux-sandbox/src/linux_run_main.rs index 8f39837de1b9..958d8645bd8d 100644 --- a/codex-rs/linux-sandbox/src/linux_run_main.rs +++ b/codex-rs/linux-sandbox/src/linux_run_main.rs @@ -427,6 +427,7 @@ fn run_bwrap_with_proc_fallback( let options = BwrapOptions { mount_proc, network_mode, + ..Default::default() }; let mut bwrap_args = build_bwrap_argv( inner, @@ -547,6 +548,7 @@ fn build_preflight_bwrap_argv( BwrapOptions { mount_proc: true, network_mode, + ..Default::default() }, ) } diff --git a/codex-rs/linux-sandbox/src/linux_run_main_tests.rs b/codex-rs/linux-sandbox/src/linux_run_main_tests.rs index 96c3c8a737af..0ed10717f222 100644 --- a/codex-rs/linux-sandbox/src/linux_run_main_tests.rs +++ b/codex-rs/linux-sandbox/src/linux_run_main_tests.rs @@ -48,6 +48,7 @@ fn inserts_bwrap_argv0_before_command_separator() { BwrapOptions { mount_proc: true, network_mode: BwrapNetworkMode::FullAccess, + ..Default::default() }, ) .args; @@ -90,6 +91,7 @@ fn rewrites_inner_command_path_when_bwrap_lacks_argv0() { BwrapOptions { mount_proc: true, network_mode: BwrapNetworkMode::FullAccess, + ..Default::default() }, ) .args; @@ -157,6 +159,7 @@ fn inserts_unshare_net_when_network_isolation_requested() { BwrapOptions { mount_proc: true, network_mode: BwrapNetworkMode::Isolated, + ..Default::default() }, ) .args; @@ -174,6 +177,7 @@ fn inserts_unshare_net_when_proxy_only_network_mode_requested() { BwrapOptions { mount_proc: true, network_mode: BwrapNetworkMode::ProxyOnly, + ..Default::default() }, ) .args; diff --git a/codex-rs/protocol/src/permissions.rs b/codex-rs/protocol/src/permissions.rs index d774f6dc0de2..80cfa823e7fc 100644 --- a/codex-rs/protocol/src/permissions.rs +++ b/codex-rs/protocol/src/permissions.rs @@ -138,6 +138,9 @@ pub enum FileSystemSandboxKind { #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema, TS)] pub struct FileSystemSandboxPolicy { pub kind: FileSystemSandboxKind, + #[serde(default, skip_serializing_if = "Option::is_none")] + #[ts(optional)] + pub glob_scan_max_depth: Option, #[serde(default, skip_serializing_if = "Vec::is_empty")] pub entries: Vec, } @@ -257,6 +260,7 @@ impl Default for FileSystemSandboxPolicy { fn default() -> Self { Self { kind: FileSystemSandboxKind::Restricted, + glob_scan_max_depth: None, entries: vec![FileSystemSandboxEntry { path: FileSystemPath::Special { value: FileSystemSpecialPath::Root, @@ -271,6 +275,7 @@ impl FileSystemSandboxPolicy { pub fn unrestricted() -> Self { Self { kind: FileSystemSandboxKind::Unrestricted, + glob_scan_max_depth: None, entries: Vec::new(), } } @@ -278,6 +283,7 @@ impl FileSystemSandboxPolicy { pub fn external_sandbox() -> Self { Self { kind: FileSystemSandboxKind::ExternalSandbox, + glob_scan_max_depth: None, entries: Vec::new(), } } @@ -285,6 +291,7 @@ impl FileSystemSandboxPolicy { pub fn restricted(entries: Vec) -> Self { Self { kind: FileSystemSandboxKind::Restricted, + glob_scan_max_depth: None, entries, } } @@ -317,6 +324,7 @@ impl FileSystemSandboxPolicy { if !matches!(rebuilt.kind, FileSystemSandboxKind::Restricted) { return rebuilt; } + rebuilt.glob_scan_max_depth = existing.glob_scan_max_depth; for deny_entry in existing .entries diff --git a/codex-rs/sandboxing/Cargo.toml b/codex-rs/sandboxing/Cargo.toml index 18c5ffb45dcc..49fd33e01d2f 100644 --- a/codex-rs/sandboxing/Cargo.toml +++ b/codex-rs/sandboxing/Cargo.toml @@ -18,6 +18,7 @@ codex-utils-absolute-path = { workspace = true } dunce = { workspace = true } libc = { workspace = true } serde_json = { workspace = true } +regex-lite = { workspace = true } tracing = { workspace = true, features = ["log"] } url = { workspace = true } which = { workspace = true } diff --git a/codex-rs/sandboxing/src/lib.rs b/codex-rs/sandboxing/src/lib.rs index dbb83236494f..38aed6ce245c 100644 --- a/codex-rs/sandboxing/src/lib.rs +++ b/codex-rs/sandboxing/src/lib.rs @@ -34,9 +34,6 @@ impl From for CodexErr { SandboxTransformError::MissingLinuxSandboxExecutable => { CodexErr::LandlockSandboxExecutableNotProvided } - SandboxTransformError::UnreadableGlobPatternsUnsupported => { - CodexErr::UnsupportedOperation(err.to_string()) - } #[cfg(target_os = "linux")] SandboxTransformError::Wsl1UnsupportedForBubblewrap => { CodexErr::UnsupportedOperation(crate::bwrap::WSL1_BWRAP_WARNING.to_string()) diff --git a/codex-rs/sandboxing/src/manager.rs b/codex-rs/sandboxing/src/manager.rs index 1e3ea9c196c2..0836eb256517 100644 --- a/codex-rs/sandboxing/src/manager.rs +++ b/codex-rs/sandboxing/src/manager.rs @@ -109,7 +109,6 @@ pub struct SandboxTransformRequest<'a> { #[derive(Debug)] pub enum SandboxTransformError { MissingLinuxSandboxExecutable, - UnreadableGlobPatternsUnsupported, #[cfg(target_os = "linux")] Wsl1UnsupportedForBubblewrap, #[cfg(not(target_os = "macos"))] @@ -122,10 +121,6 @@ impl std::fmt::Display for SandboxTransformError { Self::MissingLinuxSandboxExecutable => { write!(f, "missing codex-linux-sandbox executable path") } - Self::UnreadableGlobPatternsUnsupported => write!( - f, - "platform sandbox backend cannot enforce unreadable glob patterns" - ), #[cfg(target_os = "linux")] Self::Wsl1UnsupportedForBubblewrap => write!(f, "{WSL1_BWRAP_WARNING}"), #[cfg(not(target_os = "macos"))] @@ -201,15 +196,6 @@ impl SandboxManager { ); let effective_network_policy = effective_network_sandbox_policy(network_policy, additional_permissions.as_ref()); - if matches!( - sandbox, - SandboxType::MacosSeatbelt | SandboxType::LinuxSeccomp - ) && !effective_file_system_policy - .get_unreadable_globs_with_cwd(sandbox_policy_cwd) - .is_empty() - { - return Err(SandboxTransformError::UnreadableGlobPatternsUnsupported); - } let mut argv = Vec::with_capacity(1 + command.args.len()); argv.push(command.program); argv.extend(command.args.into_iter().map(OsString::from)); diff --git a/codex-rs/sandboxing/src/seatbelt.rs b/codex-rs/sandboxing/src/seatbelt.rs index b12492210c28..57a152e0232b 100644 --- a/codex-rs/sandboxing/src/seatbelt.rs +++ b/codex-rs/sandboxing/src/seatbelt.rs @@ -9,6 +9,7 @@ use codex_utils_absolute_path::AbsolutePathBuf; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::collections::HashMap; +use std::collections::VecDeque; use std::ffi::CStr; use std::path::Path; use std::path::PathBuf; @@ -379,6 +380,150 @@ fn build_seatbelt_access_policy( } } +fn build_seatbelt_unreadable_glob_policy( + file_system_sandbox_policy: &FileSystemSandboxPolicy, + cwd: &Path, +) -> String { + // Seatbelt does not understand the filesystem policy's glob syntax directly. + // Convert each unreadable pattern into an anchored regex deny rule and apply + // it to both reads and unlink-style writes so a denied path cannot be probed + // through destructive filesystem operations. + let unreadable_globs = file_system_sandbox_policy.get_unreadable_globs_with_cwd(cwd); + if unreadable_globs.is_empty() { + return String::new(); + } + + let mut policy_components = Vec::new(); + for pattern in unreadable_globs { + let mut regexes = BTreeSet::new(); + if let Some(regex) = seatbelt_regex_for_unreadable_glob(&pattern) { + regexes.insert(regex); + } + if let Some(pattern) = canonicalize_glob_static_prefix_for_sandbox(&pattern) + && let Some(regex) = seatbelt_regex_for_unreadable_glob(&pattern) + { + regexes.insert(regex); + } + for regex in regexes { + let regex = regex.replace('"', "\\\""); + policy_components.push(format!(r#"(deny file-read* (regex #"{regex}"))"#)); + policy_components.push(format!(r#"(deny file-write-unlink (regex #"{regex}"))"#)); + } + } + + policy_components.join("\n") +} + +fn canonicalize_glob_static_prefix_for_sandbox(pattern: &str) -> Option { + let first_glob_index = pattern + .char_indices() + .find_map(|(index, ch)| matches!(ch, '*' | '?' | '[' | ']').then_some(index)); + let Some(first_glob_index) = first_glob_index else { + return normalize_path_for_sandbox(Path::new(pattern)) + .map(|path| path.to_string_lossy().to_string()); + }; + + let static_prefix = &pattern[..first_glob_index]; + let prefix_end = if static_prefix.ends_with('/') { + static_prefix.len() - 1 + } else { + static_prefix.rfind('/').unwrap_or(0) + }; + if prefix_end == 0 { + return None; + } + + let root = normalize_path_for_sandbox(Path::new(&pattern[..prefix_end]))?; + let root = root.to_string_lossy(); + let suffix = &pattern[prefix_end..]; + let normalized_pattern = format!("{root}{suffix}"); + (normalized_pattern != pattern).then_some(normalized_pattern) +} + +fn seatbelt_regex_for_unreadable_glob(pattern: &str) -> Option { + if pattern.is_empty() { + return None; + } + + // Translate the supported git-style glob subset into a Seatbelt regex: + // `*` and `?` stay within one path component, `**/` can consume zero or + // more components, and closed character classes remain character classes. + // A pattern with no glob metacharacters is treated as exact path plus subtree. + let mut regex = String::from("^"); + let mut chars = pattern.chars().collect::>(); + let mut saw_glob = false; + + while let Some(ch) = chars.pop_front() { + match ch { + '*' => { + saw_glob = true; + if chars.front() == Some(&'*') { + chars.pop_front(); + if chars.front() == Some(&'/') { + chars.pop_front(); + regex.push_str("(.*/)?"); + } else { + regex.push_str(".*"); + } + } else { + regex.push_str("[^/]*"); + } + } + '?' => { + saw_glob = true; + regex.push_str("[^/]"); + } + '[' => { + saw_glob = true; + let mut class = Vec::new(); + let mut closed = false; + while let Some(class_ch) = chars.pop_front() { + if class_ch == ']' { + closed = true; + break; + } + class.push(class_ch); + } + if !closed { + regex.push_str("\\["); + for class_ch in class.into_iter().rev() { + chars.push_front(class_ch); + } + continue; + } + + regex.push('['); + let mut class_chars = class.into_iter(); + if let Some(first) = class_chars.next() { + match first { + '!' => regex.push('^'), + '^' => regex.push_str("\\^"), + _ => regex.push(first), + } + } + for class_ch in class_chars { + match class_ch { + '\\' => regex.push_str("\\\\"), + _ => regex.push(class_ch), + } + } + regex.push(']'); + } + ']' => { + saw_glob = true; + regex.push_str("\\]"); + } + _ => regex.push_str(®ex_lite::escape(&ch.to_string())), + } + } + + if !saw_glob { + regex.push_str("(/.*)?"); + } + regex.push('$'); + Some(regex) +} + #[cfg_attr(not(test), allow(dead_code))] fn create_seatbelt_command_args_for_legacy_policy( command: Vec, @@ -510,10 +655,13 @@ pub fn create_seatbelt_command_args(args: CreateSeatbeltCommandArgsParams<'_>) - dynamic_network_policy_for_network(network_sandbox_policy, enforce_managed_network, &proxy); let include_platform_defaults = file_system_sandbox_policy.include_platform_defaults(); + let deny_read_policy = + build_seatbelt_unreadable_glob_policy(file_system_sandbox_policy, sandbox_policy_cwd); let mut policy_sections = vec![ MACOS_SEATBELT_BASE_POLICY.to_string(), file_read_policy, file_write_policy, + deny_read_policy, network_policy, ]; if include_platform_defaults { diff --git a/codex-rs/sandboxing/src/seatbelt_tests.rs b/codex-rs/sandboxing/src/seatbelt_tests.rs index f222dd15d1d5..9d958c956400 100644 --- a/codex-rs/sandboxing/src/seatbelt_tests.rs +++ b/codex-rs/sandboxing/src/seatbelt_tests.rs @@ -3,11 +3,13 @@ use super::MACOS_PATH_TO_SEATBELT_EXECUTABLE; use super::MACOS_SEATBELT_BASE_POLICY; use super::ProxyPolicyInputs; use super::UnixDomainSocketPolicy; +use super::build_seatbelt_unreadable_glob_policy; use super::create_seatbelt_command_args; use super::create_seatbelt_command_args_for_legacy_policy; use super::dynamic_network_policy; use super::macos_dir_params; use super::normalize_path_for_sandbox; +use super::seatbelt_regex_for_unreadable_glob; use super::unix_socket_dir_params; use super::unix_socket_policy; use codex_network_proxy::ConfigReloader; @@ -257,6 +259,82 @@ fn explicit_unreadable_paths_are_excluded_from_readable_roots() { ); } +#[test] +fn unreadable_globstar_slash_matches_zero_or_more_directories() { + let regex = seatbelt_regex_for_unreadable_glob("/tmp/repo/**/*.env"); + assert_eq!(regex.as_deref(), Some(r"^/tmp/repo/(.*/)?[^/]*\.env$")); + let regex = regex_lite::Regex::new(regex.as_deref().expect("glob should compile")) + .expect("regex should compile"); + + assert!(regex.is_match("/tmp/repo/.env")); + assert!(regex.is_match("/tmp/repo/app/.env")); + assert!(regex.is_match("/tmp/repo/app/config.env")); + assert!(!regex.is_match("/tmp/repo/app/config.toml")); +} + +#[test] +fn unreadable_globs_use_git_style_component_matching() { + let regex = seatbelt_regex_for_unreadable_glob("/tmp/repo/*/file[0-9]?.txt"); + assert_eq!( + regex.as_deref(), + Some(r"^/tmp/repo/[^/]*/file[0-9][^/]\.txt$") + ); + let regex = regex_lite::Regex::new(regex.as_deref().expect("glob should compile")) + .expect("regex should compile"); + + assert!(regex.is_match("/tmp/repo/app/file42.txt")); + assert!(!regex.is_match("/tmp/repo/app/nested/file42.txt")); + assert!(!regex.is_match("/tmp/repo/app/file4.txt")); + assert!(!regex.is_match("/tmp/repo/app/fileab.txt")); +} + +#[test] +fn unreadable_globs_treat_unclosed_character_classes_as_literals() { + let regex = seatbelt_regex_for_unreadable_glob("/tmp/repo/[*.env"); + assert_eq!(regex.as_deref(), Some(r"^/tmp/repo/\[[^/]*\.env$")); + let regex = regex_lite::Regex::new(regex.as_deref().expect("glob should compile")) + .expect("regex should compile"); + + assert!(regex.is_match("/tmp/repo/[local.env")); + assert!(regex.is_match("/tmp/repo/[.env")); + assert!(!regex.is_match("/tmp/repo/local.env")); +} + +#[cfg(unix)] +#[test] +fn unreadable_glob_policy_includes_canonicalized_static_prefix() { + use std::os::unix::fs::symlink; + + let temp_dir = TempDir::new().expect("temp dir"); + let real_root = temp_dir.path().join("real-root"); + let link_root = temp_dir.path().join("link-root"); + fs::create_dir(&real_root).expect("create real root"); + symlink(&real_root, &link_root).expect("create symlinked root"); + + let pattern = format!("{}/**/*.env", link_root.display()); + let canonical_pattern = format!( + "{}/**/*.env", + real_root + .canonicalize() + .expect("canonicalize real root") + .display() + ); + let expected_regex = seatbelt_regex_for_unreadable_glob(&canonical_pattern) + .expect("canonical glob should compile"); + let mut policy = FileSystemSandboxPolicy::default(); + policy.entries.push(FileSystemSandboxEntry { + path: FileSystemPath::GlobPattern { pattern }, + access: FileSystemAccessMode::None, + }); + + let seatbelt_policy = build_seatbelt_unreadable_glob_policy(&policy, temp_dir.path()); + + assert!( + seatbelt_policy.contains(&format!(r#"(deny file-read* (regex #"{expected_regex}"))"#)), + "expected canonicalized glob regex in policy:\n{seatbelt_policy}" + ); +} + #[test] fn seatbelt_args_without_extension_profile_keep_legacy_preferences_read_access() { let cwd = std::env::temp_dir(); @@ -1179,6 +1257,7 @@ fn create_seatbelt_args_for_cwd_as_git_repo() { (allow file-write* (require-all (subpath (param "WRITABLE_ROOT_0")) (require-not (literal (param "WRITABLE_ROOT_0_EXCLUDED_0"))) (require-not (subpath (param "WRITABLE_ROOT_0_EXCLUDED_0"))) (require-not (literal (param "WRITABLE_ROOT_0_EXCLUDED_1"))) (require-not (subpath (param "WRITABLE_ROOT_0_EXCLUDED_1"))) ) (subpath (param "WRITABLE_ROOT_1")){tempdir_policy_entry} ) + "#, );