Skip to content

Commit 83eb97f

Browse files
committed
refactor: route zsh-fork through unified exec
1 parent 61fcd3e commit 83eb97f

File tree

14 files changed

+1357
-175
lines changed

14 files changed

+1357
-175
lines changed

codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs

Lines changed: 207 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ use app_test_support::McpProcess;
1111
use app_test_support::create_final_assistant_message_sse_response;
1212
use app_test_support::create_mock_responses_server_sequence;
1313
use app_test_support::create_mock_responses_server_sequence_unchecked;
14-
use app_test_support::create_shell_command_sse_response;
1514
use app_test_support::to_response;
1615
use codex_app_server_protocol::CommandAction;
1716
use codex_app_server_protocol::CommandExecutionApprovalDecision;
@@ -35,9 +34,11 @@ use codex_core::features::Feature;
3534
use core_test_support::responses;
3635
use core_test_support::skip_if_no_network;
3736
use pretty_assertions::assert_eq;
37+
use serde_json::json;
3838
use std::collections::BTreeMap;
3939
use std::path::Path;
4040
use tempfile::TempDir;
41+
use tokio::time::sleep;
4142
use tokio::time::timeout;
4243

4344
#[cfg(windows)]
@@ -62,19 +63,14 @@ async fn turn_start_shell_zsh_fork_executes_command_v2() -> Result<()> {
6263
};
6364
eprintln!("using zsh path for zsh-fork test: {}", zsh_path.display());
6465

65-
// Keep the shell command in flight until we interrupt it. A fast command
66+
// Keep the exec command in flight until we interrupt it. A fast command
6667
// like `echo hi` can finish before the interrupt arrives on faster runners,
6768
// which turns this into a test for post-command follow-up behavior instead
6869
// of interrupting an active zsh-fork command.
6970
let release_marker_escaped = release_marker.to_string_lossy().replace('\'', r#"'\''"#);
7071
let wait_for_interrupt =
7172
format!("while [ ! -f '{release_marker_escaped}' ]; do sleep 0.01; done");
72-
let response = create_shell_command_sse_response(
73-
vec!["/bin/sh".to_string(), "-c".to_string(), wait_for_interrupt],
74-
None,
75-
Some(5000),
76-
"call-zsh-fork",
77-
)?;
73+
let response = create_zsh_fork_exec_command_sse_response(&wait_for_interrupt, "call-zsh-fork")?;
7874
let no_op_response = responses::sse(vec![
7975
responses::ev_response_created("resp-2"),
8076
responses::ev_completed("resp-2"),
@@ -91,7 +87,7 @@ async fn turn_start_shell_zsh_fork_executes_command_v2() -> Result<()> {
9187
"never",
9288
&BTreeMap::from([
9389
(Feature::ShellZshFork, true),
94-
(Feature::UnifiedExec, false),
90+
(Feature::UnifiedExec, true),
9591
(Feature::ShellSnapshot, false),
9692
]),
9793
&zsh_path,
@@ -163,7 +159,7 @@ async fn turn_start_shell_zsh_fork_executes_command_v2() -> Result<()> {
163159
assert_eq!(id, "call-zsh-fork");
164160
assert_eq!(status, CommandExecutionStatus::InProgress);
165161
assert!(command.starts_with(&zsh_path.display().to_string()));
166-
assert!(command.contains("/bin/sh -c"));
162+
assert!(command.contains(" -lc "));
167163
assert!(command.contains("sleep 0.01"));
168164
assert!(command.contains(&release_marker.display().to_string()));
169165
assert_eq!(cwd, workspace);
@@ -191,14 +187,8 @@ async fn turn_start_shell_zsh_fork_exec_approval_decline_v2() -> Result<()> {
191187
eprintln!("using zsh path for zsh-fork test: {}", zsh_path.display());
192188

193189
let responses = vec![
194-
create_shell_command_sse_response(
195-
vec![
196-
"python3".to_string(),
197-
"-c".to_string(),
198-
"print(42)".to_string(),
199-
],
200-
None,
201-
Some(5000),
190+
create_zsh_fork_exec_command_sse_response(
191+
"python3 -c 'print(42)'",
202192
"call-zsh-fork-decline",
203193
)?,
204194
create_final_assistant_message_sse_response("done")?,
@@ -210,7 +200,7 @@ async fn turn_start_shell_zsh_fork_exec_approval_decline_v2() -> Result<()> {
210200
"untrusted",
211201
&BTreeMap::from([
212202
(Feature::ShellZshFork, true),
213-
(Feature::UnifiedExec, false),
203+
(Feature::UnifiedExec, true),
214204
(Feature::ShellSnapshot, false),
215205
]),
216206
&zsh_path,
@@ -326,14 +316,8 @@ async fn turn_start_shell_zsh_fork_exec_approval_cancel_v2() -> Result<()> {
326316
};
327317
eprintln!("using zsh path for zsh-fork test: {}", zsh_path.display());
328318

329-
let responses = vec![create_shell_command_sse_response(
330-
vec![
331-
"python3".to_string(),
332-
"-c".to_string(),
333-
"print(42)".to_string(),
334-
],
335-
None,
336-
Some(5000),
319+
let responses = vec![create_zsh_fork_exec_command_sse_response(
320+
"python3 -c 'print(42)'",
337321
"call-zsh-fork-cancel",
338322
)?];
339323
let server = create_mock_responses_server_sequence(responses).await;
@@ -343,7 +327,7 @@ async fn turn_start_shell_zsh_fork_exec_approval_cancel_v2() -> Result<()> {
343327
"untrusted",
344328
&BTreeMap::from([
345329
(Feature::ShellZshFork, true),
346-
(Feature::UnifiedExec, false),
330+
(Feature::UnifiedExec, true),
347331
(Feature::ShellSnapshot, false),
348332
]),
349333
&zsh_path,
@@ -441,6 +425,181 @@ async fn turn_start_shell_zsh_fork_exec_approval_cancel_v2() -> Result<()> {
441425
Ok(())
442426
}
443427

428+
#[tokio::test]
429+
async fn turn_start_shell_zsh_fork_interrupt_kills_approved_subcommand_v2() -> Result<()> {
430+
skip_if_no_network!(Ok(()));
431+
432+
let tmp = TempDir::new()?;
433+
let codex_home = tmp.path().join("codex_home");
434+
std::fs::create_dir(&codex_home)?;
435+
let workspace = tmp.path().join("workspace");
436+
std::fs::create_dir(&workspace)?;
437+
let pid_file = workspace.join("approved-subcommand.pid");
438+
let pid_file_display = pid_file.display().to_string();
439+
assert!(
440+
!pid_file_display.contains('\''),
441+
"test workspace path should not contain single quotes: {pid_file_display}"
442+
);
443+
444+
let Some(zsh_path) = find_test_zsh_path()? else {
445+
eprintln!("skipping zsh fork interrupt cleanup test: no zsh executable found");
446+
return Ok(());
447+
};
448+
if !supports_exec_wrapper_intercept(&zsh_path) {
449+
eprintln!(
450+
"skipping zsh fork interrupt cleanup test: zsh does not support EXEC_WRAPPER intercepts ({})",
451+
zsh_path.display()
452+
);
453+
return Ok(());
454+
}
455+
let zsh_path_display = zsh_path.display().to_string();
456+
eprintln!("using zsh path for zsh-fork test: {zsh_path_display}");
457+
458+
let shell_command =
459+
format!("/bin/sh -c 'echo $$ > \"{pid_file_display}\" && exec /bin/sleep 100'");
460+
let tool_call_arguments = serde_json::to_string(&json!({
461+
"cmd": shell_command,
462+
"yield_time_ms": 30_000,
463+
}))?;
464+
let response = responses::sse(vec![
465+
responses::ev_response_created("resp-1"),
466+
responses::ev_function_call(
467+
"call-zsh-fork-interrupt-cleanup",
468+
"exec_command",
469+
&tool_call_arguments,
470+
),
471+
responses::ev_completed("resp-1"),
472+
]);
473+
let no_op_response = responses::sse(vec![
474+
responses::ev_response_created("resp-2"),
475+
responses::ev_completed("resp-2"),
476+
]);
477+
let server =
478+
create_mock_responses_server_sequence_unchecked(vec![response, no_op_response]).await;
479+
create_config_toml(
480+
&codex_home,
481+
&server.uri(),
482+
"untrusted",
483+
&BTreeMap::from([
484+
(Feature::ShellZshFork, true),
485+
(Feature::UnifiedExec, true),
486+
(Feature::ShellSnapshot, false),
487+
]),
488+
&zsh_path,
489+
)?;
490+
491+
let mut mcp = create_zsh_test_mcp_process(&codex_home, &workspace).await?;
492+
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
493+
494+
let start_id = mcp
495+
.send_thread_start_request(ThreadStartParams {
496+
model: Some("mock-model".to_string()),
497+
cwd: Some(workspace.to_string_lossy().into_owned()),
498+
..Default::default()
499+
})
500+
.await?;
501+
let start_resp: JSONRPCResponse = timeout(
502+
DEFAULT_READ_TIMEOUT,
503+
mcp.read_stream_until_response_message(RequestId::Integer(start_id)),
504+
)
505+
.await??;
506+
let ThreadStartResponse { thread, .. } = to_response::<ThreadStartResponse>(start_resp)?;
507+
508+
let turn_id = mcp
509+
.send_turn_start_request(TurnStartParams {
510+
thread_id: thread.id.clone(),
511+
input: vec![V2UserInput::Text {
512+
text: "run the long-lived command".to_string(),
513+
text_elements: Vec::new(),
514+
}],
515+
cwd: Some(workspace.clone()),
516+
approval_policy: Some(codex_app_server_protocol::AskForApproval::UnlessTrusted),
517+
sandbox_policy: Some(codex_app_server_protocol::SandboxPolicy::WorkspaceWrite {
518+
writable_roots: vec![workspace.clone().try_into()?],
519+
read_only_access: codex_app_server_protocol::ReadOnlyAccess::FullAccess,
520+
network_access: false,
521+
exclude_tmpdir_env_var: false,
522+
exclude_slash_tmp: false,
523+
}),
524+
model: Some("mock-model".to_string()),
525+
effort: Some(codex_protocol::openai_models::ReasoningEffort::Medium),
526+
summary: Some(codex_protocol::config_types::ReasoningSummary::Auto),
527+
..Default::default()
528+
})
529+
.await?;
530+
let turn_resp: JSONRPCResponse = timeout(
531+
DEFAULT_READ_TIMEOUT,
532+
mcp.read_stream_until_response_message(RequestId::Integer(turn_id)),
533+
)
534+
.await??;
535+
let TurnStartResponse { turn } = to_response::<TurnStartResponse>(turn_resp)?;
536+
537+
let mut saw_target_approval = false;
538+
while !saw_target_approval {
539+
let server_req = timeout(
540+
DEFAULT_READ_TIMEOUT,
541+
mcp.read_stream_until_request_message(),
542+
)
543+
.await??;
544+
let ServerRequest::CommandExecutionRequestApproval { request_id, params } = server_req
545+
else {
546+
panic!("expected CommandExecutionRequestApproval request");
547+
};
548+
let approval_command = params.command.clone().unwrap_or_default();
549+
saw_target_approval = approval_command.contains("/bin/sh")
550+
&& approval_command.contains(&pid_file_display)
551+
&& !approval_command.contains(&zsh_path_display);
552+
mcp.send_response(
553+
request_id,
554+
serde_json::to_value(CommandExecutionRequestApprovalResponse {
555+
decision: CommandExecutionApprovalDecision::Accept,
556+
})?,
557+
)
558+
.await?;
559+
}
560+
561+
let pid = timeout(DEFAULT_READ_TIMEOUT, async {
562+
loop {
563+
if let Ok(contents) = std::fs::read_to_string(&pid_file) {
564+
return Ok::<i32, anyhow::Error>(contents.trim().parse()?);
565+
}
566+
sleep(std::time::Duration::from_millis(20)).await;
567+
}
568+
})
569+
.await??;
570+
let still_running = std::process::Command::new("/bin/kill")
571+
.args(["-0", &pid.to_string()])
572+
.status()?
573+
.success();
574+
assert!(
575+
still_running,
576+
"expected approved intercepted subprocess pid {pid} to be running before interrupt"
577+
);
578+
579+
mcp.interrupt_turn_and_wait_for_aborted(
580+
thread.id.clone(),
581+
turn.id.clone(),
582+
DEFAULT_READ_TIMEOUT,
583+
)
584+
.await?;
585+
586+
timeout(DEFAULT_READ_TIMEOUT, async {
587+
loop {
588+
let still_running = std::process::Command::new("/bin/kill")
589+
.args(["-0", &pid.to_string()])
590+
.status()?
591+
.success();
592+
if !still_running {
593+
return Ok::<(), anyhow::Error>(());
594+
}
595+
sleep(std::time::Duration::from_millis(20)).await;
596+
}
597+
})
598+
.await??;
599+
600+
Ok(())
601+
}
602+
444603
#[tokio::test]
445604
async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() -> Result<()> {
446605
skip_if_no_network!(Ok(()));
@@ -472,16 +631,15 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
472631
first_file.display(),
473632
second_file.display()
474633
);
475-
let tool_call_arguments = serde_json::to_string(&serde_json::json!({
476-
"command": shell_command,
477-
"workdir": serde_json::Value::Null,
478-
"timeout_ms": 5000
634+
let tool_call_arguments = serde_json::to_string(&json!({
635+
"cmd": shell_command,
636+
"yield_time_ms": 5000,
479637
}))?;
480638
let response = responses::sse(vec![
481639
responses::ev_response_created("resp-1"),
482640
responses::ev_function_call(
483641
"call-zsh-fork-subcommand-decline",
484-
"shell_command",
642+
"exec_command",
485643
&tool_call_arguments,
486644
),
487645
responses::ev_completed("resp-1"),
@@ -502,7 +660,7 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
502660
"untrusted",
503661
&BTreeMap::from([
504662
(Feature::ShellZshFork, true),
505-
(Feature::UnifiedExec, false),
663+
(Feature::UnifiedExec, true),
506664
(Feature::ShellSnapshot, false),
507665
]),
508666
&zsh_path,
@@ -744,6 +902,21 @@ async fn create_zsh_test_mcp_process(codex_home: &Path, zdotdir: &Path) -> Resul
744902
McpProcess::new_with_env(codex_home, &[("ZDOTDIR", Some(zdotdir.as_str()))]).await
745903
}
746904

905+
fn create_zsh_fork_exec_command_sse_response(
906+
command: &str,
907+
call_id: &str,
908+
) -> anyhow::Result<String> {
909+
let tool_call_arguments = serde_json::to_string(&json!({
910+
"cmd": command,
911+
"yield_time_ms": 5000,
912+
}))?;
913+
Ok(responses::sse(vec![
914+
responses::ev_response_created("resp-1"),
915+
responses::ev_function_call(call_id, "exec_command", &tool_call_arguments),
916+
responses::ev_completed("resp-1"),
917+
]))
918+
}
919+
747920
fn create_config_toml(
748921
codex_home: &Path,
749922
server_uri: &str,

codex-rs/core/src/memories/usage.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ fn shell_command_for_invocation(invocation: &ToolInvocation) -> Option<(Vec<Stri
103103
&params,
104104
invocation.session.user_shell(),
105105
invocation.turn.tools_config.allow_login_shell,
106+
invocation.turn.tools_config.unified_exec_backend,
106107
)
107108
.ok()?;
108109
Some((command, invocation.turn.resolve_path(params.workdir)))

0 commit comments

Comments
 (0)