Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion codex-rs/core/src/features.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ pub enum Feature {
JsRepl,
/// Enable a minimal JavaScript mode backed by Node's built-in vm runtime.
CodeMode,
/// Restrict model-visible tools to code mode entrypoints (`exec`, `exec_wait`).
/// Restrict model-visible tools to code mode entrypoints (`exec`, `wait`).
CodeModeOnly,
/// Only expose js_repl tools directly to the model.
JsReplToolsOnly,
Expand Down
5 changes: 3 additions & 2 deletions codex-rs/core/src/tools/code_mode/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,11 @@ const CODE_MODE_BRIDGE_SOURCE: &str = include_str!("bridge.js");
const CODE_MODE_DESCRIPTION_TEMPLATE: &str = include_str!("description.md");
const CODE_MODE_WAIT_DESCRIPTION_TEMPLATE: &str = include_str!("wait_description.md");
const CODE_MODE_PRAGMA_PREFIX: &str = "// @exec:";
const CODE_MODE_ONLY_PREFACE: &str = "Use `exec/exec_wait` tool to run all other tools, do not attempt to use any other tools directly";
const CODE_MODE_ONLY_PREFACE: &str =
"Use `exec/wait` tool to run all other tools, do not attempt to use any other tools directly";

pub(crate) const PUBLIC_TOOL_NAME: &str = "exec";
pub(crate) const WAIT_TOOL_NAME: &str = "exec_wait";
pub(crate) const WAIT_TOOL_NAME: &str = "wait";

pub(crate) fn is_code_mode_nested_tool(tool_name: &str) -> bool {
tool_name != PUBLIC_TOOL_NAME && tool_name != WAIT_TOOL_NAME
Expand Down
10 changes: 5 additions & 5 deletions codex-rs/core/src/tools/code_mode/wait_description.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
- Use `exec_wait` only after `exec` returns `Script running with cell ID ...`.
- Use `wait` only after `exec` returns `Script running with cell ID ...`.
- `cell_id` identifies the running `exec` cell to resume.
- `yield_time_ms` controls how long to wait for more output before yielding again. If omitted, `exec_wait` uses its default wait timeout.
- `yield_time_ms` controls how long to wait for more output before yielding again. If omitted, `wait` uses its default wait timeout.
- `max_tokens` limits how much new output this wait call returns.
- `terminate: true` stops the running cell instead of waiting for more output.
- `exec_wait` returns only the new output since the last yield, or the final completion or termination result for that cell.
- If the cell is still running, `exec_wait` may yield again with the same `cell_id`.
- If the cell has already finished, `exec_wait` returns the completed result and closes the cell.
- `wait` returns only the new output since the last yield, or the final completion or termination result for that cell.
- If the cell is still running, `wait` may yield again with the same `cell_id`.
- If the cell has already finished, `wait` returns the completed result and closes the cell.
4 changes: 2 additions & 2 deletions codex-rs/core/src/tools/spec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -772,7 +772,7 @@ fn create_write_stdin_tool() -> ToolSpec {
})
}

fn create_exec_wait_tool() -> ToolSpec {
fn create_wait_tool() -> ToolSpec {
let properties = BTreeMap::from([
(
"cell_id".to_string(),
Expand Down Expand Up @@ -2597,7 +2597,7 @@ pub(crate) fn build_specs_with_discoverable_tools(
builder.register_handler(PUBLIC_TOOL_NAME, code_mode_handler);
push_tool_spec(
&mut builder,
create_exec_wait_tool(),
create_wait_tool(),
/*supports_parallel_tool_calls*/ false,
config.code_mode_enabled,
);
Expand Down
6 changes: 3 additions & 3 deletions codex-rs/core/src/tools/spec_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2692,7 +2692,7 @@ fn code_mode_only_restricts_model_tools_to_exec_tools() {
"gpt-5.1-codex",
&features,
Some(WebSearchMode::Live),
&["exec", "exec_wait"],
&["exec", "wait"],
);
}

Expand Down Expand Up @@ -2723,7 +2723,7 @@ fn code_mode_only_exec_description_includes_full_nested_tool_details() {
assert!(!description.contains("Enabled nested tools:"));
assert!(!description.contains("Nested tool reference:"));
assert!(description.starts_with(
"Use `exec/exec_wait` tool to run all other tools, do not attempt to use any other tools directly"
"Use `exec/wait` tool to run all other tools, do not attempt to use any other tools directly"
));
assert!(description.contains("### `update_plan` (`update_plan`)"));
assert!(description.contains("### `view_image` (`view_image`)"));
Expand Down Expand Up @@ -2753,7 +2753,7 @@ fn code_mode_exec_description_omits_nested_tool_details_when_not_code_mode_only(
};

assert!(!description.starts_with(
"Use `exec/exec_wait` tool to run all other tools, do not attempt to use any other tools directly"
"Use `exec/wait` tool to run all other tools, do not attempt to use any other tools directly"
));
assert!(!description.contains("### `update_plan` (`update_plan`)"));
assert!(!description.contains("### `view_image` (`view_image`)"));
Expand Down
34 changes: 17 additions & 17 deletions codex-rs/core/tests/suite/code_mode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ async fn code_mode_only_restricts_prompt_tools() -> Result<()> {
let first_body = resp_mock.single_request().body_json();
assert_eq!(
tool_names(&first_body),
vec!["exec".to_string(), "exec_wait".to_string()]
vec!["exec".to_string(), "wait".to_string()]
);

Ok(())
Expand Down Expand Up @@ -539,7 +539,7 @@ Error:\ boom\n

#[cfg_attr(windows, ignore = "no exec_command on Windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_yield_and_resume_with_exec_wait() -> Result<()> {
async fn code_mode_can_yield_and_resume_with_wait() -> Result<()> {
skip_if_no_network!(Ok(()));

let server = responses::start_mock_server().await;
Expand Down Expand Up @@ -602,7 +602,7 @@ text("phase 3");
ev_response_created("resp-3"),
responses::ev_function_call(
"call-2",
"exec_wait",
"wait",
&serde_json::to_string(&serde_json::json!({
"cell_id": cell_id.clone(),
"yield_time_ms": 1_000,
Expand Down Expand Up @@ -646,7 +646,7 @@ text("phase 3");
ev_response_created("resp-5"),
responses::ev_function_call(
"call-3",
"exec_wait",
"wait",
&serde_json::to_string(&serde_json::json!({
"cell_id": cell_id.clone(),
"yield_time_ms": 1_000,
Expand Down Expand Up @@ -742,7 +742,7 @@ while (true) {}
ev_response_created("resp-3"),
responses::ev_function_call(
"call-2",
"exec_wait",
"wait",
&serde_json::to_string(&serde_json::json!({
"cell_id": cell_id.clone(),
"terminate": true,
Expand Down Expand Up @@ -869,7 +869,7 @@ text("session b done");
ev_response_created("resp-5"),
responses::ev_function_call(
"call-3",
"exec_wait",
"wait",
&serde_json::to_string(&serde_json::json!({
"cell_id": session_a_id.clone(),
"yield_time_ms": 1_000,
Expand Down Expand Up @@ -909,7 +909,7 @@ text("session b done");
ev_response_created("resp-7"),
responses::ev_function_call(
"call-4",
"exec_wait",
"wait",
&serde_json::to_string(&serde_json::json!({
"cell_id": session_b_id.clone(),
"yield_time_ms": 1_000,
Expand Down Expand Up @@ -947,7 +947,7 @@ text("session b done");

#[cfg_attr(windows, ignore = "no exec_command on Windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_exec_wait_can_terminate_and_continue() -> Result<()> {
async fn code_mode_wait_can_terminate_and_continue() -> Result<()> {
skip_if_no_network!(Ok(()));

let server = responses::start_mock_server().await;
Expand Down Expand Up @@ -999,7 +999,7 @@ text("phase 2");
ev_response_created("resp-3"),
responses::ev_function_call(
"call-2",
"exec_wait",
"wait",
&serde_json::to_string(&serde_json::json!({
"cell_id": cell_id.clone(),
"terminate": true,
Expand Down Expand Up @@ -1073,7 +1073,7 @@ text("after terminate");
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_exec_wait_returns_error_for_unknown_session() -> Result<()> {
async fn code_mode_wait_returns_error_for_unknown_session() -> Result<()> {
skip_if_no_network!(Ok(()));

let server = responses::start_mock_server().await;
Expand All @@ -1088,7 +1088,7 @@ async fn code_mode_exec_wait_returns_error_for_unknown_session() -> Result<()> {
ev_response_created("resp-1"),
responses::ev_function_call(
"call-1",
"exec_wait",
"wait",
&serde_json::to_string(&serde_json::json!({
"cell_id": "999999",
"yield_time_ms": 1_000,
Expand Down Expand Up @@ -1134,7 +1134,7 @@ async fn code_mode_exec_wait_returns_error_for_unknown_session() -> Result<()> {

#[cfg_attr(windows, ignore = "no exec_command on Windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_exec_wait_terminate_returns_completed_session_if_it_finished_after_yield_control()
async fn code_mode_wait_terminate_returns_completed_session_if_it_finished_after_yield_control()
-> Result<()> {
skip_if_no_network!(Ok(()));

Expand Down Expand Up @@ -1229,7 +1229,7 @@ text("session b done");
ev_response_created("resp-5"),
responses::ev_function_call(
"call-3",
"exec_wait",
"wait",
&serde_json::to_string(&serde_json::json!({
"cell_id": session_b_id.clone(),
"yield_time_ms": 1_000,
Expand Down Expand Up @@ -1279,7 +1279,7 @@ text("session b done");
ev_response_created("resp-7"),
responses::ev_function_call(
"call-4",
"exec_wait",
"wait",
&serde_json::to_string(&serde_json::json!({
"cell_id": session_a_id.clone(),
"terminate": true,
Expand Down Expand Up @@ -1330,7 +1330,7 @@ text("session b done");

#[cfg_attr(windows, ignore = "no exec_command on Windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_background_keeps_running_on_later_turn_without_exec_wait() -> Result<()> {
async fn code_mode_background_keeps_running_on_later_turn_without_wait() -> Result<()> {
skip_if_no_network!(Ok(()));

let server = responses::start_mock_server().await;
Expand Down Expand Up @@ -1423,7 +1423,7 @@ text("after yield");

#[cfg_attr(windows, ignore = "no exec_command on Windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_exec_wait_uses_its_own_max_tokens_budget() -> Result<()> {
async fn code_mode_wait_uses_its_own_max_tokens_budget() -> Result<()> {
skip_if_no_network!(Ok(()));

let server = responses::start_mock_server().await;
Expand Down Expand Up @@ -1476,7 +1476,7 @@ text("token one token two token three token four token five token six token seve
ev_response_created("resp-3"),
responses::ev_function_call(
"call-2",
"exec_wait",
"wait",
&serde_json::to_string(&serde_json::json!({
"cell_id": cell_id.clone(),
"yield_time_ms": 1_000,
Expand Down
Loading