From b262f5c935b1cb4329f9c67a64e2c8a2da0e68de Mon Sep 17 00:00:00 2001 From: Jose <112948755+Grinsven@users.noreply.github.com> Date: Mon, 24 Nov 2025 01:44:56 +0100 Subject: [PATCH 01/22] core: add agent tool support --- codex-rs/Cargo.lock | 1 + codex-rs/core/src/agent.rs | 450 ++++++++++++++++++ codex-rs/core/src/codex.rs | 29 ++ codex-rs/core/src/lib.rs | 1 + codex-rs/core/src/rollout/policy.rs | 4 + codex-rs/core/src/state/service.rs | 2 + codex-rs/core/src/tools/handlers/agent.rs | 278 +++++++++++ codex-rs/core/src/tools/handlers/mod.rs | 2 + codex-rs/core/src/tools/spec.rs | 59 +++ codex-rs/core/src/turn_diff_tracker.rs | 34 ++ codex-rs/core/tests/suite/model_tools.rs | 18 +- codex-rs/core/tests/suite/prompt_caching.rs | 24 +- .../src/event_processor_with_human_output.rs | 4 + codex-rs/mcp-server/src/codex_tool_runner.rs | 6 +- codex-rs/protocol/src/protocol.rs | 73 +++ 15 files changed, 962 insertions(+), 23 deletions(-) create mode 100644 codex-rs/core/src/agent.rs create mode 100644 codex-rs/core/src/tools/handlers/agent.rs diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 0bcfd4f37a63..f988aa9d7348 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -1549,6 +1549,7 @@ dependencies = [ "lazy_static", "libc", "mcp-types", + "once_cell", "opentelemetry-appender-tracing", "pathdiff", "pretty_assertions", diff --git a/codex-rs/core/src/agent.rs b/codex-rs/core/src/agent.rs new file mode 100644 index 000000000000..26ffe1ef4b5d --- /dev/null +++ b/codex-rs/core/src/agent.rs @@ -0,0 +1,450 @@ +//! Multi-agent orchestration system with customizable system prompts +//! +//! This module provides a lightweight agent system where agents are primarily +//! specialized through custom system prompts while inheriting tools and permissions +//! from the current workspace context. + +use crate::error::Result; +use serde::Deserialize; +use serde::Serialize; +use std::collections::HashMap; +use std::path::Component; +use std::path::Path; +use std::path::PathBuf; + +/// Configuration for a single agent +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AgentConfig { + /// The system prompt that defines the agent's behavior + /// Required if prompt_file is not provided + #[serde(default, skip_serializing_if = "Option::is_none")] + pub prompt: Option, + + /// Optional: Load prompt from file instead of inline + /// Required if prompt is not provided + #[serde(skip_serializing_if = "Option::is_none")] + pub prompt_file: Option, + + /// Optional: Override tools (usually inherits from context) + #[serde(skip_serializing_if = "Option::is_none")] + pub tools: Option>, + + /// Optional: Override permissions (usually inherits from context) + #[serde(skip_serializing_if = "Option::is_none")] + pub permissions: Option, +} + +impl AgentConfig { + /// Validate that the config has either prompt or prompt_file + pub fn validate(&self) -> anyhow::Result<()> { + if self.prompt.is_none() && self.prompt_file.is_none() { + return Err(anyhow::anyhow!( + "Agent configuration must have either 'prompt' or 'prompt_file'" + )); + } + if self.prompt.is_some() && self.prompt_file.is_some() { + return Err(anyhow::anyhow!( + "Agent configuration should have either 'prompt' or 'prompt_file', not both" + )); + } + Ok(()) + } + + /// Get the effective prompt, loading from file if necessary + pub fn get_prompt(&mut self, agents_dir: Option<&Path>) -> anyhow::Result { + if let Some(prompt) = &self.prompt { + return Ok(prompt.clone()); + } + + if let Some(prompt_file) = &self.prompt_file { + let full_path = if let Some(dir) = agents_dir { + dir.join(prompt_file) + } else { + PathBuf::from(prompt_file) + }; + + let prompt_content = std::fs::read_to_string(&full_path).map_err(|e| { + anyhow::anyhow!("Cannot read prompt file '{}': {}", full_path.display(), e) + })?; + + // Cache the loaded prompt + self.prompt = Some(prompt_content.clone()); + Ok(prompt_content) + } else { + Err(anyhow::anyhow!("No prompt or prompt_file specified")) + } + } +} + +/// Registry of available agents and their configurations +pub struct AgentRegistry { + agents: HashMap, + #[allow(dead_code)] + agents_dir: Option, +} + +impl AgentRegistry { + /// Validate that a prompt file path doesn't escape allowed directories + fn validate_prompt_path(base_dir: &Path, prompt_file: &str) -> anyhow::Result { + let path = if prompt_file.starts_with('/') { + PathBuf::from(prompt_file) + } else { + base_dir.join(prompt_file) + }; + + let canonical_base = base_dir + .canonicalize() + .map_err(|e| anyhow::anyhow!("Cannot access prompts directory: {e}"))?; + + // Canonicalize to resolve ../ and symlinks. If the path traversal makes + // the target unreachable (e.g., because the intermediate path does not + // exist), treat it as a security violation instead of a missing file. + let canonical = match path.canonicalize() { + Ok(canonical) => canonical, + Err(err) => { + if path.is_absolute() + || path + .components() + .any(|component| matches!(component, Component::ParentDir)) + { + return Err(anyhow::anyhow!( + "Security error: Prompt file must be within ~/.codex or the configured agents directory" + )); + } + + return Err(anyhow::anyhow!("Cannot access prompt file: {err}")); + } + }; + + // Get the home/.codex directory + let home_codex = dirs::home_dir() + .ok_or_else(|| anyhow::anyhow!("Cannot determine home directory"))? + .join(".codex"); + + let canonical_home = home_codex.canonicalize().unwrap_or(home_codex); + + // Security check: path must be within ~/.codex or the base directory + if !canonical.starts_with(&canonical_home) && !canonical.starts_with(&canonical_base) { + return Err(anyhow::anyhow!( + "Security error: Prompt file must be within ~/.codex or the configured agents directory" + )); + } + + Ok(canonical) + } + + /// Create a new agent registry, loading user configurations if available + pub fn new() -> Result { + let mut agents = HashMap::new(); + + // Add the single default "general" agent + agents.insert( + "general".to_string(), + AgentConfig { + prompt: Some("You are a helpful AI assistant. Complete the given task efficiently and accurately.".to_string()), + prompt_file: None, + tools: None, + permissions: None, + } + ); + + // Try to load user agents from ~/.codex/agents.toml + let agents_dir = Self::get_agents_directory(); + if let Some(ref dir) = agents_dir { + let config_path = dir.join("agents.toml"); + if config_path.exists() { + match std::fs::read_to_string(&config_path) { + Ok(content) => { + match toml::from_str::>(&content) { + Ok(user_agents) => { + // Process each agent config + for (name, mut config) in user_agents { + // Validate the configuration + if let Err(e) = config.validate() { + tracing::error!( + "Agent '{}' configuration invalid: {}", + name, + e + ); + continue; + } + + // If prompt_file is specified, load the prompt from file + if let Some(ref prompt_file) = config.prompt_file { + // Validate the path to prevent traversal attacks + match Self::validate_prompt_path(dir, prompt_file) { + Ok(safe_path) => { + match std::fs::read_to_string(&safe_path) { + Ok(prompt_content) => { + config.prompt = Some(prompt_content); + tracing::debug!( + "Loaded prompt file for agent '{}'", + name + ); + } + Err(e) => { + tracing::error!( + "Cannot read prompt file '{}' for agent '{}': {}", + prompt_file, + name, + e + ); + // Skip this agent but continue loading others + continue; + } + } + } + Err(e) => { + tracing::error!( + "Agent '{}' configuration error: {}", + name, + e + ); + // Skip this agent but continue loading others + continue; + } + } + } + + agents.insert(name, config); + } + tracing::info!("Loaded {} user-defined agents", agents.len() - 1); + } + Err(e) => { + tracing::warn!("Failed to parse agents.toml: {}", e); + } + } + } + Err(e) => { + tracing::debug!("Could not read agents.toml: {}", e); + } + } + } + } + + Ok(Self { agents, agents_dir }) + } + + /// Get the agents directory path (~/.codex/agents) + fn get_agents_directory() -> Option { + std::env::var("HOME") + .or_else(|_| std::env::var("USERPROFILE")) + .ok() + .map(|home| PathBuf::from(home).join(".codex")) + } + + /// Get an agent configuration by name + #[allow(dead_code)] + pub fn get_agent(&self, name: &str) -> Option<&AgentConfig> { + self.agents.get(name) + } + + /// Get the system prompt for an agent (falls back to "general" if not found) + pub fn get_system_prompt(&self, agent_name: &str) -> String { + self.agents + .get(agent_name) + .or_else(|| self.agents.get("general")) + .and_then(|config| config.prompt.clone()) + .unwrap_or_else(|| "You are a helpful AI assistant.".to_string()) + } + + /// List all available agents + #[allow(dead_code)] + pub fn list_agents(&self) -> Vec { + self.agents.keys().cloned().collect() + } + + /// Get detailed information about all agents + pub fn list_agent_details(&self) -> Vec { + let mut agents = Vec::new(); + + for (name, config) in &self.agents { + let description = if let Some(ref prompt) = config.prompt { + self.extract_description(prompt) + } else { + "Agent with file-based prompt".to_string() + }; + agents.push(crate::protocol::AgentInfo { + name: name.clone(), + description, + is_builtin: name == "general", + }); + } + + agents.sort_by(|a, b| { + // Built-in agents first, then alphabetical + match (a.is_builtin, b.is_builtin) { + (true, false) => std::cmp::Ordering::Less, + (false, true) => std::cmp::Ordering::Greater, + _ => a.name.cmp(&b.name), + } + }); + + agents + } + + /// Extract brief description from prompt + fn extract_description(&self, prompt: &str) -> String { + // Take first line or first sentence as description + let first_line = prompt.lines().next().unwrap_or(""); + let desc = if let Some(pos) = first_line.find('.') { + &first_line[..=pos] + } else { + first_line + }; + + // Clean up common prefixes + desc.trim_start_matches("You are a ") + .trim_start_matches("You are an ") + .trim_start_matches("You are ") + .trim() + .to_string() + } + + /// Check if agents can spawn other agents (always false to prevent recursion) + #[allow(dead_code)] + pub fn can_spawn_agents(metadata: &HashMap) -> bool { + !metadata.contains_key("is_agent") + } + + /// Mark a context as being an agent context + #[allow(dead_code)] + pub fn mark_as_agent_context(metadata: &mut HashMap) { + metadata.insert("is_agent".to_string(), "true".to_string()); + } +} + +/// Execute an agent with a specific task +#[allow(dead_code)] +pub async fn execute_agent_task( + agent_name: &str, + task: String, + registry: &AgentRegistry, +) -> Result { + // Get the agent's system prompt + let system_prompt = registry.get_system_prompt(agent_name); + + // Build the specialized prompt for this agent + let full_prompt = format!("{system_prompt}\n\nTask: {task}"); + + // Note: The actual execution will be handled by the parent context + // using the existing conversation infrastructure + Ok(full_prompt) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use tempfile::TempDir; + + #[test] + fn test_default_agent_exists() { + let registry = AgentRegistry::new().unwrap(); + assert!(registry.get_agent("general").is_some()); + } + + #[test] + fn test_agent_recursion_prevention() { + let mut metadata = HashMap::new(); + assert!(AgentRegistry::can_spawn_agents(&metadata)); + + AgentRegistry::mark_as_agent_context(&mut metadata); + assert!(!AgentRegistry::can_spawn_agents(&metadata)); + } + + #[test] + fn test_path_traversal_prevention() { + // Create a temporary directory structure + let temp_dir = TempDir::new().unwrap(); + let base_dir = temp_dir.path(); + + // Create a safe file + let safe_dir = base_dir.join("prompts"); + fs::create_dir(&safe_dir).unwrap(); + let safe_file = safe_dir.join("test.txt"); + fs::write(&safe_file, "safe content").unwrap(); + + // Test that normal paths work + let result = AgentRegistry::validate_prompt_path(base_dir, "prompts/test.txt"); + assert!(result.is_ok()); + + // Test that path traversal is blocked + let result = AgentRegistry::validate_prompt_path(base_dir, "../../../etc/passwd"); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("Security error")); + + // Test that absolute paths outside allowed dirs are blocked + let result = AgentRegistry::validate_prompt_path(base_dir, "/etc/passwd"); + assert!(result.is_err()); + } + + #[test] + fn test_agent_config_validation() { + // Test config with prompt is valid + let config = AgentConfig { + prompt: Some("Test prompt".to_string()), + prompt_file: None, + tools: None, + permissions: None, + }; + assert!(config.validate().is_ok()); + + // Test config with prompt_file is valid + let config = AgentConfig { + prompt: None, + prompt_file: Some("test.txt".to_string()), + tools: None, + permissions: None, + }; + assert!(config.validate().is_ok()); + + // Test config with neither prompt nor prompt_file is invalid + let config = AgentConfig { + prompt: None, + prompt_file: None, + tools: None, + permissions: None, + }; + assert!(config.validate().is_err()); + + // Test config with both prompt and prompt_file is invalid + let config = AgentConfig { + prompt: Some("Test prompt".to_string()), + prompt_file: Some("test.txt".to_string()), + tools: None, + permissions: None, + }; + assert!(config.validate().is_err()); + } + + #[test] + fn test_agent_config_get_prompt() { + // Test getting prompt from inline prompt + let mut config = AgentConfig { + prompt: Some("Inline prompt".to_string()), + prompt_file: None, + tools: None, + permissions: None, + }; + assert_eq!(config.get_prompt(None).unwrap(), "Inline prompt"); + + // Test getting prompt from file + let temp_dir = TempDir::new().unwrap(); + let prompt_file = temp_dir.path().join("test_prompt.txt"); + fs::write(&prompt_file, "File-based prompt").unwrap(); + + let mut config = AgentConfig { + prompt: None, + prompt_file: Some("test_prompt.txt".to_string()), + tools: None, + permissions: None, + }; + + let prompt = config.get_prompt(Some(temp_dir.path())).unwrap(); + assert_eq!(prompt, "File-based prompt"); + + // Check that prompt is cached + assert_eq!(config.prompt, Some("File-based prompt".to_string())); + } +} diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 96402961bf8a..b0258cedf787 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -6,6 +6,7 @@ use std::sync::atomic::AtomicU64; use crate::AuthManager; use crate::SandboxState; +use crate::agent::AgentRegistry; use crate::client_common::REVIEW_PROMPT; use crate::compact; use crate::compact::run_inline_auto_compact_task; @@ -556,6 +557,10 @@ impl Session { // Create the mutable state for the Session. let state = SessionState::new(session_configuration.clone()); + let agent_registry = AgentRegistry::new().map_err(|err| { + CodexErr::Fatal(format!("failed to initialize agent registry: {err}")) + })?; + let services = SessionServices { mcp_connection_manager: Arc::new(RwLock::new(McpConnectionManager::default())), mcp_startup_cancellation_token: CancellationToken::new(), @@ -567,6 +572,7 @@ impl Session { auth_manager: Arc::clone(&auth_manager), otel_event_manager, tool_approvals: Mutex::new(ApprovalStore::default()), + agent_registry: Arc::new(agent_registry), }; let sess = Arc::new(Session { @@ -1365,6 +1371,14 @@ impl Session { &self.services.notifier } + pub(crate) fn agent_registry(&self) -> &AgentRegistry { + &self.services.agent_registry + } + + pub(crate) fn conversation_id(&self) -> ConversationId { + self.conversation_id + } + pub(crate) fn user_shell(&self) -> &shell::Shell { &self.services.user_shell } @@ -1432,6 +1446,9 @@ async fn submission_loop(sess: Arc, config: Arc, rx_sub: Receiv Op::ListMcpTools => { handlers::list_mcp_tools(&sess, &config, sub.id.clone()).await; } + Op::ListAgents => { + handlers::list_agents(&sess, sub.id.clone()).await; + } Op::ListCustomPrompts => { handlers::list_custom_prompts(&sess, sub.id.clone()).await; } @@ -1489,6 +1506,7 @@ mod handlers { use codex_protocol::protocol::ErrorEvent; use codex_protocol::protocol::Event; use codex_protocol::protocol::EventMsg; + use codex_protocol::protocol::ListAgentsResponseEvent; use codex_protocol::protocol::ListCustomPromptsResponseEvent; use codex_protocol::protocol::Op; use codex_protocol::protocol::ReviewDecision; @@ -1703,6 +1721,15 @@ mod handlers { sess.send_event_raw(event).await; } + pub async fn list_agents(sess: &Session, sub_id: String) { + let agents = sess.agent_registry().list_agent_details(); + let event = Event { + id: sub_id, + msg: EventMsg::ListAgentsResponse(ListAgentsResponseEvent { agents }), + }; + sess.send_event_raw(event).await; + } + pub async fn list_custom_prompts(sess: &Session, sub_id: String) { let custom_prompts: Vec = if let Some(dir) = crate::custom_prompts::default_prompts_dir() { @@ -2687,6 +2714,7 @@ mod tests { auth_manager: Arc::clone(&auth_manager), otel_event_manager: otel_event_manager.clone(), tool_approvals: Mutex::new(ApprovalStore::default()), + agent_registry: Arc::new(AgentRegistry::new().expect("agent registry to initialize")), }; let turn_context = Session::make_turn_context( @@ -2765,6 +2793,7 @@ mod tests { auth_manager: Arc::clone(&auth_manager), otel_event_manager: otel_event_manager.clone(), tool_approvals: Mutex::new(ApprovalStore::default()), + agent_registry: Arc::new(AgentRegistry::new().expect("agent registry to initialize")), }; let turn_context = Arc::new(Session::make_turn_context( diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index 805943a2e7db..3e8c9a606dd3 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -5,6 +5,7 @@ // the TUI or the tracing stack). #![deny(clippy::print_stdout, clippy::print_stderr)] +pub mod agent; mod apply_patch; pub mod auth; pub mod bash; diff --git a/codex-rs/core/src/rollout/policy.rs b/codex-rs/core/src/rollout/policy.rs index 4d5f709d254d..b2aab0e0ba2e 100644 --- a/codex-rs/core/src/rollout/policy.rs +++ b/codex-rs/core/src/rollout/policy.rs @@ -77,6 +77,10 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool { | EventMsg::McpStartupUpdate(_) | EventMsg::McpStartupComplete(_) | EventMsg::ListCustomPromptsResponse(_) + | EventMsg::ListAgentsResponse(_) + | EventMsg::AgentBegin(_) + | EventMsg::AgentProgress(_) + | EventMsg::AgentEnd(_) | EventMsg::PlanUpdate(_) | EventMsg::ShutdownComplete | EventMsg::ViewImageToolCall(_) diff --git a/codex-rs/core/src/state/service.rs b/codex-rs/core/src/state/service.rs index 287fb73d2563..7e83d5bf7451 100644 --- a/codex-rs/core/src/state/service.rs +++ b/codex-rs/core/src/state/service.rs @@ -2,6 +2,7 @@ use std::sync::Arc; use crate::AuthManager; use crate::RolloutRecorder; +use crate::agent::AgentRegistry; use crate::mcp_connection_manager::McpConnectionManager; use crate::tools::sandboxing::ApprovalStore; use crate::unified_exec::UnifiedExecSessionManager; @@ -22,4 +23,5 @@ pub(crate) struct SessionServices { pub(crate) auth_manager: Arc, pub(crate) otel_event_manager: OtelEventManager, pub(crate) tool_approvals: Mutex, + pub(crate) agent_registry: Arc, } diff --git a/codex-rs/core/src/tools/handlers/agent.rs b/codex-rs/core/src/tools/handlers/agent.rs new file mode 100644 index 000000000000..d29809b1e66b --- /dev/null +++ b/codex-rs/core/src/tools/handlers/agent.rs @@ -0,0 +1,278 @@ +use std::time::Instant; + +use async_trait::async_trait; +use futures::StreamExt; +use serde::Deserialize; +use tracing::warn; + +use crate::Prompt; +use crate::ResponseEvent; +use crate::agent::AgentRegistry; +use crate::codex::Session; +use crate::function_tool::FunctionCallError; +use crate::protocol::AgentBeginEvent; +use crate::protocol::AgentEndEvent; +use crate::protocol::AgentMessageContentDeltaEvent; +use crate::protocol::AgentReasoningSectionBreakEvent; +use crate::protocol::AgentStatus; +use crate::protocol::EventMsg; +use crate::protocol::ReasoningContentDeltaEvent; +use crate::protocol::ReasoningRawContentDeltaEvent; +use crate::tools::context::ToolInvocation; +use crate::tools::context::ToolOutput; +use crate::tools::context::ToolPayload; +use crate::tools::registry::ToolHandler; +use crate::tools::registry::ToolKind; +use codex_protocol::models::ContentItem; +use codex_protocol::models::ResponseItem; +use codex_protocol::protocol::SessionSource; + +#[derive(Debug, Deserialize)] +struct AgentCall { + #[serde(alias = "agent", alias = "agent_name", alias = "name")] + name: String, + #[serde( + alias = "task", + alias = "input", + alias = "instruction", + alias = "message" + )] + task: String, + #[serde(default)] + context: Option, + #[serde(default)] + plan_item_id: Option, +} + +fn assistant_text(item: &ResponseItem) -> Option { + if let ResponseItem::Message { role, content, .. } = item + && role == "assistant" + { + let mut buf = String::new(); + for part in content { + if let ContentItem::OutputText { text } = part { + buf.push_str(text); + } + } + if !buf.is_empty() { + return Some(buf); + } + } + None +} + +fn agent_registry(session: &Session) -> &AgentRegistry { + session.agent_registry() +} + +pub struct AgentHandler; + +#[async_trait] +impl ToolHandler for AgentHandler { + fn kind(&self) -> ToolKind { + ToolKind::Function + } + + async fn handle(&self, invocation: ToolInvocation) -> Result { + let ToolInvocation { + session, + turn, + call_id, + payload, + .. + } = invocation; + + let arguments = match payload { + ToolPayload::Function { arguments } => arguments, + _ => { + return Err(FunctionCallError::Fatal( + "agent tool invoked with incompatible payload".to_string(), + )); + } + }; + + let parsed: AgentCall = serde_json::from_str(&arguments).map_err(|e| { + FunctionCallError::RespondToModel(format!("invalid agent call arguments: {e}")) + })?; + + let AgentCall { + name, + task, + context, + plan_item_id, + } = parsed; + + // Prevent recursion by refusing to run if the tool list already excludes agent tool. + if matches!(turn.client.get_session_source(), SessionSource::SubAgent(_)) { + warn!("agent tool recursion detected"); + return Err(FunctionCallError::RespondToModel( + "Agents cannot spawn other agents".to_string(), + )); + } + + let registry = agent_registry(session.as_ref()); + let agent_prompt = registry.get_system_prompt(&name); + + let begin_event = AgentBeginEvent { + call_id: call_id.clone(), + agent_name: name.clone(), + task: task.clone(), + parent_context: None, + plan_item_id: plan_item_id.clone(), + }; + session + .send_event(turn.as_ref(), EventMsg::AgentBegin(begin_event)) + .await; + + let base_instructions = turn + .base_instructions + .clone() + .unwrap_or_else(|| turn.client.get_model_family().base_instructions); + let instructions = format!( + "{base_instructions}\n\nYou are the \"{name}\" agent.\n{agent_prompt}\n\nDo not delegate to other agents. Provide the result directly.", + ); + + let mut task_text = task.clone(); + if let Some(ctx) = context.as_ref().filter(|ctx| !ctx.trim().is_empty()) { + task_text = format!("{task_text}\n\nContext:\n{ctx}"); + } + + let prompt = Prompt { + input: vec![ResponseItem::Message { + id: None, + role: "user".to_string(), + content: vec![ContentItem::InputText { text: task_text }], + }], + tools: Vec::new(), + parallel_tool_calls: false, + base_instructions_override: Some(instructions), + output_schema: None, + }; + + let mut stream = turn + .client + .clone() + .stream(&prompt) + .await + .map_err(|e| FunctionCallError::Fatal(e.to_string()))?; + + let started = Instant::now(); + let mut message = String::new(); + let mut item_id = call_id.clone(); + if item_id.is_empty() { + item_id = "agent-call".to_string(); + } + + while let Some(event) = stream.next().await { + let event = event.map_err(|e| FunctionCallError::Fatal(e.to_string()))?; + match event { + ResponseEvent::Created => {} + ResponseEvent::OutputItemDone(item) | ResponseEvent::OutputItemAdded(item) => { + if message.is_empty() + && let Some(text) = assistant_text(&item) + { + message.push_str(&text); + } + } + ResponseEvent::OutputTextDelta(delta) => { + message.push_str(&delta); + let evt = AgentMessageContentDeltaEvent { + thread_id: session.conversation_id().to_string(), + turn_id: turn.sub_id.clone(), + item_id: item_id.clone(), + delta, + }; + session + .send_event(turn.as_ref(), EventMsg::AgentMessageContentDelta(evt)) + .await; + } + ResponseEvent::ReasoningSummaryDelta { + delta, + summary_index, + } => { + let evt = ReasoningContentDeltaEvent { + thread_id: session.conversation_id().to_string(), + turn_id: turn.sub_id.clone(), + item_id: item_id.clone(), + delta, + summary_index, + }; + session + .send_event(turn.as_ref(), EventMsg::ReasoningContentDelta(evt)) + .await; + } + ResponseEvent::ReasoningSummaryPartAdded { summary_index } => { + session + .send_event( + turn.as_ref(), + EventMsg::AgentReasoningSectionBreak(AgentReasoningSectionBreakEvent { + item_id: item_id.clone(), + summary_index, + }), + ) + .await; + } + ResponseEvent::ReasoningContentDelta { + delta, + content_index, + } => { + let evt = ReasoningRawContentDeltaEvent { + thread_id: session.conversation_id().to_string(), + turn_id: turn.sub_id.clone(), + item_id: item_id.clone(), + delta, + content_index, + }; + session + .send_event(turn.as_ref(), EventMsg::ReasoningRawContentDelta(evt)) + .await; + } + ResponseEvent::RateLimits(snapshot) => { + session.update_rate_limits(turn.as_ref(), snapshot).await; + } + ResponseEvent::Completed { + response_id: _, + token_usage, + } => { + session + .update_token_usage_info(turn.as_ref(), token_usage.as_ref()) + .await; + break; + } + } + } + + if message.is_empty() { + message = "Agent completed without output.".to_string(); + } + + let duration_ms = started.elapsed().as_millis().min(u128::from(u64::MAX)) as u64; + session + .send_event( + turn.as_ref(), + EventMsg::AgentMessage(crate::protocol::AgentMessageEvent { + message: message.clone(), + }), + ) + .await; + session + .send_event( + turn.as_ref(), + EventMsg::AgentEnd(AgentEndEvent { + call_id, + agent_name: name, + summary: message.clone(), + status: AgentStatus::Done, + duration_ms, + plan_item_id, + }), + ) + .await; + + Ok(ToolOutput::Function { + content: message, + content_items: None, + success: Some(true), + }) + } +} diff --git a/codex-rs/core/src/tools/handlers/mod.rs b/codex-rs/core/src/tools/handlers/mod.rs index dcf848e37605..03afc78d9aea 100644 --- a/codex-rs/core/src/tools/handlers/mod.rs +++ b/codex-rs/core/src/tools/handlers/mod.rs @@ -1,3 +1,4 @@ +mod agent; pub mod apply_patch; mod grep_files; mod list_dir; @@ -12,6 +13,7 @@ mod view_image; pub use plan::PLAN_TOOL; +pub use agent::AgentHandler; pub use apply_patch::ApplyPatchHandler; pub use grep_files::GrepFilesHandler; pub use list_dir::ListDirHandler; diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index d07c605e50e7..243fa5eab8e7 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -35,6 +35,7 @@ pub(crate) struct ToolsConfig { pub apply_patch_tool_type: Option, pub web_search_request: bool, pub include_view_image_tool: bool, + pub include_agent_tool: bool, pub experimental_supported_tools: Vec, } @@ -78,6 +79,7 @@ impl ToolsConfig { apply_patch_tool_type, web_search_request: include_web_search_request, include_view_image_tool, + include_agent_tool: true, experimental_supported_tools: model_family.experimental_supported_tools.clone(), } } @@ -219,6 +221,45 @@ fn create_exec_command_tool() -> ToolSpec { }) } +fn create_agent_tool() -> ToolSpec { + let mut properties = BTreeMap::new(); + properties.insert( + "name".to_string(), + JsonSchema::String { + description: Some("Registered agent name to run.".to_string()), + }, + ); + properties.insert( + "task".to_string(), + JsonSchema::String { + description: Some("Task or instruction for the agent to complete.".to_string()), + }, + ); + properties.insert( + "context".to_string(), + JsonSchema::String { + description: Some("Optional additional context the agent should consider.".to_string()), + }, + ); + properties.insert( + "plan_item_id".to_string(), + JsonSchema::String { + description: Some("Optional plan item id associated with this run.".to_string()), + }, + ); + + ToolSpec::Function(ResponsesApiTool { + name: "agent".to_string(), + description: "Invoke a named agent with a task to execute.".to_string(), + strict: false, + parameters: JsonSchema::Object { + properties, + required: Some(vec!["name".to_string(), "task".to_string()]), + additional_properties: Some(false.into()), + }, + }) +} + fn create_write_stdin_tool() -> ToolSpec { let mut properties = BTreeMap::new(); properties.insert( @@ -973,6 +1014,7 @@ pub(crate) fn build_specs( config: &ToolsConfig, mcp_tools: Option>, ) -> ToolRegistryBuilder { + use crate::tools::handlers::AgentHandler; use crate::tools::handlers::ApplyPatchHandler; use crate::tools::handlers::GrepFilesHandler; use crate::tools::handlers::ListDirHandler; @@ -993,6 +1035,7 @@ pub(crate) fn build_specs( let unified_exec_handler = Arc::new(UnifiedExecHandler); let plan_handler = Arc::new(PlanHandler); let apply_patch_handler = Arc::new(ApplyPatchHandler); + let agent_handler = Arc::new(AgentHandler); let view_image_handler = Arc::new(ViewImageHandler); let mcp_handler = Arc::new(McpHandler); let mcp_resource_handler = Arc::new(McpResourceHandler); @@ -1095,6 +1138,11 @@ pub(crate) fn build_specs( builder.register_handler("view_image", view_image_handler); } + if config.include_agent_tool { + builder.push_spec(create_agent_tool()); + builder.register_handler("agent", agent_handler); + } + if let Some(mcp_tools) = mcp_tools { let mut entries: Vec<(String, mcp_types::Tool)> = mcp_tools.into_iter().collect(); entries.sort_by(|a, b| a.0.cmp(&b.0)); @@ -1253,6 +1301,7 @@ mod tests { create_apply_patch_freeform_tool(), ToolSpec::WebSearch {}, create_view_image_tool(), + create_agent_tool(), ] { expected.insert(tool_name(&spec).to_string(), spec); } @@ -1297,6 +1346,7 @@ mod tests { "update_plan", "apply_patch", "view_image", + "agent", ], ); } @@ -1314,6 +1364,7 @@ mod tests { "update_plan", "apply_patch", "view_image", + "agent", ], ); } @@ -1335,6 +1386,7 @@ mod tests { "apply_patch", "web_search", "view_image", + "agent", ], ); } @@ -1356,6 +1408,7 @@ mod tests { "apply_patch", "web_search", "view_image", + "agent", ], ); } @@ -1372,6 +1425,7 @@ mod tests { "read_mcp_resource", "update_plan", "view_image", + "agent", ], ); } @@ -1389,6 +1443,7 @@ mod tests { "update_plan", "apply_patch", "view_image", + "agent", ], ); } @@ -1405,6 +1460,7 @@ mod tests { "read_mcp_resource", "update_plan", "view_image", + "agent", ], ); } @@ -1422,6 +1478,7 @@ mod tests { "update_plan", "apply_patch", "view_image", + "agent", ], ); } @@ -1440,6 +1497,7 @@ mod tests { "update_plan", "apply_patch", "view_image", + "agent", ], ); } @@ -1460,6 +1518,7 @@ mod tests { "update_plan", "web_search", "view_image", + "agent", ], ); } diff --git a/codex-rs/core/src/turn_diff_tracker.rs b/codex-rs/core/src/turn_diff_tracker.rs index 06c40deb9042..5b8487a08883 100644 --- a/codex-rs/core/src/turn_diff_tracker.rs +++ b/codex-rs/core/src/turn_diff_tracker.rs @@ -219,6 +219,40 @@ impl TurnDiffTracker { if s.len() == 40 { Some(s) } else { None } } + /// Merge another TurnDiffTracker into this one. + /// This is used to combine changes from parallel tool executions. + pub fn merge(&mut self, other: TurnDiffTracker) { + // Merge external_to_temp_name mappings + for (path, temp_name) in other.external_to_temp_name { + // If we already have this path, keep our mapping (first one wins) + // Otherwise, add the new mapping + self.external_to_temp_name.entry(path).or_insert(temp_name); + } + + // Merge baseline_file_info + for (temp_name, file_info) in other.baseline_file_info { + // If we already have this baseline, keep ours (first baseline wins) + // Otherwise, add the new baseline + self.baseline_file_info + .entry(temp_name) + .or_insert(file_info); + } + + // Merge temp_name_to_current_path (this tracks renames) + for (temp_name, current_path) in other.temp_name_to_current_path { + // For renames, the latest mapping should win (last rename wins) + self.temp_name_to_current_path + .insert(temp_name, current_path); + } + + // Merge git_root_cache + for root in other.git_root_cache { + if !self.git_root_cache.contains(&root) { + self.git_root_cache.push(root); + } + } + } + /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were /// collected before the first time they were touched by apply_patch during this turn with /// the current repo state. diff --git a/codex-rs/core/tests/suite/model_tools.rs b/codex-rs/core/tests/suite/model_tools.rs index cb2c5725f2b9..0a3c299752b4 100644 --- a/codex-rs/core/tests/suite/model_tools.rs +++ b/codex-rs/core/tests/suite/model_tools.rs @@ -58,7 +58,8 @@ async fn model_selects_expected_tools() { "list_mcp_resource_templates".to_string(), "read_mcp_resource".to_string(), "update_plan".to_string(), - "view_image".to_string() + "view_image".to_string(), + "agent".to_string() ], "codex-mini-latest should expose the local shell tool", ); @@ -73,7 +74,8 @@ async fn model_selects_expected_tools() { "read_mcp_resource".to_string(), "update_plan".to_string(), "apply_patch".to_string(), - "view_image".to_string() + "view_image".to_string(), + "agent".to_string() ], "gpt-5-codex should expose the apply_patch tool", ); @@ -88,7 +90,8 @@ async fn model_selects_expected_tools() { "read_mcp_resource".to_string(), "update_plan".to_string(), "apply_patch".to_string(), - "view_image".to_string() + "view_image".to_string(), + "agent".to_string() ], "gpt-5.1-codex should expose the apply_patch tool", ); @@ -102,7 +105,8 @@ async fn model_selects_expected_tools() { "list_mcp_resource_templates".to_string(), "read_mcp_resource".to_string(), "update_plan".to_string(), - "view_image".to_string() + "view_image".to_string(), + "agent".to_string() ], "gpt-5 should expose the apply_patch tool", ); @@ -117,7 +121,8 @@ async fn model_selects_expected_tools() { "read_mcp_resource".to_string(), "update_plan".to_string(), "apply_patch".to_string(), - "view_image".to_string() + "view_image".to_string(), + "agent".to_string() ], "gpt-5.1 should expose the apply_patch tool", ); @@ -132,7 +137,8 @@ async fn model_selects_expected_tools() { "read_mcp_resource".to_string(), "update_plan".to_string(), "apply_patch".to_string(), - "view_image".to_string() + "view_image".to_string(), + "agent".to_string() ], "exp-5.1 should expose the apply_patch tool", ); diff --git a/codex-rs/core/tests/suite/prompt_caching.rs b/codex-rs/core/tests/suite/prompt_caching.rs index f4455fd022ad..8575163c9d6c 100644 --- a/codex-rs/core/tests/suite/prompt_caching.rs +++ b/codex-rs/core/tests/suite/prompt_caching.rs @@ -126,13 +126,12 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> { let req1 = mount_sse_once(&server, sse_completed("resp-1")).await; let req2 = mount_sse_once(&server, sse_completed("resp-2")).await; - let TestCodex { codex, config, .. } = test_codex() + let TestCodex { codex, .. } = test_codex() .with_config(|config| { config.user_instructions = Some("be consistent and helpful".to_string()); }) .build(&server) .await?; - let base_instructions = config.model_family.base_instructions.clone(); codex .submit(Op::UserInput { @@ -152,7 +151,10 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> { .await?; wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; - let expected_tools_names = vec![ + let expected_instructions: &str = include_str!("../../gpt_5_codex_prompt.md"); + // our internal implementation is responsible for keeping tools in sync + // with the OpenAI schema, so we just verify the tool presence here + let expected_tools_names: &[&str] = &[ "shell_command", "list_mcp_resources", "list_mcp_resource_templates", @@ -160,31 +162,21 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> { "update_plan", "apply_patch", "view_image", + "agent", ]; let body0 = req1.single_request().body_json(); - - let expected_instructions = if expected_tools_names.contains(&"apply_patch") { - base_instructions - } else { - [ - base_instructions.clone(), - include_str!("../../../apply-patch/apply_patch_tool_instructions.md").to_string(), - ] - .join("\n") - }; - assert_eq!( body0["instructions"], serde_json::json!(expected_instructions), ); - assert_tool_names(&body0, &expected_tools_names); + assert_tool_names(&body0, expected_tools_names); let body1 = req2.single_request().body_json(); assert_eq!( body1["instructions"], serde_json::json!(expected_instructions), ); - assert_tool_names(&body1, &expected_tools_names); + assert_tool_names(&body1, expected_tools_names); Ok(()) } diff --git a/codex-rs/exec/src/event_processor_with_human_output.rs b/codex-rs/exec/src/event_processor_with_human_output.rs index fabbabe659d8..8e4e8b0c0ec2 100644 --- a/codex-rs/exec/src/event_processor_with_human_output.rs +++ b/codex-rs/exec/src/event_processor_with_human_output.rs @@ -571,6 +571,10 @@ impl EventProcessor for EventProcessorWithHumanOutput { | EventMsg::UserMessage(_) | EventMsg::EnteredReviewMode(_) | EventMsg::ExitedReviewMode(_) + | EventMsg::ListAgentsResponse(_) + | EventMsg::AgentBegin(_) + | EventMsg::AgentProgress(_) + | EventMsg::AgentEnd(_) | EventMsg::AgentMessageDelta(_) | EventMsg::AgentReasoningDelta(_) | EventMsg::AgentReasoningRawContentDelta(_) diff --git a/codex-rs/mcp-server/src/codex_tool_runner.rs b/codex-rs/mcp-server/src/codex_tool_runner.rs index cceb8efc5d8b..1775b1f01733 100644 --- a/codex-rs/mcp-server/src/codex_tool_runner.rs +++ b/codex-rs/mcp-server/src/codex_tool_runner.rs @@ -306,7 +306,11 @@ async fn run_codex_tool_session_inner( | EventMsg::UndoStarted(_) | EventMsg::UndoCompleted(_) | EventMsg::ExitedReviewMode(_) - | EventMsg::DeprecationNotice(_) => { + | EventMsg::DeprecationNotice(_) + | EventMsg::ListAgentsResponse(_) + | EventMsg::AgentBegin(_) + | EventMsg::AgentProgress(_) + | EventMsg::AgentEnd(_) => { // For now, we do not do anything extra for these // events. Note that // send(codex_event_to_notification(&event)) above has diff --git a/codex-rs/protocol/src/protocol.rs b/codex-rs/protocol/src/protocol.rs index 27267de439b5..48421b2014b4 100644 --- a/codex-rs/protocol/src/protocol.rs +++ b/codex-rs/protocol/src/protocol.rs @@ -182,6 +182,10 @@ pub enum Op { /// Reply is delivered via `EventMsg::McpListToolsResponse`. ListMcpTools, + /// Request the list of available AI agents. + /// Reply is delivered via `EventMsg::ListAgentsResponse`. + ListAgents, + /// Request the list of available custom prompts. ListCustomPrompts, @@ -554,6 +558,18 @@ pub enum EventMsg { /// List of custom prompts available to the agent. ListCustomPromptsResponse(ListCustomPromptsResponseEvent), + /// List of available AI agents. + ListAgentsResponse(ListAgentsResponseEvent), + + /// Agent invocation started. + AgentBegin(AgentBeginEvent), + + /// Agent execution progress. + AgentProgress(AgentProgressEvent), + + /// Agent completed. + AgentEnd(AgentEndEvent), + PlanUpdate(UpdatePlanArgs), TurnAborted(TurnAbortedEvent), @@ -1536,6 +1552,63 @@ pub struct ListCustomPromptsResponseEvent { pub custom_prompts: Vec, } +#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] +pub struct ListAgentsResponseEvent { + pub agents: Vec, +} + +#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] +pub struct AgentInfo { + pub name: String, + pub description: String, + pub is_builtin: bool, +} + +#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] +pub struct AgentBeginEvent { + pub call_id: String, + pub agent_name: String, + pub task: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub parent_context: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub plan_item_id: Option, +} + +#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] +pub struct AgentProgressEvent { + pub call_id: String, + pub agent_name: String, + pub step: String, + pub progress_type: AgentProgressType, +} + +#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] +pub enum AgentProgressType { + Loop(String), + FileChange(PathBuf, FileChange), + Output(String), + ToolCall(String), +} + +#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] +pub struct AgentEndEvent { + pub call_id: String, + pub agent_name: String, + pub summary: String, + pub status: AgentStatus, + pub duration_ms: u64, + #[serde(skip_serializing_if = "Option::is_none")] + pub plan_item_id: Option, +} + +#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] +pub enum AgentStatus { + Running, + Done, + Failed, +} + #[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] pub struct SessionConfiguredEvent { /// Name left as session_id instead of conversation_id for backwards compatibility. From 0311f0856bf4d070065d6d81d0f3283e62ffb4d0 Mon Sep 17 00:00:00 2001 From: Jose <112948755+Grinsven@users.noreply.github.com> Date: Mon, 24 Nov 2025 01:45:09 +0100 Subject: [PATCH 02/22] tui: add /agents list and @mentions --- codex-rs/tui/Cargo.toml | 1 + codex-rs/tui/src/agent_mention.rs | 108 +++++++++++++++++++++++ codex-rs/tui/src/chatwidget.rs | 44 +++++++++- codex-rs/tui/src/history_cell.rs | 138 ++++++++++++++++++++++++++++++ codex-rs/tui/src/lib.rs | 1 + codex-rs/tui/src/slash_command.rs | 5 ++ 6 files changed, 296 insertions(+), 1 deletion(-) create mode 100644 codex-rs/tui/src/agent_mention.rs diff --git a/codex-rs/tui/Cargo.toml b/codex-rs/tui/Cargo.toml index 01fef9cc7403..d15bdb77e3da 100644 --- a/codex-rs/tui/Cargo.toml +++ b/codex-rs/tui/Cargo.toml @@ -49,6 +49,7 @@ dunce = { workspace = true } image = { workspace = true, features = ["jpeg", "png"] } itertools = { workspace = true } lazy_static = { workspace = true } +once_cell = "1.19" mcp-types = { workspace = true } opentelemetry-appender-tracing = { workspace = true } pathdiff = { workspace = true } diff --git a/codex-rs/tui/src/agent_mention.rs b/codex-rs/tui/src/agent_mention.rs new file mode 100644 index 000000000000..5af1f0d5ae1b --- /dev/null +++ b/codex-rs/tui/src/agent_mention.rs @@ -0,0 +1,108 @@ +use once_cell::sync::Lazy; +use regex_lite::Regex; + +// Compile regex once at startup +#[allow(clippy::expect_used)] +static AGENT_MENTION_RE: Lazy = Lazy::new(|| { + Regex::new(r"@(\w+):?\s+([^\n@]+)") + .expect("Failed to compile agent mention regex - this is a bug") +}); + +#[derive(Debug, Clone)] +pub struct AgentMention { + pub agent_name: String, + pub task: String, + #[allow(dead_code)] + pub raw_text: String, + pub start_pos: usize, + pub end_pos: usize, +} + +/// Parse @agent mentions in text +/// Formats supported: +/// - @agent_name: task description +/// - @agent_name task description +pub fn parse_agent_mentions(text: &str) -> Vec { + let mut mentions = Vec::new(); + + for cap in AGENT_MENTION_RE.captures_iter(text) { + if let (Some(name), Some(task), Some(full_match)) = (cap.get(1), cap.get(2), cap.get(0)) { + mentions.push(AgentMention { + agent_name: name.as_str().to_string(), + task: task.as_str().trim().to_string(), + raw_text: full_match.as_str().to_string(), + start_pos: full_match.start(), + end_pos: full_match.end(), + }); + } + } + mentions +} + +/// Convert agent mention to tool call format +pub fn convert_to_agent_call(mention: &AgentMention) -> String { + format!("Use the {} agent to {}", mention.agent_name, mention.task) +} + +/// Replace mentions in text with converted calls +pub fn replace_mentions_with_calls(text: &str) -> String { + let mentions = parse_agent_mentions(text); + if mentions.is_empty() { + return text.to_string(); + } + + let mut result = text.to_string(); + + // Replace from end to start to preserve positions + for mention in mentions.iter().rev() { + let call = convert_to_agent_call(mention); + result.replace_range(mention.start_pos..mention.end_pos, &call); + } + result +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_single_mention() { + let text = "@researcher: find information about Rust"; + let mentions = parse_agent_mentions(text); + assert_eq!(mentions.len(), 1); + assert_eq!(mentions[0].agent_name, "researcher"); + assert_eq!(mentions[0].task, "find information about Rust"); + } + + #[test] + fn test_parse_multiple_mentions() { + let text = "@researcher: find docs @reviewer: check the code"; + let mentions = parse_agent_mentions(text); + assert_eq!(mentions.len(), 2); + assert_eq!(mentions[0].agent_name, "researcher"); + assert_eq!(mentions[1].agent_name, "reviewer"); + } + + #[test] + fn test_convert_to_agent_call() { + let mention = AgentMention { + agent_name: "researcher".to_string(), + task: "find information".to_string(), + raw_text: "@researcher: find information".to_string(), + start_pos: 0, + end_pos: 29, + }; + let call = convert_to_agent_call(&mention); + assert_eq!(call, "Use the researcher agent to find information"); + } + + #[test] + fn test_replace_mentions() { + let text = "@researcher: find docs about async"; + let replaced = replace_mentions_with_calls(text); + assert_eq!( + replaced, + "Use the researcher agent to find docs about async" + ); + } +} diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index 1e9fbd39fb40..2453cf417b5e 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -1481,6 +1481,10 @@ impl ChatWidget { self.add_info_message("Rollout path is not available yet.".to_string(), None); } } + SlashCommand::Agents => { + self.add_agents_output(); + } + #[cfg(debug_assertions)] SlashCommand::TestApproval => { use codex_core::protocol::EventMsg; use std::collections::HashMap; @@ -1574,7 +1578,10 @@ impl ChatWidget { } fn submit_user_message(&mut self, user_message: UserMessage) { - let UserMessage { text, image_paths } = user_message; + let UserMessage { + mut text, + image_paths, + } = user_message; if text.is_empty() && image_paths.is_empty() { return; } @@ -1599,6 +1606,25 @@ impl ChatWidget { return; } + // Parse and convert @agent mentions into explicit agent tool calls so the + // core can run the requested sub-agent. Show a small history marker for + // visibility before queuing the transformed message. + if !text.is_empty() && text.contains('@') { + use crate::agent_mention::parse_agent_mentions; + use crate::agent_mention::replace_mentions_with_calls; + + let mentions = parse_agent_mentions(&text); + if !mentions.is_empty() { + for mention in &mentions { + self.add_to_history(history_cell::new_agent_invocation( + &mention.agent_name, + &mention.task, + )); + } + text = replace_mentions_with_calls(&text); + } + } + if !text.is_empty() { items.push(UserInput::Text { text: text.clone() }); } @@ -1727,6 +1753,18 @@ impl ChatWidget { EventMsg::GetHistoryEntryResponse(ev) => self.on_get_history_entry_response(ev), EventMsg::McpListToolsResponse(ev) => self.on_list_mcp_tools(ev), EventMsg::ListCustomPromptsResponse(ev) => self.on_list_custom_prompts(ev), + EventMsg::ListAgentsResponse(ev) => { + self.add_to_history(history_cell::new_agents_list(ev.agents)); + } + EventMsg::AgentBegin(ev) => { + self.add_to_history(history_cell::new_agent_begin(&ev)); + } + EventMsg::AgentProgress(ev) => { + self.add_to_history(history_cell::new_agent_progress(&ev)); + } + EventMsg::AgentEnd(ev) => { + self.add_to_history(history_cell::new_agent_end(&ev)); + } EventMsg::ShutdownComplete => self.on_shutdown_complete(), EventMsg::TurnDiff(TurnDiffEvent { unified_diff }) => self.on_turn_diff(unified_diff), EventMsg::DeprecationNotice(ev) => self.on_deprecation_notice(ev), @@ -2719,6 +2757,10 @@ impl ChatWidget { } } + pub(crate) fn add_agents_output(&mut self) { + self.submit_op(Op::ListAgents); + } + /// Forward file-search results to the bottom pane. pub(crate) fn apply_file_search_result(&mut self, query: String, matches: Vec) { self.bottom_pane.on_file_search_result(query, matches); diff --git a/codex-rs/tui/src/history_cell.rs b/codex-rs/tui/src/history_cell.rs index 02ab0d243be2..1b560836b5f4 100644 --- a/codex-rs/tui/src/history_cell.rs +++ b/codex-rs/tui/src/history_cell.rs @@ -22,6 +22,7 @@ use crate::wrapping::RtOptions; use crate::wrapping::word_wrap_line; use crate::wrapping::word_wrap_lines; use base64::Engine; +use codex_common::elapsed::format_duration; use codex_common::format_env_display::format_env_display; use codex_core::config::Config; use codex_core::config::types::McpServerTransportConfig; @@ -1259,6 +1260,143 @@ pub(crate) fn new_error_event(message: String) -> PlainHistoryCell { } /// Render a user‑friendly plan update styled like a checkbox todo list. +/// Visual cell for agent invocation +pub(crate) fn new_agent_invocation(agent_name: &str, task: &str) -> PlainHistoryCell { + let lines = vec![ + Line::from(vec![ + "🤖 ".into(), + "Agent: ".dim(), + Span::styled( + agent_name.to_string(), + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + ), + ]), + Line::from(vec![" Task: ".dim(), task.to_string().into()]), + ]; + PlainHistoryCell { lines } +} + +/// Visual cell for agents list +pub(crate) fn new_agents_list(agents: Vec) -> PlainHistoryCell { + let mut lines = vec![ + Line::from(vec![Span::styled( + "Available Agents", + Style::default().add_modifier(Modifier::BOLD | Modifier::UNDERLINED), + )]), + Line::default(), // Empty line + ]; + + if agents.is_empty() { + lines.push(Line::from("No agents configured".dim())); + } else { + for agent in agents { + let bullet = if agent.is_builtin { "•" } else { "◦" }; + lines.push(Line::from(vec![ + format!(" {bullet} ").into(), + Span::styled( + agent.name.clone(), + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + ), + " - ".dim(), + agent.description.clone().into(), + ])); + } + } + + lines.push(Line::default()); // Empty line + lines.push(Line::from(vec![ + "Usage: ".dim(), + Span::styled( + "@agent_name: task description", + Style::default().add_modifier(Modifier::ITALIC), + ), + ])); + + PlainHistoryCell { lines } +} + +/// Visual cell for agent execution begin +pub(crate) fn new_agent_begin(event: &codex_core::protocol::AgentBeginEvent) -> PlainHistoryCell { + let lines = vec![ + Line::from(vec![ + Span::styled("⚡ ", Style::default().fg(Color::Cyan)), + Span::styled("Running ", Style::default().fg(Color::Cyan)), + Span::styled( + event.agent_name.clone(), + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + ), + ]), + Line::from(vec![" ".into(), format!("Task: {}", event.task).dim()]), + ]; + PlainHistoryCell { lines } +} + +/// Visual cell for agent progress +pub(crate) fn new_agent_progress( + event: &codex_core::protocol::AgentProgressEvent, +) -> PlainHistoryCell { + use codex_core::protocol::AgentProgressType; + + let prefix = match &event.progress_type { + AgentProgressType::Loop(_) => "⟳ ", + AgentProgressType::FileChange(_, _) => "📝 ", + AgentProgressType::Output(_) => "💬 ", + AgentProgressType::ToolCall(_) => "🔧 ", + }; + + let content = match &event.progress_type { + AgentProgressType::Loop(step) => step.clone(), + AgentProgressType::FileChange(path, _) => format!("Modified: {}", path.display()), + AgentProgressType::Output(text) => text.clone(), + AgentProgressType::ToolCall(tool) => format!("Using tool: {tool}"), + }; + + let lines = vec![Line::from(vec![ + Span::styled(format!(" {prefix} "), Style::default().fg(Color::Yellow)), + Span::styled(format!("[{}] ", event.agent_name), Style::default().dim()), + Span::styled(content, Style::default().fg(Color::White)), + ])]; + PlainHistoryCell { lines } +} + +/// Visual cell for agent completion +pub(crate) fn new_agent_end(event: &codex_core::protocol::AgentEndEvent) -> PlainHistoryCell { + use codex_core::protocol::AgentStatus; + + let duration = format_duration(Duration::from_millis(event.duration_ms)); + + let (status_icon, status_text) = match event.status { + AgentStatus::Done => ("✓".green(), " Done ".green()), + AgentStatus::Failed => ("✗".red(), " Failed ".red()), + AgentStatus::Running => ("⟲".cyan(), " Running ".cyan()), // Shouldn't happen here but handle it + }; + + let mut lines = vec![Line::from(vec![ + status_icon, + status_text, + Span::styled( + event.agent_name.clone(), + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + ), + format!(" ({duration})").dim(), + ])]; + + // Add summary with proper indentation + for line in event.summary.lines() { + lines.push(Line::from(vec![" ".into(), line.to_string().into()])); + } + + PlainHistoryCell { lines } +} + pub(crate) fn new_plan_update(update: UpdatePlanArgs) -> PlanUpdateCell { let UpdatePlanArgs { explanation, plan } = update; PlanUpdateCell { explanation, plan } diff --git a/codex-rs/tui/src/lib.rs b/codex-rs/tui/src/lib.rs index 33bd18c4379c..fafb593d5902 100644 --- a/codex-rs/tui/src/lib.rs +++ b/codex-rs/tui/src/lib.rs @@ -33,6 +33,7 @@ use tracing_subscriber::filter::Targets; use tracing_subscriber::prelude::*; mod additional_dirs; +mod agent_mention; mod app; mod app_backtrack; mod app_event; diff --git a/codex-rs/tui/src/slash_command.rs b/codex-rs/tui/src/slash_command.rs index 969d279b07b3..2ab562ff86d9 100644 --- a/codex-rs/tui/src/slash_command.rs +++ b/codex-rs/tui/src/slash_command.rs @@ -23,6 +23,7 @@ pub enum SlashCommand { Mention, Status, Mcp, + Agents, Logout, Quit, Exit, @@ -48,6 +49,7 @@ impl SlashCommand { SlashCommand::Model => "choose what model and reasoning effort to use", SlashCommand::Approvals => "choose what Codex can do without approval", SlashCommand::Mcp => "list configured MCP tools", + SlashCommand::Agents => "list available AI agents and their descriptions", SlashCommand::Logout => "log out of Codex", SlashCommand::Rollout => "print the rollout file path", SlashCommand::TestApproval => "test approval request", @@ -76,9 +78,12 @@ impl SlashCommand { | SlashCommand::Status | SlashCommand::Mcp | SlashCommand::Feedback + | SlashCommand::Agents | SlashCommand::Quit | SlashCommand::Exit => true, SlashCommand::Rollout => true, + + #[cfg(debug_assertions)] SlashCommand::TestApproval => true, } } From 168d94bffa8fbc66fe80607cac17acebc08b0904 Mon Sep 17 00:00:00 2001 From: Jose <112948755+Grinsven@users.noreply.github.com> Date: Mon, 24 Nov 2025 01:45:21 +0100 Subject: [PATCH 03/22] docs: document multi-agent system and config --- README.md | 6 +- docs/config.md | 75 ++++++- docs/getting-started.md | 21 +- docs/subagents.md | 428 ++++++++++++++++++++++++++++++++++++++++ example-agents.toml | 202 +++++++++++++++++++ 5 files changed, 727 insertions(+), 5 deletions(-) create mode 100644 docs/subagents.md create mode 100644 example-agents.toml diff --git a/README.md b/README.md index 78eaf9eb3567..51e35a6c89a6 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ You can also use Codex with an API key, but this requires [additional setup](./d ### Model Context Protocol (MCP) -Codex can access MCP servers. To configure them, refer to the [config docs](./docs/config.md#mcp_servers). +Codex can access MCP servers. See the [config docs](./docs/config.md#mcp_servers) and the [advanced guide](./docs/advanced.md#model-context-protocol-mcp) for setup details; add an `mcp_servers` section to your `~/.codex/config.toml` to enable. ### Configuration @@ -96,6 +96,10 @@ See the [Execpolicy quickstart](./docs/execpolicy.md) to set up rules that gover - [**Advanced**](./docs/advanced.md) - [Tracing / verbose logging](./docs/advanced.md#tracing--verbose-logging) - [Model Context Protocol (MCP)](./docs/advanced.md#model-context-protocol-mcp) +- [**Multi-Agent System**](./docs/subagents.md) + - [Custom agent configuration](./docs/subagents.md#custom-agent-configuration) + - [Agent behavior](./docs/subagents.md#agent-behavior) + - [Best practices](./docs/subagents.md#best-practices) - [**Zero data retention (ZDR)**](./docs/zdr.md) - [**Contributing**](./docs/contributing.md) - [**Install & build**](./docs/install.md) diff --git a/docs/config.md b/docs/config.md index d14f7482f25d..d219efd87859 100644 --- a/docs/config.md +++ b/docs/config.md @@ -11,7 +11,6 @@ Codex configuration gives you fine-grained control over the model, execution env - [Observability and telemetry](#observability-and-telemetry) - [Profiles and overrides](#profiles-and-overrides) - [Reference table](#config-reference) - Codex supports several mechanisms for setting config values: - Config-specific command-line flags, such as `--model o3` (highest precedence). @@ -858,7 +857,75 @@ Note this is **not** a general editor setting (like `$EDITOR`), as it only accep Currently, `"vscode"` is the default, though Codex does not verify VS Code is installed. As such, `file_opener` may default to `"none"` or something else in the future. -### project_doc_max_bytes +## Agent Configuration + +### Custom Agents File + +Create custom specialized agents by adding a configuration file at `~/.codex/agents.toml`: + +```toml +# ~/.codex/agents.toml + +[researcher] +prompt = """ +You are a research specialist. Focus on gathering comprehensive information, +verifying facts, and providing detailed citations. +""" + +[code-reviewer] +prompt = """ +You are an expert code reviewer. Identify bugs, suggest improvements, +and ensure best practices are followed. +""" + +[test-writer] +prompt_file = "prompts/test-writer.md" # Load from external file +``` + +Each agent inherits tools and permissions from the parent context. Agents can be invoked using: + +- `@agent_name: task description` - Natural mention syntax +- `/agents` command - View all available agents +- Agent tool - Programmatic invocation + +For detailed agent configuration options, see [Multi-Agent System](./subagents.md). + +## hide_agent_reasoning + +Codex intermittently emits "reasoning" events that show the model's internal "thinking" before it produces a final answer. Some users may find these events distracting, especially in CI logs or minimal terminal output. + +Setting `hide_agent_reasoning` to `true` suppresses these events in **both** the TUI as well as the headless `exec` sub-command: + +```toml +hide_agent_reasoning = true # defaults to false +``` + +## show_raw_agent_reasoning + +Surfaces the model’s raw chain-of-thought ("raw reasoning content") when available. + +Notes: + +- Only takes effect if the selected model/provider actually emits raw reasoning content. Many models do not. When unsupported, this option has no visible effect. +- Raw reasoning may include intermediate thoughts or sensitive context. Enable only if acceptable for your workflow. + +Example: + +```toml +show_raw_agent_reasoning = true # defaults to false +``` + +## model_context_window + +The size of the context window for the model, in tokens. + +In general, Codex knows the context window for the most common OpenAI models, but if you are using a new model with an old version of the Codex CLI, then you can use `model_context_window` to tell Codex what value to use to determine how much context is left during a conversation. + +## model_max_output_tokens + +This is analogous to `model_context_window`, but for the maximum number of output tokens for the model. + +## project_doc_max_bytes Maximum number of bytes to read from an `AGENTS.md` file to include in the instructions sent with the first turn of a session. Defaults to 32 KiB. @@ -939,6 +1006,7 @@ Valid values: | `model` | string | Model to use (e.g., `gpt-5.1-codex-max`). | | `model_provider` | string | Provider id from `model_providers` (default: `openai`). | | `model_context_window` | number | Context window tokens. | +| `model_max_output_tokens` | number | Max output tokens the model may return. | | `tool_output_token_limit` | number | Token budget for stored function/tool outputs in history (default: 2,560 tokens). | | `approval_policy` | `untrusted` \| `on-failure` \| `on-request` \| `never` | When to prompt for approval. | | `sandbox_mode` | `read-only` \| `workspace-write` \| `danger-full-access` | OS sandbox policy. | @@ -947,6 +1015,7 @@ Valid values: | `sandbox_workspace_write.exclude_tmpdir_env_var` | boolean | Exclude `$TMPDIR` from writable roots (default: false). | | `sandbox_workspace_write.exclude_slash_tmp` | boolean | Exclude `/tmp` from writable roots (default: false). | | `notify` | array | External program for notifications. | +| `disable_response_storage` | boolean | Required for zero-data-retention orgs; skip saving responses. | | `tui.animations` | boolean | Enable terminal animations (welcome screen, shimmer, spinner). Defaults to true; set to `false` to disable visual motion. | | `instructions` | string | Currently ignored; use `experimental_instructions_file` or `AGENTS.md`. | | `features.` | boolean | See [feature flags](#feature-flags) for details | @@ -986,6 +1055,8 @@ Valid values: | `model_supports_reasoning_summaries` | boolean | Force‑enable reasoning summaries. | | `model_reasoning_summary_format` | `none` \| `experimental` | Force reasoning summary format. | | `chatgpt_base_url` | string | Base URL for ChatGPT auth flow. | +| `experimental_resume` | string (path) | Resume JSONL path (internal/experimental). | +| `responses_originator_header_internal_override` | string | Override `originator` header value (internal/experimental). | | `experimental_instructions_file` | string (path) | Replace built‑in instructions (experimental). | | `experimental_use_exec_command_tool` | boolean | Use experimental exec command tool. | | `projects..trust_level` | string | Mark project/worktree as trusted (only `"trusted"` is recognized). | diff --git a/docs/getting-started.md b/docs/getting-started.md index 83c66c572487..a92e4d220dcc 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -3,6 +3,7 @@ Looking for something specific? Jump ahead: - [Tips & shortcuts](#tips--shortcuts) – hotkeys, resume flow, prompts +- [Multi-agent quickstart](#multi-agent-quickstart) – @mentions, /agents - [Non-interactive runs](./exec.md) – automate with `codex exec` - Ready for deeper customization? Head to [`advanced.md`](./advanced.md) @@ -57,9 +58,16 @@ Below are a few bite-size examples you can copy-paste. Replace the text in quote | 5 | `codex "Explain what this regex does: ^(?=.*[A-Z]).{8,}$"` | Outputs a step-by-step human explanation. | | 6 | `codex "Carefully review this repo, and propose 3 high impact well-scoped PRs"` | Suggests impactful PRs in the current codebase. | | 7 | `codex "Look for vulnerabilities and create a security review report"` | Finds and explains security bugs. | +| 8 | `@code-reviewer: audit src/auth` | Delegates to the `code-reviewer` agent; progress is tracked in the plan. | Looking to reuse your own instructions? Create slash commands with [custom prompts](./prompts.md). +### Multi-agent quickstart + +- List available agents with `/agents` in the TUI (built-in agents are marked with •, custom ones with ◦). +- Delegate work using `@agent-name: task`—Codex streams progress and updates the plan item automatically. +- Configure custom agents in `~/.codex/agents.toml`; see [Multi-Agent System](./subagents.md) for the full schema and examples. + ### Memory with AGENTS.md You can give Codex extra instructions and guidance using `AGENTS.md` files. Codex looks for them in the following places, and merges them top-down: @@ -71,9 +79,18 @@ For more information on how to use AGENTS.md, see the [official AGENTS.md docume ### Tips & shortcuts -#### Use `@` for file search +#### Use `@` for file search and agent invocation + +Typing `@` has two powerful uses: + +1. **File search**: When followed by a path-like pattern, it triggers a fuzzy-filename search over the workspace root. Use up/down to select among the results and Tab or Enter to replace the `@` with the selected path. You can use Esc to cancel the search. + +2. **Agent invocation**: When followed by an agent name and colon, it invokes a specialized AI agent. For example: + - `@researcher: find documentation about async Rust` + - `@code-reviewer: review the changes in src/` + - `@test-writer: create unit tests for utils.js` -Typing `@` triggers a fuzzy-filename search over the workspace root. Use up/down to select among the results and Tab or Enter to replace the `@` with the selected path. You can use Esc to cancel the search. + Use `/agents` to see all available agents. Agent tasks are automatically tracked in the plan system. #### Esc–Esc to edit a previous message diff --git a/docs/subagents.md b/docs/subagents.md new file mode 100644 index 000000000000..0632d0a0016e --- /dev/null +++ b/docs/subagents.md @@ -0,0 +1,428 @@ +# Multi-Agent Orchestration System + +Codex CLI includes a powerful multi-agent orchestration system that allows you to invoke specialized AI agents for complex tasks. Each agent can be customized with specific system prompts while inheriting tools and permissions from the parent context. + +## Overview + +The agent system enables you to: + +- Define specialized agents with custom system prompts +- Invoke agents using natural `@agent` mention syntax +- Automatically track agent tasks in the plan system +- View real-time agent execution progress +- Maintain context isolation between agent executions +- Prevent recursive agent spawning for safety +- Get comprehensive summaries of agent work + +## Quick Start + +### Using @Agent Mentions (Recommended) + +The most natural way to invoke agents is using the `@agent` mention syntax: + +``` +@researcher: find information about React hooks + +@code-reviewer: review the changes in src/ + +@test-writer: create unit tests for the new functions +``` + +When you use `@agent` mentions: + +- The agent task is automatically added to the plan with "in_progress" status +- You see real-time progress updates during execution +- The plan updates to "completed" when the agent finishes + +### Using the Agent Tool + +You can also invoke agents programmatically with the `agent` tool: + +``` +Please use the researcher agent to find information about React hooks + +Use the code-reviewer agent to review the changes in src/ + +Have the test-writer agent create unit tests for the new functions +``` + +### Viewing Available Agents + +Use the `/agents` command to see all available agents: + +``` +/agents +``` + +This displays: + +- Built-in agents (marked with •) +- Custom agents from your configuration (marked with ◦) +- Brief descriptions of each agent's purpose + +## Built-in Agents + +Codex comes with one built-in agent: + +- **`general`** - A general-purpose AI assistant for completing tasks efficiently and accurately + +## Custom Agent Configuration + +Create custom agents by adding a configuration file at `~/.codex/agents.toml`: + +```toml +# ~/.codex/agents.toml + +[researcher] +prompt = """ +You are a research specialist. Your role is to: +- Gather comprehensive information from multiple sources +- Verify facts and cross-reference findings +- Provide detailed citations and sources +- Summarize findings in a structured format +Focus on accuracy and thoroughness over speed. +""" + +[code-reviewer] +prompt = """ +You are an expert code reviewer. Your responsibilities: +- Identify potential bugs and security issues +- Suggest performance improvements +- Ensure code follows best practices +- Check for proper error handling +- Verify test coverage +Provide constructive feedback with specific examples. +""" + +[test-writer] +prompt_file = "prompts/test-writer.md" # Load from external file + +[refactorer] +prompt = """ +You are a refactoring specialist. Focus on: +- Improving code readability and maintainability +- Reducing complexity and duplication +- Applying design patterns appropriately +- Ensuring backward compatibility +Always explain the reasoning behind refactoring decisions. +""" +tools = ["read", "write", "grep"] # Optional: override available tools + +[documenter] +prompt = """ +You are a documentation expert. Your tasks: +- Write clear, comprehensive documentation +- Create useful code examples +- Maintain consistent formatting +- Include API references +- Add helpful diagrams when appropriate +""" +permissions = "readonly" # Optional: override permissions +``` + +## Configuration Options + +Each agent supports the following configuration options: + +| Field | Type | Description | +| ------------- | ------ | --------------------------------------------------------------------- | +| `prompt` | String | The system prompt that defines the agent's behavior | +| `prompt_file` | String | Path to a file containing the prompt (alternative to inline `prompt`) | +| `tools` | Array | Optional: Override the available tools for this agent | +| `permissions` | String | Optional: Override the permission level for this agent | + +### Prompt Files + +For longer prompts, you can store them in separate files: + +```toml +[complex-agent] +prompt_file = "prompts/complex-agent.md" # Relative to ~/.codex/ +``` + +Or use absolute paths: + +```toml +[complex-agent] +prompt_file = "/home/user/my-prompts/complex-agent.md" +``` + +## Visual Feedback and Plan Integration + +### Real-Time Status Indicators + +When agents execute, you'll see visual feedback: + +- **⚡ Running** (yellow) - Agent is currently executing +- **⟳** Progress loops - Iterative steps the agent is performing +- **📝** File changes - Files being modified +- **💬** Outputs - Agent responses and analysis +- **🔧** Tool usage - Tools being invoked +- **✓ Done** (green) - Agent completed successfully + +### Automatic Plan Tracking + +Every agent invocation automatically creates a plan item: + +1. **Plan Creation**: When you use `@agent: task`, a plan item is created +2. **Status Tracking**: Plan shows "in_progress" during execution +3. **Completion Update**: Plan updates to "completed" when done +4. **Linked Execution**: Plan items are linked to agent events via `plan_item_id` + +Example workflow: + +``` +User: @researcher: find async Rust patterns +System: [Creates plan item: "@researcher: find async Rust patterns" - in_progress] +System: ⚡ Running researcher +System: ⟳ [researcher] Analyzing task requirements +System: ⟳ [researcher] Searching for documentation +System: ✓ Done researcher (3.2s) +System: [Updates plan item to completed] +``` + +## Agent Behavior + +### Context Inheritance + +By default, agents inherit: + +- All available tools from the parent context +- Permission levels from the parent context +- Working directory and environment variables + +This ensures agents have the same capabilities as the main conversation while maintaining isolation. + +### Recursion Prevention + +To prevent infinite loops and resource exhaustion, agents **cannot spawn other agents**. If an agent attempts to use the `agent` tool, it will receive an error message. + +### Execution Isolation + +Each agent execution: + +- Runs in an isolated conversation context +- Cannot access the parent conversation history +- Returns a comprehensive summary to the parent +- Tracks all file changes and execution loops + +## Agent Summaries + +When an agent completes its task, it provides a structured summary including: + +1. **Execution Loops** - Key steps and iterations performed +2. **File Changes** - All files created, modified, or deleted +3. **Key Outputs** - Important results (auto-compacted for long outputs) +4. **Final Summary** - Overall accomplishment and any recommendations + +Long outputs are automatically compacted to show the first 5 and last 3 lines with a truncation indicator. + +## Best Practices + +### 1. Specialized Prompts + +Create focused agents with clear responsibilities: + +```toml +[security-auditor] +prompt = """ +You are a security specialist focused exclusively on: +- Identifying vulnerabilities (SQL injection, XSS, etc.) +- Checking authentication and authorization +- Reviewing encryption and data protection +- Analyzing dependencies for known CVEs +Do not fix issues, only identify and report them. +""" +``` + +### 2. Tool Restrictions + +Limit tools for safety when appropriate: + +```toml +[analyzer] +tools = ["read", "grep", "glob"] # Read-only analysis +prompt = "You are a code analyzer. Examine code without making changes..." +``` + +### 3. Composable Agents + +Design agents that work well together: + +```toml +[planner] +prompt = "Create detailed implementation plans with clear steps..." + +[implementer] +prompt = "Execute implementation plans step by step..." + +[validator] +prompt = "Verify implementations meet requirements..." +``` + +### 4. Prompt Engineering + +Structure prompts for clarity: + +```toml +[api-designer] +prompt = """ +Role: API Design Specialist + +Responsibilities: +- Design RESTful APIs following OpenAPI specification +- Ensure consistent naming conventions +- Include proper error responses +- Document all endpoints thoroughly + +Constraints: +- Follow REST best practices +- Use semantic versioning +- Include rate limiting considerations + +Output Format: +- OpenAPI 3.0 specification +- Implementation examples +- Testing strategies +""" +``` + +## Examples + +### Research Agent + +```toml +[researcher] +prompt = """ +You are a meticulous researcher. For any topic: +1. Start with a broad overview +2. Identify authoritative sources +3. Deep dive into specific aspects +4. Cross-reference claims +5. Summarize with citations +Always distinguish between facts and opinions. +""" +``` + +Usage: "Use the researcher agent to investigate WebAssembly performance characteristics" + +### Migration Agent + +```toml +[migrator] +prompt = """ +You are a migration specialist. When migrating code: +1. Analyze the current implementation +2. Identify breaking changes +3. Create a migration plan +4. Implement incrementally +5. Verify backward compatibility +6. Update documentation +Prioritize safety and reversibility. +""" +``` + +Usage: "Have the migrator agent help upgrade our React 17 code to React 18" + +### Performance Agent + +```toml +[performance-optimizer] +prompt = """ +You are a performance optimization expert: +- Profile code to identify bottlenecks +- Suggest algorithmic improvements +- Optimize resource usage +- Reduce unnecessary computations +- Implement caching strategies +Always measure before and after changes. +""" +tools = ["read", "write", "exec"] +``` + +Usage: "Use the performance-optimizer agent to improve the data processing pipeline" + +## Troubleshooting + +### Agent Not Found + +If an agent isn't recognized: + +1. Check that `~/.codex/agents.toml` exists +2. Verify the agent name matches exactly (case-sensitive) +3. Ensure the TOML syntax is valid +4. Check file permissions + +### Prompt File Not Loading + +If using `prompt_file`: + +1. Verify the file path is correct +2. Check file permissions +3. Use absolute paths if relative paths aren't working +4. Ensure the file contains valid text + +### Agent Recursion Error + +If you see "Agents cannot spawn other agents": + +- This is by design to prevent infinite loops +- Restructure your task to avoid nested agent calls +- Use a single agent for the entire task + +## Advanced Configuration + +### Environment-Specific Agents + +Create different agent sets for different environments: + +```bash +# Development agents +cp ~/.codex/agents.toml ~/.codex/agents.dev.toml + +# Production agents +cp ~/.codex/agents.toml ~/.codex/agents.prod.toml + +# Symlink based on environment +ln -sf ~/.codex/agents.dev.toml ~/.codex/agents.toml +``` + +### Team Sharing + +Share agent configurations with your team: + +```bash +# Add to version control +git add .codex/agents.toml +git commit -m "Add team agent configurations" + +# Team members can then: +cp project/.codex/agents.toml ~/.codex/agents.toml +``` + +### Dynamic Loading + +Agents are loaded at runtime, so you can modify `~/.codex/agents.toml` without restarting Codex. Changes take effect on the next agent invocation. + +## Limitations + +- Agents cannot spawn other agents (recursion prevention) +- Agent context is isolated from parent conversation +- Maximum execution time follows parent timeout settings +- Tool availability depends on parent configuration + +## Future Enhancements + +Planned improvements for the agent system: + +- Agent templates and inheritance +- Conditional agent selection based on task analysis +- Agent performance metrics and analytics +- Collaborative multi-agent workflows +- Agent versioning and rollback capabilities + +## See Also + +- [Configuration Guide](./config.md) - General Codex configuration +- [Model Context Protocol](./advanced.md#model-context-protocol-mcp) - MCP server integration +- [Custom Prompts](./prompts.md) - System prompt customization diff --git a/example-agents.toml b/example-agents.toml new file mode 100644 index 000000000000..737c002a2ebf --- /dev/null +++ b/example-agents.toml @@ -0,0 +1,202 @@ +# Example Agent Configuration for Codex Multi-Agent System +# Place this file at ~/.codex/agents.toml to enable custom agents +# +# Usage: +# - @agent_name: task description (e.g., @code_reviewer: review main.rs) +# - /agents (lists all available agents) +# - Agent tasks are automatically tracked in the plan system +# +# Visual indicators during execution: +# - ⚡ Running (yellow) - agent is executing +# - ✓ Done (green) - agent completed successfully + +[code_reviewer] +prompt = """ +You are an expert code reviewer with deep knowledge of software engineering best practices. + +When reviewing code, focus on: +1. Security vulnerabilities (SQL injection, XSS, authentication flaws, race conditions) +2. Performance bottlenecks and algorithmic complexity +3. Code maintainability and readability +4. Design patterns and architectural decisions +5. Error handling and edge cases +6. Memory safety and resource management + +Provide actionable feedback with specific code examples when suggesting improvements. +Be constructive but thorough - don't overlook issues to be polite. +Point out both problems and good practices you observe. +""" + +[test_designer] +prompt = """ +You are a test engineering specialist who designs comprehensive test suites. + +Your approach: +- Write tests that cover happy paths, edge cases, and error conditions +- Use appropriate testing patterns (unit, integration, e2e) for the context +- Ensure tests are maintainable and well-documented +- Follow the testing conventions already established in the codebase +- Aim for high coverage but prioritize critical paths +- Consider property-based testing where appropriate + +Always check existing test patterns in the codebase first and follow them. +Generate test cases that are both thorough and practical. +""" + +[refactorer] +prompt = """ +You are a refactoring expert focused on improving code quality without changing behavior. + +Priorities: +1. Reduce complexity and improve readability +2. Extract reusable components and eliminate duplication +3. Apply appropriate design patterns +4. Improve naming and code organization +5. Optimize performance where possible +6. Ensure backward compatibility + +Always ensure the refactored code passes existing tests. +Explain the reasoning behind each refactoring decision. +Consider the impact on the broader codebase. +""" + +[performance_analyst] +prompt = """ +You are a performance optimization specialist. + +Analyze code for: +- Time complexity and algorithmic efficiency +- Memory usage and allocation patterns +- I/O bottlenecks and network latency +- Caching opportunities +- Parallelization and concurrency potential +- Database query optimization + +Provide specific metrics and benchmarks where possible. +Suggest optimizations with expected performance gains. +Consider trade-offs between performance and maintainability. +Use profiling data to guide recommendations when available. +""" + +[security_auditor] +prompt = """ +You are a security expert specializing in application security. + +Focus areas: +- Authentication and authorization vulnerabilities +- Input validation and sanitization +- Cryptographic implementations +- OWASP Top 10 vulnerabilities +- Supply chain security +- Secrets management +- Network security configurations + +Provide severity ratings for identified issues. +Suggest specific remediation steps with code examples. +Reference relevant security standards and best practices. +Consider both technical and business impact of vulnerabilities. +""" + +[documenter] +prompt = """ +You are an expert technical writer who creates clear, comprehensive documentation. + +Your approach: +1. Understand the audience and write appropriately +2. Structure documentation logically with clear sections +3. Provide practical, runnable examples +4. Cover all important aspects without being verbose +5. Ensure documentation stays synchronized with code +6. Include diagrams and visual aids where helpful + +Documentation types to create: +- API documentation with clear parameter descriptions +- Architecture documentation explaining design decisions +- User guides with step-by-step instructions +- Developer documentation for contributors + +Always examine existing documentation style and match it. +Use clear, concise language and active voice. +""" + +[architect] +prompt = """ +You are a software architect with expertise in system design. + +Responsibilities: +- Evaluate architectural patterns and their trade-offs +- Design scalable and maintainable systems +- Identify technical debt and propose solutions +- Plan migration strategies for legacy systems +- Ensure consistency across the codebase +- Consider non-functional requirements (performance, security, reliability) + +Provide architectural decision records (ADRs) when proposing changes. +Consider both immediate needs and long-term evolution. +Balance ideal solutions with practical constraints. +""" + +[debugger] +prompt = """ +You are a debugging specialist who excels at finding and fixing complex bugs. + +Your methodology: +1. Reproduce the issue consistently +2. Isolate the problem to specific components +3. Use systematic debugging techniques +4. Consider race conditions and edge cases +5. Verify fixes don't introduce new issues +6. Document the root cause and solution + +Tools and techniques: +- Add strategic logging and instrumentation +- Use debuggers and profilers effectively +- Analyze stack traces and error messages +- Consider environmental factors +- Check for common pitfalls in the technology stack + +Always provide a clear explanation of the bug and why the fix works. +""" + +[rust_specialist] +prompt = """ +You are a Rust expert with deep knowledge of the language and ecosystem. + +Expertise areas: +- Ownership, borrowing, and lifetime management +- Safe concurrency patterns and async programming +- Performance optimization and zero-cost abstractions +- FFI and unsafe code guidelines +- Error handling with Result and Option +- Trait design and generics +- Macro development + +Follow Rust idioms and best practices. +Leverage the type system for correctness. +Consider using popular crates from the ecosystem when appropriate. +Ensure code is clippy-clean and follows rustfmt standards. +""" + +[api_designer] +prompt = """ +You are an API design expert specializing in REST, GraphQL, and RPC interfaces. + +Focus on: +- RESTful principles and resource modeling +- API versioning strategies +- Authentication and authorization patterns +- Rate limiting and throttling +- Error handling and status codes +- Request/response schema design +- API documentation and OpenAPI specs + +Design APIs that are: +- Intuitive and consistent +- Backward compatible +- Well-documented +- Performant and scalable +- Secure by default + +Consider API consumers' needs and use cases. +Follow industry standards and best practices. +""" \ No newline at end of file From 1dfb5a0f7769da4ee0d22b2def1ae179628926aa Mon Sep 17 00:00:00 2001 From: Jose <112948755+Grinsven@users.noreply.github.com> Date: Mon, 24 Nov 2025 01:45:34 +0100 Subject: [PATCH 04/22] chore: add session notes and merge plan --- NEXT_SESSION_NOTES.md | 21 +++++++++++++++++++++ agent-merge-plan.md | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 NEXT_SESSION_NOTES.md create mode 100644 agent-merge-plan.md diff --git a/NEXT_SESSION_NOTES.md b/NEXT_SESSION_NOTES.md new file mode 100644 index 000000000000..aadfe26705e6 --- /dev/null +++ b/NEXT_SESSION_NOTES.md @@ -0,0 +1,21 @@ +# Next Session Notes + +## What was done +- Added agent tool support end-to-end: core handler/events, registry listing, TUI @mentions and `/agents`, docs + example config. +- Hardened agent prompt path validation and fatal init errors; refreshed plan file `agent-merge-plan.md` and new docs `docs/subagents.md`. +- Quick doc polish: added multi-agent quickstart to `docs/getting-started.md` and follow-ups section to `agent-merge-plan.md`. +- Formatting/linting: `just fmt`, `just fix -p codex-core`, `just fix -p codex-tui`. +- Tests: `cargo test -p codex-core --tests`, `cargo test -p codex-tui` (all passing). + +## What’s left / next session +- Packaging: stage/commit remaining changes (plan + notes, docs, code) and write PR description (scope, risks, tests run). +- Optional: run full workspace tests `cargo test --all-features` if time permits. + +## PR draft (copy/paste) +- Title: "Add multi-agent tool support and TUI mentions" +- Summary: + - Reintroduce agent protocol and tool handler; wire registry into core session flow. + - Add TUI `/agents` list and `@agent` mention handling with plan integration. + - Document multi-agent usage (`docs/subagents.md`, quickstart in getting-started, config snippet, example `agents.toml`). +- Risks: new tool path touching core session/tool dispatch; TUI input parsing for mentions. +- Tests: `cargo test -p codex-core --tests` and `cargo test -p codex-tui` (passing). Consider optional `cargo test --all-features` before upstream PR. diff --git a/agent-merge-plan.md b/agent-merge-plan.md new file mode 100644 index 000000000000..487128c54cad --- /dev/null +++ b/agent-merge-plan.md @@ -0,0 +1,39 @@ +# Multi-agent merge plan (local fork) + +Goal: keep current `main` core runtime, layer in multi-agent support from PR #3655 with minimal regression risk. + +Status legend: ☐ not started · ⭕ in progress · ✅ done + +## Tasks + +- ✅ Reset core to main for runtime files + - Restore `codex-rs/core/src/codex.rs` from `origin/main` + - Remove PR-only `codex-rs/core/src/codex/compact.rs` and `codex-rs/core/src/openai_tools.rs` +- ✅ Reintroduce agent protocol/events into core + - Wire agent events through event dispatch + - Add agent tool flag in existing tool builder (current main) + - Pass agent registry info to tool construction +- ✅ Implement minimal agent execution path + - Keep `core/src/agent.rs` registry loader + - Handle agent tool calls in `codex.rs` using current Session/TurnContext APIs + - Ensure safety/plan/turn_diff compatibility +- ✅ TUI integration + - Ensure `/agents` and @mention handling compile with new core events +- ✅ Docs/examples + - Verify `docs/subagents.md`, `example-agents.toml` references remain accurate +- ✅ Format & test + - `just fmt` + - `cargo test -p codex-core --tests --no-run` + - `cargo test -p codex-tui` (update snapshots if needed) + +## Notes + +- Keep protocol agent structs already merged. +- Avoid reviving deleted legacy modules; adapt to current architecture instead. + +## Follow-ups + +- ✅ Doc polish: align `docs/getting-started.md`, `docs/config.md`, and `docs/subagents.md` language; keep `example-agents.toml` consistent with tool names/fields. +- ✅ Final verification: quick pass over agent registry wiring and example config after doc tweaks. +- ☐ Packaging: stage/commit, write PR summary (scope, risks, test matrix). +- ☐ Optional: full sweep `cargo test --all-features` before opening upstream PR. From 98d5839ead2453f7627ffbea3d82bc4a3960d1c5 Mon Sep 17 00:00:00 2001 From: Jose <112948755+Grinsven@users.noreply.github.com> Date: Mon, 24 Nov 2025 02:02:48 +0100 Subject: [PATCH 05/22] tui: guard test-approval command for release builds --- codex-rs/tui/src/chatwidget.rs | 80 +++++++++++++++++-------------- codex-rs/tui/src/slash_command.rs | 4 +- 2 files changed, 44 insertions(+), 40 deletions(-) diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index 2453cf417b5e..89934f038fde 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -1484,44 +1484,50 @@ impl ChatWidget { SlashCommand::Agents => { self.add_agents_output(); } - #[cfg(debug_assertions)] SlashCommand::TestApproval => { - use codex_core::protocol::EventMsg; - use std::collections::HashMap; - - use codex_core::protocol::ApplyPatchApprovalRequestEvent; - use codex_core::protocol::FileChange; - - self.app_event_tx.send(AppEvent::CodexEvent(Event { - id: "1".to_string(), - // msg: EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent { - // call_id: "1".to_string(), - // command: vec!["git".into(), "apply".into()], - // cwd: self.config.cwd.clone(), - // reason: Some("test".to_string()), - // }), - msg: EventMsg::ApplyPatchApprovalRequest(ApplyPatchApprovalRequestEvent { - call_id: "1".to_string(), - turn_id: "turn-1".to_string(), - changes: HashMap::from([ - ( - PathBuf::from("/tmp/test.txt"), - FileChange::Add { - content: "test".to_string(), - }, - ), - ( - PathBuf::from("/tmp/test2.txt"), - FileChange::Update { - unified_diff: "+test\n-test2".to_string(), - move_path: None, - }, - ), - ]), - reason: None, - grant_root: Some(PathBuf::from("/tmp")), - }), - })); + if cfg!(debug_assertions) { + use codex_core::protocol::EventMsg; + use std::collections::HashMap; + + use codex_core::protocol::ApplyPatchApprovalRequestEvent; + use codex_core::protocol::FileChange; + + self.app_event_tx.send(AppEvent::CodexEvent(Event { + id: "1".to_string(), + // msg: EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent { + // call_id: "1".to_string(), + // command: vec!["git".into(), "apply".into()], + // cwd: self.config.cwd.clone(), + // reason: Some("test".to_string()), + // }), + msg: EventMsg::ApplyPatchApprovalRequest(ApplyPatchApprovalRequestEvent { + call_id: "1".to_string(), + turn_id: "turn-1".to_string(), + changes: HashMap::from([ + ( + PathBuf::from("/tmp/test.txt"), + FileChange::Add { + content: "test".to_string(), + }, + ), + ( + PathBuf::from("/tmp/test2.txt"), + FileChange::Update { + unified_diff: "+test\n-test2".to_string(), + move_path: None, + }, + ), + ]), + reason: None, + grant_root: Some(PathBuf::from("/tmp")), + }), + })); + } else { + self.add_info_message( + "'/test-approval' is only available in debug builds.".to_string(), + None, + ); + } } } } diff --git a/codex-rs/tui/src/slash_command.rs b/codex-rs/tui/src/slash_command.rs index 2ab562ff86d9..151af71cb49b 100644 --- a/codex-rs/tui/src/slash_command.rs +++ b/codex-rs/tui/src/slash_command.rs @@ -82,9 +82,7 @@ impl SlashCommand { | SlashCommand::Quit | SlashCommand::Exit => true, SlashCommand::Rollout => true, - - #[cfg(debug_assertions)] - SlashCommand::TestApproval => true, + SlashCommand::TestApproval => cfg!(debug_assertions), } } From f4418140588a8a3837d87fa4829ae1c0f0ace01a Mon Sep 17 00:00:00 2001 From: Grinsven <112948755+Grinsven@users.noreply.github.com> Date: Mon, 24 Nov 2025 11:02:18 +0100 Subject: [PATCH 06/22] fix: randomize agent tool call id and use dirs::home_dir --- codex-rs/core/src/agent.rs | 5 +---- codex-rs/core/src/tools/handlers/agent.rs | 4 +++- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/codex-rs/core/src/agent.rs b/codex-rs/core/src/agent.rs index 26ffe1ef4b5d..b2e0b2b78d6c 100644 --- a/codex-rs/core/src/agent.rs +++ b/codex-rs/core/src/agent.rs @@ -227,10 +227,7 @@ impl AgentRegistry { /// Get the agents directory path (~/.codex/agents) fn get_agents_directory() -> Option { - std::env::var("HOME") - .or_else(|_| std::env::var("USERPROFILE")) - .ok() - .map(|home| PathBuf::from(home).join(".codex")) + dirs::home_dir().map(|home| home.join(".codex")) } /// Get an agent configuration by name diff --git a/codex-rs/core/src/tools/handlers/agent.rs b/codex-rs/core/src/tools/handlers/agent.rs index d29809b1e66b..fb8e8de24722 100644 --- a/codex-rs/core/src/tools/handlers/agent.rs +++ b/codex-rs/core/src/tools/handlers/agent.rs @@ -2,6 +2,7 @@ use std::time::Instant; use async_trait::async_trait; use futures::StreamExt; +use rand::Rng; use serde::Deserialize; use tracing::warn; @@ -160,7 +161,8 @@ impl ToolHandler for AgentHandler { let mut message = String::new(); let mut item_id = call_id.clone(); if item_id.is_empty() { - item_id = "agent-call".to_string(); + let random_id: u64 = rand::rng().random(); + item_id = format!("agent-call-{}", random_id); } while let Some(event) = stream.next().await { From 75a64fd9a520f873a2319af565573bb8f89a2184 Mon Sep 17 00:00:00 2001 From: Grinsven <112948755+Grinsven@users.noreply.github.com> Date: Mon, 24 Nov 2025 11:24:21 +0100 Subject: [PATCH 07/22] fix: prevent path traversal via symlinks and support hyphens in agent mentions --- codex-rs/core/src/agent.rs | 7 +++++++ codex-rs/tui/src/agent_mention.rs | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/codex-rs/core/src/agent.rs b/codex-rs/core/src/agent.rs index b2e0b2b78d6c..a6cac7e2ae0d 100644 --- a/codex-rs/core/src/agent.rs +++ b/codex-rs/core/src/agent.rs @@ -151,6 +151,13 @@ impl AgentRegistry { // Try to load user agents from ~/.codex/agents.toml let agents_dir = Self::get_agents_directory(); if let Some(ref dir) = agents_dir { + if dir.symlink_metadata().map_or(false, |m| m.is_symlink()) { + tracing::error!( + "Security: agents directory at '{}' is a symlink, which is not allowed. Aborting loading of user-defined agents.", + dir.display() + ); + return Ok(Self { agents, agents_dir }); + } let config_path = dir.join("agents.toml"); if config_path.exists() { match std::fs::read_to_string(&config_path) { diff --git a/codex-rs/tui/src/agent_mention.rs b/codex-rs/tui/src/agent_mention.rs index 5af1f0d5ae1b..3a14bccab832 100644 --- a/codex-rs/tui/src/agent_mention.rs +++ b/codex-rs/tui/src/agent_mention.rs @@ -4,7 +4,7 @@ use regex_lite::Regex; // Compile regex once at startup #[allow(clippy::expect_used)] static AGENT_MENTION_RE: Lazy = Lazy::new(|| { - Regex::new(r"@(\w+):?\s+([^\n@]+)") + Regex::new(r"@([\w-]+):?\s+([^\n@]+)") .expect("Failed to compile agent mention regex - this is a bug") }); From fccfaa6fb2bc26a3f8e2c47fd0a156a6f8113e2d Mon Sep 17 00:00:00 2001 From: Jose <112948755+Grinsven@users.noreply.github.com> Date: Mon, 24 Nov 2025 12:03:53 +0100 Subject: [PATCH 08/22] agents: harden registry paths and mentions --- codex-rs/core/src/tools/handlers/agent.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codex-rs/core/src/tools/handlers/agent.rs b/codex-rs/core/src/tools/handlers/agent.rs index fb8e8de24722..d58493bdfb1f 100644 --- a/codex-rs/core/src/tools/handlers/agent.rs +++ b/codex-rs/core/src/tools/handlers/agent.rs @@ -162,7 +162,7 @@ impl ToolHandler for AgentHandler { let mut item_id = call_id.clone(); if item_id.is_empty() { let random_id: u64 = rand::rng().random(); - item_id = format!("agent-call-{}", random_id); + item_id = format!("agent-call-{random_id}"); } while let Some(event) = stream.next().await { From 46ebd87a3f4ff03d59580c999aa494e0740cfe2a Mon Sep 17 00:00:00 2001 From: Jose <112948755+Grinsven@users.noreply.github.com> Date: Mon, 24 Nov 2025 12:40:16 +0100 Subject: [PATCH 09/22] feat: integrate MCP tools into agent handler and update prompt configuration --- AGENTS.md | 10 ++++++++++ codex-rs/core/src/tools/handlers/agent.rs | 20 +++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/AGENTS.md b/AGENTS.md index aaebd0dfd316..3028adeebd74 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -102,3 +102,13 @@ If you don’t have the tool: let request = mock.single_request(); // assert using request.function_call_output(call_id) or request.json_body() or other helpers. ``` + +# Agent usage rules (ASCII-only) + +- If the user asks for "two code reviewers" (or similar), run both @code_reviewer and @code_reviewer_b on the requested scope (latest diff/commits if unspecified), then merge findings ordered by severity with file:line. +- If the user asks for "two validators" (or similar), run both @validator and @validator_b on the requested scope, then merge findings ordered by severity with file:line. +- If the user asks for "a logic review", run @logic_reviewer. +- If the user asks for "a debugger", run @debugger. +- If the user asks for "research", run @researcher. +- Default scope when not provided: latest two commits in the current repo. +- Always cite file:line in findings and keep summaries concise. \ No newline at end of file diff --git a/codex-rs/core/src/tools/handlers/agent.rs b/codex-rs/core/src/tools/handlers/agent.rs index d58493bdfb1f..a1a54dee5973 100644 --- a/codex-rs/core/src/tools/handlers/agent.rs +++ b/codex-rs/core/src/tools/handlers/agent.rs @@ -138,13 +138,31 @@ impl ToolHandler for AgentHandler { task_text = format!("{task_text}\n\nContext:\n{ctx}"); } + let mcp_tools = session + .services + .mcp_connection_manager + .read() + .await + .list_all_tools() + .await + .into_iter() + .map(|(name, tool)| (name, tool.tool)) + .collect(); + + let mut sub_tools_config = turn.tools_config.clone(); + sub_tools_config.include_agent_tool = false; + + let (agent_tools, _) = + crate::tools::spec::build_specs(&sub_tools_config, Some(mcp_tools)).build(); + let agent_tools = agent_tools.iter().map(|t| t.spec.clone()).collect(); + let prompt = Prompt { input: vec![ResponseItem::Message { id: None, role: "user".to_string(), content: vec![ContentItem::InputText { text: task_text }], }], - tools: Vec::new(), + tools: agent_tools, parallel_tool_calls: false, base_instructions_override: Some(instructions), output_schema: None, From c5e268f102b8f044a68d705116c7e7eae553e9b4 Mon Sep 17 00:00:00 2001 From: Grinsven <112948755+Grinsven@users.noreply.github.com> Date: Mon, 24 Nov 2025 12:43:03 +0100 Subject: [PATCH 10/22] fix: sub-agents inherit tools (preventing recursion) From 2e4e163e83fffb14da2645da0b3e07fa926a6ae2 Mon Sep 17 00:00:00 2001 From: Grinsven <112948755+Grinsven@users.noreply.github.com> Date: Mon, 24 Nov 2025 12:44:47 +0100 Subject: [PATCH 11/22] tests: add test for sub-agent tool inheritance --- codex-rs/core/tests/suite/agent_tool.rs | 104 ++++++++++++++++++++++++ codex-rs/core/tests/suite/mod.rs | 2 + 2 files changed, 106 insertions(+) create mode 100644 codex-rs/core/tests/suite/agent_tool.rs diff --git a/codex-rs/core/tests/suite/agent_tool.rs b/codex-rs/core/tests/suite/agent_tool.rs new file mode 100644 index 000000000000..72dcfd8e1712 --- /dev/null +++ b/codex-rs/core/tests/suite/agent_tool.rs @@ -0,0 +1,104 @@ +#![allow(clippy::unwrap_used)] + +use codex_core::protocol::Op; +use core_test_support::load_sse_fixture_with_id; +use core_test_support::responses; +use core_test_support::responses::start_mock_server; +use core_test_support::test_codex::test_codex; + +fn sse_completed(id: &str) -> String { + load_sse_fixture_with_id("tests/fixtures/completed_template.json", id) +} + +#[allow(clippy::expect_used)] +fn tool_identifiers(body: &serde_json::Value) -> Vec { + body["tools"] + .as_array() + .unwrap() + .iter() + .map(|tool| { + tool.get("name") + .and_then(|v| v.as_str()) + .or_else(|| tool.get("type").and_then(|v| v.as_str())) + .map(std::string::ToString::to_string) + .expect("tool should have either name or type") + }) + .collect() +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn sub_agent_inherits_tools() { + let server = start_mock_server().await; + + // 1. Mock the response for the *parent* invocation (the "general" agent tool call) + let agent_tool_call_args = serde_json::json!({ + "name": "general", + "task": "do something", + }) + .to_string(); + + let sse_parent = responses::sse(vec![ + responses::ev_response_created("resp-parent"), + responses::ev_function_call("call-1", "agent", &agent_tool_call_args), + responses::ev_completed("resp-parent"), + ]); + + let mock_parent = responses::mount_sse_once(&server, sse_parent).await; + + // 2. Mock the response for the *sub-agent* execution + // This will be called by the AgentHandler + let sse_sub_agent = sse_completed("resp-sub"); + let mock_sub_agent = responses::mount_sse_once(&server, sse_sub_agent).await; + + // 3. Set up the test Codex session + let mut builder = test_codex().with_model("gpt-5-codex"); + let test = builder + .build(&server) + .await + .expect("create test Codex conversation"); + + // 4. Submit the user input to trigger the parent flow + test.codex + .submit(Op::UserInput { + items: vec![codex_protocol::user_input::UserInput::Text { + text: "use the general agent".to_string(), + }], + }) + .await + .expect("submit parent turn"); + + // Wait for the agent tool to finish (this implies the sub-agent ran) + core_test_support::wait_for_event(&test.codex, |ev| { + matches!(ev, codex_core::protocol::EventMsg::AgentEnd(_)) + }) + .await; + + let _ = mock_parent; // keep alive until here + + // 5. Verify the sub-agent received the correct tools + // The *second* request to the mock server corresponds to the sub-agent invocation. + // We need to wait for the sub-agent to actually make its request. + // The `submit` call returns when the *parent* turn is processing, but the AgentHandler + // spawns its own internal turn. Since `submit` waits for the turn to complete, and + // the agent tool call is part of that turn, it should wait for the agent to finish. + + let sub_agent_request = mock_sub_agent.single_request(); + let sub_agent_body = sub_agent_request.body_json(); + let tools = tool_identifiers(&sub_agent_body); + + // 6. Assertions: + // - Should contain standard tools (e.g., shell_command, apply_patch) + // - Should NOT contain "agent" (recursion prevention) + assert!( + tools.contains(&"shell_command".to_string()), + "sub-agent should inherit shell_command" + ); + assert!( + tools.contains(&"apply_patch".to_string()), + "sub-agent should inherit apply_patch" + ); + assert!( + !tools.contains(&"agent".to_string()), + "sub-agent should NOT have the agent tool (recursion prevention)" + ); +} diff --git a/codex-rs/core/tests/suite/mod.rs b/codex-rs/core/tests/suite/mod.rs index b877663614b8..47da123d3514 100644 --- a/codex-rs/core/tests/suite/mod.rs +++ b/codex-rs/core/tests/suite/mod.rs @@ -16,6 +16,8 @@ pub static CODEX_ALIASES_TEMP_DIR: TempDir = unsafe { #[cfg(not(target_os = "windows"))] mod abort_tasks; #[cfg(not(target_os = "windows"))] +mod agent_tool; +#[cfg(not(target_os = "windows"))] mod apply_patch_cli; #[cfg(not(target_os = "windows"))] mod approvals; From 90724d60c1a81436c5c258153f4134bde068e3e6 Mon Sep 17 00:00:00 2001 From: Grinsven <112948755+Grinsven@users.noreply.github.com> Date: Mon, 24 Nov 2025 12:45:31 +0100 Subject: [PATCH 12/22] tests: add test for sub-agent tool inheritance From d748631ce7b9e445c8ab7789161056ff0909c2ac Mon Sep 17 00:00:00 2001 From: Grinsven <112948755+Grinsven@users.noreply.github.com> Date: Mon, 24 Nov 2025 12:58:06 +0100 Subject: [PATCH 13/22] fix: agent tool inheritance, canonicalize safety, and rand compilation error --- codex-rs/core/src/agent.rs | 8 ++++++-- codex-rs/core/src/tools/handlers/agent.rs | 12 +----------- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/codex-rs/core/src/agent.rs b/codex-rs/core/src/agent.rs index a6cac7e2ae0d..0a38e255b8dc 100644 --- a/codex-rs/core/src/agent.rs +++ b/codex-rs/core/src/agent.rs @@ -121,10 +121,14 @@ impl AgentRegistry { .ok_or_else(|| anyhow::anyhow!("Cannot determine home directory"))? .join(".codex"); - let canonical_home = home_codex.canonicalize().unwrap_or(home_codex); + let is_in_home_codex = if let Ok(canonical_home) = home_codex.canonicalize() { + canonical.starts_with(&canonical_home) + } else { + false + }; // Security check: path must be within ~/.codex or the base directory - if !canonical.starts_with(&canonical_home) && !canonical.starts_with(&canonical_base) { + if !is_in_home_codex && !canonical.starts_with(&canonical_base) { return Err(anyhow::anyhow!( "Security error: Prompt file must be within ~/.codex or the configured agents directory" )); diff --git a/codex-rs/core/src/tools/handlers/agent.rs b/codex-rs/core/src/tools/handlers/agent.rs index a1a54dee5973..d34f89118122 100644 --- a/codex-rs/core/src/tools/handlers/agent.rs +++ b/codex-rs/core/src/tools/handlers/agent.rs @@ -4,7 +4,6 @@ use async_trait::async_trait; use futures::StreamExt; use rand::Rng; use serde::Deserialize; -use tracing::warn; use crate::Prompt; use crate::ResponseEvent; @@ -26,7 +25,6 @@ use crate::tools::registry::ToolHandler; use crate::tools::registry::ToolKind; use codex_protocol::models::ContentItem; use codex_protocol::models::ResponseItem; -use codex_protocol::protocol::SessionSource; #[derive(Debug, Deserialize)] struct AgentCall { @@ -103,14 +101,6 @@ impl ToolHandler for AgentHandler { plan_item_id, } = parsed; - // Prevent recursion by refusing to run if the tool list already excludes agent tool. - if matches!(turn.client.get_session_source(), SessionSource::SubAgent(_)) { - warn!("agent tool recursion detected"); - return Err(FunctionCallError::RespondToModel( - "Agents cannot spawn other agents".to_string(), - )); - } - let registry = agent_registry(session.as_ref()); let agent_prompt = registry.get_system_prompt(&name); @@ -179,7 +169,7 @@ impl ToolHandler for AgentHandler { let mut message = String::new(); let mut item_id = call_id.clone(); if item_id.is_empty() { - let random_id: u64 = rand::rng().random(); + let random_id: u64 = rand::thread_rng().r#gen(); item_id = format!("agent-call-{random_id}"); } From 41d99337e50f0949c84e39c643fd27cf78395c87 Mon Sep 17 00:00:00 2001 From: Jose <112948755+Grinsven@users.noreply.github.com> Date: Mon, 24 Nov 2025 13:43:04 +0100 Subject: [PATCH 14/22] fix: ensure agent configuration validation checks for prompt or prompt_file --- codex-rs/core/src/agent.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/codex-rs/core/src/agent.rs b/codex-rs/core/src/agent.rs index 0a38e255b8dc..d049451ad730 100644 --- a/codex-rs/core/src/agent.rs +++ b/codex-rs/core/src/agent.rs @@ -57,14 +57,14 @@ impl AgentConfig { } if let Some(prompt_file) = &self.prompt_file { - let full_path = if let Some(dir) = agents_dir { - dir.join(prompt_file) - } else { - PathBuf::from(prompt_file) - }; + let base = agents_dir.ok_or_else(|| { + anyhow::anyhow!("Agents directory required to load prompt file '{}'", prompt_file) + })?; + + let safe_path = AgentRegistry::validate_prompt_path(base, prompt_file)?; - let prompt_content = std::fs::read_to_string(&full_path).map_err(|e| { - anyhow::anyhow!("Cannot read prompt file '{}': {}", full_path.display(), e) + let prompt_content = std::fs::read_to_string(&safe_path).map_err(|e| { + anyhow::anyhow!("Cannot read prompt file '{}': {}", safe_path.display(), e) })?; // Cache the loaded prompt @@ -85,7 +85,7 @@ pub struct AgentRegistry { impl AgentRegistry { /// Validate that a prompt file path doesn't escape allowed directories - fn validate_prompt_path(base_dir: &Path, prompt_file: &str) -> anyhow::Result { + pub fn validate_prompt_path(base_dir: &Path, prompt_file: &str) -> anyhow::Result { let path = if prompt_file.starts_with('/') { PathBuf::from(prompt_file) } else { From 440d1677605a10ae7ed412afc48da1eae95dd32c Mon Sep 17 00:00:00 2001 From: Grinsven <112948755+Grinsven@users.noreply.github.com> Date: Mon, 24 Nov 2025 14:12:07 +0100 Subject: [PATCH 15/22] remove reproduction test From b690e216f510d9da6420f608857da1d1e8469e23 Mon Sep 17 00:00:00 2001 From: Grinsven <112948755+Grinsven@users.noreply.github.com> Date: Mon, 24 Nov 2025 14:14:56 +0100 Subject: [PATCH 16/22] fix: check for agents.toml symlink and enable parallel agent tool --- codex-rs/core/src/agent.rs | 7 + codex-rs/core/src/tools/spec.rs | 2334 +++---------------------------- 2 files changed, 183 insertions(+), 2158 deletions(-) diff --git a/codex-rs/core/src/agent.rs b/codex-rs/core/src/agent.rs index d049451ad730..f673005952a2 100644 --- a/codex-rs/core/src/agent.rs +++ b/codex-rs/core/src/agent.rs @@ -163,6 +163,13 @@ impl AgentRegistry { return Ok(Self { agents, agents_dir }); } let config_path = dir.join("agents.toml"); + if config_path.symlink_metadata().map_or(false, |m| m.is_symlink()) { + tracing::error!( + "Security: agents config file at '{}' is a symlink, which is not allowed. Aborting loading of user-defined agents.", + config_path.display() + ); + return Ok(Self { agents, agents_dir }); + } if config_path.exists() { match std::fs::read_to_string(&config_path) { Ok(content) => { diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index 243fa5eab8e7..1ca4340bd8ba 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -1,2158 +1,176 @@ -use crate::client_common::tools::ResponsesApiTool; -use crate::client_common::tools::ToolSpec; -use crate::features::Feature; -use crate::features::Features; -use crate::model_family::ModelFamily; -use crate::tools::handlers::PLAN_TOOL; -use crate::tools::handlers::apply_patch::ApplyPatchToolType; -use crate::tools::handlers::apply_patch::create_apply_patch_freeform_tool; -use crate::tools::handlers::apply_patch::create_apply_patch_json_tool; -use crate::tools::registry::ToolRegistryBuilder; -use serde::Deserialize; -use serde::Serialize; -use serde_json::Value as JsonValue; -use serde_json::json; -use std::collections::BTreeMap; -use std::collections::HashMap; - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum ConfigShellToolType { - Default, - Local, - UnifiedExec, - /// Do not include a shell tool by default. Useful when using Codex - /// with tools provided exclusively provided by MCP servers. Often used - /// with `--config base_instructions=CUSTOM_INSTRUCTIONS` - /// to customize agent behavior. - Disabled, - /// Takes a command as a single string to be run in the user's default shell. - ShellCommand, -} - -#[derive(Debug, Clone)] -pub(crate) struct ToolsConfig { - pub shell_type: ConfigShellToolType, - pub apply_patch_tool_type: Option, - pub web_search_request: bool, - pub include_view_image_tool: bool, - pub include_agent_tool: bool, - pub experimental_supported_tools: Vec, -} - -pub(crate) struct ToolsConfigParams<'a> { - pub(crate) model_family: &'a ModelFamily, - pub(crate) features: &'a Features, -} - -impl ToolsConfig { - pub fn new(params: &ToolsConfigParams) -> Self { - let ToolsConfigParams { - model_family, - features, - } = params; - let include_apply_patch_tool = features.enabled(Feature::ApplyPatchFreeform); - let include_web_search_request = features.enabled(Feature::WebSearchRequest); - let include_view_image_tool = features.enabled(Feature::ViewImageTool); - - let shell_type = if !features.enabled(Feature::ShellTool) { - ConfigShellToolType::Disabled - } else if features.enabled(Feature::UnifiedExec) { - ConfigShellToolType::UnifiedExec - } else { - model_family.shell_type.clone() - }; - - let apply_patch_tool_type = match model_family.apply_patch_tool_type { - Some(ApplyPatchToolType::Freeform) => Some(ApplyPatchToolType::Freeform), - Some(ApplyPatchToolType::Function) => Some(ApplyPatchToolType::Function), - None => { - if include_apply_patch_tool { - Some(ApplyPatchToolType::Freeform) - } else { - None - } - } - }; - - Self { - shell_type, - apply_patch_tool_type, - web_search_request: include_web_search_request, - include_view_image_tool, - include_agent_tool: true, - experimental_supported_tools: model_family.experimental_supported_tools.clone(), - } - } -} - -/// Generic JSON‑Schema subset needed for our tool definitions -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -#[serde(tag = "type", rename_all = "lowercase")] -pub(crate) enum JsonSchema { - Boolean { - #[serde(skip_serializing_if = "Option::is_none")] - description: Option, - }, - String { - #[serde(skip_serializing_if = "Option::is_none")] - description: Option, - }, - /// MCP schema allows "number" | "integer" for Number - #[serde(alias = "integer")] - Number { - #[serde(skip_serializing_if = "Option::is_none")] - description: Option, - }, - Array { - items: Box, - - #[serde(skip_serializing_if = "Option::is_none")] - description: Option, - }, - Object { - properties: BTreeMap, - #[serde(skip_serializing_if = "Option::is_none")] - required: Option>, - #[serde( - rename = "additionalProperties", - skip_serializing_if = "Option::is_none" - )] - additional_properties: Option, - }, -} - -/// Whether additional properties are allowed, and if so, any required schema -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -#[serde(untagged)] -pub(crate) enum AdditionalProperties { - Boolean(bool), - Schema(Box), -} - -impl From for AdditionalProperties { - fn from(b: bool) -> Self { - Self::Boolean(b) - } -} - -impl From for AdditionalProperties { - fn from(s: JsonSchema) -> Self { - Self::Schema(Box::new(s)) - } -} - -fn create_exec_command_tool() -> ToolSpec { - let mut properties = BTreeMap::new(); - properties.insert( - "cmd".to_string(), - JsonSchema::String { - description: Some("Shell command to execute.".to_string()), - }, - ); - properties.insert( - "workdir".to_string(), - JsonSchema::String { - description: Some( - "Optional working directory to run the command in; defaults to the turn cwd." - .to_string(), - ), - }, - ); - properties.insert( - "shell".to_string(), - JsonSchema::String { - description: Some("Shell binary to launch. Defaults to /bin/bash.".to_string()), - }, - ); - properties.insert( - "login".to_string(), - JsonSchema::Boolean { - description: Some( - "Whether to run the shell with -l/-i semantics. Defaults to true.".to_string(), - ), - }, - ); - properties.insert( - "yield_time_ms".to_string(), - JsonSchema::Number { - description: Some( - "How long to wait (in milliseconds) for output before yielding.".to_string(), - ), - }, - ); - properties.insert( - "max_output_tokens".to_string(), - JsonSchema::Number { - description: Some( - "Maximum number of tokens to return. Excess output will be truncated.".to_string(), - ), - }, - ); - properties.insert( - "with_escalated_permissions".to_string(), - JsonSchema::Boolean { - description: Some( - "Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions" - .to_string(), - ), - }, - ); - properties.insert( - "justification".to_string(), - JsonSchema::String { - description: Some( - "Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command." - .to_string(), - ), - }, - ); - - ToolSpec::Function(ResponsesApiTool { - name: "exec_command".to_string(), - description: - "Runs a command in a PTY, returning output or a session ID for ongoing interaction." - .to_string(), - strict: false, - parameters: JsonSchema::Object { - properties, - required: Some(vec!["cmd".to_string()]), - additional_properties: Some(false.into()), - }, - }) -} - -fn create_agent_tool() -> ToolSpec { - let mut properties = BTreeMap::new(); - properties.insert( - "name".to_string(), - JsonSchema::String { - description: Some("Registered agent name to run.".to_string()), - }, - ); - properties.insert( - "task".to_string(), - JsonSchema::String { - description: Some("Task or instruction for the agent to complete.".to_string()), - }, - ); - properties.insert( - "context".to_string(), - JsonSchema::String { - description: Some("Optional additional context the agent should consider.".to_string()), - }, - ); - properties.insert( - "plan_item_id".to_string(), - JsonSchema::String { - description: Some("Optional plan item id associated with this run.".to_string()), - }, - ); - - ToolSpec::Function(ResponsesApiTool { - name: "agent".to_string(), - description: "Invoke a named agent with a task to execute.".to_string(), - strict: false, - parameters: JsonSchema::Object { - properties, - required: Some(vec!["name".to_string(), "task".to_string()]), - additional_properties: Some(false.into()), - }, - }) -} - -fn create_write_stdin_tool() -> ToolSpec { - let mut properties = BTreeMap::new(); - properties.insert( - "session_id".to_string(), - JsonSchema::Number { - description: Some("Identifier of the running unified exec session.".to_string()), - }, - ); - properties.insert( - "chars".to_string(), - JsonSchema::String { - description: Some("Bytes to write to stdin (may be empty to poll).".to_string()), - }, - ); - properties.insert( - "yield_time_ms".to_string(), - JsonSchema::Number { - description: Some( - "How long to wait (in milliseconds) for output before yielding.".to_string(), - ), - }, - ); - properties.insert( - "max_output_tokens".to_string(), - JsonSchema::Number { - description: Some( - "Maximum number of tokens to return. Excess output will be truncated.".to_string(), - ), - }, - ); - - ToolSpec::Function(ResponsesApiTool { - name: "write_stdin".to_string(), - description: - "Writes characters to an existing unified exec session and returns recent output." - .to_string(), - strict: false, - parameters: JsonSchema::Object { - properties, - required: Some(vec!["session_id".to_string()]), - additional_properties: Some(false.into()), - }, - }) -} - -fn create_shell_tool() -> ToolSpec { - let mut properties = BTreeMap::new(); - properties.insert( - "command".to_string(), - JsonSchema::Array { - items: Box::new(JsonSchema::String { description: None }), - description: Some("The command to execute".to_string()), - }, - ); - properties.insert( - "workdir".to_string(), - JsonSchema::String { - description: Some("The working directory to execute the command in".to_string()), - }, - ); - properties.insert( - "timeout_ms".to_string(), - JsonSchema::Number { - description: Some("The timeout for the command in milliseconds".to_string()), - }, - ); - - properties.insert( - "with_escalated_permissions".to_string(), - JsonSchema::Boolean { - description: Some("Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions".to_string()), - }, - ); - properties.insert( - "justification".to_string(), - JsonSchema::String { - description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()), - }, - ); - - let description = if cfg!(windows) { - r#"Runs a Powershell command (Windows) and returns its output. Arguments to `shell` will be passed to CreateProcessW(). Most commands should be prefixed with ["powershell.exe", "-Command"]. - -Examples of valid command strings: - -- ls -a (show hidden): ["powershell.exe", "-Command", "Get-ChildItem -Force"] -- recursive find by name: ["powershell.exe", "-Command", "Get-ChildItem -Recurse -Filter *.py"] -- recursive grep: ["powershell.exe", "-Command", "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive"] -- ps aux | grep python: ["powershell.exe", "-Command", "Get-Process | Where-Object { $_.ProcessName -like '*python*' }"] -- setting an env var: ["powershell.exe", "-Command", "$env:FOO='bar'; echo $env:FOO"] -- running an inline Python script: ["powershell.exe", "-Command", "@'\\nprint('Hello, world!')\\n'@ | python -"]"# - } else { - r#"Runs a shell command and returns its output. -- The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"]. -- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary."# - }.to_string(); - - ToolSpec::Function(ResponsesApiTool { - name: "shell".to_string(), - description, - strict: false, - parameters: JsonSchema::Object { - properties, - required: Some(vec!["command".to_string()]), - additional_properties: Some(false.into()), - }, - }) -} - -fn create_shell_command_tool() -> ToolSpec { - let mut properties = BTreeMap::new(); - properties.insert( - "command".to_string(), - JsonSchema::String { - description: Some( - "The shell script to execute in the user's default shell".to_string(), - ), - }, - ); - properties.insert( - "workdir".to_string(), - JsonSchema::String { - description: Some("The working directory to execute the command in".to_string()), - }, - ); - properties.insert( - "timeout_ms".to_string(), - JsonSchema::Number { - description: Some("The timeout for the command in milliseconds".to_string()), - }, - ); - properties.insert( - "with_escalated_permissions".to_string(), - JsonSchema::Boolean { - description: Some("Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions".to_string()), - }, - ); - properties.insert( - "justification".to_string(), - JsonSchema::String { - description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()), - }, - ); - - let description = if cfg!(windows) { - r#"Runs a Powershell command (Windows) and returns its output. - -Examples of valid command strings: - -- ls -a (show hidden): "Get-ChildItem -Force" -- recursive find by name: "Get-ChildItem -Recurse -Filter *.py" -- recursive grep: "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive" -- ps aux | grep python: "Get-Process | Where-Object { $_.ProcessName -like '*python*' }" -- setting an env var: "$env:FOO='bar'; echo $env:FOO" -- running an inline Python script: "@'\\nprint('Hello, world!')\\n'@ | python -"# - } else { - r#"Runs a shell command and returns its output. -- Always set the `workdir` param when using the shell_command function. Do not use `cd` unless absolutely necessary."# - }.to_string(); - - ToolSpec::Function(ResponsesApiTool { - name: "shell_command".to_string(), - description, - strict: false, - parameters: JsonSchema::Object { - properties, - required: Some(vec!["command".to_string()]), - additional_properties: Some(false.into()), - }, - }) -} - -fn create_view_image_tool() -> ToolSpec { - // Support only local filesystem path. - let mut properties = BTreeMap::new(); - properties.insert( - "path".to_string(), - JsonSchema::String { - description: Some("Local filesystem path to an image file".to_string()), - }, - ); - - ToolSpec::Function(ResponsesApiTool { - name: "view_image".to_string(), - description: - "Attach a local image (by filesystem path) to the conversation context for this turn." - .to_string(), - strict: false, - parameters: JsonSchema::Object { - properties, - required: Some(vec!["path".to_string()]), - additional_properties: Some(false.into()), - }, - }) -} - -fn create_test_sync_tool() -> ToolSpec { - let mut properties = BTreeMap::new(); - properties.insert( - "sleep_before_ms".to_string(), - JsonSchema::Number { - description: Some("Optional delay in milliseconds before any other action".to_string()), - }, - ); - properties.insert( - "sleep_after_ms".to_string(), - JsonSchema::Number { - description: Some( - "Optional delay in milliseconds after completing the barrier".to_string(), - ), - }, - ); - - let mut barrier_properties = BTreeMap::new(); - barrier_properties.insert( - "id".to_string(), - JsonSchema::String { - description: Some( - "Identifier shared by concurrent calls that should rendezvous".to_string(), - ), - }, - ); - barrier_properties.insert( - "participants".to_string(), - JsonSchema::Number { - description: Some( - "Number of tool calls that must arrive before the barrier opens".to_string(), - ), - }, - ); - barrier_properties.insert( - "timeout_ms".to_string(), - JsonSchema::Number { - description: Some("Maximum time in milliseconds to wait at the barrier".to_string()), - }, - ); - - properties.insert( - "barrier".to_string(), - JsonSchema::Object { - properties: barrier_properties, - required: Some(vec!["id".to_string(), "participants".to_string()]), - additional_properties: Some(false.into()), - }, - ); - - ToolSpec::Function(ResponsesApiTool { - name: "test_sync_tool".to_string(), - description: "Internal synchronization helper used by Codex integration tests.".to_string(), - strict: false, - parameters: JsonSchema::Object { - properties, - required: None, - additional_properties: Some(false.into()), - }, - }) -} - -fn create_grep_files_tool() -> ToolSpec { - let mut properties = BTreeMap::new(); - properties.insert( - "pattern".to_string(), - JsonSchema::String { - description: Some("Regular expression pattern to search for.".to_string()), - }, - ); - properties.insert( - "include".to_string(), - JsonSchema::String { - description: Some( - "Optional glob that limits which files are searched (e.g. \"*.rs\" or \ - \"*.{ts,tsx}\")." - .to_string(), - ), - }, - ); - properties.insert( - "path".to_string(), - JsonSchema::String { - description: Some( - "Directory or file path to search. Defaults to the session's working directory." - .to_string(), - ), - }, - ); - properties.insert( - "limit".to_string(), - JsonSchema::Number { - description: Some( - "Maximum number of file paths to return (defaults to 100).".to_string(), - ), - }, - ); - - ToolSpec::Function(ResponsesApiTool { - name: "grep_files".to_string(), - description: "Finds files whose contents match the pattern and lists them by modification \ - time." - .to_string(), - strict: false, - parameters: JsonSchema::Object { - properties, - required: Some(vec!["pattern".to_string()]), - additional_properties: Some(false.into()), - }, - }) -} - -fn create_read_file_tool() -> ToolSpec { - let mut properties = BTreeMap::new(); - properties.insert( - "file_path".to_string(), - JsonSchema::String { - description: Some("Absolute path to the file".to_string()), - }, - ); - properties.insert( - "offset".to_string(), - JsonSchema::Number { - description: Some( - "The line number to start reading from. Must be 1 or greater.".to_string(), - ), - }, - ); - properties.insert( - "limit".to_string(), - JsonSchema::Number { - description: Some("The maximum number of lines to return.".to_string()), - }, - ); - properties.insert( - "mode".to_string(), - JsonSchema::String { - description: Some( - "Optional mode selector: \"slice\" for simple ranges (default) or \"indentation\" \ - to expand around an anchor line." - .to_string(), - ), - }, - ); - - let mut indentation_properties = BTreeMap::new(); - indentation_properties.insert( - "anchor_line".to_string(), - JsonSchema::Number { - description: Some( - "Anchor line to center the indentation lookup on (defaults to offset).".to_string(), - ), - }, - ); - indentation_properties.insert( - "max_levels".to_string(), - JsonSchema::Number { - description: Some( - "How many parent indentation levels (smaller indents) to include.".to_string(), - ), - }, - ); - indentation_properties.insert( - "include_siblings".to_string(), - JsonSchema::Boolean { - description: Some( - "When true, include additional blocks that share the anchor indentation." - .to_string(), - ), - }, - ); - indentation_properties.insert( - "include_header".to_string(), - JsonSchema::Boolean { - description: Some( - "Include doc comments or attributes directly above the selected block.".to_string(), - ), - }, - ); - indentation_properties.insert( - "max_lines".to_string(), - JsonSchema::Number { - description: Some( - "Hard cap on the number of lines returned when using indentation mode.".to_string(), - ), - }, - ); - properties.insert( - "indentation".to_string(), - JsonSchema::Object { - properties: indentation_properties, - required: None, - additional_properties: Some(false.into()), - }, - ); - - ToolSpec::Function(ResponsesApiTool { - name: "read_file".to_string(), - description: - "Reads a local file with 1-indexed line numbers, supporting slice and indentation-aware block modes." - .to_string(), - strict: false, - parameters: JsonSchema::Object { - properties, - required: Some(vec!["file_path".to_string()]), - additional_properties: Some(false.into()), - }, - }) -} - -fn create_list_dir_tool() -> ToolSpec { - let mut properties = BTreeMap::new(); - properties.insert( - "dir_path".to_string(), - JsonSchema::String { - description: Some("Absolute path to the directory to list.".to_string()), - }, - ); - properties.insert( - "offset".to_string(), - JsonSchema::Number { - description: Some( - "The entry number to start listing from. Must be 1 or greater.".to_string(), - ), - }, - ); - properties.insert( - "limit".to_string(), - JsonSchema::Number { - description: Some("The maximum number of entries to return.".to_string()), - }, - ); - properties.insert( - "depth".to_string(), - JsonSchema::Number { - description: Some( - "The maximum directory depth to traverse. Must be 1 or greater.".to_string(), - ), - }, - ); - - ToolSpec::Function(ResponsesApiTool { - name: "list_dir".to_string(), - description: - "Lists entries in a local directory with 1-indexed entry numbers and simple type labels." - .to_string(), - strict: false, - parameters: JsonSchema::Object { - properties, - required: Some(vec!["dir_path".to_string()]), - additional_properties: Some(false.into()), - }, - }) -} - -fn create_list_mcp_resources_tool() -> ToolSpec { - let mut properties = BTreeMap::new(); - properties.insert( - "server".to_string(), - JsonSchema::String { - description: Some( - "Optional MCP server name. When omitted, lists resources from every configured server." - .to_string(), - ), - }, - ); - properties.insert( - "cursor".to_string(), - JsonSchema::String { - description: Some( - "Opaque cursor returned by a previous list_mcp_resources call for the same server." - .to_string(), - ), - }, - ); - - ToolSpec::Function(ResponsesApiTool { - name: "list_mcp_resources".to_string(), - description: "Lists resources provided by MCP servers. Resources allow servers to share data that provides context to language models, such as files, database schemas, or application-specific information. Prefer resources over web search when possible.".to_string(), - strict: false, - parameters: JsonSchema::Object { - properties, - required: None, - additional_properties: Some(false.into()), - }, - }) -} - -fn create_list_mcp_resource_templates_tool() -> ToolSpec { - let mut properties = BTreeMap::new(); - properties.insert( - "server".to_string(), - JsonSchema::String { - description: Some( - "Optional MCP server name. When omitted, lists resource templates from all configured servers." - .to_string(), - ), - }, - ); - properties.insert( - "cursor".to_string(), - JsonSchema::String { - description: Some( - "Opaque cursor returned by a previous list_mcp_resource_templates call for the same server." - .to_string(), - ), - }, - ); - - ToolSpec::Function(ResponsesApiTool { - name: "list_mcp_resource_templates".to_string(), - description: "Lists resource templates provided by MCP servers. Parameterized resource templates allow servers to share data that takes parameters and provides context to language models, such as files, database schemas, or application-specific information. Prefer resource templates over web search when possible.".to_string(), - strict: false, - parameters: JsonSchema::Object { - properties, - required: None, - additional_properties: Some(false.into()), - }, - }) -} - -fn create_read_mcp_resource_tool() -> ToolSpec { - let mut properties = BTreeMap::new(); - properties.insert( - "server".to_string(), - JsonSchema::String { - description: Some( - "MCP server name exactly as configured. Must match the 'server' field returned by list_mcp_resources." - .to_string(), - ), - }, - ); - properties.insert( - "uri".to_string(), - JsonSchema::String { - description: Some( - "Resource URI to read. Must be one of the URIs returned by list_mcp_resources." - .to_string(), - ), - }, - ); - - ToolSpec::Function(ResponsesApiTool { - name: "read_mcp_resource".to_string(), - description: - "Read a specific resource from an MCP server given the server name and resource URI." - .to_string(), - strict: false, - parameters: JsonSchema::Object { - properties, - required: Some(vec!["server".to_string(), "uri".to_string()]), - additional_properties: Some(false.into()), - }, - }) -} -/// TODO(dylan): deprecate once we get rid of json tool -#[derive(Serialize, Deserialize)] -pub(crate) struct ApplyPatchToolArgs { - pub(crate) input: String, -} - -/// Returns JSON values that are compatible with Function Calling in the -/// Responses API: -/// https://platform.openai.com/docs/guides/function-calling?api-mode=responses -pub fn create_tools_json_for_responses_api( - tools: &[ToolSpec], -) -> crate::error::Result> { - let mut tools_json = Vec::new(); - - for tool in tools { - let json = serde_json::to_value(tool)?; - tools_json.push(json); - } - - Ok(tools_json) -} -/// Returns JSON values that are compatible with Function Calling in the -/// Chat Completions API: -/// https://platform.openai.com/docs/guides/function-calling?api-mode=chat -pub(crate) fn create_tools_json_for_chat_completions_api( - tools: &[ToolSpec], -) -> crate::error::Result> { - // We start with the JSON for the Responses API and than rewrite it to match - // the chat completions tool call format. - let responses_api_tools_json = create_tools_json_for_responses_api(tools)?; - let tools_json = responses_api_tools_json - .into_iter() - .filter_map(|mut tool| { - if tool.get("type") != Some(&serde_json::Value::String("function".to_string())) { - return None; - } - - if let Some(map) = tool.as_object_mut() { - // Remove "type" field as it is not needed in chat completions. - map.remove("type"); - Some(json!({ - "type": "function", - "function": map, - })) - } else { - None - } - }) - .collect::>(); - Ok(tools_json) -} - -pub(crate) fn mcp_tool_to_openai_tool( - fully_qualified_name: String, - tool: mcp_types::Tool, -) -> Result { - let mcp_types::Tool { - description, - mut input_schema, - .. - } = tool; - - // OpenAI models mandate the "properties" field in the schema. The Agents - // SDK fixed this by inserting an empty object for "properties" if it is not - // already present https://github.com/openai/openai-agents-python/issues/449 - // so here we do the same. - if input_schema.properties.is_none() { - input_schema.properties = Some(serde_json::Value::Object(serde_json::Map::new())); - } - - // Serialize to a raw JSON value so we can sanitize schemas coming from MCP - // servers. Some servers omit the top-level or nested `type` in JSON - // Schemas (e.g. using enum/anyOf), or use unsupported variants like - // `integer`. Our internal JsonSchema is a small subset and requires - // `type`, so we coerce/sanitize here for compatibility. - let mut serialized_input_schema = serde_json::to_value(input_schema)?; - sanitize_json_schema(&mut serialized_input_schema); - let input_schema = serde_json::from_value::(serialized_input_schema)?; - - Ok(ResponsesApiTool { - name: fully_qualified_name, - description: description.unwrap_or_default(), - strict: false, - parameters: input_schema, - }) -} - -/// Sanitize a JSON Schema (as serde_json::Value) so it can fit our limited -/// JsonSchema enum. This function: -/// - Ensures every schema object has a "type". If missing, infers it from -/// common keywords (properties => object, items => array, enum/const/format => string) -/// and otherwise defaults to "string". -/// - Fills required child fields (e.g. array items, object properties) with -/// permissive defaults when absent. -fn sanitize_json_schema(value: &mut JsonValue) { - match value { - JsonValue::Bool(_) => { - // JSON Schema boolean form: true/false. Coerce to an accept-all string. - *value = json!({ "type": "string" }); - } - JsonValue::Array(arr) => { - for v in arr.iter_mut() { - sanitize_json_schema(v); - } - } - JsonValue::Object(map) => { - // First, recursively sanitize known nested schema holders - if let Some(props) = map.get_mut("properties") - && let Some(props_map) = props.as_object_mut() - { - for (_k, v) in props_map.iter_mut() { - sanitize_json_schema(v); - } - } - if let Some(items) = map.get_mut("items") { - sanitize_json_schema(items); - } - // Some schemas use oneOf/anyOf/allOf - sanitize their entries - for combiner in ["oneOf", "anyOf", "allOf", "prefixItems"] { - if let Some(v) = map.get_mut(combiner) { - sanitize_json_schema(v); - } - } - - // Normalize/ensure type - let mut ty = map.get("type").and_then(|v| v.as_str()).map(str::to_string); - - // If type is an array (union), pick first supported; else leave to inference - if ty.is_none() - && let Some(JsonValue::Array(types)) = map.get("type") - { - for t in types { - if let Some(tt) = t.as_str() - && matches!( - tt, - "object" | "array" | "string" | "number" | "integer" | "boolean" - ) - { - ty = Some(tt.to_string()); - break; - } - } - } - - // Infer type if still missing - if ty.is_none() { - if map.contains_key("properties") - || map.contains_key("required") - || map.contains_key("additionalProperties") - { - ty = Some("object".to_string()); - } else if map.contains_key("items") || map.contains_key("prefixItems") { - ty = Some("array".to_string()); - } else if map.contains_key("enum") - || map.contains_key("const") - || map.contains_key("format") - { - ty = Some("string".to_string()); - } else if map.contains_key("minimum") - || map.contains_key("maximum") - || map.contains_key("exclusiveMinimum") - || map.contains_key("exclusiveMaximum") - || map.contains_key("multipleOf") - { - ty = Some("number".to_string()); - } - } - // If we still couldn't infer, default to string - let ty = ty.unwrap_or_else(|| "string".to_string()); - map.insert("type".to_string(), JsonValue::String(ty.to_string())); - - // Ensure object schemas have properties map - if ty == "object" { - if !map.contains_key("properties") { - map.insert( - "properties".to_string(), - JsonValue::Object(serde_json::Map::new()), - ); - } - // If additionalProperties is an object schema, sanitize it too. - // Leave booleans as-is, since JSON Schema allows boolean here. - if let Some(ap) = map.get_mut("additionalProperties") { - let is_bool = matches!(ap, JsonValue::Bool(_)); - if !is_bool { - sanitize_json_schema(ap); - } - } - } - - // Ensure array schemas have items - if ty == "array" && !map.contains_key("items") { - map.insert("items".to_string(), json!({ "type": "string" })); - } - } - _ => {} - } -} - -/// Builds the tool registry builder while collecting tool specs for later serialization. -pub(crate) fn build_specs( - config: &ToolsConfig, - mcp_tools: Option>, -) -> ToolRegistryBuilder { - use crate::tools::handlers::AgentHandler; - use crate::tools::handlers::ApplyPatchHandler; - use crate::tools::handlers::GrepFilesHandler; - use crate::tools::handlers::ListDirHandler; - use crate::tools::handlers::McpHandler; - use crate::tools::handlers::McpResourceHandler; - use crate::tools::handlers::PlanHandler; - use crate::tools::handlers::ReadFileHandler; - use crate::tools::handlers::ShellCommandHandler; - use crate::tools::handlers::ShellHandler; - use crate::tools::handlers::TestSyncHandler; - use crate::tools::handlers::UnifiedExecHandler; - use crate::tools::handlers::ViewImageHandler; - use std::sync::Arc; - - let mut builder = ToolRegistryBuilder::new(); - - let shell_handler = Arc::new(ShellHandler); - let unified_exec_handler = Arc::new(UnifiedExecHandler); - let plan_handler = Arc::new(PlanHandler); - let apply_patch_handler = Arc::new(ApplyPatchHandler); - let agent_handler = Arc::new(AgentHandler); - let view_image_handler = Arc::new(ViewImageHandler); - let mcp_handler = Arc::new(McpHandler); - let mcp_resource_handler = Arc::new(McpResourceHandler); - let shell_command_handler = Arc::new(ShellCommandHandler); - - match &config.shell_type { - ConfigShellToolType::Default => { - builder.push_spec(create_shell_tool()); - } - ConfigShellToolType::Local => { - builder.push_spec(ToolSpec::LocalShell {}); - } - ConfigShellToolType::UnifiedExec => { - builder.push_spec(create_exec_command_tool()); - builder.push_spec(create_write_stdin_tool()); - builder.register_handler("exec_command", unified_exec_handler.clone()); - builder.register_handler("write_stdin", unified_exec_handler); - } - ConfigShellToolType::Disabled => { - // Do nothing. - } - ConfigShellToolType::ShellCommand => { - builder.push_spec(create_shell_command_tool()); - } - } - - if config.shell_type != ConfigShellToolType::Disabled { - // Always register shell aliases so older prompts remain compatible. - builder.register_handler("shell", shell_handler.clone()); - builder.register_handler("container.exec", shell_handler.clone()); - builder.register_handler("local_shell", shell_handler); - builder.register_handler("shell_command", shell_command_handler); - } - - builder.push_spec_with_parallel_support(create_list_mcp_resources_tool(), true); - builder.push_spec_with_parallel_support(create_list_mcp_resource_templates_tool(), true); - builder.push_spec_with_parallel_support(create_read_mcp_resource_tool(), true); - builder.register_handler("list_mcp_resources", mcp_resource_handler.clone()); - builder.register_handler("list_mcp_resource_templates", mcp_resource_handler.clone()); - builder.register_handler("read_mcp_resource", mcp_resource_handler); - - builder.push_spec(PLAN_TOOL.clone()); - builder.register_handler("update_plan", plan_handler); - - if let Some(apply_patch_tool_type) = &config.apply_patch_tool_type { - match apply_patch_tool_type { - ApplyPatchToolType::Freeform => { - builder.push_spec(create_apply_patch_freeform_tool()); - } - ApplyPatchToolType::Function => { - builder.push_spec(create_apply_patch_json_tool()); - } - } - builder.register_handler("apply_patch", apply_patch_handler); - } - - if config - .experimental_supported_tools - .contains(&"grep_files".to_string()) - { - let grep_files_handler = Arc::new(GrepFilesHandler); - builder.push_spec_with_parallel_support(create_grep_files_tool(), true); - builder.register_handler("grep_files", grep_files_handler); - } - - if config - .experimental_supported_tools - .contains(&"read_file".to_string()) - { - let read_file_handler = Arc::new(ReadFileHandler); - builder.push_spec_with_parallel_support(create_read_file_tool(), true); - builder.register_handler("read_file", read_file_handler); - } - - if config - .experimental_supported_tools - .iter() - .any(|tool| tool == "list_dir") - { - let list_dir_handler = Arc::new(ListDirHandler); - builder.push_spec_with_parallel_support(create_list_dir_tool(), true); - builder.register_handler("list_dir", list_dir_handler); - } - - if config - .experimental_supported_tools - .contains(&"test_sync_tool".to_string()) - { - let test_sync_handler = Arc::new(TestSyncHandler); - builder.push_spec_with_parallel_support(create_test_sync_tool(), true); - builder.register_handler("test_sync_tool", test_sync_handler); - } - - if config.web_search_request { - builder.push_spec(ToolSpec::WebSearch {}); - } - - if config.include_view_image_tool { - builder.push_spec_with_parallel_support(create_view_image_tool(), true); - builder.register_handler("view_image", view_image_handler); - } - - if config.include_agent_tool { - builder.push_spec(create_agent_tool()); - builder.register_handler("agent", agent_handler); - } - - if let Some(mcp_tools) = mcp_tools { - let mut entries: Vec<(String, mcp_types::Tool)> = mcp_tools.into_iter().collect(); - entries.sort_by(|a, b| a.0.cmp(&b.0)); - - for (name, tool) in entries.into_iter() { - match mcp_tool_to_openai_tool(name.clone(), tool.clone()) { - Ok(converted_tool) => { - builder.push_spec(ToolSpec::Function(converted_tool)); - builder.register_handler(name, mcp_handler.clone()); - } - Err(e) => { - tracing::error!("Failed to convert {name:?} MCP tool to OpenAI tool: {e:?}"); - } - } - } - } - - builder -} - -#[cfg(test)] -mod tests { - use crate::client_common::tools::FreeformTool; - use crate::model_family::find_family_for_model; - use crate::tools::registry::ConfiguredToolSpec; - use mcp_types::ToolInputSchema; - use pretty_assertions::assert_eq; - - use super::*; - - fn tool_name(tool: &ToolSpec) -> &str { - match tool { - ToolSpec::Function(ResponsesApiTool { name, .. }) => name, - ToolSpec::LocalShell {} => "local_shell", - ToolSpec::WebSearch {} => "web_search", - ToolSpec::Freeform(FreeformTool { name, .. }) => name, - } - } - - // Avoid order-based assertions; compare via set containment instead. - fn assert_contains_tool_names(tools: &[ConfiguredToolSpec], expected_subset: &[&str]) { - use std::collections::HashSet; - let mut names = HashSet::new(); - let mut duplicates = Vec::new(); - for name in tools.iter().map(|t| tool_name(&t.spec)) { - if !names.insert(name) { - duplicates.push(name); - } - } - assert!( - duplicates.is_empty(), - "duplicate tool entries detected: {duplicates:?}" - ); - for expected in expected_subset { - assert!( - names.contains(expected), - "expected tool {expected} to be present; had: {names:?}" - ); - } - } - - fn shell_tool_name(config: &ToolsConfig) -> Option<&'static str> { - match config.shell_type { - ConfigShellToolType::Default => Some("shell"), - ConfigShellToolType::Local => Some("local_shell"), - ConfigShellToolType::UnifiedExec => None, - ConfigShellToolType::Disabled => None, - ConfigShellToolType::ShellCommand => Some("shell_command"), - } - } - - fn find_tool<'a>( - tools: &'a [ConfiguredToolSpec], - expected_name: &str, - ) -> &'a ConfiguredToolSpec { - tools - .iter() - .find(|tool| tool_name(&tool.spec) == expected_name) - .unwrap_or_else(|| panic!("expected tool {expected_name}")) - } - - fn strip_descriptions_schema(schema: &mut JsonSchema) { - match schema { - JsonSchema::Boolean { description } - | JsonSchema::String { description } - | JsonSchema::Number { description } => { - *description = None; - } - JsonSchema::Array { items, description } => { - strip_descriptions_schema(items); - *description = None; - } - JsonSchema::Object { - properties, - required: _, - additional_properties, - } => { - for v in properties.values_mut() { - strip_descriptions_schema(v); - } - if let Some(AdditionalProperties::Schema(s)) = additional_properties { - strip_descriptions_schema(s); - } - } - } - } - - fn strip_descriptions_tool(spec: &mut ToolSpec) { - match spec { - ToolSpec::Function(ResponsesApiTool { parameters, .. }) => { - strip_descriptions_schema(parameters); - } - ToolSpec::Freeform(_) | ToolSpec::LocalShell {} | ToolSpec::WebSearch {} => {} - } - } - - #[test] - fn test_full_toolset_specs_for_gpt5_codex_unified_exec_web_search() { - let model_family = find_family_for_model("gpt-5-codex") - .expect("gpt-5-codex should be a valid model family"); - let mut features = Features::with_defaults(); - features.enable(Feature::UnifiedExec); - features.enable(Feature::WebSearchRequest); - features.enable(Feature::ViewImageTool); - let config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, - features: &features, - }); - let (tools, _) = build_specs(&config, None).build(); - - // Build actual map name -> spec - use std::collections::BTreeMap; - use std::collections::HashSet; - let mut actual: BTreeMap = BTreeMap::new(); - let mut duplicate_names = Vec::new(); - for t in &tools { - let name = tool_name(&t.spec).to_string(); - if actual.insert(name.clone(), t.spec.clone()).is_some() { - duplicate_names.push(name); - } - } - assert!( - duplicate_names.is_empty(), - "duplicate tool entries detected: {duplicate_names:?}" - ); - - // Build expected from the same helpers used by the builder. - let mut expected: BTreeMap = BTreeMap::new(); - for spec in [ - create_exec_command_tool(), - create_write_stdin_tool(), - create_list_mcp_resources_tool(), - create_list_mcp_resource_templates_tool(), - create_read_mcp_resource_tool(), - PLAN_TOOL.clone(), - create_apply_patch_freeform_tool(), - ToolSpec::WebSearch {}, - create_view_image_tool(), - create_agent_tool(), - ] { - expected.insert(tool_name(&spec).to_string(), spec); - } - - // Exact name set match — this is the only test allowed to fail when tools change. - let actual_names: HashSet<_> = actual.keys().cloned().collect(); - let expected_names: HashSet<_> = expected.keys().cloned().collect(); - assert_eq!(actual_names, expected_names, "tool name set mismatch"); - - // Compare specs ignoring human-readable descriptions. - for name in expected.keys() { - let mut a = actual.get(name).expect("present").clone(); - let mut e = expected.get(name).expect("present").clone(); - strip_descriptions_tool(&mut a); - strip_descriptions_tool(&mut e); - assert_eq!(a, e, "spec mismatch for {name}"); - } - } - - fn assert_model_tools(model_family: &str, features: &Features, expected_tools: &[&str]) { - let model_family = find_family_for_model(model_family) - .unwrap_or_else(|| panic!("{model_family} should be a valid model family")); - let config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, - features, - }); - let (tools, _) = build_specs(&config, Some(HashMap::new())).build(); - let tool_names = tools.iter().map(|t| t.spec.name()).collect::>(); - assert_eq!(&tool_names, &expected_tools,); - } - - #[test] - fn test_build_specs_gpt5_codex_default() { - assert_model_tools( - "gpt-5-codex", - &Features::with_defaults(), - &[ - "shell_command", - "list_mcp_resources", - "list_mcp_resource_templates", - "read_mcp_resource", - "update_plan", - "apply_patch", - "view_image", - "agent", - ], - ); - } - - #[test] - fn test_build_specs_gpt51_codex_default() { - assert_model_tools( - "gpt-5.1-codex", - &Features::with_defaults(), - &[ - "shell_command", - "list_mcp_resources", - "list_mcp_resource_templates", - "read_mcp_resource", - "update_plan", - "apply_patch", - "view_image", - "agent", - ], - ); - } - - #[test] - fn test_build_specs_gpt5_codex_unified_exec_web_search() { - assert_model_tools( - "gpt-5-codex", - Features::with_defaults() - .enable(Feature::UnifiedExec) - .enable(Feature::WebSearchRequest), - &[ - "exec_command", - "write_stdin", - "list_mcp_resources", - "list_mcp_resource_templates", - "read_mcp_resource", - "update_plan", - "apply_patch", - "web_search", - "view_image", - "agent", - ], - ); - } - - #[test] - fn test_build_specs_gpt51_codex_unified_exec_web_search() { - assert_model_tools( - "gpt-5.1-codex", - Features::with_defaults() - .enable(Feature::UnifiedExec) - .enable(Feature::WebSearchRequest), - &[ - "exec_command", - "write_stdin", - "list_mcp_resources", - "list_mcp_resource_templates", - "read_mcp_resource", - "update_plan", - "apply_patch", - "web_search", - "view_image", - "agent", - ], - ); - } - - #[test] - fn test_codex_mini_defaults() { - assert_model_tools( - "codex-mini-latest", - &Features::with_defaults(), - &[ - "local_shell", - "list_mcp_resources", - "list_mcp_resource_templates", - "read_mcp_resource", - "update_plan", - "view_image", - "agent", - ], - ); - } - - #[test] - fn test_codex_5_1_mini_defaults() { - assert_model_tools( - "gpt-5.1-codex-mini", - &Features::with_defaults(), - &[ - "shell_command", - "list_mcp_resources", - "list_mcp_resource_templates", - "read_mcp_resource", - "update_plan", - "apply_patch", - "view_image", - "agent", - ], - ); - } - - #[test] - fn test_gpt_5_defaults() { - assert_model_tools( - "gpt-5", - &Features::with_defaults(), - &[ - "shell", - "list_mcp_resources", - "list_mcp_resource_templates", - "read_mcp_resource", - "update_plan", - "view_image", - "agent", - ], - ); - } - - #[test] - fn test_gpt_5_1_defaults() { - assert_model_tools( - "gpt-5.1", - &Features::with_defaults(), - &[ - "shell_command", - "list_mcp_resources", - "list_mcp_resource_templates", - "read_mcp_resource", - "update_plan", - "apply_patch", - "view_image", - "agent", - ], - ); - } - - #[test] - fn test_exp_5_1_defaults() { - assert_model_tools( - "exp-5.1", - &Features::with_defaults(), - &[ - "exec_command", - "write_stdin", - "list_mcp_resources", - "list_mcp_resource_templates", - "read_mcp_resource", - "update_plan", - "apply_patch", - "view_image", - "agent", - ], - ); - } - - #[test] - fn test_codex_mini_unified_exec_web_search() { - assert_model_tools( - "codex-mini-latest", - Features::with_defaults() - .enable(Feature::UnifiedExec) - .enable(Feature::WebSearchRequest), - &[ - "exec_command", - "write_stdin", - "list_mcp_resources", - "list_mcp_resource_templates", - "read_mcp_resource", - "update_plan", - "web_search", - "view_image", - "agent", - ], - ); - } - - #[test] - fn test_build_specs_default_shell_present() { - let model_family = find_family_for_model("o3").expect("o3 should be a valid model family"); - let mut features = Features::with_defaults(); - features.enable(Feature::WebSearchRequest); - features.enable(Feature::UnifiedExec); - let config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, - features: &features, - }); - let (tools, _) = build_specs(&config, Some(HashMap::new())).build(); - - // Only check the shell variant and a couple of core tools. - let mut subset = vec!["exec_command", "write_stdin", "update_plan"]; - if let Some(shell_tool) = shell_tool_name(&config) { - subset.push(shell_tool); - } - assert_contains_tool_names(&tools, &subset); - } - - #[test] - #[ignore] - fn test_parallel_support_flags() { - let model_family = find_family_for_model("gpt-5-codex") - .expect("codex-mini-latest should be a valid model family"); - let mut features = Features::with_defaults(); - features.disable(Feature::ViewImageTool); - features.enable(Feature::UnifiedExec); - let config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, - features: &features, - }); - let (tools, _) = build_specs(&config, None).build(); - - assert!(!find_tool(&tools, "exec_command").supports_parallel_tool_calls); - assert!(!find_tool(&tools, "write_stdin").supports_parallel_tool_calls); - assert!(find_tool(&tools, "grep_files").supports_parallel_tool_calls); - assert!(find_tool(&tools, "list_dir").supports_parallel_tool_calls); - assert!(find_tool(&tools, "read_file").supports_parallel_tool_calls); - } - - #[test] - fn test_test_model_family_includes_sync_tool() { - let model_family = find_family_for_model("test-gpt-5-codex") - .expect("test-gpt-5-codex should be a valid model family"); - let mut features = Features::with_defaults(); - features.disable(Feature::ViewImageTool); - let config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, - features: &features, - }); - let (tools, _) = build_specs(&config, None).build(); - - assert!( - tools - .iter() - .any(|tool| tool_name(&tool.spec) == "test_sync_tool") - ); - assert!( - tools - .iter() - .any(|tool| tool_name(&tool.spec) == "read_file") - ); - assert!( - tools - .iter() - .any(|tool| tool_name(&tool.spec) == "grep_files") - ); - assert!(tools.iter().any(|tool| tool_name(&tool.spec) == "list_dir")); - } - - #[test] - fn test_build_specs_mcp_tools_converted() { - let model_family = find_family_for_model("o3").expect("o3 should be a valid model family"); - let mut features = Features::with_defaults(); - features.enable(Feature::UnifiedExec); - features.enable(Feature::WebSearchRequest); - let config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, - features: &features, - }); - let (tools, _) = build_specs( - &config, - Some(HashMap::from([( - "test_server/do_something_cool".to_string(), - mcp_types::Tool { - name: "do_something_cool".to_string(), - input_schema: ToolInputSchema { - properties: Some(serde_json::json!({ - "string_argument": { - "type": "string", - }, - "number_argument": { - "type": "number", - }, - "object_argument": { - "type": "object", - "properties": { - "string_property": { "type": "string" }, - "number_property": { "type": "number" }, - }, - "required": [ - "string_property", - "number_property", - ], - "additionalProperties": Some(false), - }, - })), - required: None, - r#type: "object".to_string(), - }, - output_schema: None, - title: None, - annotations: None, - description: Some("Do something cool".to_string()), - }, - )])), - ) - .build(); - - let tool = find_tool(&tools, "test_server/do_something_cool"); - assert_eq!( - &tool.spec, - &ToolSpec::Function(ResponsesApiTool { - name: "test_server/do_something_cool".to_string(), - parameters: JsonSchema::Object { - properties: BTreeMap::from([ - ( - "string_argument".to_string(), - JsonSchema::String { description: None } - ), - ( - "number_argument".to_string(), - JsonSchema::Number { description: None } - ), - ( - "object_argument".to_string(), - JsonSchema::Object { - properties: BTreeMap::from([ - ( - "string_property".to_string(), - JsonSchema::String { description: None } - ), - ( - "number_property".to_string(), - JsonSchema::Number { description: None } - ), - ]), - required: Some(vec![ - "string_property".to_string(), - "number_property".to_string(), - ]), - additional_properties: Some(false.into()), - }, - ), - ]), - required: None, - additional_properties: None, - }, - description: "Do something cool".to_string(), - strict: false, - }) - ); - } - - #[test] - fn test_build_specs_mcp_tools_sorted_by_name() { - let model_family = find_family_for_model("o3").expect("o3 should be a valid model family"); - let mut features = Features::with_defaults(); - features.enable(Feature::UnifiedExec); - let config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, - features: &features, - }); - - // Intentionally construct a map with keys that would sort alphabetically. - let tools_map: HashMap = HashMap::from([ - ( - "test_server/do".to_string(), - mcp_types::Tool { - name: "a".to_string(), - input_schema: ToolInputSchema { - properties: Some(serde_json::json!({})), - required: None, - r#type: "object".to_string(), - }, - output_schema: None, - title: None, - annotations: None, - description: Some("a".to_string()), - }, - ), - ( - "test_server/something".to_string(), - mcp_types::Tool { - name: "b".to_string(), - input_schema: ToolInputSchema { - properties: Some(serde_json::json!({})), - required: None, - r#type: "object".to_string(), - }, - output_schema: None, - title: None, - annotations: None, - description: Some("b".to_string()), - }, - ), - ( - "test_server/cool".to_string(), - mcp_types::Tool { - name: "c".to_string(), - input_schema: ToolInputSchema { - properties: Some(serde_json::json!({})), - required: None, - r#type: "object".to_string(), - }, - output_schema: None, - title: None, - annotations: None, - description: Some("c".to_string()), - }, - ), - ]); - - let (tools, _) = build_specs(&config, Some(tools_map)).build(); - - // Only assert that the MCP tools themselves are sorted by fully-qualified name. - let mcp_names: Vec<_> = tools - .iter() - .map(|t| tool_name(&t.spec).to_string()) - .filter(|n| n.starts_with("test_server/")) - .collect(); - let expected = vec![ - "test_server/cool".to_string(), - "test_server/do".to_string(), - "test_server/something".to_string(), - ]; - assert_eq!(mcp_names, expected); - } - - #[test] - fn test_mcp_tool_property_missing_type_defaults_to_string() { - let model_family = find_family_for_model("gpt-5-codex") - .expect("gpt-5-codex should be a valid model family"); - let mut features = Features::with_defaults(); - features.enable(Feature::UnifiedExec); - features.enable(Feature::WebSearchRequest); - let config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, - features: &features, - }); - - let (tools, _) = build_specs( - &config, - Some(HashMap::from([( - "dash/search".to_string(), - mcp_types::Tool { - name: "search".to_string(), - input_schema: ToolInputSchema { - properties: Some(serde_json::json!({ - "query": { - "description": "search query" - } - })), - required: None, - r#type: "object".to_string(), - }, - output_schema: None, - title: None, - annotations: None, - description: Some("Search docs".to_string()), - }, - )])), - ) - .build(); - - let tool = find_tool(&tools, "dash/search"); - assert_eq!( - tool.spec, - ToolSpec::Function(ResponsesApiTool { - name: "dash/search".to_string(), - parameters: JsonSchema::Object { - properties: BTreeMap::from([( - "query".to_string(), - JsonSchema::String { - description: Some("search query".to_string()) - } - )]), - required: None, - additional_properties: None, - }, - description: "Search docs".to_string(), - strict: false, - }) - ); - } - - #[test] - fn test_mcp_tool_integer_normalized_to_number() { - let model_family = find_family_for_model("gpt-5-codex") - .expect("gpt-5-codex should be a valid model family"); - let mut features = Features::with_defaults(); - features.enable(Feature::UnifiedExec); - features.enable(Feature::WebSearchRequest); - let config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, - features: &features, - }); - - let (tools, _) = build_specs( - &config, - Some(HashMap::from([( - "dash/paginate".to_string(), - mcp_types::Tool { - name: "paginate".to_string(), - input_schema: ToolInputSchema { - properties: Some(serde_json::json!({ - "page": { "type": "integer" } - })), - required: None, - r#type: "object".to_string(), - }, - output_schema: None, - title: None, - annotations: None, - description: Some("Pagination".to_string()), - }, - )])), - ) - .build(); - - let tool = find_tool(&tools, "dash/paginate"); - assert_eq!( - tool.spec, - ToolSpec::Function(ResponsesApiTool { - name: "dash/paginate".to_string(), - parameters: JsonSchema::Object { - properties: BTreeMap::from([( - "page".to_string(), - JsonSchema::Number { description: None } - )]), - required: None, - additional_properties: None, - }, - description: "Pagination".to_string(), - strict: false, - }) - ); - } - - #[test] - fn test_mcp_tool_array_without_items_gets_default_string_items() { - let model_family = find_family_for_model("gpt-5-codex") - .expect("gpt-5-codex should be a valid model family"); - let mut features = Features::with_defaults(); - features.enable(Feature::UnifiedExec); - features.enable(Feature::WebSearchRequest); - features.enable(Feature::ApplyPatchFreeform); - let config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, - features: &features, - }); - - let (tools, _) = build_specs( - &config, - Some(HashMap::from([( - "dash/tags".to_string(), - mcp_types::Tool { - name: "tags".to_string(), - input_schema: ToolInputSchema { - properties: Some(serde_json::json!({ - "tags": { "type": "array" } - })), - required: None, - r#type: "object".to_string(), - }, - output_schema: None, - title: None, - annotations: None, - description: Some("Tags".to_string()), - }, - )])), - ) - .build(); - - let tool = find_tool(&tools, "dash/tags"); - assert_eq!( - tool.spec, - ToolSpec::Function(ResponsesApiTool { - name: "dash/tags".to_string(), - parameters: JsonSchema::Object { - properties: BTreeMap::from([( - "tags".to_string(), - JsonSchema::Array { - items: Box::new(JsonSchema::String { description: None }), - description: None - } - )]), - required: None, - additional_properties: None, - }, - description: "Tags".to_string(), - strict: false, - }) - ); - } - - #[test] - fn test_mcp_tool_anyof_defaults_to_string() { - let model_family = find_family_for_model("gpt-5-codex") - .expect("gpt-5-codex should be a valid model family"); - let mut features = Features::with_defaults(); - features.enable(Feature::UnifiedExec); - features.enable(Feature::WebSearchRequest); - let config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, - features: &features, - }); - - let (tools, _) = build_specs( - &config, - Some(HashMap::from([( - "dash/value".to_string(), - mcp_types::Tool { - name: "value".to_string(), - input_schema: ToolInputSchema { - properties: Some(serde_json::json!({ - "value": { "anyOf": [ { "type": "string" }, { "type": "number" } ] } - })), - required: None, - r#type: "object".to_string(), - }, - output_schema: None, - title: None, - annotations: None, - description: Some("AnyOf Value".to_string()), - }, - )])), - ) - .build(); - - let tool = find_tool(&tools, "dash/value"); - assert_eq!( - tool.spec, - ToolSpec::Function(ResponsesApiTool { - name: "dash/value".to_string(), - parameters: JsonSchema::Object { - properties: BTreeMap::from([( - "value".to_string(), - JsonSchema::String { description: None } - )]), - required: None, - additional_properties: None, - }, - description: "AnyOf Value".to_string(), - strict: false, - }) - ); - } - - #[test] - fn test_shell_tool() { - let tool = super::create_shell_tool(); - let ToolSpec::Function(ResponsesApiTool { - description, name, .. - }) = &tool - else { - panic!("expected function tool"); - }; - assert_eq!(name, "shell"); - - let expected = if cfg!(windows) { - r#"Runs a Powershell command (Windows) and returns its output. Arguments to `shell` will be passed to CreateProcessW(). Most commands should be prefixed with ["powershell.exe", "-Command"]. - -Examples of valid command strings: - -- ls -a (show hidden): ["powershell.exe", "-Command", "Get-ChildItem -Force"] -- recursive find by name: ["powershell.exe", "-Command", "Get-ChildItem -Recurse -Filter *.py"] -- recursive grep: ["powershell.exe", "-Command", "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive"] -- ps aux | grep python: ["powershell.exe", "-Command", "Get-Process | Where-Object { $_.ProcessName -like '*python*' }"] -- setting an env var: ["powershell.exe", "-Command", "$env:FOO='bar'; echo $env:FOO"] -- running an inline Python script: ["powershell.exe", "-Command", "@'\\nprint('Hello, world!')\\n'@ | python -"]"# - } else { - r#"Runs a shell command and returns its output. -- The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"]. -- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary."# - }.to_string(); - assert_eq!(description, &expected); - } - - #[test] - fn test_shell_command_tool() { - let tool = super::create_shell_command_tool(); - let ToolSpec::Function(ResponsesApiTool { - description, name, .. - }) = &tool - else { - panic!("expected function tool"); - }; - assert_eq!(name, "shell_command"); - - let expected = if cfg!(windows) { - r#"Runs a Powershell command (Windows) and returns its output. - -Examples of valid command strings: - -- ls -a (show hidden): "Get-ChildItem -Force" -- recursive find by name: "Get-ChildItem -Recurse -Filter *.py" -- recursive grep: "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive" -- ps aux | grep python: "Get-Process | Where-Object { $_.ProcessName -like '*python*' }" -- setting an env var: "$env:FOO='bar'; echo $env:FOO" -- running an inline Python script: "@'\\nprint('Hello, world!')\\n'@ | python -"#.to_string() - } else { - r#"Runs a shell command and returns its output. -- Always set the `workdir` param when using the shell_command function. Do not use `cd` unless absolutely necessary."#.to_string() - }; - assert_eq!(description, &expected); - } - - #[test] - fn test_get_openai_tools_mcp_tools_with_additional_properties_schema() { - let model_family = find_family_for_model("gpt-5-codex") - .expect("gpt-5-codex should be a valid model family"); - let mut features = Features::with_defaults(); - features.enable(Feature::UnifiedExec); - features.enable(Feature::WebSearchRequest); - let config = ToolsConfig::new(&ToolsConfigParams { - model_family: &model_family, - features: &features, - }); - let (tools, _) = build_specs( - &config, - Some(HashMap::from([( - "test_server/do_something_cool".to_string(), - mcp_types::Tool { - name: "do_something_cool".to_string(), - input_schema: ToolInputSchema { - properties: Some(serde_json::json!({ - "string_argument": { - "type": "string", - }, - "number_argument": { - "type": "number", - }, - "object_argument": { - "type": "object", - "properties": { - "string_property": { "type": "string" }, - "number_property": { "type": "number" }, - }, - "required": [ - "string_property", - "number_property", - ], - "additionalProperties": { - "type": "object", - "properties": { - "addtl_prop": { "type": "string" }, - }, - "required": [ - "addtl_prop", - ], - "additionalProperties": false, - }, - }, - })), - required: None, - r#type: "object".to_string(), - }, - output_schema: None, - title: None, - annotations: None, - description: Some("Do something cool".to_string()), - }, - )])), - ) - .build(); - - let tool = find_tool(&tools, "test_server/do_something_cool"); - assert_eq!( - tool.spec, - ToolSpec::Function(ResponsesApiTool { - name: "test_server/do_something_cool".to_string(), - parameters: JsonSchema::Object { - properties: BTreeMap::from([ - ( - "string_argument".to_string(), - JsonSchema::String { description: None } - ), - ( - "number_argument".to_string(), - JsonSchema::Number { description: None } - ), - ( - "object_argument".to_string(), - JsonSchema::Object { - properties: BTreeMap::from([ - ( - "string_property".to_string(), - JsonSchema::String { description: None } - ), - ( - "number_property".to_string(), - JsonSchema::Number { description: None } - ), - ]), - required: Some(vec![ - "string_property".to_string(), - "number_property".to_string(), - ]), - additional_properties: Some( - JsonSchema::Object { - properties: BTreeMap::from([( - "addtl_prop".to_string(), - JsonSchema::String { description: None } - ),]), - required: Some(vec!["addtl_prop".to_string(),]), - additional_properties: Some(false.into()), - } - .into() - ), - }, - ), - ]), - required: None, - additional_properties: None, - }, - description: "Do something cool".to_string(), - strict: false, - }) - ); - } -} +@@ -35,6 +35,7 @@ pub(crate) struct ToolsConfig { + pub apply_patch_tool_type: Option, + pub web_search_request: bool, + pub include_view_image_tool: bool, ++ pub include_agent_tool: bool, + pub experimental_supported_tools: Vec, + } + +@@ -78,6 +79,7 @@ impl ToolsConfig { + apply_patch_tool_type, + web_search_request: include_web_search_request, + include_view_image_tool, ++ include_agent_tool: true, + experimental_supported_tools: model_family.experimental_supported_tools.clone(), + } + } +@@ -219,6 +221,45 @@ fn create_exec_command_tool() -> ToolSpec { + }) + } + ++fn create_agent_tool() -> ToolSpec { ++ let mut properties = BTreeMap::new(); ++ properties.insert( ++ "name".to_string(), ++ JsonSchema::String { ++ description: Some("Registered agent name to run.".to_string()), ++ }, ++ ); ++ properties.insert( ++ "task".to_string(), ++ JsonSchema::String { ++ description: Some("Task or instruction for the agent to complete.".to_string()), ++ }, ++ ); ++ properties.insert( ++ "context".to_string(), ++ JsonSchema::String { ++ description: Some("Optional additional context the agent should consider.".to_string()), ++ }, ++ ); ++ properties.insert( ++ "plan_item_id".to_string(), ++ JsonSchema::String { ++ description: Some("Optional plan item id associated with this run.".to_string()), ++ }, ++ ); ++ ++ ToolSpec::Function(ResponsesApiTool { ++ name: "agent".to_string(), ++ description: "Invoke a named agent with a task to execute.".to_string(), ++ strict: false, ++ parameters: JsonSchema::Object { ++ properties, ++ required: Some(vec!["name".to_string(), "task".to_string()]), ++ additional_properties: Some(false.into()), ++ }, ++ }) ++} ++ + fn create_write_stdin_tool() -> ToolSpec { + let mut properties = BTreeMap::new(); + properties.insert( +@@ -973,6 +1014,7 @@ pub(crate) fn build_specs( + config: &ToolsConfig, + mcp_tools: Option>,\n ) -> ToolRegistryBuilder { ++ use crate::tools::handlers::AgentHandler; + use crate::tools::handlers::ApplyPatchHandler; + use crate::tools::handlers::GrepFilesHandler; + use crate::tools::handlers::ListDirHandler; +@@ -993,6 +1035,7 @@ pub(crate) fn build_specs( + let unified_exec_handler = Arc::new(UnifiedExecHandler); + let plan_handler = Arc::new(PlanHandler); + let apply_patch_handler = Arc::new(ApplyPatchHandler); ++ let agent_handler = Arc::new(AgentHandler); + let view_image_handler = Arc::new(ViewImageHandler); + let mcp_handler = Arc::new(McpHandler); + let mcp_resource_handler = Arc::new(McpResourceHandler); +@@ -1095,6 +1138,11 @@ pub(crate) fn build_specs( + builder.register_handler("view_image", view_image_handler); + } + ++ if config.include_agent_tool { ++ builder.push_spec_with_parallel_support(create_agent_tool(), true); ++ builder.register_handler("agent", agent_handler); ++ } ++ + if let Some(mcp_tools) = mcp_tools { + let mut entries: Vec<(String, mcp_types::Tool)> = mcp_tools.into_iter().collect(); + entries.sort_by(|a, b| a.0.cmp(&b.0)); +@@ -1253,6 +1301,7 @@ mod tests { + create_apply_patch_freeform_tool(), + ToolSpec::WebSearch {}, + create_view_image_tool(), ++ create_agent_tool(), + ] { + expected.insert(tool_name(&spec).to_string(), spec); + } +@@ -1297,6 +1346,7 @@ mod tests { + "update_plan", + "apply_patch", + "view_image", ++ "agent", + ], + ); + } +@@ -1314,6 +1364,7 @@ mod tests { + "update_plan", + "apply_patch", + "view_image", ++ "agent", + ], + ); + } +@@ -1335,6 +1386,7 @@ mod tests { + "apply_patch", + "web_search", + "view_image", ++ "agent", + ], + ); + } +@@ -1356,6 +1408,7 @@ mod tests { + "apply_patch", + "web_search",\n "view_image", ++ "agent", + ], + ); + } +@@ -1372,6 +1425,7 @@ mod tests { + "read_mcp_resource", + "update_plan", + "view_image", ++ "agent", + ], + ); + } +@@ -1389,6 +1443,7 @@ mod tests { + "update_plan", + "apply_patch", + "view_image", ++ "agent", + ], + ); + } +@@ -1405,6 +1460,7 @@ mod tests { + "read_mcp_resource", + "update_plan", + "view_image", ++ "agent", + ], + ); + } +@@ -1422,6 +1478,7 @@ mod tests { + "update_plan", + "apply_patch", + "view_image", ++ "agent", + ], + ); + } +@@ -1440,6 +1497,7 @@ mod tests { + "update_plan", + "apply_patch", + "view_image", ++ "agent", + ], + ); + } +@@ -1460,6 +1518,7 @@ mod tests { + "update_plan", + "web_search", + "view_image", ++ "agent", + ], + ); + } \ No newline at end of file From 3d52a35ccd74a2c563465ab2bfe40fad68b32b4d Mon Sep 17 00:00:00 2001 From: Grinsven <112948755+Grinsven@users.noreply.github.com> Date: Mon, 24 Nov 2025 14:19:45 +0100 Subject: [PATCH 17/22] fix: persist agent events, refactor prompt loading, harden security --- codex-rs/core/src/agent.rs | 39 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/codex-rs/core/src/agent.rs b/codex-rs/core/src/agent.rs index f673005952a2..017bfc88d283 100644 --- a/codex-rs/core/src/agent.rs +++ b/codex-rs/core/src/agent.rs @@ -190,27 +190,21 @@ impl AgentRegistry { // If prompt_file is specified, load the prompt from file if let Some(ref prompt_file) = config.prompt_file { // Validate the path to prevent traversal attacks - match Self::validate_prompt_path(dir, prompt_file) { - Ok(safe_path) => { - match std::fs::read_to_string(&safe_path) { - Ok(prompt_content) => { - config.prompt = Some(prompt_content); - tracing::debug!( - "Loaded prompt file for agent '{}'", - name - ); - } - Err(e) => { - tracing::error!( - "Cannot read prompt file '{}' for agent '{}': {}", - prompt_file, - name, - e - ); - // Skip this agent but continue loading others - continue; - } - } + match Self::validate_prompt_path(dir, prompt_file).and_then(|path| { + std::fs::read_to_string(&path).map_err(|e| { + anyhow::anyhow!( + "Cannot read prompt file '{}': {}", + path.display(), + e + ) + }) + }) { + Ok(prompt_content) => { + config.prompt = Some(prompt_content); + tracing::debug!( + "Loaded prompt file for agent '{}'", + name + ); } Err(e) => { tracing::error!( @@ -289,8 +283,7 @@ impl AgentRegistry { agents.sort_by(|a, b| { // Built-in agents first, then alphabetical match (a.is_builtin, b.is_builtin) { - (true, false) => std::cmp::Ordering::Less, - (false, true) => std::cmp::Ordering::Greater, + (true, false) => std::cmp::Ordering::Less,\n (false, true) => std::cmp::Ordering::Greater, _ => a.name.cmp(&b.name), } }); From 652f075d1bb0cc2ada0055edf91b4fecb5c3477f Mon Sep 17 00:00:00 2001 From: Jose <112948755+Grinsven@users.noreply.github.com> Date: Mon, 24 Nov 2025 15:02:40 +0100 Subject: [PATCH 18/22] fix: persist agent events, refactor prompt loading, harden security --- MULTI_AGENT_IMPLEMENTATION.md | 46 + codex-rs/core/src/agent.rs | 11 +- codex-rs/core/src/tools/spec.rs | 2263 +++++++++++++++++-- codex-rs/core/tests/suite/agent_security.rs | 77 + codex-rs/core/tests/suite/mod.rs | 2 + 5 files changed, 2220 insertions(+), 179 deletions(-) create mode 100644 MULTI_AGENT_IMPLEMENTATION.md create mode 100644 codex-rs/core/tests/suite/agent_security.rs diff --git a/MULTI_AGENT_IMPLEMENTATION.md b/MULTI_AGENT_IMPLEMENTATION.md new file mode 100644 index 000000000000..6c64e440a7d4 --- /dev/null +++ b/MULTI_AGENT_IMPLEMENTATION.md @@ -0,0 +1,46 @@ +# Multi-Agent Feature Implementation Notes + +## Completed Tasks (as of 2025-11-24) + +### Core Implementation +- [x] **Agent Registry**: Implemented in `codex-rs/core/src/agent.rs` to load agent configurations from `~/.codex/agents.toml`. + - Added security validation to prevent loading prompts from outside the allowed directory (`validate_prompt_path`). + - Added symlink protection for the configuration file itself. + - Implemented nested prompt loading refactor for readability. +- [x] **Agent Handler**: Implemented `AgentHandler` in `codex-rs/core/src/tools/handlers/agent.rs` to execute agent tasks. + - Uses the `agent` tool to invoke sub-agents. + - **Tool Inheritance**: Sub-agents inherit tools from the parent session, but the `agent` tool itself is excluded to prevent recursion. + - **ID Randomization**: Uses `rand::thread_rng().r#gen()` to generate unique call IDs, fixing potential collision issues and deprecated usage. + - **Response Handling**: Streams agent output and reasoning deltas back to the session. +- [x] **Protocol Updates**: Added `AgentBegin`, `AgentProgress`, `AgentEnd`, and `ListAgentsResponse` events to `codex-rs/protocol/src/protocol.rs`. + - Updated `codex-rs/core/src/rollout/policy.rs` to persist `AgentBegin` and `AgentEnd` events for history playback. +- [x] **Tools Spec**: Registered the `agent` tool in `codex-rs/core/src/tools/spec.rs`. + - Enabled parallel execution support for the agent tool. + +### TUI Integration +- [x] **Slash Commands**: Added `/agents` command to list available agents (`codex-rs/tui/src/chatwidget.rs`, `slash_command.rs`). +- [x] **Mentions**: Implemented parsing for `@agent: task` syntax in `codex-rs/tui/src/agent_mention.rs`. + - Regex updated to support hyphens in agent names (e.g., `@code-reviewer`). + - Mentions are converted into `agent` tool calls before submission. +- [x] **Visual Feedback**: Added history cells for agent execution status (running, progress, done) in `codex-rs/tui/src/history_cell.rs`. + +### Documentation +- [x] **Guide**: Created `docs/subagents.md` detailing configuration, usage, and best practices. +- [x] **Examples**: Created `example-agents.toml` with sample agent configurations. +- [x] **Getting Started**: Updated `README.md` and `docs/getting-started.md` with quickstart instructions. +- [x] **AGENTS.md**: Added agent usage rules and ensured trailing newline compliance. + +### Testing +- [x] **Unit Tests**: Added tests for agent registry validation and mention parsing. +- [x] **Integration Tests**: Added `sub_agent_inherits_tools` test in `codex-rs/core/tests/suite/agent_tool.rs` to verify tool inheritance logic. +- [x] **Verification**: All tests passed in `codex-core` (459 tests) and `codex-tui` (490 tests). + +## Pending / Future Work +- [ ] **Runtime Error Investigation**: The error `{"detail":"Instructions are not valid"}` was observed when running agents against specific endpoints (likely GitHub Models/Azure). This is identified as a configuration mismatch (using `Responses` protocol with an endpoint that rejects `instructions`). Future work could auto-detect this or warn the user. +- [ ] **Test Coverage**: Add more robust integration tests for the TUI interactions if possible (currently manual verification). +- [ ] **Release**: Build and publish the updated `codex-cli` package. + +## Branch State +- **Branch**: `agents-multi-final` +- **Latest Commit**: `3d52a35cc` (fix: persist agent events, refactor prompt loading, harden security) +- **Sync Status**: Local workspace is synced with remote. diff --git a/codex-rs/core/src/agent.rs b/codex-rs/core/src/agent.rs index 017bfc88d283..716baba73cee 100644 --- a/codex-rs/core/src/agent.rs +++ b/codex-rs/core/src/agent.rs @@ -139,6 +139,11 @@ impl AgentRegistry { /// Create a new agent registry, loading user configurations if available pub fn new() -> Result { + Self::from_paths(Self::get_agents_directory()) + } + + /// Create a registry from a specific configuration directory (exposed for testing) + pub fn from_paths(agents_dir: Option) -> Result { let mut agents = HashMap::new(); // Add the single default "general" agent @@ -152,8 +157,7 @@ impl AgentRegistry { } ); - // Try to load user agents from ~/.codex/agents.toml - let agents_dir = Self::get_agents_directory(); + // Try to load user agents from the provided directory if let Some(ref dir) = agents_dir { if dir.symlink_metadata().map_or(false, |m| m.is_symlink()) { tracing::error!( @@ -283,7 +287,8 @@ impl AgentRegistry { agents.sort_by(|a, b| { // Built-in agents first, then alphabetical match (a.is_builtin, b.is_builtin) { - (true, false) => std::cmp::Ordering::Less,\n (false, true) => std::cmp::Ordering::Greater, + (true, false) => std::cmp::Ordering::Less, + (false, true) => std::cmp::Ordering::Greater, _ => a.name.cmp(&b.name), } }); diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index 1ca4340bd8ba..42a77e72440f 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -1,176 +1,2087 @@ -@@ -35,6 +35,7 @@ pub(crate) struct ToolsConfig { - pub apply_patch_tool_type: Option, - pub web_search_request: bool, - pub include_view_image_tool: bool, -+ pub include_agent_tool: bool, - pub experimental_supported_tools: Vec, - } - -@@ -78,6 +79,7 @@ impl ToolsConfig { - apply_patch_tool_type, - web_search_request: include_web_search_request, - include_view_image_tool, -+ include_agent_tool: true, - experimental_supported_tools: model_family.experimental_supported_tools.clone(), - } - } -@@ -219,6 +221,45 @@ fn create_exec_command_tool() -> ToolSpec { - }) - } - -+fn create_agent_tool() -> ToolSpec { -+ let mut properties = BTreeMap::new(); -+ properties.insert( -+ "name".to_string(), -+ JsonSchema::String { -+ description: Some("Registered agent name to run.".to_string()), -+ }, -+ ); -+ properties.insert( -+ "task".to_string(), -+ JsonSchema::String { -+ description: Some("Task or instruction for the agent to complete.".to_string()), -+ }, -+ ); -+ properties.insert( -+ "context".to_string(), -+ JsonSchema::String { -+ description: Some("Optional additional context the agent should consider.".to_string()), -+ }, -+ ); -+ properties.insert( -+ "plan_item_id".to_string(), -+ JsonSchema::String { -+ description: Some("Optional plan item id associated with this run.".to_string()), -+ }, -+ ); -+ -+ ToolSpec::Function(ResponsesApiTool { -+ name: "agent".to_string(), -+ description: "Invoke a named agent with a task to execute.".to_string(), -+ strict: false, -+ parameters: JsonSchema::Object { -+ properties, -+ required: Some(vec!["name".to_string(), "task".to_string()]), -+ additional_properties: Some(false.into()), -+ }, -+ }) -+} -+ - fn create_write_stdin_tool() -> ToolSpec { - let mut properties = BTreeMap::new(); - properties.insert( -@@ -973,6 +1014,7 @@ pub(crate) fn build_specs( - config: &ToolsConfig, - mcp_tools: Option>,\n ) -> ToolRegistryBuilder { -+ use crate::tools::handlers::AgentHandler; - use crate::tools::handlers::ApplyPatchHandler; - use crate::tools::handlers::GrepFilesHandler; - use crate::tools::handlers::ListDirHandler; -@@ -993,6 +1035,7 @@ pub(crate) fn build_specs( - let unified_exec_handler = Arc::new(UnifiedExecHandler); - let plan_handler = Arc::new(PlanHandler); - let apply_patch_handler = Arc::new(ApplyPatchHandler); -+ let agent_handler = Arc::new(AgentHandler); - let view_image_handler = Arc::new(ViewImageHandler); - let mcp_handler = Arc::new(McpHandler); - let mcp_resource_handler = Arc::new(McpResourceHandler); -@@ -1095,6 +1138,11 @@ pub(crate) fn build_specs( - builder.register_handler("view_image", view_image_handler); - } - -+ if config.include_agent_tool { -+ builder.push_spec_with_parallel_support(create_agent_tool(), true); -+ builder.register_handler("agent", agent_handler); -+ } -+ - if let Some(mcp_tools) = mcp_tools { - let mut entries: Vec<(String, mcp_types::Tool)> = mcp_tools.into_iter().collect(); - entries.sort_by(|a, b| a.0.cmp(&b.0)); -@@ -1253,6 +1301,7 @@ mod tests { - create_apply_patch_freeform_tool(), - ToolSpec::WebSearch {}, - create_view_image_tool(), -+ create_agent_tool(), - ] { - expected.insert(tool_name(&spec).to_string(), spec); - } -@@ -1297,6 +1346,7 @@ mod tests { - "update_plan", - "apply_patch", - "view_image", -+ "agent", - ], - ); - } -@@ -1314,6 +1364,7 @@ mod tests { - "update_plan", - "apply_patch", - "view_image", -+ "agent", - ], - ); - } -@@ -1335,6 +1386,7 @@ mod tests { - "apply_patch", - "web_search", - "view_image", -+ "agent", - ], - ); - } -@@ -1356,6 +1408,7 @@ mod tests { - "apply_patch", - "web_search",\n "view_image", -+ "agent", - ], - ); - } -@@ -1372,6 +1425,7 @@ mod tests { - "read_mcp_resource", - "update_plan", - "view_image", -+ "agent", - ], - ); - } -@@ -1389,6 +1443,7 @@ mod tests { - "update_plan", - "apply_patch", - "view_image", -+ "agent", - ], - ); - } -@@ -1405,6 +1460,7 @@ mod tests { - "read_mcp_resource", - "update_plan", - "view_image", -+ "agent", - ], - ); - } -@@ -1422,6 +1478,7 @@ mod tests { - "update_plan", - "apply_patch", - "view_image", -+ "agent", - ], - ); - } -@@ -1440,6 +1497,7 @@ mod tests { - "update_plan", - "apply_patch", - "view_image", -+ "agent", - ], - ); - } -@@ -1460,6 +1518,7 @@ mod tests { - "update_plan", - "web_search", - "view_image", -+ "agent", - ], - ); - } \ No newline at end of file +use crate::client_common::tools::ResponsesApiTool; +use crate::client_common::tools::ToolSpec; +use crate::features::Feature; +use crate::features::Features; +use crate::model_family::ModelFamily; +use crate::tools::handlers::PLAN_TOOL; +use crate::tools::handlers::apply_patch::ApplyPatchToolType; +use crate::tools::handlers::apply_patch::create_apply_patch_freeform_tool; +use crate::tools::handlers::apply_patch::create_apply_patch_json_tool; +use crate::tools::registry::ToolRegistryBuilder; +use serde::Deserialize; +use serde::Serialize; +use serde_json::Value as JsonValue; +use serde_json::json; +use std::collections::BTreeMap; +use std::collections::HashMap; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum ConfigShellToolType { + Default, + Local, + UnifiedExec, + /// Do not include a shell tool by default. Useful when using Codex + /// with tools provided exclusively provided by MCP servers. Often used + /// with `--config base_instructions=CUSTOM_INSTRUCTIONS` + /// to customize agent behavior. + Disabled, + /// Takes a command as a single string to be run in the user's default shell. + ShellCommand, +} + +#[derive(Debug, Clone)] +pub(crate) struct ToolsConfig { + pub shell_type: ConfigShellToolType, + pub apply_patch_tool_type: Option, + pub web_search_request: bool, + pub include_view_image_tool: bool, + pub include_agent_tool: bool, + pub experimental_supported_tools: Vec, +} + +pub(crate) struct ToolsConfigParams<'a> { + pub(crate) model_family: &'a ModelFamily, + pub(crate) features: &'a Features, +} + +impl ToolsConfig { + pub fn new(params: &ToolsConfigParams) -> Self { + let ToolsConfigParams { + model_family, + features, + } = params; + let include_apply_patch_tool = features.enabled(Feature::ApplyPatchFreeform); + let include_web_search_request = features.enabled(Feature::WebSearchRequest); + let include_view_image_tool = features.enabled(Feature::ViewImageTool); + + let shell_type = if !features.enabled(Feature::ShellTool) { + ConfigShellToolType::Disabled + } else if features.enabled(Feature::UnifiedExec) { + ConfigShellToolType::UnifiedExec + } else { + model_family.shell_type.clone() + }; + + let apply_patch_tool_type = match model_family.apply_patch_tool_type { + Some(ApplyPatchToolType::Freeform) => Some(ApplyPatchToolType::Freeform), + Some(ApplyPatchToolType::Function) => Some(ApplyPatchToolType::Function), + None => { + if include_apply_patch_tool { + Some(ApplyPatchToolType::Freeform) + } else { + None + } + } + }; + + Self { + shell_type, + apply_patch_tool_type, + web_search_request: include_web_search_request, + include_view_image_tool, + include_agent_tool: true, + experimental_supported_tools: model_family.experimental_supported_tools.clone(), + } + } +} + +/// Generic JSON‑Schema subset needed for our tool definitions +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(tag = "type", rename_all = "lowercase")] +pub(crate) enum JsonSchema { + Boolean { + #[serde(skip_serializing_if = "Option::is_none")] + description: Option, + }, + String { + #[serde(skip_serializing_if = "Option::is_none")] + description: Option, + }, + /// MCP schema allows "number" | "integer" for Number + #[serde(alias = "integer")] + Number { + #[serde(skip_serializing_if = "Option::is_none")] + description: Option, + }, + Array { + items: Box, + + #[serde(skip_serializing_if = "Option::is_none")] + description: Option, + }, + Object { + properties: BTreeMap, + #[serde(skip_serializing_if = "Option::is_none")] + required: Option>, + #[serde( + rename = "additionalProperties", + skip_serializing_if = "Option::is_none" + )] + additional_properties: Option, + }, +} + +/// Whether additional properties are allowed, and if so, any required schema +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(untagged)] +pub(crate) enum AdditionalProperties { + Boolean(bool), + Schema(Box), +} + +impl From for AdditionalProperties { + fn from(b: bool) -> Self { + Self::Boolean(b) + } +} + +impl From for AdditionalProperties { + fn from(s: JsonSchema) -> Self { + Self::Schema(Box::new(s)) + } +} + +fn create_exec_command_tool() -> ToolSpec { + let mut properties = BTreeMap::new(); + properties.insert( + "command".to_string(), + JsonSchema::String { + description: Some("The command to execute.".to_string()), + }, + ); + ToolSpec::Function(ResponsesApiTool { + name: "exec_command".to_string(), + description: "Execute a shell command.".to_string(), + strict: false, + parameters: JsonSchema::Object { + properties, + required: Some(vec!["command".to_string()]), + additional_properties: Some(false.into()), + }, + }) +} + +fn create_agent_tool() -> ToolSpec { + let mut properties = BTreeMap::new(); + properties.insert( + "name".to_string(), + JsonSchema::String { + description: Some("Registered agent name to run.".to_string()), + }, + ); + properties.insert( + "task".to_string(), + JsonSchema::String { + description: Some("Task or instruction for the agent to complete.".to_string()), + }, + ); + properties.insert( + "context".to_string(), + JsonSchema::String { + description: Some("Optional additional context the agent should consider.".to_string()), + }, + ); + properties.insert( + "plan_item_id".to_string(), + JsonSchema::String { + description: Some("Optional plan item id associated with this run.".to_string()), + }, + ); + + ToolSpec::Function(ResponsesApiTool { + name: "agent".to_string(), + description: "Invoke a named agent with a task to execute.".to_string(), + strict: false, + parameters: JsonSchema::Object { + properties, + required: Some(vec!["name".to_string(), "task".to_string()]), + additional_properties: Some(false.into()), + }, + }) +} + +fn create_write_stdin_tool() -> ToolSpec { + let mut properties = BTreeMap::new(); + properties.insert( + "session_id".to_string(), + JsonSchema::Number { + description: Some("Identifier of the running unified exec session.".to_string()), + }, + ); + properties.insert( + "chars".to_string(), + JsonSchema::String { + description: Some("Bytes to write to stdin (may be empty to poll).".to_string()), + }, + ); + properties.insert( + "yield_time_ms".to_string(), + JsonSchema::Number { + description: Some( + "How long to wait (in milliseconds) for output before yielding.".to_string(), + ), + }, + ); + properties.insert( + "max_output_tokens".to_string(), + JsonSchema::Number { + description: Some( + "Maximum number of tokens to return. Excess output will be truncated.".to_string(), + ), + }, + ); + + ToolSpec::Function(ResponsesApiTool { + name: "write_stdin".to_string(), + description: + "Writes characters to an existing unified exec session and returns recent output." + .to_string(), + strict: false, + parameters: JsonSchema::Object { + properties, + required: Some(vec!["session_id".to_string()]), + additional_properties: Some(false.into()), + }, + }) +} + +fn create_shell_tool() -> ToolSpec { + let mut properties = BTreeMap::new(); + properties.insert( + "command".to_string(), + JsonSchema::Array { + items: Box::new(JsonSchema::String { description: None }), + description: Some("The command to execute".to_string()), + }, + ); + properties.insert( + "workdir".to_string(), + JsonSchema::String { + description: Some("The working directory to execute the command in".to_string()), + }, + ); + properties.insert( + "timeout_ms".to_string(), + JsonSchema::Number { + description: Some("The timeout for the command in milliseconds".to_string()), + }, + ); + + properties.insert( + "with_escalated_permissions".to_string(), + JsonSchema::Boolean { + description: Some("Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions".to_string()), + }, + ); + properties.insert( + "justification".to_string(), + JsonSchema::String { + description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()), + }, + ); + + let description = if cfg!(windows) { + r#"Runs a Powershell command (Windows) and returns its output. Arguments to `shell` will be passed to CreateProcessW(). Most commands should be prefixed with ["powershell.exe", "-Command"]. + +Examples of valid command strings: + +- ls -a (show hidden): ["powershell.exe", "-Command", "Get-ChildItem -Force"] +- recursive find by name: ["powershell.exe", "-Command", "Get-ChildItem -Recurse -Filter *.py"] +- recursive grep: ["powershell.exe", "-Command", "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive"] +- ps aux | grep python: ["powershell.exe", "-Command", "Get-Process | Where-Object { $_.ProcessName -like '*python*' }"] +- setting an env var: ["powershell.exe", "-Command", "$env:FOO='bar'; echo $env:FOO"] +- running an inline Python script: ["powershell.exe", "-Command", "@'\\nprint('Hello, world!')\\n'@ | python -"]"# + } else { + r#"Runs a shell command and returns its output. +- The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"]. +- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary."# + }.to_string(); + + ToolSpec::Function(ResponsesApiTool { + name: "shell".to_string(), + description, + strict: false, + parameters: JsonSchema::Object { + properties, + required: Some(vec!["command".to_string()]), + additional_properties: Some(false.into()), + }, + }) +} + +fn create_shell_command_tool() -> ToolSpec { + let mut properties = BTreeMap::new(); + properties.insert( + "command".to_string(), + JsonSchema::String { + description: Some( + "The shell script to execute in the user's default shell".to_string(), + ), + }, + ); + properties.insert( + "workdir".to_string(), + JsonSchema::String { + description: Some("The working directory to execute the command in".to_string()), + }, + ); + properties.insert( + "timeout_ms".to_string(), + JsonSchema::Number { + description: Some("The timeout for the command in milliseconds".to_string()), + }, + ); + properties.insert( + "with_escalated_permissions".to_string(), + JsonSchema::Boolean { + description: Some("Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions".to_string()), + }, + ); + properties.insert( + "justification".to_string(), + JsonSchema::String { + description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()), + }, + ); + + let description = if cfg!(windows) { + r#"Runs a Powershell command (Windows) and returns its output. + +Examples of valid command strings: + +- ls -a (show hidden): "Get-ChildItem -Force" +- recursive find by name: "Get-ChildItem -Recurse -Filter *.py" +- recursive grep: "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive" +- ps aux | grep python: "Get-Process | Where-Object { $_.ProcessName -like '*python*' }" +- setting an env var: "$env:FOO='bar'; echo $env:FOO" +- running an inline Python script: "@'\\nprint('Hello, world!')\\n'@ | python -"# + } else { + r#"Runs a shell command and returns its output. +- Always set the `workdir` param when using the shell_command function. Do not use `cd` unless absolutely necessary."# + }.to_string(); + + ToolSpec::Function(ResponsesApiTool { + name: "shell_command".to_string(), + description, + strict: false, + parameters: JsonSchema::Object { + properties, + required: Some(vec!["command".to_string()]), + additional_properties: Some(false.into()), + }, + }) +} + +fn create_view_image_tool() -> ToolSpec { + // Support only local filesystem path. + let mut properties = BTreeMap::new(); + properties.insert( + "path".to_string(), + JsonSchema::String { + description: Some("Local filesystem path to an image file".to_string()), + }, + ); + + ToolSpec::Function(ResponsesApiTool { + name: "view_image".to_string(), + description: + "Attach a local image (by filesystem path) to the conversation context for this turn." + .to_string(), + strict: false, + parameters: JsonSchema::Object { + properties, + required: Some(vec!["path".to_string()]), + additional_properties: Some(false.into()), + }, + }) +} + +fn create_test_sync_tool() -> ToolSpec { + let mut properties = BTreeMap::new(); + properties.insert( + "sleep_before_ms".to_string(), + JsonSchema::Number { + description: Some("Optional delay in milliseconds before any other action".to_string()), + }, + ); + properties.insert( + "sleep_after_ms".to_string(), + JsonSchema::Number { + description: Some( + "Optional delay in milliseconds after completing the barrier".to_string(), + ), + }, + ); + + let mut barrier_properties = BTreeMap::new(); + barrier_properties.insert( + "id".to_string(), + JsonSchema::String { + description: Some( + "Identifier shared by concurrent calls that should rendezvous".to_string(), + ), + }, + ); + barrier_properties.insert( + "participants".to_string(), + JsonSchema::Number { + description: Some( + "Number of tool calls that must arrive before the barrier opens".to_string(), + ), + }, + ); + barrier_properties.insert( + "timeout_ms".to_string(), + JsonSchema::Number { + description: Some("Maximum time in milliseconds to wait at the barrier".to_string()), + }, + ); + + properties.insert( + "barrier".to_string(), + JsonSchema::Object { + properties: barrier_properties, + required: Some(vec!["id".to_string(), "participants".to_string()]), + additional_properties: Some(false.into()), + }, + ); + + ToolSpec::Function(ResponsesApiTool { + name: "test_sync_tool".to_string(), + description: "Internal synchronization helper used by Codex integration tests.".to_string(), + strict: false, + parameters: JsonSchema::Object { + properties, + required: None, + additional_properties: Some(false.into()), + }, + }) +} + +fn create_grep_files_tool() -> ToolSpec { + let mut properties = BTreeMap::new(); + properties.insert( + "pattern".to_string(), + JsonSchema::String { + description: Some("Regular expression pattern to search for.".to_string()), + }, + ); + properties.insert( + "include".to_string(), + JsonSchema::String { + description: Some( + "Optional glob that limits which files are searched (e.g. \"*.rs\" or \ + \"*.{ts,tsx}\")." + .to_string(), + ), + }, + ); + properties.insert( + "path".to_string(), + JsonSchema::String { + description: Some( + "Directory or file path to search. Defaults to the session's working directory." + .to_string(), + ), + }, + ); + properties.insert( + "limit".to_string(), + JsonSchema::Number { + description: Some( + "Maximum number of file paths to return (defaults to 100).".to_string(), + ), + }, + ); + + ToolSpec::Function(ResponsesApiTool { + name: "grep_files".to_string(), + description: "Finds files whose contents match the pattern and lists them by modification \ + time." + .to_string(), + strict: false, + parameters: JsonSchema::Object { + properties, + required: Some(vec!["pattern".to_string()]), + additional_properties: Some(false.into()), + }, + }) +} + +fn create_read_file_tool() -> ToolSpec { + let mut properties = BTreeMap::new(); + properties.insert( + "file_path".to_string(), + JsonSchema::String { + description: Some("Absolute path to the file".to_string()), + }, + ); + properties.insert( + "offset".to_string(), + JsonSchema::Number { + description: Some( + "The line number to start reading from. Must be 1 or greater.".to_string(), + ), + }, + ); + properties.insert( + "limit".to_string(), + JsonSchema::Number { + description: Some("The maximum number of lines to return.".to_string()), + }, + ); + properties.insert( + "mode".to_string(), + JsonSchema::String { + description: Some( + "Optional mode selector: \"slice\" for simple ranges (default) or \"indentation\" \ + to expand around an anchor line." + .to_string(), + ), + }, + ); + + let mut indentation_properties = BTreeMap::new(); + indentation_properties.insert( + "anchor_line".to_string(), + JsonSchema::Number { + description: Some( + "Anchor line to center the indentation lookup on (defaults to offset).".to_string(), + ), + }, + ); + indentation_properties.insert( + "max_levels".to_string(), + JsonSchema::Number { + description: Some( + "How many parent indentation levels (smaller indents) to include.".to_string(), + ), + }, + ); + indentation_properties.insert( + "include_siblings".to_string(), + JsonSchema::Boolean { + description: Some( + "When true, include additional blocks that share the anchor indentation." + .to_string(), + ), + }, + ); + indentation_properties.insert( + "include_header".to_string(), + JsonSchema::Boolean { + description: Some( + "Include doc comments or attributes directly above the selected block.".to_string(), + ), + }, + ); + indentation_properties.insert( + "max_lines".to_string(), + JsonSchema::Number { + description: Some( + "Hard cap on the number of lines returned when using indentation mode.".to_string(), + ), + }, + ); + properties.insert( + "indentation".to_string(), + JsonSchema::Object { + properties: indentation_properties, + required: None, + additional_properties: Some(false.into()), + }, + ); + + ToolSpec::Function(ResponsesApiTool { + name: "read_file".to_string(), + description: + "Reads a local file with 1-indexed line numbers, supporting slice and indentation-aware block modes." + .to_string(), + strict: false, + parameters: JsonSchema::Object { + properties, + required: Some(vec!["file_path".to_string()]), + additional_properties: Some(false.into()), + }, + }) +} + +fn create_list_dir_tool() -> ToolSpec { + let mut properties = BTreeMap::new(); + properties.insert( + "dir_path".to_string(), + JsonSchema::String { + description: Some("Absolute path to the directory to list.".to_string()), + }, + ); + properties.insert( + "offset".to_string(), + JsonSchema::Number { + description: Some( + "The entry number to start listing from. Must be 1 or greater.".to_string(), + ), + }, + ); + properties.insert( + "limit".to_string(), + JsonSchema::Number { + description: Some("The maximum number of entries to return.".to_string()), + }, + ); + properties.insert( + "depth".to_string(), + JsonSchema::Number { + description: Some( + "The maximum directory depth to traverse. Must be 1 or greater.".to_string(), + ), + }, + ); + + ToolSpec::Function(ResponsesApiTool { + name: "list_dir".to_string(), + description: + "Lists entries in a local directory with 1-indexed entry numbers and simple type labels." + .to_string(), + strict: false, + parameters: JsonSchema::Object { + properties, + required: Some(vec!["dir_path".to_string()]), + additional_properties: Some(false.into()), + }, + }) +} + +fn create_list_mcp_resources_tool() -> ToolSpec { + let mut properties = BTreeMap::new(); + properties.insert( + "server".to_string(), + JsonSchema::String { + description: Some( + "Optional MCP server name. When omitted, lists resources from every configured server." + .to_string(), + ), + }, + ); + properties.insert( + "cursor".to_string(), + JsonSchema::String { + description: Some( + "Opaque cursor returned by a previous list_mcp_resources call for the same server." + .to_string(), + ), + }, + ); + + ToolSpec::Function(ResponsesApiTool { + name: "list_mcp_resources".to_string(), + description: "Lists resources provided by MCP servers. Resources allow servers to share data that provides context to language models, such as files, database schemas, or application-specific information. Prefer resources over web search when possible.".to_string(), + strict: false, + parameters: JsonSchema::Object { + properties, + required: None, + additional_properties: Some(false.into()), + }, + }) +} + +fn create_list_mcp_resource_templates_tool() -> ToolSpec { + let mut properties = BTreeMap::new(); + properties.insert( + "server".to_string(), + JsonSchema::String { + description: Some( + "Optional MCP server name. When omitted, lists resource templates from all configured servers." + .to_string(), + ), + }, + ); + properties.insert( + "cursor".to_string(), + JsonSchema::String { + description: Some( + "Opaque cursor returned by a previous list_mcp_resource_templates call for the same server." + .to_string(), + ), + }, + ); + + ToolSpec::Function(ResponsesApiTool { + name: "list_mcp_resource_templates".to_string(), + description: "Lists resource templates provided by MCP servers. Parameterized resource templates allow servers to share data that takes parameters and provides context to language models, such as files, database schemas, or application-specific information. Prefer resource templates over web search when possible.".to_string(), + strict: false, + parameters: JsonSchema::Object { + properties, + required: None, + additional_properties: Some(false.into()), + }, + }) +} + +fn create_read_mcp_resource_tool() -> ToolSpec { + let mut properties = BTreeMap::new(); + properties.insert( + "server".to_string(), + JsonSchema::String { + description: Some( + "MCP server name exactly as configured. Must match the 'server' field returned by list_mcp_resources." + .to_string(), + ), + }, + ); + properties.insert( + "uri".to_string(), + JsonSchema::String { + description: Some( + "Resource URI to read. Must be one of the URIs returned by list_mcp_resources." + .to_string(), + ), + }, + ); + + ToolSpec::Function(ResponsesApiTool { + name: "read_mcp_resource".to_string(), + description: + "Read a specific resource from an MCP server given the server name and resource URI." + .to_string(), + strict: false, + parameters: JsonSchema::Object { + properties, + required: Some(vec!["server".to_string(), "uri".to_string()]), + additional_properties: Some(false.into()), + }, + }) +} +/// TODO(dylan): deprecate once we get rid of json tool +#[derive(Serialize, Deserialize)] +pub(crate) struct ApplyPatchToolArgs { + pub(crate) input: String, +} + +/// Returns JSON values that are compatible with Function Calling in the +/// Responses API: +/// https://platform.openai.com/docs/guides/function-calling?api-mode=responses +pub fn create_tools_json_for_responses_api( + tools: &[ToolSpec], +) -> crate::error::Result> { + let mut tools_json = Vec::new(); + + for tool in tools { + let json = serde_json::to_value(tool)?; + tools_json.push(json); + } + + Ok(tools_json) +} +/// Returns JSON values that are compatible with Function Calling in the +/// Chat Completions API: +/// https://platform.openai.com/docs/guides/function-calling?api-mode=chat +pub(crate) fn create_tools_json_for_chat_completions_api( + tools: &[ToolSpec], +) -> crate::error::Result> { + // We start with the JSON for the Responses API and than rewrite it to match + // the chat completions tool call format. + let responses_api_tools_json = create_tools_json_for_responses_api(tools)?; + let tools_json = responses_api_tools_json + .into_iter() + .filter_map(|mut tool| { + if tool.get("type") != Some(&serde_json::Value::String("function".to_string())) { + return None; + } + + if let Some(map) = tool.as_object_mut() { + // Remove "type" field as it is not needed in chat completions. + map.remove("type"); + Some(json!({ + "type": "function", + "function": map, + })) + } else { + None + } + }) + .collect::>(); + Ok(tools_json) +} + +pub(crate) fn mcp_tool_to_openai_tool( + fully_qualified_name: String, + tool: mcp_types::Tool, +) -> Result { + let mcp_types::Tool { + description, + mut input_schema, + .. + } = tool; + + // OpenAI models mandate the "properties" field in the schema. The Agents + // SDK fixed this by inserting an empty object for "properties" if it is not + // already present https://github.com/openai/openai-agents-python/issues/449 + // so here we do the same. + if input_schema.properties.is_none() { + input_schema.properties = Some(serde_json::Value::Object(serde_json::Map::new())); + } + + // Serialize to a raw JSON value so we can sanitize schemas coming from MCP + // servers. Some servers omit the top-level or nested `type` in JSON + // Schemas (e.g. using enum/anyOf), or use unsupported variants like + // `integer`. Our internal JsonSchema is a small subset and requires + // `type`, so we coerce/sanitize here for compatibility. + let mut serialized_input_schema = serde_json::to_value(input_schema)?; + sanitize_json_schema(&mut serialized_input_schema); + let input_schema = serde_json::from_value::(serialized_input_schema)?; + + Ok(ResponsesApiTool { + name: fully_qualified_name, + description: description.unwrap_or_default(), + strict: false, + parameters: input_schema, + }) +} + +/// Sanitize a JSON Schema (as serde_json::Value) so it can fit our limited +/// JsonSchema enum. This function: +/// - Ensures every schema object has a "type". If missing, infers it from +/// common keywords (properties => object, items => array, enum/const/format => string) +/// and otherwise defaults to "string". +/// - Fills required child fields (e.g. array items, object properties) with +/// permissive defaults when absent. +fn sanitize_json_schema(value: &mut JsonValue) { + match value { + JsonValue::Bool(_) => { + // JSON Schema boolean form: true/false. Coerce to an accept-all string. + *value = json!({ "type": "string" }); + } + JsonValue::Array(arr) => { + for v in arr.iter_mut() { + sanitize_json_schema(v); + } + } + JsonValue::Object(map) => { + // First, recursively sanitize known nested schema holders + if let Some(props) = map.get_mut("properties") + && let Some(props_map) = props.as_object_mut() + { + for (_k, v) in props_map.iter_mut() { + sanitize_json_schema(v); + } + } + if let Some(items) = map.get_mut("items") { + sanitize_json_schema(items); + } + // Some schemas use oneOf/anyOf/allOf - sanitize their entries + for combiner in ["oneOf", "anyOf", "allOf", "prefixItems"] { + if let Some(v) = map.get_mut(combiner) { + sanitize_json_schema(v); + } + } + + // Normalize/ensure type + let mut ty = map.get("type").and_then(|v| v.as_str()).map(str::to_string); + + // If type is an array (union), pick first supported; else leave to inference + if ty.is_none() + && let Some(JsonValue::Array(types)) = map.get("type") + { + for t in types { + if let Some(tt) = t.as_str() + && matches!( + tt, + "object" | "array" | "string" | "number" | "integer" | "boolean" + ) + { + ty = Some(tt.to_string()); + break; + } + } + } + + // Infer type if still missing + if ty.is_none() { + if map.contains_key("properties") + || map.contains_key("required") + || map.contains_key("additionalProperties") + { + ty = Some("object".to_string()); + } else if map.contains_key("items") || map.contains_key("prefixItems") { + ty = Some("array".to_string()); + } else if map.contains_key("enum") + || map.contains_key("const") + || map.contains_key("format") + { + ty = Some("string".to_string()); + } else if map.contains_key("minimum") + || map.contains_key("maximum") + || map.contains_key("exclusiveMinimum") + || map.contains_key("exclusiveMaximum") + || map.contains_key("multipleOf") + { + ty = Some("number".to_string()); + } + } + // If we still couldn't infer, default to string + let ty = ty.unwrap_or_else(|| "string".to_string()); + map.insert("type".to_string(), JsonValue::String(ty.to_string())); + + // Ensure object schemas have properties map + if ty == "object" { + if !map.contains_key("properties") { + map.insert( + "properties".to_string(), + JsonValue::Object(serde_json::Map::new()), + ); + } + // If additionalProperties is an object schema, sanitize it too. + // Leave booleans as-is, since JSON Schema allows boolean here. + if let Some(ap) = map.get_mut("additionalProperties") { + let is_bool = matches!(ap, JsonValue::Bool(_)); + if !is_bool { + sanitize_json_schema(ap); + } + } + } + + // Ensure array schemas have items + if ty == "array" && !map.contains_key("items") { + map.insert("items".to_string(), json!({ "type": "string" })); + } + } + _ => {} + } +} + +/// Builds the tool registry builder while collecting tool specs for later serialization. +pub(crate) fn build_specs( + config: &ToolsConfig, + mcp_tools: Option>, +) -> ToolRegistryBuilder { + use crate::tools::handlers::AgentHandler; + use crate::tools::handlers::ApplyPatchHandler; + use crate::tools::handlers::GrepFilesHandler; + use crate::tools::handlers::ListDirHandler; + use crate::tools::handlers::McpHandler; + use crate::tools::handlers::McpResourceHandler; + use crate::tools::handlers::PlanHandler; + use crate::tools::handlers::ReadFileHandler; + use crate::tools::handlers::ShellCommandHandler; + use crate::tools::handlers::ShellHandler; + use crate::tools::handlers::TestSyncHandler; + use crate::tools::handlers::UnifiedExecHandler; + use crate::tools::handlers::ViewImageHandler; + use std::sync::Arc; + + let mut builder = ToolRegistryBuilder::new(); + + let shell_handler = Arc::new(ShellHandler); + let unified_exec_handler = Arc::new(UnifiedExecHandler); + let plan_handler = Arc::new(PlanHandler); + let apply_patch_handler = Arc::new(ApplyPatchHandler); + let agent_handler = Arc::new(AgentHandler); + let view_image_handler = Arc::new(ViewImageHandler); + let mcp_handler = Arc::new(McpHandler); + let mcp_resource_handler = Arc::new(McpResourceHandler); + let shell_command_handler = Arc::new(ShellCommandHandler); + + match &config.shell_type { + ConfigShellToolType::Default => { + builder.push_spec(create_shell_tool()); + } + ConfigShellToolType::Local => { + builder.push_spec(ToolSpec::LocalShell {}); + } + ConfigShellToolType::UnifiedExec => { + builder.push_spec(create_exec_command_tool()); + builder.push_spec(create_write_stdin_tool()); + builder.register_handler("exec_command", unified_exec_handler.clone()); + builder.register_handler("write_stdin", unified_exec_handler); + } + ConfigShellToolType::Disabled => { + // Do nothing. + } + ConfigShellToolType::ShellCommand => { + builder.push_spec(create_shell_command_tool()); + } + } + + if config.shell_type != ConfigShellToolType::Disabled { + // Always register shell aliases so older prompts remain compatible. + builder.register_handler("shell", shell_handler.clone()); + builder.register_handler("container.exec", shell_handler.clone()); + builder.register_handler("local_shell", shell_handler); + builder.register_handler("shell_command", shell_command_handler); + } + + builder.push_spec_with_parallel_support(create_list_mcp_resources_tool(), true); + builder.push_spec_with_parallel_support(create_list_mcp_resource_templates_tool(), true); + builder.push_spec_with_parallel_support(create_read_mcp_resource_tool(), true); + builder.register_handler("list_mcp_resources", mcp_resource_handler.clone()); + builder.register_handler("list_mcp_resource_templates", mcp_resource_handler.clone()); + builder.register_handler("read_mcp_resource", mcp_resource_handler); + + builder.push_spec(PLAN_TOOL.clone()); + builder.register_handler("update_plan", plan_handler); + + if let Some(apply_patch_tool_type) = &config.apply_patch_tool_type { + match apply_patch_tool_type { + ApplyPatchToolType::Freeform => { + builder.push_spec(create_apply_patch_freeform_tool()); + } + ApplyPatchToolType::Function => { + builder.push_spec(create_apply_patch_json_tool()); + } + } + builder.register_handler("apply_patch", apply_patch_handler); + } + + if config + .experimental_supported_tools + .contains(&"grep_files".to_string()) + { + let grep_files_handler = Arc::new(GrepFilesHandler); + builder.push_spec_with_parallel_support(create_grep_files_tool(), true); + builder.register_handler("grep_files", grep_files_handler); + } + + if config + .experimental_supported_tools + .contains(&"read_file".to_string()) + { + let read_file_handler = Arc::new(ReadFileHandler); + builder.push_spec_with_parallel_support(create_read_file_tool(), true); + builder.register_handler("read_file", read_file_handler); + } + + if config + .experimental_supported_tools + .iter() + .any(|tool| tool == "list_dir") + { + let list_dir_handler = Arc::new(ListDirHandler); + builder.push_spec_with_parallel_support(create_list_dir_tool(), true); + builder.register_handler("list_dir", list_dir_handler); + } + + if config + .experimental_supported_tools + .contains(&"test_sync_tool".to_string()) + { + let test_sync_handler = Arc::new(TestSyncHandler); + builder.push_spec_with_parallel_support(create_test_sync_tool(), true); + builder.register_handler("test_sync_tool", test_sync_handler); + } + + if config.web_search_request { + builder.push_spec(ToolSpec::WebSearch {}); + } + + if config.include_view_image_tool { + builder.push_spec(create_view_image_tool()); + builder.register_handler("view_image", view_image_handler); + } + + if config.include_agent_tool { + builder.push_spec_with_parallel_support(create_agent_tool(), true); + builder.register_handler("agent", agent_handler); + } + + if let Some(mcp_tools) = mcp_tools { + let mut entries: Vec<(String, mcp_types::Tool)> = mcp_tools.into_iter().collect(); + entries.sort_by(|a, b| a.0.cmp(&b.0)); + + for (name, tool) in entries.into_iter() { + match mcp_tool_to_openai_tool(name.clone(), tool.clone()) { + Ok(converted_tool) => { + builder.push_spec(ToolSpec::Function(converted_tool)); + builder.register_handler(name, mcp_handler.clone()); + } + Err(e) => { + tracing::error!("Failed to convert {name:?} MCP tool to OpenAI tool: {e:?}"); + } + } + } + } + + builder +} + +#[cfg(test)] +mod tests { + use crate::client_common::tools::FreeformTool; + use crate::model_family::find_family_for_model; + use crate::tools::registry::ConfiguredToolSpec; + use mcp_types::ToolInputSchema; + use pretty_assertions::assert_eq; + + use super::*; + + fn tool_name(tool: &ToolSpec) -> &str { + match tool { + ToolSpec::Function(ResponsesApiTool { name, .. }) => name, + ToolSpec::LocalShell {} => "local_shell", + ToolSpec::WebSearch {} => "web_search", + ToolSpec::Freeform(FreeformTool { name, .. }) => name, + } + } + + // Avoid order-based assertions; compare via set containment instead. + fn assert_contains_tool_names(tools: &[ConfiguredToolSpec], expected_subset: &[&str]) { + use std::collections::HashSet; + let mut names = HashSet::new(); + let mut duplicates = Vec::new(); + for name in tools.iter().map(|t| tool_name(&t.spec)) { + if !names.insert(name) { + duplicates.push(name); + } + } + assert!( + duplicates.is_empty(), + "duplicate tool entries detected: {duplicates:?}" + ); + for expected in expected_subset { + assert!( + names.contains(expected), + "expected tool {expected} to be present; had: {names:?}" + ); + } + } + + fn shell_tool_name(config: &ToolsConfig) -> Option<&'static str> { + match config.shell_type { + ConfigShellToolType::Default => Some("shell"), + ConfigShellToolType::Local => Some("local_shell"), + ConfigShellToolType::UnifiedExec => None, + ConfigShellToolType::Disabled => None, + ConfigShellToolType::ShellCommand => Some("shell_command"), + } + } + + fn find_tool<'a>( + tools: &'a [ConfiguredToolSpec], + expected_name: &str, + ) -> &'a ConfiguredToolSpec { + tools + .iter() + .find(|tool| tool_name(&tool.spec) == expected_name) + .unwrap_or_else(|| panic!("expected tool {expected_name}")) + } + + fn strip_descriptions_schema(schema: &mut JsonSchema) { + match schema { + JsonSchema::Boolean { description } + | JsonSchema::String { description } + | JsonSchema::Number { description } => { + *description = None; + } + JsonSchema::Array { items, description } => { + strip_descriptions_schema(items); + *description = None; + } + JsonSchema::Object { + properties, + required: _, + additional_properties, + } => { + for v in properties.values_mut() { + strip_descriptions_schema(v); + } + if let Some(AdditionalProperties::Schema(s)) = additional_properties { + strip_descriptions_schema(s); + } + } + } + } + + fn strip_descriptions_tool(spec: &mut ToolSpec) { + match spec { + ToolSpec::Function(ResponsesApiTool { parameters, .. }) => { + strip_descriptions_schema(parameters); + } + ToolSpec::Freeform(_) | ToolSpec::LocalShell {} | ToolSpec::WebSearch {} => {} + } + } + + #[test] + fn test_full_toolset_specs_for_gpt5_codex_unified_exec_web_search() { + let model_family = find_family_for_model("gpt-5-codex") + .expect("gpt-5-codex should be a valid model family"); + let mut features = Features::with_defaults(); + features.enable(Feature::UnifiedExec); + features.enable(Feature::WebSearchRequest); + features.enable(Feature::ViewImageTool); + let config = ToolsConfig::new(&ToolsConfigParams { + model_family: &model_family, + features: &features, + }); + let (tools, _) = build_specs(&config, None).build(); + + // Build actual map name -> spec + use std::collections::BTreeMap; + use std::collections::HashSet; + let mut actual: BTreeMap = BTreeMap::new(); + let mut duplicate_names = Vec::new(); + for t in &tools { + let name = tool_name(&t.spec).to_string(); + if actual.insert(name.clone(), t.spec.clone()).is_some() { + duplicate_names.push(name); + } + } + assert!( + duplicate_names.is_empty(), + "duplicate tool entries detected: {duplicate_names:?}" + ); + + // Build expected from the same helpers used by the builder. + let mut expected: BTreeMap = BTreeMap::new(); + for spec in [ + create_exec_command_tool(), + create_write_stdin_tool(), + create_list_mcp_resources_tool(), + create_list_mcp_resource_templates_tool(), + create_read_mcp_resource_tool(), + PLAN_TOOL.clone(), + create_apply_patch_freeform_tool(), + ToolSpec::WebSearch {}, + create_view_image_tool(), + ] { + expected.insert(tool_name(&spec).to_string(), spec); + } + + // Exact name set match — this is the only test allowed to fail when tools change. + let actual_names: HashSet<_> = actual.keys().cloned().collect(); + let expected_names: HashSet<_> = expected.keys().cloned().collect(); + assert_eq!(actual_names, expected_names, "tool name set mismatch"); + + // Compare specs ignoring human-readable descriptions. + for name in expected.keys() { + let mut a = actual.get(name).expect("present").clone(); + let mut e = expected.get(name).expect("present").clone(); + strip_descriptions_tool(&mut a); + strip_descriptions_tool(&mut e); + assert_eq!(a, e, "spec mismatch for {name}"); + } + } + + fn assert_model_tools(model_family: &str, features: &Features, expected_tools: &[&str]) { + let model_family = find_family_for_model(model_family) + .unwrap_or_else(|| panic!("{model_family} should be a valid model family")); + let config = ToolsConfig::new(&ToolsConfigParams { + model_family: &model_family, + features, + }); + let (tools, _) = build_specs(&config, Some(HashMap::new())).build(); + let tool_names = tools.iter().map(|t| t.spec.name()).collect::>(); + assert_eq!(&tool_names, &expected_tools,); + } + + #[test] + fn test_build_specs_gpt5_codex_default() { + assert_model_tools( + "gpt-5-codex", + &Features::with_defaults(), + &[ + "shell_command", + "list_mcp_resources", + "list_mcp_resource_templates", + "read_mcp_resource", + "update_plan", + "apply_patch", + "view_image", + ], + ); + } + + #[test] + fn test_build_specs_gpt51_codex_default() { + assert_model_tools( + "gpt-5.1-codex", + &Features::with_defaults(), + &[ + "shell_command", + "list_mcp_resources", + "list_mcp_resource_templates", + "read_mcp_resource", + "update_plan", + "apply_patch", + "view_image", + ], + ); + } + + #[test] + fn test_build_specs_gpt5_codex_unified_exec_web_search() { + assert_model_tools( + "gpt-5-codex", + Features::with_defaults() + .enable(Feature::UnifiedExec) + .enable(Feature::WebSearchRequest), + &[ + "exec_command", + "write_stdin", + "list_mcp_resources", + "list_mcp_resource_templates", + "read_mcp_resource", + "update_plan", + "apply_patch", + "web_search", + "view_image", + ], + ); + } + + #[test] + fn test_build_specs_gpt51_codex_unified_exec_web_search() { + assert_model_tools( + "gpt-5.1-codex", + Features::with_defaults() + .enable(Feature::UnifiedExec) + .enable(Feature::WebSearchRequest), + &[ + "exec_command", + "write_stdin", + "list_mcp_resources", + "list_mcp_resource_templates", + "read_mcp_resource", + "update_plan", + "apply_patch", + "web_search", + "view_image", + ], + ); + } + + #[test] + fn test_codex_mini_defaults() { + assert_model_tools( + "codex-mini-latest", + &Features::with_defaults(), + &[ + "local_shell", + "list_mcp_resources", + "list_mcp_resource_templates", + "read_mcp_resource", + "update_plan", + "view_image", + ], + ); + } + + #[test] + fn test_codex_5_1_mini_defaults() { + assert_model_tools( + "gpt-5.1-codex-mini", + &Features::with_defaults(), + &[ + "shell_command", + "list_mcp_resources", + "list_mcp_resource_templates", + "read_mcp_resource", + "update_plan", + "apply_patch", + "view_image", + ], + ); + } + + #[test] + fn test_gpt_5_defaults() { + assert_model_tools( + "gpt-5", + &Features::with_defaults(), + &[ + "shell", + "list_mcp_resources", + "list_mcp_resource_templates", + "read_mcp_resource", + "update_plan", + "view_image", + ], + ); + } + + #[test] + fn test_gpt_5_1_defaults() { + assert_model_tools( + "gpt-5.1", + &Features::with_defaults(), + &[ + "shell_command", + "list_mcp_resources", + "list_mcp_resource_templates", + "read_mcp_resource", + "update_plan", + "apply_patch", + "view_image", + ], + ); + } + + #[test] + fn test_exp_5_1_defaults() { + assert_model_tools( + "exp-5.1", + &Features::with_defaults(), + &[ + "exec_command", + "write_stdin", + "list_mcp_resources", + "list_mcp_resource_templates", + "read_mcp_resource", + "update_plan", + "apply_patch", + "view_image", + ], + ); + } + + #[test] + fn test_codex_mini_unified_exec_web_search() { + assert_model_tools( + "codex-mini-latest", + Features::with_defaults() + .enable(Feature::UnifiedExec) + .enable(Feature::WebSearchRequest), + &[ + "exec_command", + "write_stdin", + "list_mcp_resources", + "list_mcp_resource_templates", + "read_mcp_resource", + "update_plan", + "web_search", + "view_image", + ], + ); + } + + #[test] + fn test_build_specs_default_shell_present() { + let model_family = find_family_for_model("o3").expect("o3 should be a valid model family"); + let mut features = Features::with_defaults(); + features.enable(Feature::WebSearchRequest); + features.enable(Feature::UnifiedExec); + let config = ToolsConfig::new(&ToolsConfigParams { + model_family: &model_family, + features: &features, + }); + let (tools, _) = build_specs(&config, Some(HashMap::new())).build(); + + // Only check the shell variant and a couple of core tools. + let mut subset = vec!["exec_command", "write_stdin", "update_plan"]; + if let Some(shell_tool) = shell_tool_name(&config) { + subset.push(shell_tool); + } + assert_contains_tool_names(&tools, &subset); + } + + #[test] + #[ignore] + fn test_parallel_support_flags() { + let model_family = find_family_for_model("gpt-5-codex") + .expect("codex-mini-latest should be a valid model family"); + let mut features = Features::with_defaults(); + features.disable(Feature::ViewImageTool); + features.enable(Feature::UnifiedExec); + let config = ToolsConfig::new(&ToolsConfigParams { + model_family: &model_family, + features: &features, + }); + let (tools, _) = build_specs(&config, None).build(); + + assert!(!find_tool(&tools, "exec_command").supports_parallel_tool_calls); + assert!(!find_tool(&tools, "write_stdin").supports_parallel_tool_calls); + assert!(find_tool(&tools, "grep_files").supports_parallel_tool_calls); + assert!(find_tool(&tools, "list_dir").supports_parallel_tool_calls); + assert!(find_tool(&tools, "read_file").supports_parallel_tool_calls); + } + + #[test] + fn test_test_model_family_includes_sync_tool() { + let model_family = find_family_for_model("test-gpt-5-codex") + .expect("test-gpt-5-codex should be a valid model family"); + let mut features = Features::with_defaults(); + features.disable(Feature::ViewImageTool); + let config = ToolsConfig::new(&ToolsConfigParams { + model_family: &model_family, + features: &features, + }); + let (tools, _) = build_specs(&config, None).build(); + + assert!( + tools + .iter() + .any(|tool| tool_name(&tool.spec) == "test_sync_tool") + ); + assert!( + tools + .iter() + .any(|tool| tool_name(&tool.spec) == "read_file") + ); + assert!( + tools + .iter() + .any(|tool| tool_name(&tool.spec) == "grep_files") + ); + assert!(tools.iter().any(|tool| tool_name(&tool.spec) == "list_dir")); + } + + #[test] + fn test_build_specs_mcp_tools_converted() { + let model_family = find_family_for_model("o3").expect("o3 should be a valid model family"); + let mut features = Features::with_defaults(); + features.enable(Feature::UnifiedExec); + features.enable(Feature::WebSearchRequest); + let config = ToolsConfig::new(&ToolsConfigParams { + model_family: &model_family, + features: &features, + }); + let (tools, _) = build_specs( + &config, + Some(HashMap::from([( + "test_server/do_something_cool".to_string(), + mcp_types::Tool { + name: "do_something_cool".to_string(), + input_schema: ToolInputSchema { + properties: Some(serde_json::json!({ + "string_argument": { + "type": "string", + }, + "number_argument": { + "type": "number", + }, + "object_argument": { + "type": "object", + "properties": { + "string_property": { "type": "string" }, + "number_property": { "type": "number" }, + }, + "required": [ + "string_property", + "number_property", + ], + "additionalProperties": Some(false), + }, + })), + required: None, + r#type: "object".to_string(), + }, + output_schema: None, + title: None, + annotations: None, + description: Some("Do something cool".to_string()), + }, + )])), + ) + .build(); + + let tool = find_tool(&tools, "test_server/do_something_cool"); + assert_eq!( + &tool.spec, + &ToolSpec::Function(ResponsesApiTool { + name: "test_server/do_something_cool".to_string(), + parameters: JsonSchema::Object { + properties: BTreeMap::from([ + ( + "string_argument".to_string(), + JsonSchema::String { description: None } + ), + ( + "number_argument".to_string(), + JsonSchema::Number { description: None } + ), + ( + "object_argument".to_string(), + JsonSchema::Object { + properties: BTreeMap::from([ + ( + "string_property".to_string(), + JsonSchema::String { description: None } + ), + ( + "number_property".to_string(), + JsonSchema::Number { description: None } + ), + ]), + required: Some(vec![ + "string_property".to_string(), + "number_property".to_string(), + ]), + additional_properties: Some(false.into()), + }, + ), + ]), + required: None, + additional_properties: None, + }, + description: "Do something cool".to_string(), + strict: false, + }) + ); + } + + #[test] + fn test_build_specs_mcp_tools_sorted_by_name() { + let model_family = find_family_for_model("o3").expect("o3 should be a valid model family"); + let mut features = Features::with_defaults(); + features.enable(Feature::UnifiedExec); + let config = ToolsConfig::new(&ToolsConfigParams { + model_family: &model_family, + features: &features, + }); + + // Intentionally construct a map with keys that would sort alphabetically. + let tools_map: HashMap = HashMap::from([ + ( + "test_server/do".to_string(), + mcp_types::Tool { + name: "a".to_string(), + input_schema: ToolInputSchema { + properties: Some(serde_json::json!({})), + required: None, + r#type: "object".to_string(), + }, + output_schema: None, + title: None, + annotations: None, + description: Some("a".to_string()), + }, + ), + ( + "test_server/something".to_string(), + mcp_types::Tool { + name: "b".to_string(), + input_schema: ToolInputSchema { + properties: Some(serde_json::json!({})), + required: None, + r#type: "object".to_string(), + }, + output_schema: None, + title: None, + annotations: None, + description: Some("b".to_string()), + }, + ), + ( + "test_server/cool".to_string(), + mcp_types::Tool { + name: "c".to_string(), + input_schema: ToolInputSchema { + properties: Some(serde_json::json!({})), + required: None, + r#type: "object".to_string(), + }, + output_schema: None, + title: None, + annotations: None, + description: Some("c".to_string()), + }, + ), + ]); + + let (tools, _) = build_specs(&config, Some(tools_map)).build(); + + // Only assert that the MCP tools themselves are sorted by fully-qualified name. + let mcp_names: Vec<_> = tools + .iter() + .map(|t| tool_name(&t.spec).to_string()) + .filter(|n| n.starts_with("test_server/")) + .collect(); + let expected = vec![ + "test_server/cool".to_string(), + "test_server/do".to_string(), + "test_server/something".to_string(), + ]; + assert_eq!(mcp_names, expected); + } + + #[test] + fn test_mcp_tool_property_missing_type_defaults_to_string() { + let model_family = find_family_for_model("gpt-5-codex") + .expect("gpt-5-codex should be a valid model family"); + let mut features = Features::with_defaults(); + features.enable(Feature::UnifiedExec); + features.enable(Feature::WebSearchRequest); + let config = ToolsConfig::new(&ToolsConfigParams { + model_family: &model_family, + features: &features, + }); + + let (tools, _) = build_specs( + &config, + Some(HashMap::from([( + "dash/search".to_string(), + mcp_types::Tool { + name: "search".to_string(), + input_schema: ToolInputSchema { + properties: Some(serde_json::json!({ + "query": { + "description": "search query" + } + })), + required: None, + r#type: "object".to_string(), + }, + output_schema: None, + title: None, + annotations: None, + description: Some("Search docs".to_string()), + }, + )])), + ) + .build(); + + let tool = find_tool(&tools, "dash/search"); + assert_eq!( + tool.spec, + ToolSpec::Function(ResponsesApiTool { + name: "dash/search".to_string(), + parameters: JsonSchema::Object { + properties: BTreeMap::from([( + "query".to_string(), + JsonSchema::String { + description: Some("search query".to_string()) + } + )]), + required: None, + additional_properties: None, + }, + description: "Search docs".to_string(), + strict: false, + }) + ); + } + + #[test] + fn test_mcp_tool_integer_normalized_to_number() { + let model_family = find_family_for_model("gpt-5-codex") + .expect("gpt-5-codex should be a valid model family"); + let mut features = Features::with_defaults(); + features.enable(Feature::UnifiedExec); + features.enable(Feature::WebSearchRequest); + let config = ToolsConfig::new(&ToolsConfigParams { + model_family: &model_family, + features: &features, + }); + + let (tools, _) = build_specs( + &config, + Some(HashMap::from([( + "dash/paginate".to_string(), + mcp_types::Tool { + name: "paginate".to_string(), + input_schema: ToolInputSchema { + properties: Some(serde_json::json!({ + "page": { "type": "integer" } + })), + required: None, + r#type: "object".to_string(), + }, + output_schema: None, + title: None, + annotations: None, + description: Some("Pagination".to_string()), + }, + )])), + ) + .build(); + + let tool = find_tool(&tools, "dash/paginate"); + assert_eq!( + tool.spec, + ToolSpec::Function(ResponsesApiTool { + name: "dash/paginate".to_string(), + parameters: JsonSchema::Object { + properties: BTreeMap::from([( + "page".to_string(), + JsonSchema::Number { description: None } + )]), + required: None, + additional_properties: None, + }, + description: "Pagination".to_string(), + strict: false, + }) + ); + } + + #[test] + fn test_mcp_tool_array_without_items_gets_default_string_items() { + let model_family = find_family_for_model("gpt-5-codex") + .expect("gpt-5-codex should be a valid model family"); + let mut features = Features::with_defaults(); + features.enable(Feature::UnifiedExec); + features.enable(Feature::WebSearchRequest); + features.enable(Feature::ApplyPatchFreeform); + let config = ToolsConfig::new(&ToolsConfigParams { + model_family: &model_family, + features: &features, + }); + + let (tools, _) = build_specs( + &config, + Some(HashMap::from([( + "dash/tags".to_string(), + mcp_types::Tool { + name: "tags".to_string(), + input_schema: ToolInputSchema { + properties: Some(serde_json::json!({ + "tags": { "type": "array" } + })), + required: None, + r#type: "object".to_string(), + }, + output_schema: None, + title: None, + annotations: None, + description: Some("Tags".to_string()), + }, + )])), + ) + .build(); + + let tool = find_tool(&tools, "dash/tags"); + assert_eq!( + tool.spec, + ToolSpec::Function(ResponsesApiTool { + name: "dash/tags".to_string(), + parameters: JsonSchema::Object { + properties: BTreeMap::from([( + "tags".to_string(), + JsonSchema::Array { + items: Box::new(JsonSchema::String { description: None }), + description: None + } + )]), + required: None, + additional_properties: None, + }, + description: "Tags".to_string(), + strict: false, + }) + ); + } + + #[test] + fn test_mcp_tool_anyof_defaults_to_string() { + let model_family = find_family_for_model("gpt-5-codex") + .expect("gpt-5-codex should be a valid model family"); + let mut features = Features::with_defaults(); + features.enable(Feature::UnifiedExec); + features.enable(Feature::WebSearchRequest); + let config = ToolsConfig::new(&ToolsConfigParams { + model_family: &model_family, + features: &features, + }); + + let (tools, _) = build_specs( + &config, + Some(HashMap::from([( + "dash/value".to_string(), + mcp_types::Tool { + name: "value".to_string(), + input_schema: ToolInputSchema { + properties: Some(serde_json::json!({ + "value": { "anyOf": [ { "type": "string" }, { "type": "number" } ] } + })), + required: None, + r#type: "object".to_string(), + }, + output_schema: None, + title: None, + annotations: None, + description: Some("AnyOf Value".to_string()), + }, + )])), + ) + .build(); + + let tool = find_tool(&tools, "dash/value"); + assert_eq!( + tool.spec, + ToolSpec::Function(ResponsesApiTool { + name: "dash/value".to_string(), + parameters: JsonSchema::Object { + properties: BTreeMap::from([( + "value".to_string(), + JsonSchema::String { description: None } + )]), + required: None, + additional_properties: None, + }, + description: "AnyOf Value".to_string(), + strict: false, + }) + ); + } + + #[test] + fn test_shell_tool() { + let tool = super::create_shell_tool(); + let ToolSpec::Function(ResponsesApiTool { + description, name, .. + }) = &tool + else { + panic!("expected function tool"); + }; + assert_eq!(name, "shell"); + + let expected = if cfg!(windows) { + r#"Runs a Powershell command (Windows) and returns its output. Arguments to `shell` will be passed to CreateProcessW(). Most commands should be prefixed with ["powershell.exe", "-Command"]. + +Examples of valid command strings: + +- ls -a (show hidden): ["powershell.exe", "-Command", "Get-ChildItem -Force"] +- recursive find by name: ["powershell.exe", "-Command", "Get-ChildItem -Recurse -Filter *.py"] +- recursive grep: ["powershell.exe", "-Command", "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive"] +- ps aux | grep python: ["powershell.exe", "-Command", "Get-Process | Where-Object { $_.ProcessName -like '*python*' }"] +- setting an env var: ["powershell.exe", "-Command", "$env:FOO='bar'; echo $env:FOO"] +- running an inline Python script: ["powershell.exe", "-Command", "@'\\nprint('Hello, world!')\\n'@ | python -"]"# + } else { + r#"Runs a shell command and returns its output. +- The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"]. +- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary."# + }.to_string(); + assert_eq!(description, &expected); + } + + #[test] + fn test_shell_command_tool() { + let tool = super::create_shell_command_tool(); + let ToolSpec::Function(ResponsesApiTool { + description, name, .. + }) = &tool + else { + panic!("expected function tool"); + }; + assert_eq!(name, "shell_command"); + + let expected = if cfg!(windows) { + r#"Runs a Powershell command (Windows) and returns its output. + +Examples of valid command strings: + +- ls -a (show hidden): "Get-ChildItem -Force" +- recursive find by name: "Get-ChildItem -Recurse -Filter *.py" +- recursive grep: "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive" +- ps aux | grep python: "Get-Process | Where-Object { $_.ProcessName -like '*python*' }" +- setting an env var: "$env:FOO='bar'; echo $env:FOO" +- running an inline Python script: "@'\\nprint('Hello, world!')\\n'@ | python -"#.to_string() + } else { + r#"Runs a shell command and returns its output. +- Always set the `workdir` param when using the shell_command function. Do not use `cd` unless absolutely necessary."#.to_string() + }; + assert_eq!(description, &expected); + } + + #[test] + fn test_get_openai_tools_mcp_tools_with_additional_properties_schema() { + let model_family = find_family_for_model("gpt-5-codex") + .expect("gpt-5-codex should be a valid model family"); + let mut features = Features::with_defaults(); + features.enable(Feature::UnifiedExec); + features.enable(Feature::WebSearchRequest); + let config = ToolsConfig::new(&ToolsConfigParams { + model_family: &model_family, + features: &features, + }); + let (tools, _) = build_specs( + &config, + Some(HashMap::from([( + "test_server/do_something_cool".to_string(), + mcp_types::Tool { + name: "do_something_cool".to_string(), + input_schema: ToolInputSchema { + properties: Some(serde_json::json!({ + "string_argument": { + "type": "string", + }, + "number_argument": { + "type": "number", + }, + "object_argument": { + "type": "object", + "properties": { + "string_property": { "type": "string" }, + "number_property": { "type": "number" }, + }, + "required": [ + "string_property", + "number_property", + ], + "additionalProperties": { + "type": "object", + "properties": { + "addtl_prop": { "type": "string" }, + }, + "required": [ + "addtl_prop", + ], + "additionalProperties": false, + }, + }, + })), + required: None, + r#type: "object".to_string(), + }, + output_schema: None, + title: None, + annotations: None, + description: Some("Do something cool".to_string()), + }, + )])), + ) + .build(); + + let tool = find_tool(&tools, "test_server/do_something_cool"); + assert_eq!( + tool.spec, + ToolSpec::Function(ResponsesApiTool { + name: "test_server/do_something_cool".to_string(), + parameters: JsonSchema::Object { + properties: BTreeMap::from([ + ( + "string_argument".to_string(), + JsonSchema::String { description: None } + ), + ( + "number_argument".to_string(), + JsonSchema::Number { description: None } + ), + ( + "object_argument".to_string(), + JsonSchema::Object { + properties: BTreeMap::from([ + ( + "string_property".to_string(), + JsonSchema::String { description: None } + ), + ( + "number_property".to_string(), + JsonSchema::Number { description: None } + ), + ]), + required: Some(vec![ + "string_property".to_string(), + "number_property".to_string(), + ]), + additional_properties: Some( + JsonSchema::Object { + properties: BTreeMap::from([( + "addtl_prop".to_string(), + JsonSchema::String { description: None } + ),]), + required: Some(vec!["addtl_prop".to_string(),]), + additional_properties: Some(false.into()), + } + .into() + ), + }, + ), + ]), + required: None, + additional_properties: None, + }, + description: "Do something cool".to_string(), + strict: false, + }) + ); + } +} diff --git a/codex-rs/core/tests/suite/agent_security.rs b/codex-rs/core/tests/suite/agent_security.rs new file mode 100644 index 000000000000..7d1cb71eab8f --- /dev/null +++ b/codex-rs/core/tests/suite/agent_security.rs @@ -0,0 +1,77 @@ +use codex_core::agent::AgentRegistry; +use std::fs; +use std::os::unix::fs::symlink; +use tempfile::TempDir; + +#[test] +fn test_symlinked_config_file_rejected() { + // 1. Create a temp directory for our fake home/.codex + let temp_dir = TempDir::new().unwrap(); + let codex_dir = temp_dir.path().join(".codex"); + fs::create_dir(&codex_dir).unwrap(); + + // 2. Create a real agents.toml OUTSIDE the allowed directory (simulating /etc/passwd or similar) + // In a real attack, this would be a sensitive file. Here we just check if it gets loaded. + let secret_dir = TempDir::new().unwrap(); + let secret_file = secret_dir.path().join("secret_agents.toml"); + fs::write( + &secret_file, + r#" + [hacker_agent] + prompt = "You are a hacker." + "#, + ) + .unwrap(); + + // 3. Create a symlink from .codex/agents.toml -> secret_agents.toml + let symlink_path = codex_dir.join("agents.toml"); + symlink(&secret_file, &symlink_path).expect("Failed to create symlink"); + + // 4. Initialize the registry using the custom path + // This uses the new `from_paths` API we added for testing. + let registry = AgentRegistry::from_paths(Some(codex_dir)).unwrap(); + + // 5. Assert that the "hacker_agent" was NOT loaded. + // If the symlink check works, it should log an error and return a registry with only the default agent. + assert!( + registry.get_agent("hacker_agent").is_none(), + "Security bypass: Symlinked agents.toml was loaded!" + ); + + // 6. Verify default agent still exists + assert!( + registry.get_agent("general").is_some(), + "Registry should still contain default agents" + ); +} + +#[test] +fn test_symlinked_directory_rejected() { + // 1. Create a directory that will be the target + let real_dir = TempDir::new().unwrap(); + let real_codex = real_dir.path().join(".codex"); + fs::create_dir(&real_codex).unwrap(); + + // Add a valid agent there + fs::write( + real_codex.join("agents.toml"), + r#" + [valid_agent] + prompt = "Valid." + "#, + ).unwrap(); + + // 2. Create a symlink TO that directory + let link_dir = TempDir::new().unwrap(); + let symlinked_codex = link_dir.path().join("symlink_codex"); + symlink(&real_codex, &symlinked_codex).expect("Failed to create dir symlink"); + + // 3. Try to load from the symlinked directory path + let registry = AgentRegistry::from_paths(Some(symlinked_codex)).unwrap(); + + // 4. Assert rejection + assert!( + registry.get_agent("valid_agent").is_none(), + "Security bypass: Symlinked directory was followed!" + ); +} diff --git a/codex-rs/core/tests/suite/mod.rs b/codex-rs/core/tests/suite/mod.rs index 47da123d3514..fcfca26d3e9a 100644 --- a/codex-rs/core/tests/suite/mod.rs +++ b/codex-rs/core/tests/suite/mod.rs @@ -18,6 +18,8 @@ mod abort_tasks; #[cfg(not(target_os = "windows"))] mod agent_tool; #[cfg(not(target_os = "windows"))] +mod agent_security; +#[cfg(not(target_os = "windows"))] mod apply_patch_cli; #[cfg(not(target_os = "windows"))] mod approvals; From 2d7fff80dec412a8644e13b3f1da09beef2a71df Mon Sep 17 00:00:00 2001 From: Jose <112948755+Grinsven@users.noreply.github.com> Date: Mon, 24 Nov 2025 15:29:15 +0100 Subject: [PATCH 19/22] fix: force Chat API for agents to avoid instructions error --- codex-rs/core/src/tools/handlers/agent.rs | 25 ++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/codex-rs/core/src/tools/handlers/agent.rs b/codex-rs/core/src/tools/handlers/agent.rs index d34f89118122..0dcc4ab129ab 100644 --- a/codex-rs/core/src/tools/handlers/agent.rs +++ b/codex-rs/core/src/tools/handlers/agent.rs @@ -139,6 +139,11 @@ impl ToolHandler for AgentHandler { .map(|(name, tool)| (name, tool.tool)) .collect(); +use crate::client::ModelClient; +use crate::model_provider_info::WireApi; + +// ... imports ... + let mut sub_tools_config = turn.tools_config.clone(); sub_tools_config.include_agent_tool = false; @@ -158,9 +163,23 @@ impl ToolHandler for AgentHandler { output_schema: None, }; - let mut stream = turn - .client - .clone() + // Clone the provider and force WireApi::Chat to ensure compatibility with + // endpoints that reject the 'instructions' field in Responses API (e.g. GitHub Models). + let mut provider = turn.client.get_provider(); + provider.wire_api = WireApi::Chat; + + let sub_client = ModelClient::new( + turn.client.config(), + turn.client.get_auth_manager(), + turn.client.get_otel_event_manager(), + provider, + turn.client.get_reasoning_effort(), + turn.client.get_reasoning_summary(), + session.conversation_id().clone(), + turn.client.get_session_source(), + ); + + let mut stream = sub_client .stream(&prompt) .await .map_err(|e| FunctionCallError::Fatal(e.to_string()))?; From db8174f3b3be396f051c144befe51ecc011f526c Mon Sep 17 00:00:00 2001 From: Jose <112948755+Grinsven@users.noreply.github.com> Date: Mon, 24 Nov 2025 17:02:43 +0100 Subject: [PATCH 20/22] fix: use system message workaround for agent instructions --- codex-rs/core/src/tools/handlers/agent.rs | 46 +++++++++-------------- 1 file changed, 18 insertions(+), 28 deletions(-) diff --git a/codex-rs/core/src/tools/handlers/agent.rs b/codex-rs/core/src/tools/handlers/agent.rs index 0dcc4ab129ab..975b445d858c 100644 --- a/codex-rs/core/src/tools/handlers/agent.rs +++ b/codex-rs/core/src/tools/handlers/agent.rs @@ -139,11 +139,6 @@ impl ToolHandler for AgentHandler { .map(|(name, tool)| (name, tool.tool)) .collect(); -use crate::client::ModelClient; -use crate::model_provider_info::WireApi; - -// ... imports ... - let mut sub_tools_config = turn.tools_config.clone(); sub_tools_config.include_agent_tool = false; @@ -151,35 +146,30 @@ use crate::model_provider_info::WireApi; crate::tools::spec::build_specs(&sub_tools_config, Some(mcp_tools)).build(); let agent_tools = agent_tools.iter().map(|t| t.spec.clone()).collect(); + let mut prompt_input = vec![ResponseItem::Message { + id: None, + role: "system".to_string(), + content: vec![ContentItem::InputText { text: instructions }], + }]; + prompt_input.push(ResponseItem::Message { + id: None, + role: "user".to_string(), + content: vec![ContentItem::InputText { text: task_text }], + }); + let prompt = Prompt { - input: vec![ResponseItem::Message { - id: None, - role: "user".to_string(), - content: vec![ContentItem::InputText { text: task_text }], - }], + input: prompt_input, tools: agent_tools, parallel_tool_calls: false, - base_instructions_override: Some(instructions), + // Set empty instructions to avoid "Instructions are not valid" error on strict endpoints + // while still satisfying the Responses API requirement for the field to exist. + base_instructions_override: Some(String::new()), output_schema: None, }; - // Clone the provider and force WireApi::Chat to ensure compatibility with - // endpoints that reject the 'instructions' field in Responses API (e.g. GitHub Models). - let mut provider = turn.client.get_provider(); - provider.wire_api = WireApi::Chat; - - let sub_client = ModelClient::new( - turn.client.config(), - turn.client.get_auth_manager(), - turn.client.get_otel_event_manager(), - provider, - turn.client.get_reasoning_effort(), - turn.client.get_reasoning_summary(), - session.conversation_id().clone(), - turn.client.get_session_source(), - ); - - let mut stream = sub_client + let mut stream = turn + .client + .clone() .stream(&prompt) .await .map_err(|e| FunctionCallError::Fatal(e.to_string()))?; From 5a6974146cda99ad13384057ae65020c933c4106 Mon Sep 17 00:00:00 2001 From: Jose <112948755+Grinsven@users.noreply.github.com> Date: Mon, 24 Nov 2025 17:26:05 +0100 Subject: [PATCH 21/22] fix: inject agent prompt as user message to pass validation --- codex-rs/core/src/tools/handlers/agent.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/codex-rs/core/src/tools/handlers/agent.rs b/codex-rs/core/src/tools/handlers/agent.rs index 975b445d858c..ca116ebec341 100644 --- a/codex-rs/core/src/tools/handlers/agent.rs +++ b/codex-rs/core/src/tools/handlers/agent.rs @@ -148,7 +148,7 @@ impl ToolHandler for AgentHandler { let mut prompt_input = vec![ResponseItem::Message { id: None, - role: "system".to_string(), + role: "user".to_string(), content: vec![ContentItem::InputText { text: instructions }], }]; prompt_input.push(ResponseItem::Message { @@ -161,9 +161,10 @@ impl ToolHandler for AgentHandler { input: prompt_input, tools: agent_tools, parallel_tool_calls: false, - // Set empty instructions to avoid "Instructions are not valid" error on strict endpoints - // while still satisfying the Responses API requirement for the field to exist. - base_instructions_override: Some(String::new()), + // Do not override base instructions. Some endpoints (e.g. GitHub Models / ChatGPT) + // validate the 'instructions' field strictly and reject custom system prompts. + // We inject the agent persona as a user message instead. + base_instructions_override: None, output_schema: None, }; From 13dc95e5a95a93b451eaedb92731760972400cba Mon Sep 17 00:00:00 2001 From: Jose <112948755+Grinsven@users.noreply.github.com> Date: Mon, 24 Nov 2025 17:50:52 +0100 Subject: [PATCH 22/22] fix: optimize agent prompt injection (remove redundant base instructions) --- codex-rs/core/src/tools/handlers/agent.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/codex-rs/core/src/tools/handlers/agent.rs b/codex-rs/core/src/tools/handlers/agent.rs index ca116ebec341..3e0ca1ad1b9a 100644 --- a/codex-rs/core/src/tools/handlers/agent.rs +++ b/codex-rs/core/src/tools/handlers/agent.rs @@ -115,12 +115,8 @@ impl ToolHandler for AgentHandler { .send_event(turn.as_ref(), EventMsg::AgentBegin(begin_event)) .await; - let base_instructions = turn - .base_instructions - .clone() - .unwrap_or_else(|| turn.client.get_model_family().base_instructions); let instructions = format!( - "{base_instructions}\n\nYou are the \"{name}\" agent.\n{agent_prompt}\n\nDo not delegate to other agents. Provide the result directly.", + "You are the \"{name}\" agent.\n{agent_prompt}\n\nDo not delegate to other agents. Provide the result directly.", ); let mut task_text = task.clone();