diff --git a/README.md b/README.md index ab93ecad22e4..e828360dc7d4 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,6 @@ You can also use Codex with an API key, but this requires [additional setup](./d Codex CLI supports [MCP servers](./docs/advanced.md#model-context-protocol-mcp). Enable by adding an `mcp_servers` section to your `~/.codex/config.toml`. - ### Configuration Codex CLI supports a rich set of configuration options, with preferences stored in `~/.codex/config.toml`. For full configuration options, see [Configuration](./docs/config.md). @@ -88,6 +87,10 @@ Codex CLI supports a rich set of configuration options, with preferences stored - [Non-interactive / CI mode](./docs/advanced.md#non-interactive--ci-mode) - [Tracing / verbose logging](./docs/advanced.md#tracing--verbose-logging) - [Model Context Protocol (MCP)](./docs/advanced.md#model-context-protocol-mcp) +- [**Multi-Agent System**](./docs/subagents.md) + - [Custom agent configuration](./docs/subagents.md#custom-agent-configuration) + - [Agent behavior](./docs/subagents.md#agent-behavior) + - [Best practices](./docs/subagents.md#best-practices) - [**Zero data retention (ZDR)**](./docs/zdr.md) - [**Contributing**](./docs/contributing.md) - [**Install & build**](./docs/install.md) @@ -102,4 +105,3 @@ Codex CLI supports a rich set of configuration options, with preferences stored ## License This repository is licensed under the [Apache-2.0 License](LICENSE). - diff --git a/codex-rs/core/src/agent.rs b/codex-rs/core/src/agent.rs new file mode 100644 index 000000000000..cab414cbfa40 --- /dev/null +++ b/codex-rs/core/src/agent.rs @@ -0,0 +1,428 @@ +//! Multi-agent orchestration system with customizable system prompts +//! +//! This module provides a lightweight agent system where agents are primarily +//! specialized through custom system prompts while inheriting tools and permissions +//! from the current workspace context. + +use crate::error::Result; +use serde::Deserialize; +use serde::Serialize; +use std::collections::HashMap; +use std::path::Path; +use std::path::PathBuf; + +/// Configuration for a single agent +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AgentConfig { + /// The system prompt that defines the agent's behavior + /// Required if prompt_file is not provided + #[serde(default, skip_serializing_if = "Option::is_none")] + pub prompt: Option, + + /// Optional: Load prompt from file instead of inline + /// Required if prompt is not provided + #[serde(skip_serializing_if = "Option::is_none")] + pub prompt_file: Option, + + /// Optional: Override tools (usually inherits from context) + #[serde(skip_serializing_if = "Option::is_none")] + pub tools: Option>, + + /// Optional: Override permissions (usually inherits from context) + #[serde(skip_serializing_if = "Option::is_none")] + pub permissions: Option, +} + +impl AgentConfig { + /// Validate that the config has either prompt or prompt_file + pub fn validate(&self) -> anyhow::Result<()> { + if self.prompt.is_none() && self.prompt_file.is_none() { + return Err(anyhow::anyhow!( + "Agent configuration must have either 'prompt' or 'prompt_file'" + )); + } + if self.prompt.is_some() && self.prompt_file.is_some() { + return Err(anyhow::anyhow!( + "Agent configuration should have either 'prompt' or 'prompt_file', not both" + )); + } + Ok(()) + } + + /// Get the effective prompt, loading from file if necessary + pub fn get_prompt(&mut self, agents_dir: Option<&Path>) -> anyhow::Result { + if let Some(prompt) = &self.prompt { + return Ok(prompt.clone()); + } + + if let Some(prompt_file) = &self.prompt_file { + let full_path = if let Some(dir) = agents_dir { + dir.join(prompt_file) + } else { + PathBuf::from(prompt_file) + }; + + let prompt_content = std::fs::read_to_string(&full_path).map_err(|e| { + anyhow::anyhow!("Cannot read prompt file '{}': {}", full_path.display(), e) + })?; + + // Cache the loaded prompt + self.prompt = Some(prompt_content.clone()); + Ok(prompt_content) + } else { + Err(anyhow::anyhow!("No prompt or prompt_file specified")) + } + } +} + +/// Registry of available agents and their configurations +pub struct AgentRegistry { + agents: HashMap, + #[allow(dead_code)] + agents_dir: Option, +} + +impl AgentRegistry { + /// Validate that a prompt file path doesn't escape allowed directories + fn validate_prompt_path(base_dir: &Path, prompt_file: &str) -> anyhow::Result { + let path = if prompt_file.starts_with('/') { + PathBuf::from(prompt_file) + } else { + base_dir.join(prompt_file) + }; + + // Canonicalize to resolve ../ and symlinks + let canonical = path + .canonicalize() + .map_err(|e| anyhow::anyhow!("Cannot access prompt file: {}", e))?; + + // Get the home/.codex directory + let home_codex = dirs::home_dir() + .ok_or_else(|| anyhow::anyhow!("Cannot determine home directory"))? + .join(".codex"); + + // Security check: path must be within ~/.codex or the base directory + if !canonical.starts_with(&home_codex) && !canonical.starts_with(base_dir) { + return Err(anyhow::anyhow!( + "Security error: Prompt file must be within ~/.codex directory" + )); + } + + Ok(canonical) + } + + /// Create a new agent registry, loading user configurations if available + pub fn new() -> Result { + let mut agents = HashMap::new(); + + // Add the single default "general" agent + agents.insert( + "general".to_string(), + AgentConfig { + prompt: Some("You are a helpful AI assistant. Complete the given task efficiently and accurately.".to_string()), + prompt_file: None, + tools: None, + permissions: None, + } + ); + + // Try to load user agents from ~/.codex/agents.toml + let agents_dir = Self::get_agents_directory(); + if let Some(ref dir) = agents_dir { + let config_path = dir.join("agents.toml"); + if config_path.exists() { + match std::fs::read_to_string(&config_path) { + Ok(content) => { + match toml::from_str::>(&content) { + Ok(user_agents) => { + // Process each agent config + for (name, mut config) in user_agents { + // Validate the configuration + if let Err(e) = config.validate() { + tracing::error!( + "Agent '{}' configuration invalid: {}", + name, + e + ); + continue; + } + + // If prompt_file is specified, load the prompt from file + if let Some(ref prompt_file) = config.prompt_file { + // Validate the path to prevent traversal attacks + match Self::validate_prompt_path(dir, prompt_file) { + Ok(safe_path) => { + match std::fs::read_to_string(&safe_path) { + Ok(prompt_content) => { + config.prompt = Some(prompt_content); + tracing::debug!( + "Loaded prompt file for agent '{}'", + name + ); + } + Err(e) => { + tracing::error!( + "Cannot read prompt file '{}' for agent '{}': {}", + prompt_file, + name, + e + ); + // Skip this agent but continue loading others + continue; + } + } + } + Err(e) => { + tracing::error!( + "Agent '{}' configuration error: {}", + name, + e + ); + // Skip this agent but continue loading others + continue; + } + } + } + + agents.insert(name, config); + } + tracing::info!("Loaded {} user-defined agents", agents.len() - 1); + } + Err(e) => { + tracing::warn!("Failed to parse agents.toml: {}", e); + } + } + } + Err(e) => { + tracing::debug!("Could not read agents.toml: {}", e); + } + } + } + } + + Ok(Self { agents, agents_dir }) + } + + /// Get the agents directory path (~/.codex/agents) + fn get_agents_directory() -> Option { + std::env::var("HOME") + .or_else(|_| std::env::var("USERPROFILE")) + .ok() + .map(|home| PathBuf::from(home).join(".codex")) + } + + /// Get an agent configuration by name + #[allow(dead_code)] + pub fn get_agent(&self, name: &str) -> Option<&AgentConfig> { + self.agents.get(name) + } + + /// Get the system prompt for an agent (falls back to "general" if not found) + pub fn get_system_prompt(&self, agent_name: &str) -> String { + self.agents + .get(agent_name) + .or_else(|| self.agents.get("general")) + .and_then(|config| config.prompt.clone()) + .unwrap_or_else(|| "You are a helpful AI assistant.".to_string()) + } + + /// List all available agents + #[allow(dead_code)] + pub fn list_agents(&self) -> Vec { + self.agents.keys().cloned().collect() + } + + /// Get detailed information about all agents + pub fn list_agent_details(&self) -> Vec { + let mut agents = Vec::new(); + + for (name, config) in &self.agents { + let description = if let Some(ref prompt) = config.prompt { + self.extract_description(prompt) + } else { + "Agent with file-based prompt".to_string() + }; + agents.push(crate::protocol::AgentInfo { + name: name.clone(), + description, + is_builtin: name == "general", + }); + } + + agents.sort_by(|a, b| { + // Built-in agents first, then alphabetical + match (a.is_builtin, b.is_builtin) { + (true, false) => std::cmp::Ordering::Less, + (false, true) => std::cmp::Ordering::Greater, + _ => a.name.cmp(&b.name), + } + }); + + agents + } + + /// Extract brief description from prompt + fn extract_description(&self, prompt: &str) -> String { + // Take first line or first sentence as description + let first_line = prompt.lines().next().unwrap_or(""); + let desc = if let Some(pos) = first_line.find('.') { + &first_line[..=pos] + } else { + first_line + }; + + // Clean up common prefixes + desc.trim_start_matches("You are a ") + .trim_start_matches("You are an ") + .trim_start_matches("You are ") + .trim() + .to_string() + } + + /// Check if agents can spawn other agents (always false to prevent recursion) + #[allow(dead_code)] + pub fn can_spawn_agents(metadata: &HashMap) -> bool { + !metadata.contains_key("is_agent") + } + + /// Mark a context as being an agent context + #[allow(dead_code)] + pub fn mark_as_agent_context(metadata: &mut HashMap) { + metadata.insert("is_agent".to_string(), "true".to_string()); + } +} + +/// Execute an agent with a specific task +#[allow(dead_code)] +pub async fn execute_agent_task( + agent_name: &str, + task: String, + registry: &AgentRegistry, +) -> Result { + // Get the agent's system prompt + let system_prompt = registry.get_system_prompt(agent_name); + + // Build the specialized prompt for this agent + let full_prompt = format!("{system_prompt}\n\nTask: {task}"); + + // Note: The actual execution will be handled by the parent context + // using the existing conversation infrastructure + Ok(full_prompt) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use tempfile::TempDir; + + #[test] + fn test_default_agent_exists() { + let registry = AgentRegistry::new().unwrap(); + assert!(registry.get_agent("general").is_some()); + } + + #[test] + fn test_agent_recursion_prevention() { + let mut metadata = HashMap::new(); + assert!(AgentRegistry::can_spawn_agents(&metadata)); + + AgentRegistry::mark_as_agent_context(&mut metadata); + assert!(!AgentRegistry::can_spawn_agents(&metadata)); + } + + #[test] + fn test_path_traversal_prevention() { + // Create a temporary directory structure + let temp_dir = TempDir::new().unwrap(); + let base_dir = temp_dir.path(); + + // Create a safe file + let safe_dir = base_dir.join("prompts"); + fs::create_dir(&safe_dir).unwrap(); + let safe_file = safe_dir.join("test.txt"); + fs::write(&safe_file, "safe content").unwrap(); + + // Test that normal paths work + let result = AgentRegistry::validate_prompt_path(base_dir, "prompts/test.txt"); + assert!(result.is_ok()); + + // Test that path traversal is blocked + let result = AgentRegistry::validate_prompt_path(base_dir, "../../../etc/passwd"); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("Security error")); + + // Test that absolute paths outside allowed dirs are blocked + let result = AgentRegistry::validate_prompt_path(base_dir, "/etc/passwd"); + assert!(result.is_err()); + } + + #[test] + fn test_agent_config_validation() { + // Test config with prompt is valid + let config = AgentConfig { + prompt: Some("Test prompt".to_string()), + prompt_file: None, + tools: None, + permissions: None, + }; + assert!(config.validate().is_ok()); + + // Test config with prompt_file is valid + let config = AgentConfig { + prompt: None, + prompt_file: Some("test.txt".to_string()), + tools: None, + permissions: None, + }; + assert!(config.validate().is_ok()); + + // Test config with neither prompt nor prompt_file is invalid + let config = AgentConfig { + prompt: None, + prompt_file: None, + tools: None, + permissions: None, + }; + assert!(config.validate().is_err()); + + // Test config with both prompt and prompt_file is invalid + let config = AgentConfig { + prompt: Some("Test prompt".to_string()), + prompt_file: Some("test.txt".to_string()), + tools: None, + permissions: None, + }; + assert!(config.validate().is_err()); + } + + #[test] + fn test_agent_config_get_prompt() { + // Test getting prompt from inline prompt + let mut config = AgentConfig { + prompt: Some("Inline prompt".to_string()), + prompt_file: None, + tools: None, + permissions: None, + }; + assert_eq!(config.get_prompt(None).unwrap(), "Inline prompt"); + + // Test getting prompt from file + let temp_dir = TempDir::new().unwrap(); + let prompt_file = temp_dir.path().join("test_prompt.txt"); + fs::write(&prompt_file, "File-based prompt").unwrap(); + + let mut config = AgentConfig { + prompt: None, + prompt_file: Some("test_prompt.txt".to_string()), + tools: None, + permissions: None, + }; + + let prompt = config.get_prompt(Some(temp_dir.path())).unwrap(); + assert_eq!(prompt, "File-based prompt"); + + // Check that prompt is cached + assert_eq!(config.prompt, Some("File-based prompt".to_string())); + } +} diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index b3b75ec76a7d..424a7d1ece00 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -73,6 +73,7 @@ use crate::mcp_tool_call::handle_mcp_tool_call; use crate::model_family::find_family_for_model; use crate::openai_model_info::get_model_info; use crate::openai_tools::ApplyPatchToolArgs; + use crate::openai_tools::ToolsConfig; use crate::openai_tools::ToolsConfigParams; use crate::openai_tools::get_openai_tools; @@ -135,21 +136,44 @@ mod compact; use self::compact::build_compacted_history; use self::compact::collect_user_messages; -// A convenience extension trait for acquiring mutex locks where poisoning is -// unrecoverable and should abort the program. This avoids scattered `.unwrap()` -// calls on `lock()` while still surfacing a clear panic message when a lock is -// poisoned. +// A convenience extension trait for acquiring mutex locks with automatic +// recovery from poison errors. This provides a cleaner API than manually +// handling poisoned locks at every call site. trait MutexExt { - fn lock_unchecked(&self) -> MutexGuard<'_, T>; + fn lock_or_recover(&self) -> MutexGuard<'_, T>; } impl MutexExt for Mutex { - fn lock_unchecked(&self) -> MutexGuard<'_, T> { - #[expect(clippy::expect_used)] - self.lock().expect("poisoned lock") + fn lock_or_recover(&self) -> MutexGuard<'_, T> { + match self.lock() { + Ok(guard) => guard, + Err(poisoned) => { + // Log the poisoned lock but recover and continue + // This is safe because we're taking ownership of the data + tracing::warn!("Recovering from poisoned mutex"); + poisoned.into_inner() + } + } } } +/// Structure to hold pending tool calls for parallel execution +#[derive(Clone)] +struct PendingToolCall { + item: ResponseItem, + call_id: String, + name: String, + arguments: Option, +} + +/// Arguments for the agent tool +#[derive(Debug, Serialize, Deserialize)] +struct AgentToolArgs { + task: String, + agent: Option, + context: Option, +} + /// The high-level interface to the Codex system. /// It operates as a queue pair where you send submissions and receive events. pub struct Codex { @@ -287,6 +311,9 @@ pub(crate) struct Session { session_manager: ExecSessionManager, unified_exec_manager: UnifiedExecSessionManager, + /// Agent registry for multi-agent orchestration + agent_registry: Mutex>>, + /// External notifier command (will be passed as args to exec()). When /// `None` this feature is disabled. notify: Option>, @@ -479,6 +506,7 @@ impl Session { use_streamable_shell_tool: config.use_experimental_streamable_shell_tool, include_view_image_tool: config.include_view_image_tool, experimental_unified_exec_tool: config.use_experimental_unified_exec_tool, + include_agent_tool: true, // Enable agent tool by default }), user_instructions, base_instructions, @@ -488,12 +516,23 @@ impl Session { cwd, is_review_mode: false, }; + + // Initialize agent registry once during session creation + let agent_registry = match crate::agent::AgentRegistry::new() { + Ok(r) => Some(Arc::new(r)), + Err(e) => { + tracing::warn!("Failed to initialize agent registry: {e}"); + None + } + }; + let sess = Arc::new(Session { conversation_id, tx_event: tx_event.clone(), mcp_connection_manager, session_manager: ExecSessionManager::default(), unified_exec_manager: UnifiedExecSessionManager::default(), + agent_registry: Mutex::new(agent_registry), notify, state: Mutex::new(state), rollout: Mutex::new(Some(rollout_recorder)), @@ -529,7 +568,7 @@ impl Session { } pub fn set_task(&self, task: AgentTask) { - let mut state = self.state.lock_unchecked(); + let mut state = self.state.lock_or_recover(); if let Some(current_task) = state.current_task.take() { current_task.abort(TurnAbortReason::Replaced); } @@ -537,7 +576,7 @@ impl Session { } pub fn remove_task(&self, sub_id: &str) { - let mut state = self.state.lock_unchecked(); + let mut state = self.state.lock_or_recover(); if let Some(task) = &state.current_task && task.sub_id == sub_id { @@ -546,7 +585,7 @@ impl Session { } fn next_internal_sub_id(&self) -> String { - let mut state = self.state.lock_unchecked(); + let mut state = self.state.lock_or_recover(); let id = state.next_internal_sub_id; state.next_internal_sub_id += 1; format!("auto-compact-{id}") @@ -604,7 +643,7 @@ impl Session { let (tx_approve, rx_approve) = oneshot::channel(); let event_id = sub_id.clone(); let prev_entry = { - let mut state = self.state.lock_unchecked(); + let mut state = self.state.lock_or_recover(); state.pending_approvals.insert(sub_id, tx_approve) }; if prev_entry.is_some() { @@ -636,7 +675,7 @@ impl Session { let (tx_approve, rx_approve) = oneshot::channel(); let event_id = sub_id.clone(); let prev_entry = { - let mut state = self.state.lock_unchecked(); + let mut state = self.state.lock_or_recover(); state.pending_approvals.insert(sub_id, tx_approve) }; if prev_entry.is_some() { @@ -658,7 +697,7 @@ impl Session { pub fn notify_approval(&self, sub_id: &str, decision: ReviewDecision) { let entry = { - let mut state = self.state.lock_unchecked(); + let mut state = self.state.lock_or_recover(); state.pending_approvals.remove(sub_id) }; match entry { @@ -672,7 +711,7 @@ impl Session { } pub fn add_approved_command(&self, cmd: Vec) { - let mut state = self.state.lock_unchecked(); + let mut state = self.state.lock_or_recover(); state.approved_commands.insert(cmd); } @@ -713,7 +752,7 @@ impl Session { /// Append ResponseItems to the in-memory conversation history only. fn record_into_history(&self, items: &[ResponseItem]) { self.state - .lock_unchecked() + .lock_or_recover() .history .record_items(items.iter()); } @@ -743,7 +782,7 @@ impl Session { async fn persist_rollout_items(&self, items: &[RolloutItem]) { let recorder = { - let guard = self.rollout.lock_unchecked(); + let guard = self.rollout.lock_or_recover(); guard.as_ref().cloned() }; if let Some(rec) = recorder @@ -758,7 +797,7 @@ impl Session { turn_context: &TurnContext, token_usage: &Option, ) -> Option { - let mut state = self.state.lock_unchecked(); + let mut state = self.state.lock_or_recover(); let info = TokenUsageInfo::new_or_append( &state.token_info, token_usage, @@ -974,12 +1013,12 @@ impl Session { /// Build the full turn input by concatenating the current conversation /// history with additional items for this turn. pub fn turn_input_with_history(&self, extra: Vec) -> Vec { - [self.state.lock_unchecked().history.contents(), extra].concat() + [self.state.lock_or_recover().history.contents(), extra].concat() } /// Returns the input if there was no task running to inject into pub fn inject_input(&self, input: Vec) -> Result<(), Vec> { - let mut state = self.state.lock_unchecked(); + let mut state = self.state.lock_or_recover(); if state.current_task.is_some() { state.pending_input.push(input.into()); Ok(()) @@ -989,7 +1028,7 @@ impl Session { } pub fn get_pending_input(&self) -> Vec { - let mut state = self.state.lock_unchecked(); + let mut state = self.state.lock_or_recover(); if state.pending_input.is_empty() { Vec::with_capacity(0) } else { @@ -1013,7 +1052,7 @@ impl Session { fn interrupt_task(&self) { info!("interrupt received: abort current task, if any"); - let mut state = self.state.lock_unchecked(); + let mut state = self.state.lock_or_recover(); state.pending_approvals.clear(); state.pending_input.clear(); if let Some(task) = state.current_task.take() { @@ -1246,6 +1285,7 @@ async fn submission_loop( use_streamable_shell_tool: config.use_experimental_streamable_shell_tool, include_view_image_tool: config.include_view_image_tool, experimental_unified_exec_tool: config.use_experimental_unified_exec_tool, + include_agent_tool: true, }); let new_turn_context = TurnContext { @@ -1336,6 +1376,7 @@ async fn submission_loop( include_view_image_tool: config.include_view_image_tool, experimental_unified_exec_tool: config .use_experimental_unified_exec_tool, + include_agent_tool: true, }), user_instructions: turn_context.user_instructions.clone(), base_instructions: turn_context.base_instructions.clone(), @@ -1432,6 +1473,25 @@ async fn submission_loop( }; sess.send_event(event).await; } + Op::ListAgents => { + let sub_id = sub.id.clone(); + + // Get the agent registry and list agents + let agents = { + let agent_registry_guard = sess.agent_registry.lock_or_recover(); + agent_registry_guard + .as_ref() + .map(|r| r.list_agent_details()) + .unwrap_or_else(Vec::new) + }; // MutexGuard is dropped here + let event = Event { + id: sub_id, + msg: EventMsg::ListAgentsResponse(crate::protocol::ListAgentsResponseEvent { + agents, + }), + }; + sess.send_event(event).await; + } Op::ListCustomPrompts => { let sub_id = sub.id.clone(); @@ -1468,7 +1528,7 @@ async fn submission_loop( // Gracefully flush and shutdown rollout recorder on session end so tests // that inspect the rollout file do not race with the background writer. - let recorder_opt = sess.rollout.lock_unchecked().take(); + let recorder_opt = sess.rollout.lock_or_recover().take(); if let Some(rec) = recorder_opt && let Err(e) = rec.shutdown().await { @@ -1493,7 +1553,7 @@ async fn submission_loop( let sub_id = sub.id.clone(); // Flush rollout writes before returning the path so readers observe a consistent file. let (path, rec_opt) = { - let guard = sess.rollout.lock_unchecked(); + let guard = sess.rollout.lock_or_recover(); match guard.as_ref() { Some(rec) => (rec.get_rollout_path(), Some(rec.clone())), None => { @@ -1555,6 +1615,7 @@ async fn spawn_review_thread( use_streamable_shell_tool: false, include_view_image_tool: false, experimental_unified_exec_tool: config.use_experimental_unified_exec_tool, + include_agent_tool: false, // Disable for review mode }); let base_instructions = REVIEW_PROMPT.to_string(); @@ -1948,9 +2009,18 @@ async fn run_turn( sub_id: String, input: Vec, ) -> CodexResult { + // Get agent list if available + let agent_infos = + if let Ok(Some(registry)) = sess.agent_registry.lock().map(|g| g.as_ref().cloned()) { + Some(registry.list_agent_details()) + } else { + None + }; + let tools = get_openai_tools( &turn_context.tools_config, Some(sess.mcp_connection_manager.list_all_tools()), + agent_infos, ); let prompt = Prompt { @@ -2092,6 +2162,11 @@ async fn try_run_turn( let mut output = Vec::new(); + // First pass: collect all items from the stream + let mut collected_items = Vec::new(); + #[allow(unused_assignments)] + let mut token_usage_result: Option = None; + loop { // Poll the next item from the model stream. We must inspect *both* Ok and Err // cases so that transient stream failures (e.g., dropped SSE connection before @@ -2118,15 +2193,7 @@ async fn try_run_turn( match event { ResponseEvent::Created => {} ResponseEvent::OutputItemDone(item) => { - let response = handle_response_item( - sess, - turn_context, - turn_diff_tracker, - sub_id, - item.clone(), - ) - .await?; - output.push(ProcessedResponseItem { item, response }); + collected_items.push(item); } ResponseEvent::WebSearchCallBegin { call_id } => { let _ = sess @@ -2159,12 +2226,9 @@ async fn try_run_turn( sess.send_event(event).await; } - let result = TurnRunResult { - processed_items: output, - total_token_usage: token_usage.clone(), - }; - - return Ok(result); + // Store the token usage for the result + token_usage_result = token_usage.clone(); + break; // Exit the collection loop } ResponseEvent::OutputTextDelta(delta) => { // In review child threads, suppress assistant text deltas; the @@ -2206,6 +2270,174 @@ async fn try_run_turn( } } } + + // Process collected items after the stream completes + // Process items in order while collecting agent calls for parallel execution + let mut processed_items = Vec::new(); + let mut pending_agent_calls = Vec::new(); + + for item in collected_items { + match &item { + ResponseItem::FunctionCall { + name, + call_id, + arguments, + .. + } if name == "agent" => { + // Collect agent calls for parallel execution + pending_agent_calls.push(PendingToolCall { + item: item.clone(), + call_id: call_id.clone(), + name: name.clone(), + arguments: Some(arguments.clone()), + }); + } + ResponseItem::FunctionCall { .. } => { + // Process non-agent function calls immediately in order + let response = handle_response_item( + sess, + turn_context, + turn_diff_tracker, + sub_id, + item.clone(), + ) + .await?; + if let Some(resp) = response.clone() { + output.push(resp); + } + processed_items.push(ProcessedResponseItem { + item: item.clone(), + response, + }); + } + ResponseItem::LocalShellCall { + call_id: Some(id), .. + } => { + // Process shell calls immediately in order + let response = handle_response_item( + sess, + turn_context, + turn_diff_tracker, + sub_id, + item.clone(), + ) + .await?; + if let Some(resp) = response.clone() { + output.push(resp); + } + processed_items.push(ProcessedResponseItem { + item: item.clone(), + response, + }); + } + ResponseItem::CustomToolCall { name, call_id, .. } if name == "agent" => { + // Collect agent calls for parallel execution + pending_agent_calls.push(PendingToolCall { + item: item.clone(), + call_id: call_id.clone(), + name: name.clone(), + arguments: None, + }); + } + ResponseItem::CustomToolCall { .. } => { + // Process non-agent custom tool calls immediately in order + let response = handle_response_item( + sess, + turn_context, + turn_diff_tracker, + sub_id, + item.clone(), + ) + .await?; + if let Some(resp) = response.clone() { + output.push(resp); + } + processed_items.push(ProcessedResponseItem { + item: item.clone(), + response, + }); + } + _ => { + // Process non-tool items immediately in order + let response = handle_response_item( + sess, + turn_context, + turn_diff_tracker, + sub_id, + item.clone(), + ) + .await?; + if let Some(resp) = response.clone() { + output.push(resp); + } + processed_items.push(ProcessedResponseItem { item, response }); + } + } + } + + // Process pending agent calls in parallel if any were collected + if !pending_agent_calls.is_empty() { + // Handle agent calls with TRUE PARALLEL EXECUTION + // All agents run concurrently at the same time, not sequentially + // This provides maximum performance for multi-agent orchestration + + // Notify UI about parallel agent execution starting + if pending_agent_calls.len() > 1 { + let event = Event { + id: sub_id.to_string(), + msg: EventMsg::BackgroundEvent(BackgroundEventEvent { + message: format!( + "🚀 Starting {} agents in PARALLEL...", + pending_agent_calls.len() + ), + }), + }; + sess.send_event(event).await; + } + + let agent_call_params: Vec<_> = pending_agent_calls + .iter() + .map(|call| { + ( + call.call_id.clone(), + call.arguments.clone().unwrap_or_default(), + sub_id.to_string(), + ) + }) + .collect(); + + // Execute all agents in parallel with proper concurrency control + let agent_results = { + // Create thread-safe wrappers for concurrent execution + // SAFETY: sess and turn_context are guaranteed to outlive this call + // as they're borrowed from the enclosing function scope + let sess_wrapper = Arc::new(SessionWrapper::new(sess)); + let context_wrapper = Arc::new(TurnContextWrapper::new(turn_context)); + + execute_agents_concurrent_safe( + sess_wrapper, + context_wrapper, + turn_diff_tracker, + agent_call_params, + ) + .await + }; + + // Process agent results + for (i, (_call_id, result)) in agent_results.into_iter().enumerate() { + let item = pending_agent_calls[i].item.clone(); + output.push(result.clone()); + processed_items.push(ProcessedResponseItem { + item, + response: Some(result), + }); + } + } + + Ok(TurnRunResult { + processed_items, + total_token_usage: token_usage_result, + }) } async fn handle_response_item( @@ -2567,6 +2799,17 @@ async fn handle_function_call( output: function_call_output, } } + "agent" => { + // Agent calls are now handled in parallel at the turn level + // Return a placeholder response that will be replaced by parallel execution + ResponseInputItem::FunctionCallOutput { + call_id, + output: FunctionCallOutputPayload { + content: "Agent execution deferred for parallel processing".to_string(), + success: Some(false), + }, + } + } _ => { match sess.mcp_connection_manager.parse_tool_name(&name) { Some((server, tool_name)) => { @@ -2592,6 +2835,515 @@ async fn handle_function_call( } } +/// Messages sent from agent execution +#[derive(Debug, Clone)] +#[allow(dead_code)] +enum AgentMessage { + Loop(String), // Description of an execution loop/step + Change(PathBuf, FileChange), // File change made by the agent (path, change) + Output(String), // General output from the agent + Summary(String), // Summary of agent's work +} + +// ================================================================================= +// Concurrent Execution Wrappers for Thread Safety +// ================================================================================= +// These wrappers enable safe sharing of Session and TurnContext across threads +// during parallel agent execution. They use raw pointers internally but are safe +// because: +// 1. The referenced objects are guaranteed to outlive the wrappers (enforced by caller) +// 2. Session and TurnContext have internal synchronization via Mutex fields +// 3. The wrappers are only used within a controlled scope where lifetimes are guaranteed + +/// Thread-safe wrapper for Session to enable concurrent execution +struct SessionWrapper { + // We use a raw pointer here because: + // 1. We need to share &Session across threads but Session isn't Clone + // 2. The Session is guaranteed to outlive this wrapper (created in same scope) + // 3. Session has internal thread-safety via Mutex fields + ptr: *const Session, + _phantom: std::marker::PhantomData, +} + +// SAFETY: Session has internal synchronization via Mutex fields +// The wrapper ensures the Session outlives all uses +unsafe impl Send for SessionWrapper {} +unsafe impl Sync for SessionWrapper {} + +impl SessionWrapper { + fn new(sess: &Session) -> Self { + Self { + ptr: sess as *const Session, + _phantom: std::marker::PhantomData, + } + } + + fn get(&self) -> &Session { + // SAFETY: The caller guarantees Session outlives this wrapper + // This is enforced by the structure of execute_agents_concurrent_safe + unsafe { &*self.ptr } + } +} + +/// Thread-safe wrapper for TurnContext to enable concurrent execution +struct TurnContextWrapper { + // We use a raw pointer here for the same reasons as SessionWrapper + ptr: *const TurnContext, + _phantom: std::marker::PhantomData, +} + +// SAFETY: TurnContext contains only thread-safe types +// The wrapper ensures the TurnContext outlives all uses +unsafe impl Send for TurnContextWrapper {} +unsafe impl Sync for TurnContextWrapper {} + +impl TurnContextWrapper { + fn new(ctx: &TurnContext) -> Self { + Self { + ptr: ctx as *const TurnContext, + _phantom: std::marker::PhantomData, + } + } + + fn get(&self) -> &TurnContext { + // SAFETY: The caller guarantees TurnContext outlives this wrapper + // This is enforced by the structure of execute_agents_concurrent_safe + unsafe { &*self.ptr } + } +} + +// ================================================================================= +// Agent Execution Helper Functions +// ================================================================================= + +/// Parse agent arguments from JSON string +fn parse_agent_args(arguments: &str) -> Result { + serde_json::from_str::(arguments) +} + +/// Build the task message for the agent (sent as user input) +fn build_agent_task_message(context: Option<&str>, task: &str) -> String { + match context { + Some(ctx) => format!("Context: {ctx}\n\nTask: {task}"), + None => format!("Task: {task}"), + } +} + +/// Generate a unique plan ID for agent execution +fn generate_agent_plan_id(agent_name: &str, call_id: &str) -> String { + format!( + "agent-{}-{}", + agent_name, + call_id.get(..8).unwrap_or(call_id) + ) +} + +/// Create an error response for agent calls +fn create_tool_error_response(item: &ResponseItem, error_msg: &str) -> Option { + match item { + ResponseItem::FunctionCall { call_id, .. } => Some(ResponseInputItem::FunctionCallOutput { + call_id: call_id.clone(), + output: FunctionCallOutputPayload { + content: error_msg.to_string(), + success: Some(false), + }, + }), + ResponseItem::CustomToolCall { call_id, .. } => { + Some(ResponseInputItem::CustomToolCallOutput { + call_id: call_id.clone(), + output: error_msg.to_string(), + }) + } + _ => None, + } +} + +fn create_agent_error_response(call_id: String, error_msg: &str) -> (String, ResponseInputItem) { + ( + call_id.clone(), + ResponseInputItem::FunctionCallOutput { + call_id, + output: FunctionCallOutputPayload { + content: error_msg.to_string(), + success: Some(false), + }, + }, + ) +} + +/// Get the agent registry from the session +fn get_agent_registry(sess: &Session) -> Result, String> { + let agent_registry_guard = sess.agent_registry.lock_or_recover(); + match agent_registry_guard.as_ref() { + Some(r) => Ok(r.clone()), + None => Err("Agent registry not available".to_string()), + } +} +/// Execute multiple agents with true parallel execution +/// Uses safe wrappers to enable concurrent access to Session and TurnContext +/// All agents run in parallel using futures::future::join_all +async fn execute_agents_concurrent_safe( + sess_wrapper: Arc, + context_wrapper: Arc, + turn_diff_tracker: &mut TurnDiffTracker, + agent_calls: Vec<(String, String, String)>, // (call_id, arguments, sub_id) +) -> Vec<(String, ResponseInputItem)> { + use futures::future::join_all; + + let sess = sess_wrapper.get(); + + // Get the agent registry once using helper + let registry = match get_agent_registry(sess) { + Ok(r) => r, + Err(msg) => { + return agent_calls + .into_iter() + .map(|(call_id, _, _)| create_agent_error_response(call_id, &msg)) + .collect(); + } + }; + + // Set agent context flag to prevent recursion + // Create futures for TRUE PARALLEL agent execution + // Each agent runs independently and concurrently + let agent_futures: Vec<_> = agent_calls + .into_iter() + .map(|(call_id, arguments, sub_id)| { + let registry_clone = Arc::clone(®istry); + let sess_wrapper_clone = Arc::clone(&sess_wrapper); + let context_wrapper_clone = Arc::clone(&context_wrapper); + + // Each agent executes in parallel + async move { + // Parse agent arguments + let args = match parse_agent_args(&arguments) { + Ok(a) => a, + Err(e) => { + let (call_id, response) = create_agent_error_response( + call_id.clone(), + &format!("Failed to parse agent arguments: {e}"), + ); + return (call_id, response, TurnDiffTracker::new()); + } + }; + + let agent_name = args.agent.unwrap_or_else(|| "general".to_string()); + let agent_system_prompt = registry_clone.get_system_prompt(&agent_name); + + // Build the agent's task message (what the user is asking) + let agent_task_message = + build_agent_task_message(args.context.as_deref(), &args.task); + + let plan_item_id = generate_agent_plan_id(&agent_name, &call_id); + + // Execute the agent with concurrent support + let (result, diff_tracker) = execute_agent_isolated_concurrent( + sess_wrapper_clone, + context_wrapper_clone, + AgentExecutionParams { + sub_id: sub_id.clone(), + agent_name: agent_name.clone(), + task_message: agent_task_message, + agent_system_prompt: agent_system_prompt.clone(), + call_id: call_id.clone(), + _plan_item_id: Some(plan_item_id), + }, + ) + .await; + + // Convert result to ResponseInputItem + let response = match result { + Ok(agent_response) => ResponseInputItem::FunctionCallOutput { + call_id: call_id.clone(), + output: FunctionCallOutputPayload { + content: agent_response, + success: Some(true), + }, + }, + Err(e) => ResponseInputItem::FunctionCallOutput { + call_id: call_id.clone(), + output: FunctionCallOutputPayload { + content: format!("Agent execution failed: {e}"), + success: Some(false), + }, + }, + }; + + (call_id, response, diff_tracker) + } + }) + .collect(); + + // Execute all agents concurrently - TRUE PARALLELISM + // All agents run at the same time, not sequentially + let agent_results = join_all(agent_futures).await; + + // Merge all diff trackers from agents and return results + let mut results = Vec::new(); + for (call_id, response, agent_diff_tracker) in agent_results { + // Merge the agent's diff tracker into the main one + turn_diff_tracker.merge(agent_diff_tracker); + results.push((call_id, response)); + } + + results +} +struct AgentExecutionParams { + sub_id: String, + agent_name: String, + task_message: String, + agent_system_prompt: String, + call_id: String, + _plan_item_id: Option, +} + +/// Execute an agent in an isolated context with concurrent support +async fn execute_agent_isolated_concurrent( + sess_wrapper: Arc, + context_wrapper: Arc, + params: AgentExecutionParams, +) -> (Result, TurnDiffTracker) { + let sess = sess_wrapper.get(); + let parent_context = context_wrapper.get(); + use std::time::Instant; + let start_time = Instant::now(); + + info!( + "Executing agent '{}' with isolated context (parallel)", + params.agent_name + ); + // Log agent start and notify UI + info!( + "Agent '{}' starting task (call_id: {}) - PARALLEL EXECUTION", + params.agent_name, params.call_id + ); + + // Send agent start event to UI for status display + sess.send_event(Event { + id: params.sub_id.clone(), + msg: EventMsg::BackgroundEvent(BackgroundEventEvent { + message: format!( + "🤖 Agent '{}' started: {}", + params.agent_name, params.task_message + ), + }), + }) + .await; + + // Create agent messages - just the task as a user message + let agent_messages = vec![ResponseItem::Message { + id: None, + role: "user".to_string(), + content: vec![ContentItem::InputText { + text: params.task_message.clone(), + }], + }]; + + // Build agent's custom instructions: agent system prompt + AGENTS.md + let mut agent_custom_instructions = String::new(); + + // First append the agent's system prompt + if !params.agent_system_prompt.is_empty() { + agent_custom_instructions.push_str(¶ms.agent_system_prompt); + } + + // Then append AGENTS.md if present + if let Some(user_inst) = parent_context.user_instructions.as_deref() + && !user_inst.is_empty() + { + if !agent_custom_instructions.is_empty() { + agent_custom_instructions.push_str("\n\n"); + } + agent_custom_instructions.push_str(user_inst); + } + + // Create a modified turn context for the agent + // base_instructions: Keep parent's base instructions (default Codex instructions) + // user_instructions: Agent system prompt + AGENTS.md + // IMPORTANT: Disable agent tool for agents to prevent recursion + let mut agent_tools_config = parent_context.tools_config.clone(); + agent_tools_config.include_agent_tool = false; // Prevent agents from spawning other agents + + let agent_turn_context = TurnContext { + client: parent_context.client.clone(), + tools_config: agent_tools_config, + base_instructions: parent_context.base_instructions.clone(), // Keep default base instructions + user_instructions: if agent_custom_instructions.is_empty() { + None + } else { + Some(agent_custom_instructions) + }, // Agent prompt + AGENTS.md + approval_policy: parent_context.approval_policy, + sandbox_policy: parent_context.sandbox_policy.clone(), + shell_environment_policy: parent_context.shell_environment_policy.clone(), + cwd: parent_context.cwd.clone(), + is_review_mode: false, + }; + + // Execute a single turn for the agent + let mut agent_response = String::new(); + let mut turn_diff_tracker = TurnDiffTracker::new(); + + match run_turn( + sess, + &agent_turn_context, + &mut turn_diff_tracker, + params.sub_id.clone(), + agent_messages, + ) + .await + { + Ok(turn_output) => { + // Extract the assistant's response + for processed_item in turn_output.processed_items { + if let ProcessedResponseItem { + item: ResponseItem::Message { role, content, .. }, + response: None, + } = processed_item + && role == "assistant" + { + for content_item in content { + if let ContentItem::OutputText { text } = content_item { + agent_response.push_str(&text); + agent_response.push('\n'); + } + } + } + } + } + Err(e) => { + error!("Agent '{}' turn failed: {e:#}", params.agent_name); + agent_response = format!("Error during agent execution: {e}"); + return (Err(agent_response.clone()), turn_diff_tracker); + } + } + + let duration = start_time.elapsed(); + + // Log agent completion for PARALLEL EXECUTION + info!( + "Agent '{}' completed in {}ms (call_id: {}) - PARALLEL EXECUTION", + params.agent_name, + duration.as_millis(), + params.call_id + ); + + // Send agent completion status to UI + let status_msg = if agent_response.is_empty() { + format!( + "❌ Agent '{}' failed: No response generated", + params.agent_name + ) + } else { + let preview = if agent_response.len() > 100 { + format!("{}...", &agent_response[..100]) + } else { + agent_response.clone() + }; + format!( + "âś… Agent '{}' completed in {:.2}s: {}", + params.agent_name, + duration.as_secs_f64(), + preview.trim().replace('\n', " ") + ) + }; + + sess.send_event(Event { + id: params.sub_id.clone(), + msg: EventMsg::BackgroundEvent(BackgroundEventEvent { + message: status_msg, + }), + }) + .await; + + (Ok(agent_response), turn_diff_tracker) +} + +/// Generate a comprehensive summary of agent execution +#[allow(dead_code)] +fn generate_agent_summary( + agent_name: &str, + init_prompt: &str, + loops: &[String], + changes: &[(PathBuf, FileChange)], + outputs: &[String], +) -> String { + let mut summary = Vec::new(); + + // Header + summary.push(format!("=== Agent '{agent_name}' Execution Summary ===")); + summary.push(String::new()); + + // Task description + summary.push("**Task:**".to_string()); + let task_lines: Vec<&str> = init_prompt.lines().collect(); + if task_lines.len() <= 3 { + summary.push(init_prompt.to_string()); + } else { + // Compact long prompts + summary.push(format!("{}...", task_lines[..2].join("\n"))); + } + summary.push(String::new()); + + // Execution loops + if !loops.is_empty() { + summary.push("**Execution Steps:**".to_string()); + for (i, loop_desc) in loops.iter().enumerate() { + summary.push(format!(" {}. {}", i + 1, loop_desc)); + } + summary.push(String::new()); + } + + // File changes + if !changes.is_empty() { + summary.push(format!("**Changes Made ({} files):**", changes.len())); + for (path, change) in changes { + let action = match change { + FileChange::Add { .. } => "added", + FileChange::Delete { .. } => "deleted", + FileChange::Update { move_path, .. } => { + if move_path.is_some() { + "moved" + } else { + "modified" + } + } + }; + summary.push(format!(" - {} {}", action, path.display())); + } + summary.push(String::new()); + } else { + summary.push("**Changes Made:** None".to_string()); + summary.push(String::new()); + } + + // Compact output + if !outputs.is_empty() { + summary.push("**Result:**".to_string()); + let combined_output = outputs.join("\n"); + let output_lines: Vec<&str> = combined_output.lines().collect(); + + // Auto-compact long outputs + if output_lines.len() > 10 { + // Take first 5 and last 3 lines + let compacted = [ + output_lines[..5].join("\n"), + format!("... ({} lines omitted) ...", output_lines.len() - 8), + output_lines[output_lines.len() - 3..].join("\n"), + ]; + summary.push(compacted.join("\n")); + } else { + summary.push(combined_output); + } + summary.push(String::new()); + } + + // Footer + summary.push(format!("=== Agent '{agent_name}' Complete ===")); + + summary.join("\n") +} + async fn handle_custom_tool_call( sess: &Session, turn_context: &TurnContext, @@ -2789,7 +3541,7 @@ async fn handle_container_exec_with_params( } None => { let safety = { - let state = sess.state.lock_unchecked(); + let state = sess.state.lock_or_recover(); assess_command_safety( ¶ms.command, turn_context.approval_policy, @@ -3356,7 +4108,7 @@ mod tests { }), )); - let actual = session.state.lock_unchecked().history.contents(); + let actual = session.state.lock_or_recover().history.contents(); assert_eq!(expected, actual); } @@ -3369,7 +4121,7 @@ mod tests { session.record_initial_history(&turn_context, InitialHistory::Forked(rollout_items)), ); - let actual = session.state.lock_unchecked().history.contents(); + let actual = session.state.lock_or_recover().history.contents(); assert_eq!(expected, actual); } @@ -3584,6 +4336,7 @@ mod tests { use_streamable_shell_tool: config.use_experimental_streamable_shell_tool, include_view_image_tool: config.include_view_image_tool, experimental_unified_exec_tool: config.use_experimental_unified_exec_tool, + include_agent_tool: true, }); let turn_context = TurnContext { client, @@ -3602,6 +4355,7 @@ mod tests { mcp_connection_manager: McpConnectionManager::default(), session_manager: ExecSessionManager::default(), unified_exec_manager: UnifiedExecSessionManager::default(), + agent_registry: Mutex::new(None), notify: None, rollout: Mutex::new(None), state: Mutex::new(State { diff --git a/codex-rs/core/src/codex/compact.rs b/codex-rs/core/src/codex/compact.rs index a465f937d4c7..d55c9073cd9b 100644 --- a/codex-rs/core/src/codex/compact.rs +++ b/codex-rs/core/src/codex/compact.rs @@ -171,7 +171,7 @@ async fn run_compact_task_inner( sess.remove_task(&sub_id); } let history_snapshot = { - let state = sess.state.lock_unchecked(); + let state = sess.state.lock_or_recover(); state.history.contents() }; let summary_text = get_last_assistant_message_from_turn(&history_snapshot).unwrap_or_default(); @@ -179,7 +179,7 @@ async fn run_compact_task_inner( let initial_context = sess.build_initial_context(turn_context.as_ref()); let new_history = build_compacted_history(initial_context, &user_messages, &summary_text); { - let mut state = sess.state.lock_unchecked(); + let mut state = sess.state.lock_or_recover(); state.history.replace(new_history); } @@ -290,7 +290,7 @@ async fn drain_to_completed( }; match event { Ok(ResponseEvent::OutputItemDone(item)) => { - let mut state = sess.state.lock_unchecked(); + let mut state = sess.state.lock_or_recover(); state.history.record_items(std::slice::from_ref(&item)); } Ok(ResponseEvent::Completed { .. }) => { diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index e024effbe214..42e12e765f32 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -5,6 +5,7 @@ // the TUI or the tracing stack). #![deny(clippy::print_stdout, clippy::print_stderr)] +pub mod agent; mod apply_patch; pub mod auth; pub mod bash; diff --git a/codex-rs/core/src/openai_tools.rs b/codex-rs/core/src/openai_tools.rs index f4d724815e22..c98c23a1a981 100644 --- a/codex-rs/core/src/openai_tools.rs +++ b/codex-rs/core/src/openai_tools.rs @@ -71,6 +71,7 @@ pub(crate) struct ToolsConfig { pub web_search_request: bool, pub include_view_image_tool: bool, pub experimental_unified_exec_tool: bool, + pub include_agent_tool: bool, } pub(crate) struct ToolsConfigParams<'a> { @@ -83,6 +84,7 @@ pub(crate) struct ToolsConfigParams<'a> { pub(crate) use_streamable_shell_tool: bool, pub(crate) include_view_image_tool: bool, pub(crate) experimental_unified_exec_tool: bool, + pub(crate) include_agent_tool: bool, } impl ToolsConfig { @@ -97,6 +99,7 @@ impl ToolsConfig { use_streamable_shell_tool, include_view_image_tool, experimental_unified_exec_tool, + include_agent_tool, } = params; let mut shell_type = if *use_streamable_shell_tool { ConfigShellToolType::StreamableShell @@ -130,6 +133,7 @@ impl ToolsConfig { web_search_request: *include_web_search_request, include_view_image_tool: *include_view_image_tool, experimental_unified_exec_tool: *experimental_unified_exec_tool, + include_agent_tool: *include_agent_tool, } } } @@ -323,6 +327,85 @@ fn create_view_image_tool() -> OpenAiTool { }, }) } + +fn create_agent_tool(agent_infos: Option<&[crate::protocol::AgentInfo]>) -> OpenAiTool { + let mut properties = BTreeMap::new(); + + // Build agent description with list of available agents + let agent_description = if let Some(agents) = agent_infos { + if agents.is_empty() { + "Name of the agent to use. No custom agents configured. Use 'general' for default." + .to_string() + } else { + let agent_list: Vec = agents + .iter() + .map(|a| { + if a.description.is_empty() { + format!(" - {}", a.name) + } else { + format!(" - {}: {}", a.name, a.description) + } + }) + .collect(); + format!( + "Name of the agent to use. Available agents:\n{}\n - general: Default general-purpose agent", + agent_list.join("\n") + ) + } + } else { + "Name of the agent to use (e.g., 'code_reviewer', 'test_designer') or 'general' for default" + .to_string() + }; + + properties.insert( + "agent".to_string(), + JsonSchema::String { + description: Some(agent_description), + }, + ); + + properties.insert( + "task".to_string(), + JsonSchema::String { + description: Some("The task for the agent to perform autonomously. Be specific and provide clear instructions. When using multiple agent calls in parallel, each agent can work on a different task or aspect of the problem concurrently.".to_string()), + }, + ); + + properties.insert( + "context".to_string(), + JsonSchema::String { + description: Some( + "Optional additional context to provide to the agent for better task understanding" + .to_string(), + ), + }, + ); + + // Build tool description with parallel execution emphasis + let tool_description = if let Some(agents) = agent_infos { + if !agents.is_empty() { + format!( + "Run a specialized agent for delegated task execution. {} specialized agents available. Use the 'agent' parameter to select one. IMPORTANT: This tool supports TRUE PARALLEL EXECUTION - multiple agent tool calls in the same response will run concurrently for maximum performance. Ideal for dividing complex tasks among multiple specialized agents.", + agents.len() + ) + } else { + "Run a specialized agent for delegated task execution. No custom agents configured, will use general agent. IMPORTANT: This tool supports TRUE PARALLEL EXECUTION - multiple agent tool calls in the same response will run concurrently for maximum performance.".to_string() + } + } else { + "Run a specialized agent with custom system prompt for delegated task execution. IMPORTANT: This tool supports TRUE PARALLEL EXECUTION - multiple agent tool calls in the same response will run concurrently for maximum performance. Ideal for dividing complex tasks among multiple specialized agents.".to_string() + }; + + OpenAiTool::Function(ResponsesApiTool { + name: "agent".to_string(), + description: tool_description, + strict: false, + parameters: JsonSchema::Object { + properties, + required: Some(vec!["task".to_string()]), + additional_properties: Some(false), + }, + }) +} /// TODO(dylan): deprecate once we get rid of json tool #[derive(Serialize, Deserialize)] pub(crate) struct ApplyPatchToolArgs { @@ -530,6 +613,7 @@ fn sanitize_json_schema(value: &mut JsonValue) { pub(crate) fn get_openai_tools( config: &ToolsConfig, mcp_tools: Option>, + agent_infos: Option>, ) -> Vec { let mut tools: Vec = Vec::new(); @@ -580,6 +664,12 @@ pub(crate) fn get_openai_tools( if config.include_view_image_tool { tools.push(create_view_image_tool()); } + + // Include the agent tool for multi-agent orchestration + if config.include_agent_tool { + tools.push(create_agent_tool(agent_infos.as_deref())); + } + if let Some(mcp_tools) = mcp_tools { // Ensure deterministic ordering to maximize prompt cache hits. let mut entries: Vec<(String, mcp_types::Tool)> = mcp_tools.into_iter().collect(); @@ -644,8 +734,9 @@ mod tests { use_streamable_shell_tool: false, include_view_image_tool: true, experimental_unified_exec_tool: true, + include_agent_tool: false, }); - let tools = get_openai_tools(&config, Some(HashMap::new())); + let tools = get_openai_tools(&config, Some(HashMap::new()), None); assert_eq_tool_names( &tools, @@ -666,8 +757,9 @@ mod tests { use_streamable_shell_tool: false, include_view_image_tool: true, experimental_unified_exec_tool: true, + include_agent_tool: false, }); - let tools = get_openai_tools(&config, Some(HashMap::new())); + let tools = get_openai_tools(&config, Some(HashMap::new()), None); assert_eq_tool_names( &tools, @@ -688,6 +780,7 @@ mod tests { use_streamable_shell_tool: false, include_view_image_tool: true, experimental_unified_exec_tool: true, + include_agent_tool: false, }); let tools = get_openai_tools( &config, @@ -725,6 +818,7 @@ mod tests { description: Some("Do something cool".to_string()), }, )])), + None, ); assert_eq_tool_names( @@ -794,6 +888,7 @@ mod tests { use_streamable_shell_tool: false, include_view_image_tool: true, experimental_unified_exec_tool: true, + include_agent_tool: false, }); // Intentionally construct a map with keys that would sort alphabetically. @@ -845,7 +940,7 @@ mod tests { ), ]); - let tools = get_openai_tools(&config, Some(tools_map)); + let tools = get_openai_tools(&config, Some(tools_map), None); // Expect unified_exec first, followed by MCP tools sorted by fully-qualified name. assert_eq_tool_names( &tools, @@ -872,6 +967,7 @@ mod tests { use_streamable_shell_tool: false, include_view_image_tool: true, experimental_unified_exec_tool: true, + include_agent_tool: false, }); let tools = get_openai_tools( @@ -895,6 +991,7 @@ mod tests { description: Some("Search docs".to_string()), }, )])), + None, ); assert_eq_tool_names( @@ -935,6 +1032,7 @@ mod tests { use_streamable_shell_tool: false, include_view_image_tool: true, experimental_unified_exec_tool: true, + include_agent_tool: false, }); let tools = get_openai_tools( @@ -956,6 +1054,7 @@ mod tests { description: Some("Pagination".to_string()), }, )])), + None, ); assert_eq_tool_names( @@ -993,6 +1092,7 @@ mod tests { use_streamable_shell_tool: false, include_view_image_tool: true, experimental_unified_exec_tool: true, + include_agent_tool: false, }); let tools = get_openai_tools( @@ -1014,6 +1114,7 @@ mod tests { description: Some("Tags".to_string()), }, )])), + None, ); assert_eq_tool_names( @@ -1054,6 +1155,7 @@ mod tests { use_streamable_shell_tool: false, include_view_image_tool: true, experimental_unified_exec_tool: true, + include_agent_tool: false, }); let tools = get_openai_tools( @@ -1075,6 +1177,7 @@ mod tests { description: Some("AnyOf Value".to_string()), }, )])), + None, ); assert_eq_tool_names( diff --git a/codex-rs/core/src/rollout/policy.rs b/codex-rs/core/src/rollout/policy.rs index 2fd0efb0dc10..e39dcca72f49 100644 --- a/codex-rs/core/src/rollout/policy.rs +++ b/codex-rs/core/src/rollout/policy.rs @@ -68,6 +68,10 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool { | EventMsg::GetHistoryEntryResponse(_) | EventMsg::McpListToolsResponse(_) | EventMsg::ListCustomPromptsResponse(_) + | EventMsg::ListAgentsResponse(_) + | EventMsg::AgentBegin(_) + | EventMsg::AgentProgress(_) + | EventMsg::AgentEnd(_) | EventMsg::PlanUpdate(_) | EventMsg::ShutdownComplete | EventMsg::ConversationPath(_) => false, diff --git a/codex-rs/core/src/turn_diff_tracker.rs b/codex-rs/core/src/turn_diff_tracker.rs index 6c12d6cd4648..63f94571abf4 100644 --- a/codex-rs/core/src/turn_diff_tracker.rs +++ b/codex-rs/core/src/turn_diff_tracker.rs @@ -219,6 +219,40 @@ impl TurnDiffTracker { if s.len() == 40 { Some(s) } else { None } } + /// Merge another TurnDiffTracker into this one. + /// This is used to combine changes from parallel tool executions. + pub fn merge(&mut self, other: TurnDiffTracker) { + // Merge external_to_temp_name mappings + for (path, temp_name) in other.external_to_temp_name { + // If we already have this path, keep our mapping (first one wins) + // Otherwise, add the new mapping + self.external_to_temp_name.entry(path).or_insert(temp_name); + } + + // Merge baseline_file_info + for (temp_name, file_info) in other.baseline_file_info { + // If we already have this baseline, keep ours (first baseline wins) + // Otherwise, add the new baseline + self.baseline_file_info + .entry(temp_name) + .or_insert(file_info); + } + + // Merge temp_name_to_current_path (this tracks renames) + for (temp_name, current_path) in other.temp_name_to_current_path { + // For renames, the latest mapping should win (last rename wins) + self.temp_name_to_current_path + .insert(temp_name, current_path); + } + + // Merge git_root_cache + for root in other.git_root_cache { + if !self.git_root_cache.contains(&root) { + self.git_root_cache.push(root); + } + } + } + /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were /// collected before the first time they were touched by apply_patch during this turn with /// the current repo state. diff --git a/codex-rs/core/tests/suite/prompt_caching.rs b/codex-rs/core/tests/suite/prompt_caching.rs index a69f57a2014f..08d8efd44d4e 100644 --- a/codex-rs/core/tests/suite/prompt_caching.rs +++ b/codex-rs/core/tests/suite/prompt_caching.rs @@ -216,7 +216,8 @@ async fn prompt_tools_are_consistent_across_requests() { let expected_instructions: &str = include_str!("../../prompt.md"); // our internal implementation is responsible for keeping tools in sync // with the OpenAI schema, so we just verify the tool presence here - let expected_tools_names: &[&str] = &["shell", "update_plan", "apply_patch", "view_image"]; + let expected_tools_names: &[&str] = + &["shell", "update_plan", "apply_patch", "view_image", "agent"]; let body0 = requests[0].body_json::().unwrap(); assert_eq!( body0["instructions"], diff --git a/codex-rs/exec/src/event_processor_with_human_output.rs b/codex-rs/exec/src/event_processor_with_human_output.rs index c126208fadf1..7393936b61fd 100644 --- a/codex-rs/exec/src/event_processor_with_human_output.rs +++ b/codex-rs/exec/src/event_processor_with_human_output.rs @@ -564,6 +564,10 @@ impl EventProcessor for EventProcessorWithHumanOutput { EventMsg::UserMessage(_) => {} EventMsg::EnteredReviewMode(_) => {} EventMsg::ExitedReviewMode(_) => {} + EventMsg::ListAgentsResponse(_) => {} + EventMsg::AgentBegin(_) => {} + EventMsg::AgentProgress(_) => {} + EventMsg::AgentEnd(_) => {} } CodexStatus::Running } diff --git a/codex-rs/mcp-server/src/codex_tool_runner.rs b/codex-rs/mcp-server/src/codex_tool_runner.rs index db48da28e272..3fd4f852c90d 100644 --- a/codex-rs/mcp-server/src/codex_tool_runner.rs +++ b/codex-rs/mcp-server/src/codex_tool_runner.rs @@ -281,7 +281,11 @@ async fn run_codex_tool_session_inner( | EventMsg::UserMessage(_) | EventMsg::ShutdownComplete | EventMsg::EnteredReviewMode(_) - | EventMsg::ExitedReviewMode(_) => { + | EventMsg::ExitedReviewMode(_) + | EventMsg::ListAgentsResponse(_) + | EventMsg::AgentBegin(_) + | EventMsg::AgentProgress(_) + | EventMsg::AgentEnd(_) => { // For now, we do not do anything extra for these // events. Note that // send(codex_event_to_notification(&event)) above has diff --git a/codex-rs/protocol/src/protocol.rs b/codex-rs/protocol/src/protocol.rs index c3aebcdd42e0..b1dece82a24a 100644 --- a/codex-rs/protocol/src/protocol.rs +++ b/codex-rs/protocol/src/protocol.rs @@ -160,6 +160,10 @@ pub enum Op { /// Reply is delivered via `EventMsg::McpListToolsResponse`. ListMcpTools, + /// Request the list of available AI agents. + /// Reply is delivered via `EventMsg::ListAgentsResponse`. + ListAgents, + /// Request the list of available custom prompts. ListCustomPrompts, @@ -502,6 +506,18 @@ pub enum EventMsg { /// List of custom prompts available to the agent. ListCustomPromptsResponse(ListCustomPromptsResponseEvent), + /// List of available AI agents. + ListAgentsResponse(ListAgentsResponseEvent), + + /// Agent invocation started. + AgentBegin(AgentBeginEvent), + + /// Agent execution progress. + AgentProgress(AgentProgressEvent), + + /// Agent completed. + AgentEnd(AgentEndEvent), + PlanUpdate(UpdatePlanArgs), TurnAborted(TurnAbortedEvent), @@ -1159,6 +1175,63 @@ pub struct ListCustomPromptsResponseEvent { pub custom_prompts: Vec, } +#[derive(Debug, Clone, Deserialize, Serialize, TS)] +pub struct ListAgentsResponseEvent { + pub agents: Vec, +} + +#[derive(Debug, Clone, Deserialize, Serialize, TS)] +pub struct AgentInfo { + pub name: String, + pub description: String, + pub is_builtin: bool, +} + +#[derive(Debug, Clone, Deserialize, Serialize, TS)] +pub struct AgentBeginEvent { + pub call_id: String, + pub agent_name: String, + pub task: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub parent_context: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub plan_item_id: Option, +} + +#[derive(Debug, Clone, Deserialize, Serialize, TS)] +pub struct AgentProgressEvent { + pub call_id: String, + pub agent_name: String, + pub step: String, + pub progress_type: AgentProgressType, +} + +#[derive(Debug, Clone, Deserialize, Serialize, TS)] +pub enum AgentProgressType { + Loop(String), + FileChange(PathBuf, FileChange), + Output(String), + ToolCall(String), +} + +#[derive(Debug, Clone, Deserialize, Serialize, TS)] +pub struct AgentEndEvent { + pub call_id: String, + pub agent_name: String, + pub summary: String, + pub status: AgentStatus, + pub duration_ms: u64, + #[serde(skip_serializing_if = "Option::is_none")] + pub plan_item_id: Option, +} + +#[derive(Debug, Clone, Deserialize, Serialize, TS)] +pub enum AgentStatus { + Running, + Done, + Failed, +} + #[derive(Debug, Default, Clone, Deserialize, Serialize, TS)] pub struct SessionConfiguredEvent { /// Name left as session_id instead of conversation_id for backwards compatibility. diff --git a/codex-rs/tui/src/agent_mention.rs b/codex-rs/tui/src/agent_mention.rs new file mode 100644 index 000000000000..5af1f0d5ae1b --- /dev/null +++ b/codex-rs/tui/src/agent_mention.rs @@ -0,0 +1,108 @@ +use once_cell::sync::Lazy; +use regex_lite::Regex; + +// Compile regex once at startup +#[allow(clippy::expect_used)] +static AGENT_MENTION_RE: Lazy = Lazy::new(|| { + Regex::new(r"@(\w+):?\s+([^\n@]+)") + .expect("Failed to compile agent mention regex - this is a bug") +}); + +#[derive(Debug, Clone)] +pub struct AgentMention { + pub agent_name: String, + pub task: String, + #[allow(dead_code)] + pub raw_text: String, + pub start_pos: usize, + pub end_pos: usize, +} + +/// Parse @agent mentions in text +/// Formats supported: +/// - @agent_name: task description +/// - @agent_name task description +pub fn parse_agent_mentions(text: &str) -> Vec { + let mut mentions = Vec::new(); + + for cap in AGENT_MENTION_RE.captures_iter(text) { + if let (Some(name), Some(task), Some(full_match)) = (cap.get(1), cap.get(2), cap.get(0)) { + mentions.push(AgentMention { + agent_name: name.as_str().to_string(), + task: task.as_str().trim().to_string(), + raw_text: full_match.as_str().to_string(), + start_pos: full_match.start(), + end_pos: full_match.end(), + }); + } + } + mentions +} + +/// Convert agent mention to tool call format +pub fn convert_to_agent_call(mention: &AgentMention) -> String { + format!("Use the {} agent to {}", mention.agent_name, mention.task) +} + +/// Replace mentions in text with converted calls +pub fn replace_mentions_with_calls(text: &str) -> String { + let mentions = parse_agent_mentions(text); + if mentions.is_empty() { + return text.to_string(); + } + + let mut result = text.to_string(); + + // Replace from end to start to preserve positions + for mention in mentions.iter().rev() { + let call = convert_to_agent_call(mention); + result.replace_range(mention.start_pos..mention.end_pos, &call); + } + result +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_single_mention() { + let text = "@researcher: find information about Rust"; + let mentions = parse_agent_mentions(text); + assert_eq!(mentions.len(), 1); + assert_eq!(mentions[0].agent_name, "researcher"); + assert_eq!(mentions[0].task, "find information about Rust"); + } + + #[test] + fn test_parse_multiple_mentions() { + let text = "@researcher: find docs @reviewer: check the code"; + let mentions = parse_agent_mentions(text); + assert_eq!(mentions.len(), 2); + assert_eq!(mentions[0].agent_name, "researcher"); + assert_eq!(mentions[1].agent_name, "reviewer"); + } + + #[test] + fn test_convert_to_agent_call() { + let mention = AgentMention { + agent_name: "researcher".to_string(), + task: "find information".to_string(), + raw_text: "@researcher: find information".to_string(), + start_pos: 0, + end_pos: 29, + }; + let call = convert_to_agent_call(&mention); + assert_eq!(call, "Use the researcher agent to find information"); + } + + #[test] + fn test_replace_mentions() { + let text = "@researcher: find docs about async"; + let replaced = replace_mentions_with_calls(text); + assert_eq!( + replaced, + "Use the researcher agent to find docs about async" + ); + } +} diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index 7ab954f263a9..469da9633e06 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -913,6 +913,9 @@ impl ChatWidget { SlashCommand::Mcp => { self.add_mcp_output(); } + SlashCommand::Agents => { + self.add_agents_output(); + } #[cfg(debug_assertions)] SlashCommand::TestApproval => { use codex_core::protocol::EventMsg; @@ -1117,6 +1120,18 @@ impl ChatWidget { EventMsg::GetHistoryEntryResponse(ev) => self.on_get_history_entry_response(ev), EventMsg::McpListToolsResponse(ev) => self.on_list_mcp_tools(ev), EventMsg::ListCustomPromptsResponse(ev) => self.on_list_custom_prompts(ev), + EventMsg::ListAgentsResponse(ev) => { + self.add_to_history(history_cell::new_agents_list(ev.agents)); + } + EventMsg::AgentBegin(ev) => { + self.add_to_history(history_cell::new_agent_begin(&ev)); + } + EventMsg::AgentProgress(ev) => { + self.add_to_history(history_cell::new_agent_progress(&ev)); + } + EventMsg::AgentEnd(ev) => { + self.add_to_history(history_cell::new_agent_end(&ev)); + } EventMsg::ShutdownComplete => self.on_shutdown_complete(), EventMsg::TurnDiff(TurnDiffEvent { unified_diff }) => self.on_turn_diff(unified_diff), EventMsg::BackgroundEvent(BackgroundEventEvent { message }) => { @@ -1362,6 +1377,10 @@ impl ChatWidget { } } + pub(crate) fn add_agents_output(&mut self) { + self.submit_op(Op::ListAgents); + } + /// Forward file-search results to the bottom pane. pub(crate) fn apply_file_search_result(&mut self, query: String, matches: Vec) { self.bottom_pane.on_file_search_result(query, matches); @@ -1432,10 +1451,29 @@ impl ChatWidget { /// Programmatically submit a user text message as if typed in the /// composer. The text will be added to conversation history and sent to /// the agent. - pub(crate) fn submit_text_message(&mut self, text: String) { + pub(crate) fn submit_text_message(&mut self, mut text: String) { if text.is_empty() { return; } + + // Parse and convert @agent mentions + if text.contains('@') { + use crate::agent_mention::parse_agent_mentions; + use crate::agent_mention::replace_mentions_with_calls; + let mentions = parse_agent_mentions(&text); + if !mentions.is_empty() { + // Show visual indicator of agent invocation + for mention in &mentions { + self.add_to_history(history_cell::new_agent_invocation( + &mention.agent_name, + &mention.task, + )); + } + // Convert mentions to proper agent tool calls + text = replace_mentions_with_calls(&text); + } + } + self.submit_user_message(text.into()); } diff --git a/codex-rs/tui/src/history_cell.rs b/codex-rs/tui/src/history_cell.rs index 9cdd29dd0273..74dedf93e398 100644 --- a/codex-rs/tui/src/history_cell.rs +++ b/codex-rs/tui/src/history_cell.rs @@ -1317,6 +1317,143 @@ pub(crate) fn new_stream_error_event(message: String) -> PlainHistoryCell { } /// Render a user‑friendly plan update styled like a checkbox todo list. +/// Visual cell for agent invocation +pub(crate) fn new_agent_invocation(agent_name: &str, task: &str) -> PlainHistoryCell { + let lines = vec![ + Line::from(vec![ + "🤖 ".into(), + "Agent: ".dim(), + Span::styled( + agent_name.to_string(), + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + ), + ]), + Line::from(vec![" Task: ".dim(), task.to_string().into()]), + ]; + PlainHistoryCell { lines } +} + +/// Visual cell for agents list +pub(crate) fn new_agents_list(agents: Vec) -> PlainHistoryCell { + let mut lines = vec![ + Line::from(vec![Span::styled( + "Available Agents", + Style::default().add_modifier(Modifier::BOLD | Modifier::UNDERLINED), + )]), + Line::default(), // Empty line + ]; + + if agents.is_empty() { + lines.push(Line::from("No agents configured".dim())); + } else { + for agent in agents { + let bullet = if agent.is_builtin { "•" } else { "â—¦" }; + lines.push(Line::from(vec![ + format!(" {bullet} ").into(), + Span::styled( + agent.name.clone(), + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + ), + " - ".dim(), + agent.description.clone().into(), + ])); + } + } + + lines.push(Line::default()); // Empty line + lines.push(Line::from(vec![ + "Usage: ".dim(), + Span::styled( + "@agent_name: task description", + Style::default().add_modifier(Modifier::ITALIC), + ), + ])); + + PlainHistoryCell { lines } +} + +/// Visual cell for agent execution begin +pub(crate) fn new_agent_begin(event: &codex_core::protocol::AgentBeginEvent) -> PlainHistoryCell { + let lines = vec![ + Line::from(vec![ + Span::styled("⚡ ", Style::default().fg(Color::Cyan)), + Span::styled("Running ", Style::default().fg(Color::Cyan)), + Span::styled( + event.agent_name.clone(), + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + ), + ]), + Line::from(vec![" ".into(), format!("Task: {}", event.task).dim()]), + ]; + PlainHistoryCell { lines } +} + +/// Visual cell for agent progress +pub(crate) fn new_agent_progress( + event: &codex_core::protocol::AgentProgressEvent, +) -> PlainHistoryCell { + use codex_core::protocol::AgentProgressType; + + let prefix = match &event.progress_type { + AgentProgressType::Loop(_) => "âźł ", + AgentProgressType::FileChange(_, _) => "📝 ", + AgentProgressType::Output(_) => "đź’¬ ", + AgentProgressType::ToolCall(_) => "đź”§ ", + }; + + let content = match &event.progress_type { + AgentProgressType::Loop(step) => step.clone(), + AgentProgressType::FileChange(path, _) => format!("Modified: {}", path.display()), + AgentProgressType::Output(text) => text.clone(), + AgentProgressType::ToolCall(tool) => format!("Using tool: {tool}"), + }; + + let lines = vec![Line::from(vec![ + Span::styled(format!(" {prefix} "), Style::default().fg(Color::Yellow)), + Span::styled(format!("[{}] ", event.agent_name), Style::default().dim()), + Span::styled(content, Style::default().fg(Color::White)), + ])]; + PlainHistoryCell { lines } +} + +/// Visual cell for agent completion +pub(crate) fn new_agent_end(event: &codex_core::protocol::AgentEndEvent) -> PlainHistoryCell { + use codex_core::protocol::AgentStatus; + + let duration = format_duration(Duration::from_millis(event.duration_ms)); + + let (status_icon, status_text) = match event.status { + AgentStatus::Done => ("âś“".green(), " Done ".green()), + AgentStatus::Failed => ("âś—".red(), " Failed ".red()), + AgentStatus::Running => ("⟲".cyan(), " Running ".cyan()), // Shouldn't happen here but handle it + }; + + let mut lines = vec![Line::from(vec![ + status_icon, + status_text, + Span::styled( + event.agent_name.clone(), + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + ), + format!(" ({duration})").dim(), + ])]; + + // Add summary with proper indentation + for line in event.summary.lines() { + lines.push(Line::from(vec![" ".into(), line.to_string().into()])); + } + + PlainHistoryCell { lines } +} + pub(crate) fn new_plan_update(update: UpdatePlanArgs) -> PlanUpdateCell { let UpdatePlanArgs { explanation, plan } = update; PlanUpdateCell { explanation, plan } diff --git a/codex-rs/tui/src/lib.rs b/codex-rs/tui/src/lib.rs index 995ca17a6b3f..c43095c17b1b 100644 --- a/codex-rs/tui/src/lib.rs +++ b/codex-rs/tui/src/lib.rs @@ -28,6 +28,7 @@ use tracing_appender::non_blocking; use tracing_subscriber::EnvFilter; use tracing_subscriber::prelude::*; +mod agent_mention; mod app; mod app_backtrack; mod app_event; diff --git a/codex-rs/tui/src/slash_command.rs b/codex-rs/tui/src/slash_command.rs index 3268a92a2db2..64d7068da5d3 100644 --- a/codex-rs/tui/src/slash_command.rs +++ b/codex-rs/tui/src/slash_command.rs @@ -21,6 +21,7 @@ pub enum SlashCommand { Mention, Status, Mcp, + Agents, Logout, Quit, #[cfg(debug_assertions)] @@ -41,6 +42,7 @@ impl SlashCommand { SlashCommand::Model => "choose what model and reasoning effort to use", SlashCommand::Approvals => "choose what Codex can do without approval", SlashCommand::Mcp => "list configured MCP tools", + SlashCommand::Agents => "list available AI agents and their descriptions", SlashCommand::Logout => "log out of Codex", #[cfg(debug_assertions)] SlashCommand::TestApproval => "test approval request", @@ -66,6 +68,7 @@ impl SlashCommand { | SlashCommand::Mention | SlashCommand::Status | SlashCommand::Mcp + | SlashCommand::Agents | SlashCommand::Quit => true, #[cfg(debug_assertions)] diff --git a/docs/config.md b/docs/config.md index 4f287b824e96..68ac34d7913d 100644 --- a/docs/config.md +++ b/docs/config.md @@ -1,6 +1,5 @@ # Config - Codex supports several mechanisms for setting config values: - Config-specific command-line flags, such as `--model o3` (highest precedence). @@ -408,10 +407,10 @@ set = { CI = "1" } include_only = ["PATH", "HOME"] ``` -| Field | Type | Default | Description | -| ------------------------- | -------------------------- | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------- | -| `inherit` | string | `all` | Starting template for the environment:
`all` (clone full parent env), `core` (`HOME`, `PATH`, `USER`, …), or `none` (start empty). | -| `ignore_default_excludes` | boolean | `false` | When `false`, Codex removes any var whose **name** contains `KEY`, `SECRET`, or `TOKEN` (case-insensitive) before other rules run. | +| Field | Type | Default | Description | +| ------------------------- | -------------------- | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------- | +| `inherit` | string | `all` | Starting template for the environment:
`all` (clone full parent env), `core` (`HOME`, `PATH`, `USER`, …), or `none` (start empty). | +| `ignore_default_excludes` | boolean | `false` | When `false`, Codex removes any var whose **name** contains `KEY`, `SECRET`, or `TOKEN` (case-insensitive) before other rules run. | | `exclude` | array | `[]` | Case-insensitive glob patterns to drop after the default filter.
Examples: `"AWS_*"`, `"AZURE_*"`. | | `set` | table | `{}` | Explicit key/value overrides or additions – always win over inherited values. | | `include_only` | array | `[]` | If non-empty, a whitelist of patterns; only variables that match _one_ pattern survive the final step. (Generally used with `inherit = "all"`.) | @@ -539,6 +538,39 @@ Note this is **not** a general editor setting (like `$EDITOR`), as it only accep Currently, `"vscode"` is the default, though Codex does not verify VS Code is installed. As such, `file_opener` may default to `"none"` or something else in the future. +## Agent Configuration + +### Custom Agents File + +Create custom specialized agents by adding a configuration file at `~/.codex/agents.toml`: + +```toml +# ~/.codex/agents.toml + +[researcher] +prompt = """ +You are a research specialist. Focus on gathering comprehensive information, +verifying facts, and providing detailed citations. +""" + +[code-reviewer] +prompt = """ +You are an expert code reviewer. Identify bugs, suggest improvements, +and ensure best practices are followed. +""" + +[test-writer] +prompt_file = "prompts/test-writer.md" # Load from external file +``` + +Each agent inherits tools and permissions from the parent context. Agents can be invoked using: + +- `@agent_name: task description` - Natural mention syntax +- `/agents` command - View all available agents +- Agent tool - Programmatic invocation + +For detailed agent configuration options, see [Multi-Agent System](./subagents.md). + ## hide_agent_reasoning Codex intermittently emits "reasoning" events that show the model's internal "thinking" before it produces a final answer. Some users may find these events distracting, especially in CI logs or minimal terminal output. @@ -601,54 +633,54 @@ notifications = [ "agent-turn-complete", "approval-requested" ] ## Config reference -| Key | Type / Values | Notes | -| --- | --- | --- | -| `model` | string | Model to use (e.g., `gpt-5`). | -| `model_provider` | string | Provider id from `model_providers` (default: `openai`). | -| `model_context_window` | number | Context window tokens. | -| `model_max_output_tokens` | number | Max output tokens. | -| `approval_policy` | `untrusted` \| `on-failure` \| `on-request` \| `never` | When to prompt for approval. | -| `sandbox_mode` | `read-only` \| `workspace-write` \| `danger-full-access` | OS sandbox policy. | -| `sandbox_workspace_write.writable_roots` | array | Extra writable roots in workspace‑write. | -| `sandbox_workspace_write.network_access` | boolean | Allow network in workspace‑write (default: false). | -| `sandbox_workspace_write.exclude_tmpdir_env_var` | boolean | Exclude `$TMPDIR` from writable roots (default: false). | -| `sandbox_workspace_write.exclude_slash_tmp` | boolean | Exclude `/tmp` from writable roots (default: false). | -| `disable_response_storage` | boolean | Required for ZDR orgs. | -| `notify` | array | External program for notifications. | -| `instructions` | string | Currently ignored; use `experimental_instructions_file` or `AGENTS.md`. | -| `mcp_servers..command` | string | MCP server launcher command. | -| `mcp_servers..args` | array | MCP server args. | -| `mcp_servers..env` | map | MCP server env vars. | -| `mcp_servers..startup_timeout_ms` | number | Startup timeout in milliseconds (default: 10_000). Timeout is applied both for initializing MCP server and initially listing tools. | -| `model_providers..name` | string | Display name. | -| `model_providers..base_url` | string | API base URL. | -| `model_providers..env_key` | string | Env var for API key. | -| `model_providers..wire_api` | `chat` \| `responses` | Protocol used (default: `chat`). | -| `model_providers..query_params` | map | Extra query params (e.g., Azure `api-version`). | -| `model_providers..http_headers` | map | Additional static headers. | -| `model_providers..env_http_headers` | map | Headers sourced from env vars. | -| `model_providers..request_max_retries` | number | Per‑provider HTTP retry count (default: 4). | -| `model_providers..stream_max_retries` | number | SSE stream retry count (default: 5). | -| `model_providers..stream_idle_timeout_ms` | number | SSE idle timeout (ms) (default: 300000). | -| `project_doc_max_bytes` | number | Max bytes to read from `AGENTS.md`. | -| `profile` | string | Active profile name. | -| `profiles..*` | various | Profile‑scoped overrides of the same keys. | -| `history.persistence` | `save-all` \| `none` | History file persistence (default: `save-all`). | -| `history.max_bytes` | number | Currently ignored (not enforced). | -| `file_opener` | `vscode` \| `vscode-insiders` \| `windsurf` \| `cursor` \| `none` | URI scheme for clickable citations (default: `vscode`). | -| `tui` | table | TUI‑specific options. | -| `tui.notifications` | boolean \| array | Enable desktop notifications in the tui (default: false). | -| `hide_agent_reasoning` | boolean | Hide model reasoning events. | -| `show_raw_agent_reasoning` | boolean | Show raw reasoning (when available). | -| `model_reasoning_effort` | `minimal` \| `low` \| `medium` \| `high` | Responses API reasoning effort. | -| `model_reasoning_summary` | `auto` \| `concise` \| `detailed` \| `none` | Reasoning summaries. | -| `model_verbosity` | `low` \| `medium` \| `high` | GPT‑5 text verbosity (Responses API). | -| `model_supports_reasoning_summaries` | boolean | Force‑enable reasoning summaries. | -| `model_reasoning_summary_format` | `none` \| `experimental` | Force reasoning summary format. | -| `chatgpt_base_url` | string | Base URL for ChatGPT auth flow. | -| `experimental_resume` | string (path) | Resume JSONL path (internal/experimental). | -| `experimental_instructions_file` | string (path) | Replace built‑in instructions (experimental). | -| `experimental_use_exec_command_tool` | boolean | Use experimental exec command tool. | -| `responses_originator_header_internal_override` | string | Override `originator` header value. | -| `projects..trust_level` | string | Mark project/worktree as trusted (only `"trusted"` is recognized). | -| `tools.web_search` | boolean | Enable web search tool (alias: `web_search_request`) (default: false). | +| Key | Type / Values | Notes | +| ------------------------------------------------ | ----------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- | +| `model` | string | Model to use (e.g., `gpt-5`). | +| `model_provider` | string | Provider id from `model_providers` (default: `openai`). | +| `model_context_window` | number | Context window tokens. | +| `model_max_output_tokens` | number | Max output tokens. | +| `approval_policy` | `untrusted` \| `on-failure` \| `on-request` \| `never` | When to prompt for approval. | +| `sandbox_mode` | `read-only` \| `workspace-write` \| `danger-full-access` | OS sandbox policy. | +| `sandbox_workspace_write.writable_roots` | array | Extra writable roots in workspace‑write. | +| `sandbox_workspace_write.network_access` | boolean | Allow network in workspace‑write (default: false). | +| `sandbox_workspace_write.exclude_tmpdir_env_var` | boolean | Exclude `$TMPDIR` from writable roots (default: false). | +| `sandbox_workspace_write.exclude_slash_tmp` | boolean | Exclude `/tmp` from writable roots (default: false). | +| `disable_response_storage` | boolean | Required for ZDR orgs. | +| `notify` | array | External program for notifications. | +| `instructions` | string | Currently ignored; use `experimental_instructions_file` or `AGENTS.md`. | +| `mcp_servers..command` | string | MCP server launcher command. | +| `mcp_servers..args` | array | MCP server args. | +| `mcp_servers..env` | map | MCP server env vars. | +| `mcp_servers..startup_timeout_ms` | number | Startup timeout in milliseconds (default: 10_000). Timeout is applied both for initializing MCP server and initially listing tools. | +| `model_providers..name` | string | Display name. | +| `model_providers..base_url` | string | API base URL. | +| `model_providers..env_key` | string | Env var for API key. | +| `model_providers..wire_api` | `chat` \| `responses` | Protocol used (default: `chat`). | +| `model_providers..query_params` | map | Extra query params (e.g., Azure `api-version`). | +| `model_providers..http_headers` | map | Additional static headers. | +| `model_providers..env_http_headers` | map | Headers sourced from env vars. | +| `model_providers..request_max_retries` | number | Per‑provider HTTP retry count (default: 4). | +| `model_providers..stream_max_retries` | number | SSE stream retry count (default: 5). | +| `model_providers..stream_idle_timeout_ms` | number | SSE idle timeout (ms) (default: 300000). | +| `project_doc_max_bytes` | number | Max bytes to read from `AGENTS.md`. | +| `profile` | string | Active profile name. | +| `profiles..*` | various | Profile‑scoped overrides of the same keys. | +| `history.persistence` | `save-all` \| `none` | History file persistence (default: `save-all`). | +| `history.max_bytes` | number | Currently ignored (not enforced). | +| `file_opener` | `vscode` \| `vscode-insiders` \| `windsurf` \| `cursor` \| `none` | URI scheme for clickable citations (default: `vscode`). | +| `tui` | table | TUI‑specific options. | +| `tui.notifications` | boolean \| array | Enable desktop notifications in the tui (default: false). | +| `hide_agent_reasoning` | boolean | Hide model reasoning events. | +| `show_raw_agent_reasoning` | boolean | Show raw reasoning (when available). | +| `model_reasoning_effort` | `minimal` \| `low` \| `medium` \| `high` | Responses API reasoning effort. | +| `model_reasoning_summary` | `auto` \| `concise` \| `detailed` \| `none` | Reasoning summaries. | +| `model_verbosity` | `low` \| `medium` \| `high` | GPT‑5 text verbosity (Responses API). | +| `model_supports_reasoning_summaries` | boolean | Force‑enable reasoning summaries. | +| `model_reasoning_summary_format` | `none` \| `experimental` | Force reasoning summary format. | +| `chatgpt_base_url` | string | Base URL for ChatGPT auth flow. | +| `experimental_resume` | string (path) | Resume JSONL path (internal/experimental). | +| `experimental_instructions_file` | string (path) | Replace built‑in instructions (experimental). | +| `experimental_use_exec_command_tool` | boolean | Use experimental exec command tool. | +| `responses_originator_header_internal_override` | string | Override `originator` header value. | +| `projects..trust_level` | string | Mark project/worktree as trusted (only `"trusted"` is recognized). | +| `tools.web_search` | boolean | Enable web search tool (alias: `web_search_request`) (default: false). | diff --git a/docs/getting-started.md b/docs/getting-started.md index e97de6a048ce..64d4fc419796 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -71,9 +71,18 @@ For more information on how to use AGENTS.md, see the [official AGENTS.md docume ### Tips & shortcuts -#### Use `@` for file search +#### Use `@` for file search and agent invocation -Typing `@` triggers a fuzzy-filename search over the workspace root. Use up/down to select among the results and Tab or Enter to replace the `@` with the selected path. You can use Esc to cancel the search. +Typing `@` has two powerful uses: + +1. **File search**: When followed by a path-like pattern, it triggers a fuzzy-filename search over the workspace root. Use up/down to select among the results and Tab or Enter to replace the `@` with the selected path. You can use Esc to cancel the search. + +2. **Agent invocation**: When followed by an agent name and colon, it invokes a specialized AI agent. For example: + - `@researcher: find documentation about async Rust` + - `@code-reviewer: review the changes in src/` + - `@test-writer: create unit tests for utils.js` + + Use `/agents` to see all available agents. Agent tasks are automatically tracked in the plan system. #### Image input diff --git a/docs/subagents.md b/docs/subagents.md new file mode 100644 index 000000000000..0632d0a0016e --- /dev/null +++ b/docs/subagents.md @@ -0,0 +1,428 @@ +# Multi-Agent Orchestration System + +Codex CLI includes a powerful multi-agent orchestration system that allows you to invoke specialized AI agents for complex tasks. Each agent can be customized with specific system prompts while inheriting tools and permissions from the parent context. + +## Overview + +The agent system enables you to: + +- Define specialized agents with custom system prompts +- Invoke agents using natural `@agent` mention syntax +- Automatically track agent tasks in the plan system +- View real-time agent execution progress +- Maintain context isolation between agent executions +- Prevent recursive agent spawning for safety +- Get comprehensive summaries of agent work + +## Quick Start + +### Using @Agent Mentions (Recommended) + +The most natural way to invoke agents is using the `@agent` mention syntax: + +``` +@researcher: find information about React hooks + +@code-reviewer: review the changes in src/ + +@test-writer: create unit tests for the new functions +``` + +When you use `@agent` mentions: + +- The agent task is automatically added to the plan with "in_progress" status +- You see real-time progress updates during execution +- The plan updates to "completed" when the agent finishes + +### Using the Agent Tool + +You can also invoke agents programmatically with the `agent` tool: + +``` +Please use the researcher agent to find information about React hooks + +Use the code-reviewer agent to review the changes in src/ + +Have the test-writer agent create unit tests for the new functions +``` + +### Viewing Available Agents + +Use the `/agents` command to see all available agents: + +``` +/agents +``` + +This displays: + +- Built-in agents (marked with •) +- Custom agents from your configuration (marked with ◦) +- Brief descriptions of each agent's purpose + +## Built-in Agents + +Codex comes with one built-in agent: + +- **`general`** - A general-purpose AI assistant for completing tasks efficiently and accurately + +## Custom Agent Configuration + +Create custom agents by adding a configuration file at `~/.codex/agents.toml`: + +```toml +# ~/.codex/agents.toml + +[researcher] +prompt = """ +You are a research specialist. Your role is to: +- Gather comprehensive information from multiple sources +- Verify facts and cross-reference findings +- Provide detailed citations and sources +- Summarize findings in a structured format +Focus on accuracy and thoroughness over speed. +""" + +[code-reviewer] +prompt = """ +You are an expert code reviewer. Your responsibilities: +- Identify potential bugs and security issues +- Suggest performance improvements +- Ensure code follows best practices +- Check for proper error handling +- Verify test coverage +Provide constructive feedback with specific examples. +""" + +[test-writer] +prompt_file = "prompts/test-writer.md" # Load from external file + +[refactorer] +prompt = """ +You are a refactoring specialist. Focus on: +- Improving code readability and maintainability +- Reducing complexity and duplication +- Applying design patterns appropriately +- Ensuring backward compatibility +Always explain the reasoning behind refactoring decisions. +""" +tools = ["read", "write", "grep"] # Optional: override available tools + +[documenter] +prompt = """ +You are a documentation expert. Your tasks: +- Write clear, comprehensive documentation +- Create useful code examples +- Maintain consistent formatting +- Include API references +- Add helpful diagrams when appropriate +""" +permissions = "readonly" # Optional: override permissions +``` + +## Configuration Options + +Each agent supports the following configuration options: + +| Field | Type | Description | +| ------------- | ------ | --------------------------------------------------------------------- | +| `prompt` | String | The system prompt that defines the agent's behavior | +| `prompt_file` | String | Path to a file containing the prompt (alternative to inline `prompt`) | +| `tools` | Array | Optional: Override the available tools for this agent | +| `permissions` | String | Optional: Override the permission level for this agent | + +### Prompt Files + +For longer prompts, you can store them in separate files: + +```toml +[complex-agent] +prompt_file = "prompts/complex-agent.md" # Relative to ~/.codex/ +``` + +Or use absolute paths: + +```toml +[complex-agent] +prompt_file = "/home/user/my-prompts/complex-agent.md" +``` + +## Visual Feedback and Plan Integration + +### Real-Time Status Indicators + +When agents execute, you'll see visual feedback: + +- **⚡ Running** (yellow) - Agent is currently executing +- **⟳** Progress loops - Iterative steps the agent is performing +- **📝** File changes - Files being modified +- **💬** Outputs - Agent responses and analysis +- **🔧** Tool usage - Tools being invoked +- **✓ Done** (green) - Agent completed successfully + +### Automatic Plan Tracking + +Every agent invocation automatically creates a plan item: + +1. **Plan Creation**: When you use `@agent: task`, a plan item is created +2. **Status Tracking**: Plan shows "in_progress" during execution +3. **Completion Update**: Plan updates to "completed" when done +4. **Linked Execution**: Plan items are linked to agent events via `plan_item_id` + +Example workflow: + +``` +User: @researcher: find async Rust patterns +System: [Creates plan item: "@researcher: find async Rust patterns" - in_progress] +System: ⚡ Running researcher +System: ⟳ [researcher] Analyzing task requirements +System: ⟳ [researcher] Searching for documentation +System: ✓ Done researcher (3.2s) +System: [Updates plan item to completed] +``` + +## Agent Behavior + +### Context Inheritance + +By default, agents inherit: + +- All available tools from the parent context +- Permission levels from the parent context +- Working directory and environment variables + +This ensures agents have the same capabilities as the main conversation while maintaining isolation. + +### Recursion Prevention + +To prevent infinite loops and resource exhaustion, agents **cannot spawn other agents**. If an agent attempts to use the `agent` tool, it will receive an error message. + +### Execution Isolation + +Each agent execution: + +- Runs in an isolated conversation context +- Cannot access the parent conversation history +- Returns a comprehensive summary to the parent +- Tracks all file changes and execution loops + +## Agent Summaries + +When an agent completes its task, it provides a structured summary including: + +1. **Execution Loops** - Key steps and iterations performed +2. **File Changes** - All files created, modified, or deleted +3. **Key Outputs** - Important results (auto-compacted for long outputs) +4. **Final Summary** - Overall accomplishment and any recommendations + +Long outputs are automatically compacted to show the first 5 and last 3 lines with a truncation indicator. + +## Best Practices + +### 1. Specialized Prompts + +Create focused agents with clear responsibilities: + +```toml +[security-auditor] +prompt = """ +You are a security specialist focused exclusively on: +- Identifying vulnerabilities (SQL injection, XSS, etc.) +- Checking authentication and authorization +- Reviewing encryption and data protection +- Analyzing dependencies for known CVEs +Do not fix issues, only identify and report them. +""" +``` + +### 2. Tool Restrictions + +Limit tools for safety when appropriate: + +```toml +[analyzer] +tools = ["read", "grep", "glob"] # Read-only analysis +prompt = "You are a code analyzer. Examine code without making changes..." +``` + +### 3. Composable Agents + +Design agents that work well together: + +```toml +[planner] +prompt = "Create detailed implementation plans with clear steps..." + +[implementer] +prompt = "Execute implementation plans step by step..." + +[validator] +prompt = "Verify implementations meet requirements..." +``` + +### 4. Prompt Engineering + +Structure prompts for clarity: + +```toml +[api-designer] +prompt = """ +Role: API Design Specialist + +Responsibilities: +- Design RESTful APIs following OpenAPI specification +- Ensure consistent naming conventions +- Include proper error responses +- Document all endpoints thoroughly + +Constraints: +- Follow REST best practices +- Use semantic versioning +- Include rate limiting considerations + +Output Format: +- OpenAPI 3.0 specification +- Implementation examples +- Testing strategies +""" +``` + +## Examples + +### Research Agent + +```toml +[researcher] +prompt = """ +You are a meticulous researcher. For any topic: +1. Start with a broad overview +2. Identify authoritative sources +3. Deep dive into specific aspects +4. Cross-reference claims +5. Summarize with citations +Always distinguish between facts and opinions. +""" +``` + +Usage: "Use the researcher agent to investigate WebAssembly performance characteristics" + +### Migration Agent + +```toml +[migrator] +prompt = """ +You are a migration specialist. When migrating code: +1. Analyze the current implementation +2. Identify breaking changes +3. Create a migration plan +4. Implement incrementally +5. Verify backward compatibility +6. Update documentation +Prioritize safety and reversibility. +""" +``` + +Usage: "Have the migrator agent help upgrade our React 17 code to React 18" + +### Performance Agent + +```toml +[performance-optimizer] +prompt = """ +You are a performance optimization expert: +- Profile code to identify bottlenecks +- Suggest algorithmic improvements +- Optimize resource usage +- Reduce unnecessary computations +- Implement caching strategies +Always measure before and after changes. +""" +tools = ["read", "write", "exec"] +``` + +Usage: "Use the performance-optimizer agent to improve the data processing pipeline" + +## Troubleshooting + +### Agent Not Found + +If an agent isn't recognized: + +1. Check that `~/.codex/agents.toml` exists +2. Verify the agent name matches exactly (case-sensitive) +3. Ensure the TOML syntax is valid +4. Check file permissions + +### Prompt File Not Loading + +If using `prompt_file`: + +1. Verify the file path is correct +2. Check file permissions +3. Use absolute paths if relative paths aren't working +4. Ensure the file contains valid text + +### Agent Recursion Error + +If you see "Agents cannot spawn other agents": + +- This is by design to prevent infinite loops +- Restructure your task to avoid nested agent calls +- Use a single agent for the entire task + +## Advanced Configuration + +### Environment-Specific Agents + +Create different agent sets for different environments: + +```bash +# Development agents +cp ~/.codex/agents.toml ~/.codex/agents.dev.toml + +# Production agents +cp ~/.codex/agents.toml ~/.codex/agents.prod.toml + +# Symlink based on environment +ln -sf ~/.codex/agents.dev.toml ~/.codex/agents.toml +``` + +### Team Sharing + +Share agent configurations with your team: + +```bash +# Add to version control +git add .codex/agents.toml +git commit -m "Add team agent configurations" + +# Team members can then: +cp project/.codex/agents.toml ~/.codex/agents.toml +``` + +### Dynamic Loading + +Agents are loaded at runtime, so you can modify `~/.codex/agents.toml` without restarting Codex. Changes take effect on the next agent invocation. + +## Limitations + +- Agents cannot spawn other agents (recursion prevention) +- Agent context is isolated from parent conversation +- Maximum execution time follows parent timeout settings +- Tool availability depends on parent configuration + +## Future Enhancements + +Planned improvements for the agent system: + +- Agent templates and inheritance +- Conditional agent selection based on task analysis +- Agent performance metrics and analytics +- Collaborative multi-agent workflows +- Agent versioning and rollback capabilities + +## See Also + +- [Configuration Guide](./config.md) - General Codex configuration +- [Model Context Protocol](./advanced.md#model-context-protocol-mcp) - MCP server integration +- [Custom Prompts](./prompts.md) - System prompt customization diff --git a/example-agents.toml b/example-agents.toml new file mode 100644 index 000000000000..737c002a2ebf --- /dev/null +++ b/example-agents.toml @@ -0,0 +1,202 @@ +# Example Agent Configuration for Codex Multi-Agent System +# Place this file at ~/.codex/agents.toml to enable custom agents +# +# Usage: +# - @agent_name: task description (e.g., @code_reviewer: review main.rs) +# - /agents (lists all available agents) +# - Agent tasks are automatically tracked in the plan system +# +# Visual indicators during execution: +# - ⚡ Running (yellow) - agent is executing +# - ✓ Done (green) - agent completed successfully + +[code_reviewer] +prompt = """ +You are an expert code reviewer with deep knowledge of software engineering best practices. + +When reviewing code, focus on: +1. Security vulnerabilities (SQL injection, XSS, authentication flaws, race conditions) +2. Performance bottlenecks and algorithmic complexity +3. Code maintainability and readability +4. Design patterns and architectural decisions +5. Error handling and edge cases +6. Memory safety and resource management + +Provide actionable feedback with specific code examples when suggesting improvements. +Be constructive but thorough - don't overlook issues to be polite. +Point out both problems and good practices you observe. +""" + +[test_designer] +prompt = """ +You are a test engineering specialist who designs comprehensive test suites. + +Your approach: +- Write tests that cover happy paths, edge cases, and error conditions +- Use appropriate testing patterns (unit, integration, e2e) for the context +- Ensure tests are maintainable and well-documented +- Follow the testing conventions already established in the codebase +- Aim for high coverage but prioritize critical paths +- Consider property-based testing where appropriate + +Always check existing test patterns in the codebase first and follow them. +Generate test cases that are both thorough and practical. +""" + +[refactorer] +prompt = """ +You are a refactoring expert focused on improving code quality without changing behavior. + +Priorities: +1. Reduce complexity and improve readability +2. Extract reusable components and eliminate duplication +3. Apply appropriate design patterns +4. Improve naming and code organization +5. Optimize performance where possible +6. Ensure backward compatibility + +Always ensure the refactored code passes existing tests. +Explain the reasoning behind each refactoring decision. +Consider the impact on the broader codebase. +""" + +[performance_analyst] +prompt = """ +You are a performance optimization specialist. + +Analyze code for: +- Time complexity and algorithmic efficiency +- Memory usage and allocation patterns +- I/O bottlenecks and network latency +- Caching opportunities +- Parallelization and concurrency potential +- Database query optimization + +Provide specific metrics and benchmarks where possible. +Suggest optimizations with expected performance gains. +Consider trade-offs between performance and maintainability. +Use profiling data to guide recommendations when available. +""" + +[security_auditor] +prompt = """ +You are a security expert specializing in application security. + +Focus areas: +- Authentication and authorization vulnerabilities +- Input validation and sanitization +- Cryptographic implementations +- OWASP Top 10 vulnerabilities +- Supply chain security +- Secrets management +- Network security configurations + +Provide severity ratings for identified issues. +Suggest specific remediation steps with code examples. +Reference relevant security standards and best practices. +Consider both technical and business impact of vulnerabilities. +""" + +[documenter] +prompt = """ +You are an expert technical writer who creates clear, comprehensive documentation. + +Your approach: +1. Understand the audience and write appropriately +2. Structure documentation logically with clear sections +3. Provide practical, runnable examples +4. Cover all important aspects without being verbose +5. Ensure documentation stays synchronized with code +6. Include diagrams and visual aids where helpful + +Documentation types to create: +- API documentation with clear parameter descriptions +- Architecture documentation explaining design decisions +- User guides with step-by-step instructions +- Developer documentation for contributors + +Always examine existing documentation style and match it. +Use clear, concise language and active voice. +""" + +[architect] +prompt = """ +You are a software architect with expertise in system design. + +Responsibilities: +- Evaluate architectural patterns and their trade-offs +- Design scalable and maintainable systems +- Identify technical debt and propose solutions +- Plan migration strategies for legacy systems +- Ensure consistency across the codebase +- Consider non-functional requirements (performance, security, reliability) + +Provide architectural decision records (ADRs) when proposing changes. +Consider both immediate needs and long-term evolution. +Balance ideal solutions with practical constraints. +""" + +[debugger] +prompt = """ +You are a debugging specialist who excels at finding and fixing complex bugs. + +Your methodology: +1. Reproduce the issue consistently +2. Isolate the problem to specific components +3. Use systematic debugging techniques +4. Consider race conditions and edge cases +5. Verify fixes don't introduce new issues +6. Document the root cause and solution + +Tools and techniques: +- Add strategic logging and instrumentation +- Use debuggers and profilers effectively +- Analyze stack traces and error messages +- Consider environmental factors +- Check for common pitfalls in the technology stack + +Always provide a clear explanation of the bug and why the fix works. +""" + +[rust_specialist] +prompt = """ +You are a Rust expert with deep knowledge of the language and ecosystem. + +Expertise areas: +- Ownership, borrowing, and lifetime management +- Safe concurrency patterns and async programming +- Performance optimization and zero-cost abstractions +- FFI and unsafe code guidelines +- Error handling with Result and Option +- Trait design and generics +- Macro development + +Follow Rust idioms and best practices. +Leverage the type system for correctness. +Consider using popular crates from the ecosystem when appropriate. +Ensure code is clippy-clean and follows rustfmt standards. +""" + +[api_designer] +prompt = """ +You are an API design expert specializing in REST, GraphQL, and RPC interfaces. + +Focus on: +- RESTful principles and resource modeling +- API versioning strategies +- Authentication and authorization patterns +- Rate limiting and throttling +- Error handling and status codes +- Request/response schema design +- API documentation and OpenAPI specs + +Design APIs that are: +- Intuitive and consistent +- Backward compatible +- Well-documented +- Performant and scalable +- Secure by default + +Consider API consumers' needs and use cases. +Follow industry standards and best practices. +""" \ No newline at end of file