From 78853ae24ce243a455ad91a4e633d6d7cd188e2f Mon Sep 17 00:00:00 2001 From: George Nesterenok Date: Tue, 14 Oct 2025 16:13:35 +0000 Subject: [PATCH 01/17] initial proto commit --- ai-temp/AGENTS.md | 64 +++ ai-temp/CHANGELOG.md | 10 + ai-temp/config-design.md | 66 +++ ai-temp/error-handling.md | 31 ++ ai-temp/example-codex-home/AGENTS.md | 6 + ai-temp/example-codex-home/README.md | 28 ++ .../agents/rust_test_writer/AGENTS.md | 5 + .../agents/rust_test_writer/config.toml | 5 + .../agents/test_driver/AGENTS.md | 5 + .../agents/test_driver/config.toml | 5 + ai-temp/example-codex-home/config.toml | 8 + ai-temp/initial_prompt.md | 41 ++ ai-temp/instruction-design.md | 26 ++ ai-temp/persistence-design.md | 39 ++ codex-rs/Cargo.lock | 15 + codex-rs/Cargo.toml | 3 +- codex-rs/cli/src/main.rs | 3 + codex-rs/multi-agent/Cargo.toml | 19 + codex-rs/multi-agent/src/lib.rs | 436 ++++++++++++++++++ codex-rs/tui/Cargo.toml | 1 + codex-rs/tui/src/cli.rs | 4 + codex-rs/tui/src/lib.rs | 55 +-- 22 files changed, 832 insertions(+), 43 deletions(-) create mode 100644 ai-temp/AGENTS.md create mode 100644 ai-temp/CHANGELOG.md create mode 100644 ai-temp/config-design.md create mode 100644 ai-temp/error-handling.md create mode 100644 ai-temp/example-codex-home/AGENTS.md create mode 100644 ai-temp/example-codex-home/README.md create mode 100644 ai-temp/example-codex-home/agents/rust_test_writer/AGENTS.md create mode 100644 ai-temp/example-codex-home/agents/rust_test_writer/config.toml create mode 100644 ai-temp/example-codex-home/agents/test_driver/AGENTS.md create mode 100644 ai-temp/example-codex-home/agents/test_driver/config.toml create mode 100644 ai-temp/example-codex-home/config.toml create mode 100644 ai-temp/initial_prompt.md create mode 100644 ai-temp/instruction-design.md create mode 100644 ai-temp/persistence-design.md create mode 100644 codex-rs/multi-agent/Cargo.toml create mode 100644 codex-rs/multi-agent/src/lib.rs diff --git a/ai-temp/AGENTS.md b/ai-temp/AGENTS.md new file mode 100644 index 000000000000..e5601049de2e --- /dev/null +++ b/ai-temp/AGENTS.md @@ -0,0 +1,64 @@ +# Multi-Agent Orchestrator Notes + +## Feature Goal +- Allow the primary Codex CLI agent to delegate work to named sub-agents that live inside `~/.codex/agents//`. Here `` is a human-friendly slug like `rust_test_writer` that doubles as the directory name. +- Each sub-agent should behave like an isolated Codex installation with its own `AGENTS.md`, `config.toml`, `log/`, `sessions/`, and related state directories. +- The orchestrator must load, run, and switch between agents without duplicating the existing configuration, logging, history, and persistence logic. + +## Existing Implementation Survey + +### Config and `CODEX_HOME` +- `codex-rs/core/src/config.rs` owns the `Config` struct, the `find_codex_home` helper, and `Config::load_from_base_config_with_overrides`, which lets us inject a custom `codex_home` path when constructing a configuration. `Config::log_dir` and related helpers derive paths by appending to `codex_home`, so moving to a per-agent directory is automatically supported. +- `codex-rs/core/src/config_loader/mod.rs` implements layered config loading (`config.toml`, managed overrides, CLI overrides). It already accepts an arbitrary base directory, so we can reuse it for sub-agent trees by pointing it at `~/.codex/agents/`. +- `codex-rs/common/src/config_override.rs` parses `-c key=value` overrides. Those overrides can continue to target agent-specific settings as long as we resolve them against the sub-agent config before the run starts. + +### Project instructions (`AGENTS.md`) +- `Config::load_instructions` in `codex-rs/core/src/config.rs` reads `AGENTS.md` at the root of `codex_home`. That gives us a place to put per-agent doctrine without touching repo-level instructions. +- Repository and cwd instructions are merged by `codex-rs/core/src/project_doc.rs`, which walks the filesystem to collect `AGENTS.md` files. This logic happens after `Config` is loaded, so sub-agent instructions will cascade naturally once the agent-specific `Config` sets its own cwd and codex_home. +- The TUI status widget (`codex-rs/tui/src/status/helpers.rs`) already summarises discovered instructions. It will display sub-agent docs correctly as long as the orchestrator updates the `Config` before rendering. +### Session persistence and logging +- Each agent writes rollouts, streaming history, and logs under its own `codex_home`. See `ai-temp/persistence-design.md` for the isolation rules and orchestrator responsibilities. + +### Auth and CLI entry points +- Authentication helpers in `codex-rs/core/src/auth.rs` read and write `auth.json` beneath `codex_home`. For the first iteration, all agents share the primary `~/.codex/auth.json`; isolation hooks can be added later if needed. +- CLI bootstrapping happens in `codex-rs/cli/src/main.rs`, which constructs `Config` via the shared loader and then launches the TUI or other subcommands. The orchestrator will need to hook here (or inside the TUI) to select an agent before the config load so that downstream crates operate against the correct directory tree. +- Documentation for the current configuration surface is in `docs/config.md`, ensuring any new flags or environment variables we introduce are documented alongside existing options. + +## Design Principles +- Treat each sub-agent as an isolated `Config` + state bundle so existing code paths stay unchanged. +- Keep the orchestration layer thin: it should select the right `codex_home`, prepare overrides, and then call into unmodified core/TUI code wherever possible. +- Prefer additive interfaces (e.g., `AgentRegistry::resolve_path(id) -> PathBuf`) over invasive changes to core modules, respecting the repository's instruction to avoid Java-level over-abstraction. +- Make it easy to fall back to single-agent behaviour by defaulting to the legacy `~/.codex` layout when no sub-agent is selected. + +## Proposed Architecture +- Directory layout: + - `~/.codex/agents//AGENTS.md` – sub-agent guidance consumed by `Config::load_instructions`. `` should be a meaningful, filesystem-safe identifier (e.g., `rust_test_writer`). + - `~/.codex/agents//config.toml` – optional overrides layered on top of the global config loader. + - `~/.codex/agents//log/` and `~/.codex/agents//sessions/` – reused by the TUI and rollout recorder with no code changes. + - Optional extras such as `history.jsonl`, `auth.json`, or MCP metadata can mirror the top-level structure when isolation is desired. +- Orchestration flow: + - Extend the CLI (likely in `codex-rs/cli/src/main.rs`) to accept an `--agent ` flag or read the selection from a config profile. The orchestrator resolves `~/.codex/agents/` (creating it if missing) before loading `Config`. + - Introduce a lightweight helper (e.g., `codex-rs/core/src/agent_registry.rs`) that maps agent identifiers to directories, validates presence of `AGENTS.md`/`config.toml`, and exposes the resolved `codex_home`. + - When the main agent needs to talk to a sub-agent, construct a new `Config` by calling `Config::load_from_base_config_with_overrides` with the agent's path. All downstream components (sessions, logs, instructions) receive the correct context automatically. +- Maintain a controller component in the CLI or core layer that mediates conversations: the primary agent keeps the user-facing session, delegates tasks via API calls to sub-agent Codex instances, and reconciles their responses. +- Decoupling strategy: + - Keep orchestrator logic in a new module/crate rather than embedding it directly into `codex-rs/core/src/codex.rs`, so only the orchestration entry points depend on it. + - Use trait-based boundaries sparingly: a simple `AgentContext` struct carrying the agent id, codex_home, and resolved `Config` may be enough, keeping future changes localised. + +## Agent Invocation UX +- Default behaviour: the main agent chooses when to invoke sub-agents, treating them like native tools (similar to the plan tool or apply-patch flow). +- Explicit requests: users can opt to summon particular agents by tagging them in prompts, e.g. `#rust_test_writer`. +- Multiple tags (`#agent_one #agent_two`) allow coordinated runs when orchestration logic supports it. + +## Error Handling +- Validation, logging, and recovery patterns are documented in `ai-temp/error-handling.md`. Highlights: + - Agent discovery failures turn into actionable CLI/TUI errors via `anyhow::bail!`. + - Detailed context is emitted through `tracing` logs while the main history records only summary entries. + - Persistence and working-directory issues follow the same `std::io::Result` semantics used by rollout recording. + +## Roadmap +1. Implement an `AgentRegistry` that enumerates `~/.codex/agents`, validates directory shape, and resolves paths. +2. Add CLI plumbing to choose an agent (flag, config entry, or interactive prompt) before constructing `Config`. +3. Ensure core services (auth, logging, history, rollout) honour the selected agent by threading the alternate `codex_home`. +4. Prototype orchestration logic that spins up a secondary Codex instance using the sub-agent context and mediates message flow. +5. Expand tests and documentation to cover multi-agent behaviour, including snapshots for the new directory layout and user guidance in `docs/`. diff --git a/ai-temp/CHANGELOG.md b/ai-temp/CHANGELOG.md new file mode 100644 index 000000000000..829c90bb7305 --- /dev/null +++ b/ai-temp/CHANGELOG.md @@ -0,0 +1,10 @@ +# Multi-Agent Orchestrator Change Log + +> Keep this file current; it documents the evolution of the multi-agent design work. An outdated changelog breaks the orchestrator timeline. + +## 2025-10-14 +- Captured the baseline design artifacts (`AGENTS.md`, `config-design.md`, `instruction-design.md`, `persistence-design.md`, `error-handling.md`) compiled during the planning phase. +- Reiterated the requirement that this changelog must stay up to date as the multi-agent feature evolves. +- Scaffolded the `codex-multi-agent` crate with `AgentId`, `AgentRegistry`, and async config loading that merges global/agent/CLI overrides into an `AgentContext`. +- Wired the TUI bootstrapper to the new loader, introducing a `--agent` flag that scopes interactive runs to `~/.codex/agents//`. +- Added `ai-temp/example-codex-home/` with ready-to-run config, instructions, and multiple agent directories for hands-on testing via `CODEX_HOME=...` and `--agent`. diff --git a/ai-temp/config-design.md b/ai-temp/config-design.md new file mode 100644 index 000000000000..c8ec42d6184b --- /dev/null +++ b/ai-temp/config-design.md @@ -0,0 +1,66 @@ +# Agent Configuration Strategy + +## Objectives +- Allow each sub-agent (e.g. `rust_test_writer`) to inherit the user’s normal Codex configuration while selectively overriding settings inside `~/.codex/agents//config.toml`. +- Reuse the existing configuration pipeline in `codex-rs/core` so we do not fork logic for managed layers, CLI overrides, or path derivations. +- Keep integration points narrowly scoped by introducing a dedicated crate that exposes a small API for the orchestrator and CLI entry points. + +## Existing Building Blocks +- `codex-rs/core/src/config.rs:965` (`Config::load_from_base_config_with_overrides`) already accepts a `ConfigToml`, `ConfigOverrides`, and an explicit `codex_home`. Passing a sub-agent directory here causes every downstream helper (`log_dir`, history, rollout recorder, etc.) to follow that directory automatically. +- `codex-rs/core/src/config_loader/mod.rs:63` layers `config.toml`, managed overrides, and managed preferences for whichever directory we point it at. It also exposes `load_config_as_toml` for reading a `ConfigToml` directly. +- `codex-rs/common/src/config_override.rs:19` parses `-c key=value` flags into a list of overrides. These are applied after all disk-based layers, so they naturally become the last stage in the merge order. + +## Proposed Loading Order +1. Resolve the base Codex home (`~/.codex`) via `codex-rs/core/src/config.rs:1290` (`find_codex_home`). +2. Load the user’s global `ConfigToml` (including managed layers) from that directory. +3. If an `agent_id` is provided, resolve `~/.codex/agents/` and load its `config.toml`. Merge this table on top of the global config. +4. Apply CLI overrides (`CliConfigOverrides::parse_overrides`) so one-off adjustments still work per session. +5. Instantiate the final `Config` via `Config::load_from_base_config_with_overrides`, passing the resolved agent `codex_home` when present; otherwise fall back to the global Codex home. + +This yields inherited behaviour by default while letting each agent override keys explicitly. + +## New Crate: `codex-multi-agent` +To keep the core codebase loosely coupled, introduce a new crate under `codex-rs/multi-agent` with the following responsibilities: + +- `AgentRegistry` + - Enumerates `~/.codex/agents`, validates names, and exposes metadata for each sub-agent directory. + - Ensures required files (currently `AGENTS.md` and optional `config.toml`) exist. + +- `AgentConfigLoader` + - Public API: `load(agent_id: Option<&str>, cli_overrides: &CliConfigOverrides) -> std::io::Result`. + - Internally performs the loading order above: + - Calls into `codex_core::config::load_config_as_toml` for the global layer. + - Loads `~/.codex/agents//config.toml` (if present) using the same helper. + - Merges TOML tables using `codex_core::config::merge_toml_values`. + - Applies CLI overrides by reusing `CliConfigOverrides::apply_on_value`. + - Constructs `Config` with the correct `codex_home`. + +- `AgentContext` + - Struct holding `agent_id`, `codex_home`, the resolved `Config`, and helper methods (e.g., path accessors) so downstream orchestration code doesn’t manipulate raw paths. + +By isolating the orchestration-specific logic in this crate, other crates only need to depend on a stable interface instead of re-implementing directory handling. + +## Integration Points +- **CLI (`codex-rs/cli/src/main.rs:36`)** + Replace the direct call to `Config::load_with_cli_overrides` with the new loader. The CLI will pass the parsed `CliConfigOverrides` and any requested agent id (via a new `--agent` flag or profile). The returned `AgentContext` supplies the `Config` used to boot the TUI or subcommands. + +- **Primary Orchestrator** + When the main agent delegates to a sub-agent, it asks `AgentConfigLoader` for that agent’s context. Because the returned `Config` already points at `~/.codex/agents/`, all existing services (rollouts, logs, history) operate in the agent’s sandbox without additional wiring. + +- **Future Interfaces** + Other modules (e.g., a session picker or app server bridge) interact with sub-agents only through the `AgentContext` API, keeping implementation details sealed inside the new crate. + +## Authentication Defaults +- Initial version: all agents share the primary `auth.json` located in `~/.codex`. +- The loader always points authentication helpers (`codex_core::auth`) at the main Codex home, regardless of the agent’s data directory. +- Future extension: agent configs may opt into isolated credentials (API keys, ChatGPT logins, provider-specific secrets). For now we defer that work until a concrete use case emerges. + +## Rationale +- Leveraging `Config::load_from_base_config_with_overrides` means we honour every existing feature (profiles, managed preferences, CLI overrides) without re-creating the merge logic. +- Passing a custom `codex_home` is the safest way to ensure all path-based helpers stay in sync. It avoids ad-hoc path munging and keeps the change set small. +- A dedicated crate provides a single place to evolve agent-related behaviour (validation, migrations, metadata) without scattering knowledge of `~/.codex/agents` across the repo. + +## Open Points to Finalise +- Exact CLI UX for selecting an agent (flag vs. config profile vs. interactive picker). +- Whether agent directories can fall back to the global `auth.json` or require their own credentials. +- Error reporting strategy when an agent directory exists but is misconfigured. diff --git a/ai-temp/error-handling.md b/ai-temp/error-handling.md new file mode 100644 index 000000000000..11c4a1ba354d --- /dev/null +++ b/ai-temp/error-handling.md @@ -0,0 +1,31 @@ +# Error Handling Plan + +## Existing Patterns To Reuse +- **`anyhow::Result` for CLI flows** – `codex-rs/cli/src/main.rs:242` returns `anyhow::Result<()>`, bubbling rich context to the top level. +- **Explicit `bail!` for user mistakes** – `codex-rs/cli/src/mcp_cmd.rs:234` uses `anyhow::bail!` when arguments are invalid; we can mirror that when an agent directory is missing required files. +- **Structured core errors** – `codex-rs/core/src/error.rs` defines the `CodexErr` enum backed by `thiserror`. New orchestration-specific failures should either map onto existing variants (e.g., `UnsupportedOperation`) or wrap into `CodexErr::Fatal`. +- **I/O fallbacks** – modules like `codex-rs/core/src/rollout/recorder.rs:106` return `std::io::Result`, letting callers decide whether to disable persistence. Follow the same convention for filesystem interactions inside `AgentRegistry`. +- **Runtime logging** – the TUI logs with `tracing::error!` (`codex-rs/tui/src/lib.rs:294`). Whenever we catch and suppress an error, emit a tracing event so users can inspect `codex-tui.log`. + +## Planned Error Classes +1. **Agent discovery errors**: missing directory, unreadable contents, invalid slug. These trigger `anyhow::bail!` during registry enumeration so the CLI shows an actionable message. +2. **Instruction violations**: absent or empty `AGENTS.md` when required. Registry will surface a validation error; orchestrator records a warning in the main history (without launching the agent). +3. **Config parsing errors**: malformed `config.toml` under the agent. Propagate the `toml::de::Error` via `anyhow` while annotating with the agent id/path. +4. **Working directory issues**: nonexistent or non-writable target paths. Return `std::io::Error` with `ErrorKind::NotFound`/`PermissionDenied` so callers can decide whether to fall back or abort. +5. **Session/log persistence errors**: mirror rollout recorder behaviour—log a warning and continue without persistence when write failures occur. + +## Surfacing Strategy +- CLI/TUI: display concise, user-facing messages for validation failures (e.g., “`#rust_test_writer` is missing AGENTS.md – fix ~/.codex/agents/rust_test_writer/AGENTS.md`”). +- Logs: use `tracing::error!` and `warn!` to include full context (agent id, path, underlying error). +- Main history: when a sub-agent fails to launch, append a summary item noting the failure class. + +## Recovery Paths +- If agent setup fails, the orchestrator: + - Logs the error. + - Writes a single history entry describing the failure. + - Returns control to the primary agent without launching the sub-agent. +- Follow-up invocations will re-run validation so fixes take effect immediately. + +## TODO +- Implement shared helpers inside the new multi-agent crate to construct consistent error messages. +- Add tests covering invalid agent directories and ensure the CLI/TUI renders the expected guidance. diff --git a/ai-temp/example-codex-home/AGENTS.md b/ai-temp/example-codex-home/AGENTS.md new file mode 100644 index 000000000000..948e9d8e1344 --- /dev/null +++ b/ai-temp/example-codex-home/AGENTS.md @@ -0,0 +1,6 @@ +# Sample Primary Agent Instructions + +Welcome to the example Codex home directory. This `AGENTS.md` mirrors what you might keep under `~/.codex/AGENTS.md`. + +- Default behaviour: focus on quick experiments in the current repository. +- Keep answers concise; link back to multi-agent documentation when relevant. diff --git a/ai-temp/example-codex-home/README.md b/ai-temp/example-codex-home/README.md new file mode 100644 index 000000000000..a0ebeb57c75b --- /dev/null +++ b/ai-temp/example-codex-home/README.md @@ -0,0 +1,28 @@ +# Sample Codex Home Setup + +This directory mirrors a minimal `~/.codex` layout so you can try the new multi-agent loader without touching your real config. + +## Directory Structure + +- `config.toml`: baseline settings used when no sub-agent is selected. +- `AGENTS.md`: default instruction set for the primary agent. +- `agents/rust_test_writer`: sub-agent focused on Rust testing. +- `agents/test_driver`: sub-agent that keeps sandboxing strict while running checks. +- `log/`, `sessions/`: empty placeholders so Codex can write logs and rollouts. + +## Quick Start + +```bash +export CODEX_HOME="$(pwd)/ai-temp/example-codex-home" + +# Primary agent (uses AGENTS.md + config.toml in this directory) +codex --help + +# Rust-focused sub-agent +codex --agent rust_test_writer + +# Test driver sub-agent with read-only sandbox +codex --agent test_driver +``` + +Unset `CODEX_HOME` (or point it back to your real path) once you're done experimenting. diff --git a/ai-temp/example-codex-home/agents/rust_test_writer/AGENTS.md b/ai-temp/example-codex-home/agents/rust_test_writer/AGENTS.md new file mode 100644 index 000000000000..080550f745a7 --- /dev/null +++ b/ai-temp/example-codex-home/agents/rust_test_writer/AGENTS.md @@ -0,0 +1,5 @@ +# Rust Test Writer Agent + +- Own the Rust test suite; prioritize `cargo test -p ` workflows. +- Suggest additional assertions and snapshot updates when behaviour changes. +- Assume the working directory is the repository root unless overridden. diff --git a/ai-temp/example-codex-home/agents/rust_test_writer/config.toml b/ai-temp/example-codex-home/agents/rust_test_writer/config.toml new file mode 100644 index 000000000000..74b1fce60b99 --- /dev/null +++ b/ai-temp/example-codex-home/agents/rust_test_writer/config.toml @@ -0,0 +1,5 @@ +model = "o1-mini" +sandbox_mode = "workspace-write" + +[history] +persistence = "save-all" diff --git a/ai-temp/example-codex-home/agents/test_driver/AGENTS.md b/ai-temp/example-codex-home/agents/test_driver/AGENTS.md new file mode 100644 index 000000000000..36bcfbc87d63 --- /dev/null +++ b/ai-temp/example-codex-home/agents/test_driver/AGENTS.md @@ -0,0 +1,5 @@ +# Test Driver Agent + +- Act as a build-and-test orchestrator. +- Run only idempotent commands (`cargo check`, `cargo test`, `npm test`). +- Summarize failures succinctly with probable fixes. diff --git a/ai-temp/example-codex-home/agents/test_driver/config.toml b/ai-temp/example-codex-home/agents/test_driver/config.toml new file mode 100644 index 000000000000..936a98410e4c --- /dev/null +++ b/ai-temp/example-codex-home/agents/test_driver/config.toml @@ -0,0 +1,5 @@ +model = "gpt-4o-mini" +sandbox_mode = "read-only" + +[history] +persistence = "none" diff --git a/ai-temp/example-codex-home/config.toml b/ai-temp/example-codex-home/config.toml new file mode 100644 index 000000000000..3093f3656bde --- /dev/null +++ b/ai-temp/example-codex-home/config.toml @@ -0,0 +1,8 @@ +model = "gpt-4o-mini" +sandbox_mode = "workspace-write" + +[history] +persistence = "save-all" + +[tui] +show_task_summary = true diff --git a/ai-temp/initial_prompt.md b/ai-temp/initial_prompt.md new file mode 100644 index 000000000000..fb210e6556db --- /dev/null +++ b/ai-temp/initial_prompt.md @@ -0,0 +1,41 @@ +› here is what i will be working on + it will be multi agent suport for codex cli + so we have main agent that user can interact with via cli + but this agent will be able to talk to subagent that will have its own AGENTS.md + and config.toml + as well as + /007  log/ + /008  sessions/ + + so it will function exacly the same as ~/.codex dir in base codex + but in ~/.codex it will be possible to create dir + agents/ + that will store dirs of agents name of dir will be id of agent + there there will be at least AGENTS.md and config.toml crutialy + to set it up + + first we are in design phase + in root of repo make dir + ai-temp/ + + there place AGENTS.md where we keep info about this feture specific + to keep ai context while we are working on it + + + first of all investigete how related moving parts are implemented in existing codex + codebase and what we can reuse and how are we going to hook that in + that will be first step + + than + make AGENTS.md with your findign related to my feture idea than overall roadmap + basic design principals archetecute so i can have a think about it + referece relative paths of files that you reference so i can find it and have a look. + + some more points + i want it to be decoupled form main codebase where possible so its easy to maintain and + interfaces shuld allow for change in other parts of codebase without a lot of changes in my + part + so we use some lvl of abstraction but not Java enterprise lvl that would be to much lol + just a lil decopeling + + go for it create design documentations so we can analise this idea together diff --git a/ai-temp/instruction-design.md b/ai-temp/instruction-design.md new file mode 100644 index 000000000000..cd1a04ebf217 --- /dev/null +++ b/ai-temp/instruction-design.md @@ -0,0 +1,26 @@ +# Instruction Handling Strategy + +## Goals +- Let each sub-agent define its own `AGENTS.md` guidance under `~/.codex/agents//AGENTS.md`. +- Provide flexible working-directory scopes so instructions can apply to isolated sandboxes, shared repos, or arbitrary service folders. +- Control whether sub-agent instructions replace or extend repo-level documents. + +## Working Directory Modes +- The orchestrator assigns a working directory per agent by setting `ConfigOverrides::cwd` before constructing `Config`. +- Supported modes: + 1. **Isolated sandbox** – run the agent in a dedicated staging directory (e.g., `/tmp/...`) for experimentation without touching the main workspace. + 2. **Shared workspace** – reuse the primary agent’s current working directory so collaborators operate on the same files. + 3. **Custom path** – point at a specific project directory (frontend/backend split, microservice repos, etc.). +- `AgentContext` records the chosen path so downstream code (project-doc discovery, logging) operates with consistent scope. + +## Instruction Inheritance +- Default behaviour: the agent’s `AGENTS.md` replaces inherited docs for a clean slate. +- Optional override: agent `config.toml` may set `inherit_repo_instructions = true` (name TBD) to append repository-level documents after the agent-specific instructions. +- Implementation outline: + - During load, the config layer reads the inheritance flag. + - If disabled, set `Config::base_instructions` to the agent file and skip repo traversal. + - If enabled, rely on `codex_core::project_doc::read_project_docs` so instructions are merged root-to-leaf with the agent doc prepended. + +## UI Exposure +- The TUI status helpers (`codex-rs/tui/src/status/helpers.rs`) will detect the updated `Config` and display whichever instruction set is active. +- Future enhancements may include surfacing the current inheritance mode or working directory in status overlays so users know the context the sub-agent is operating within. diff --git a/ai-temp/persistence-design.md b/ai-temp/persistence-design.md new file mode 100644 index 000000000000..57a187729fbb --- /dev/null +++ b/ai-temp/persistence-design.md @@ -0,0 +1,39 @@ +# Session & Logging Strategy + +## Guiding Principles +- Every sub-agent stores its session artifacts exclusively under `~/.codex/agents//`. +- The main agent’s history remains uncluttered; it records only that a sub-agent was invoked, not the sub-agent’s internal transcript. +- Session and log data must never leak from one agent’s directory into another’s unless the user explicitly exports it. + +## Rollouts (`sessions/`) +- `codex_core::rollout::recorder` writes JSONL rollouts beneath `config.codex_home.join("sessions")`. +- When we construct a `Config` for a sub-agent, we set `codex_home = ~/.codex/agents/`, so rollouts automatically land in `~/.codex/agents//sessions/...`. +- Orchestrator responsibility: + - Optionally create a lightweight stub entry in the main agent’s `sessions/` noting the sub-agent invocation (timestamp, agent id, summary). + - Leave the full rollout content in the sub-agent’s directory. + +## Streaming History (`history.jsonl`) +- `codex_core::message_history` appends to `config.codex_home/history.jsonl`. +- Sub-agents therefore maintain separate history files without additional work. +- The orchestrator may log a high-level event in the primary history file containing: + - Agent id + - Input prompt summary + - Final output summary + - Optional metadata (duration, status) +- No token-level or intermediate output from the sub-agent should be written to the main history file. + +## Logging (`log/`) +- The TUI initialises its sink via `codex_core::config::log_dir(&config)`. +- With the sub-agent `Config`, logs land in `~/.codex/agents//log/`. +- The orchestrator can maintain a central audit log (e.g., `~/.codex/log/multi-agent.log`) capturing cross-agent coordination events without duplicating the sub-agent logs. + +## Temporary State +- Any ephemeral files (scratch buffers, intermediate diffs) created by sub-agents should live inside their chosen working directory or under their agent directory. +- The orchestrator provides helpers to allocate temp directories scoped to the agent id so cleanup routines are straightforward. + +## Main-Agent Visibility +- Primary session/history entries contain only: + - Which agent was invoked. + - When the invocation started/finished. + - Success/failure status plus short text summaries. +- Detailed traces require inspecting the per-agent directories, ensuring isolation by default while still enabling audits when needed. diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 0c12880c940a..2578e0c1ff08 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -1254,6 +1254,20 @@ dependencies = [ "wiremock", ] +[[package]] +name = "codex-multi-agent" +version = "0.0.0" +dependencies = [ + "anyhow", + "codex-common", + "codex-core", + "serde", + "serde_json", + "tempfile", + "tokio", + "toml", +] + [[package]] name = "codex-ollama" version = "0.0.0" @@ -1391,6 +1405,7 @@ dependencies = [ "codex-file-search", "codex-git-tooling", "codex-login", + "codex-multi-agent", "codex-ollama", "codex-protocol", "color-eyre", diff --git a/codex-rs/Cargo.toml b/codex-rs/Cargo.toml index ca40b1a536e4..5fb88014e0b2 100644 --- a/codex-rs/Cargo.toml +++ b/codex-rs/Cargo.toml @@ -32,7 +32,7 @@ members = [ "git-apply", "utils/json-to-toml", "utils/readiness", - "utils/string", + "utils/string", "multi-agent", ] resolver = "2" @@ -70,6 +70,7 @@ codex-protocol-ts = { path = "protocol-ts" } codex-responses-api-proxy = { path = "responses-api-proxy" } codex-rmcp-client = { path = "rmcp-client" } codex-tui = { path = "tui" } +codex-multi-agent = { path = "multi-agent" } codex-utils-json-to-toml = { path = "utils/json-to-toml" } codex-utils-readiness = { path = "utils/readiness" } codex-utils-string = { path = "utils/string" } diff --git a/codex-rs/cli/src/main.rs b/codex-rs/cli/src/main.rs index cd43041ab0e7..dbf851fcba19 100644 --- a/codex-rs/cli/src/main.rs +++ b/codex-rs/cli/src/main.rs @@ -434,6 +434,9 @@ fn merge_resume_cli_flags(interactive: &mut TuiCli, resume_cli: TuiCli) { if let Some(profile) = resume_cli.config_profile { interactive.config_profile = Some(profile); } + if let Some(agent) = resume_cli.agent { + interactive.agent = Some(agent); + } if let Some(sandbox) = resume_cli.sandbox_mode { interactive.sandbox_mode = Some(sandbox); } diff --git a/codex-rs/multi-agent/Cargo.toml b/codex-rs/multi-agent/Cargo.toml new file mode 100644 index 000000000000..93514af898ea --- /dev/null +++ b/codex-rs/multi-agent/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "codex-multi-agent" +version.workspace = true +edition.workspace = true + +[dependencies] +anyhow.workspace = true +codex-common = { workspace = true, features = ["cli"] } +codex-core.workspace = true +serde.workspace = true +toml.workspace = true + +[dev-dependencies] +serde_json.workspace = true +tempfile.workspace = true +tokio = { workspace = true, features = ["macros", "rt"] } + +[lints] +workspace = true diff --git a/codex-rs/multi-agent/src/lib.rs b/codex-rs/multi-agent/src/lib.rs new file mode 100644 index 000000000000..89443fdc6e95 --- /dev/null +++ b/codex-rs/multi-agent/src/lib.rs @@ -0,0 +1,436 @@ +//! Multi-agent configuration loader and registry utilities. +//! +//! This crate keeps the multi-agent orchestration logic decoupled from the +//! rest of the codebase. It exposes a focused API around three main concepts: +//! an [`AgentId`] slug, an [`AgentRegistry`] that maps ids to directories under +//! `~/.codex/agents/`, and an [`AgentContext`] that bundles the effective +//! configuration for a selected agent. + +use std::fmt; +use std::fs; +use std::path::Path; +use std::path::PathBuf; + +use anyhow::Context; +use anyhow::Result; +use anyhow::bail; +use codex_common::CliConfigOverrides; +use codex_core::config::Config; +use codex_core::config::ConfigOverrides; +use codex_core::config::ConfigToml; +use codex_core::config::find_codex_home; +use codex_core::config_loader; +use serde::Deserialize; +use serde::Serialize; +use toml::Value as TomlValue; + +/// Identifier for a sub-agent directory under `~/.codex/agents`. +/// +/// The slug must be lowercase ASCII and may contain letters, numbers, +/// underscores, and hyphens. This keeps directory names portable while staying +/// human-friendly (e.g., `rust_test_writer`). +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(transparent)] +pub struct AgentId(String); + +impl AgentId { + /// Parse `raw` into an [`AgentId`] while enforcing slug constraints. + pub fn parse(raw: &str) -> Result { + if raw.is_empty() { + bail!("Agent id cannot be empty"); + } + + if !raw + .bytes() + .all(|b| matches!(b, b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_')) + { + bail!("Invalid agent id `{raw}`; use lowercase letters, numbers, `-`, or `_`"); + } + + Ok(Self(raw.to_string())) + } + + /// Access the slug as a `&str`. + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl fmt::Display for AgentId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +/// Root-level registry responsible for resolving agent directories. +#[derive(Debug, Clone)] +pub struct AgentRegistry { + global_codex_home: PathBuf, + agents_root: PathBuf, +} + +impl AgentRegistry { + /// Construct a registry for a given global Codex home directory. + pub fn new(global_codex_home: PathBuf) -> Self { + let agents_root = global_codex_home.join("agents"); + Self { + global_codex_home, + agents_root, + } + } + + /// Resolve and create (if needed) the directory for `agent_id`. + pub fn ensure_agent_dir(&self, agent_id: &AgentId) -> Result { + let dir = self.agents_root.join(agent_id.as_str()); + fs::create_dir_all(&dir).with_context(|| { + format!( + "Failed to create agent directory at {}", + dir.to_string_lossy() + ) + })?; + Ok(dir) + } + + /// Enumerate all agent ids by inspecting the filesystem. + pub fn list_agent_ids(&self) -> Result> { + let iter = match fs::read_dir(&self.agents_root) { + Ok(entries) => entries, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => { + return Ok(Vec::new()); + } + Err(err) => { + return Err(err).with_context(|| { + format!( + "Failed to list agent directory {}", + self.agents_root.to_string_lossy() + ) + }); + } + }; + + let mut ids = Vec::new(); + for entry in iter { + let entry = entry?; + if entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false) + && let Some(name) = entry.file_name().to_str() + && let Ok(id) = AgentId::parse(name) + { + ids.push(id); + } + } + + ids.sort_by(|a, b| a.as_str().cmp(b.as_str())); + Ok(ids) + } + + /// Access the canonical Codex home all agents inherit from. + pub fn global_codex_home(&self) -> &Path { + &self.global_codex_home + } + + /// Access the root directory that holds agent subdirectories. + pub fn agents_root(&self) -> &Path { + &self.agents_root + } +} + +/// Aggregated context for an agent (or the primary agent when `agent_id` is +/// `None`). +#[derive(Debug, Clone)] +pub struct AgentContext { + agent_id: Option, + codex_home: PathBuf, + global_codex_home: PathBuf, + config_toml: ConfigToml, + config: Config, +} + +impl AgentContext { + fn new( + agent_id: Option, + codex_home: PathBuf, + global_codex_home: PathBuf, + config_toml: ConfigToml, + config: Config, + ) -> Self { + Self { + agent_id, + codex_home, + global_codex_home, + config_toml, + config, + } + } + + /// Returns the resolved agent id (if any). + pub fn agent_id(&self) -> Option<&AgentId> { + self.agent_id.as_ref() + } + + /// Returns the effective Codex home used for configuration, logs, and sessions. + pub fn codex_home(&self) -> &Path { + &self.codex_home + } + + /// Returns the shared global Codex home (`~/.codex`). + pub fn global_codex_home(&self) -> &Path { + &self.global_codex_home + } + + /// Returns the merged `ConfigToml` that produced this context. + pub fn config_toml(&self) -> &ConfigToml { + &self.config_toml + } + + /// Provides the resolved [`Config`] for this context. + pub fn config(&self) -> &Config { + &self.config + } + + /// Consume the context and return the inner [`Config`]. + pub fn into_config(self) -> Config { + self.config + } +} + +/// Loader responsible for merging global, agent, and CLI overrides into a +/// single [`Config`] instance. +#[derive(Debug, Clone)] +pub struct AgentConfigLoader { + registry: AgentRegistry, +} + +impl AgentConfigLoader { + /// Construct a loader rooted at the provided `global_codex_home`. + pub fn new(global_codex_home: PathBuf) -> Self { + Self { + registry: AgentRegistry::new(global_codex_home), + } + } + + /// Construct a loader by discovering the global Codex home from the environment. + pub fn from_env() -> Result { + let global_codex_home = find_codex_home() + .context("Failed to resolve Codex home while constructing AgentConfigLoader")?; + Ok(Self::new(global_codex_home)) + } + + /// Access the underlying registry. + pub fn registry(&self) -> &AgentRegistry { + &self.registry + } + + /// Load configuration for the provided `agent_slug`. When `agent_slug` is + /// `None`, the primary (legacy) Codex context is returned. + pub async fn load_by_slug( + &self, + agent_slug: Option<&str>, + cli_overrides: &CliConfigOverrides, + config_overrides: ConfigOverrides, + ) -> Result { + let agent_id = match agent_slug { + Some(slug) => Some(AgentId::parse(slug)?), + None => None, + }; + self.load(agent_id.as_ref(), cli_overrides, config_overrides) + .await + } + + /// Load configuration for `agent_id`, returning an [`AgentContext`]. + pub async fn load( + &self, + agent_id: Option<&AgentId>, + cli_overrides: &CliConfigOverrides, + config_overrides: ConfigOverrides, + ) -> Result { + let mut merged_value = + config_loader::load_config_as_toml(self.registry.global_codex_home()) + .await + .with_context(|| { + format!( + "Failed to load global config from {}", + self.registry.global_codex_home().to_string_lossy() + ) + })?; + + let (agent_id_owned, agent_codex_home) = match agent_id { + Some(id) => { + let agent_dir = self.registry.ensure_agent_dir(id)?; + let agent_value = config_loader::load_config_as_toml(agent_dir.as_path()) + .await + .with_context(|| { + format!( + "Failed to load agent config for `{}` at {}", + id, + agent_dir.to_string_lossy() + ) + })?; + merge_toml_values(&mut merged_value, &agent_value); + (Some(id.clone()), agent_dir) + } + None => (None, self.registry.global_codex_home().to_path_buf()), + }; + + cli_overrides + .apply_on_value(&mut merged_value) + .map_err(|err: String| anyhow::anyhow!(err)) + .context("Failed to apply CLI config overrides")?; + + let config_toml: ConfigToml = merged_value.clone().try_into().map_err(|err| { + anyhow::anyhow!(err).context("Failed to deserialize merged config into ConfigToml") + })?; + + let config = Config::load_from_base_config_with_overrides( + config_toml.clone(), + config_overrides, + agent_codex_home.clone(), + ) + .with_context(|| { + format!( + "Failed to build Config for agent `{}`", + agent_id.map(AgentId::as_str).unwrap_or("primary") + ) + })?; + + Ok(AgentContext::new( + agent_id_owned, + agent_codex_home, + self.registry.global_codex_home().to_path_buf(), + config_toml, + config, + )) + } +} + +/// Convenience helper that loads an [`AgentContext`] using environment-derived +/// Codex paths. +pub async fn load_agent_context( + agent_slug: Option<&str>, + cli_overrides: &CliConfigOverrides, + config_overrides: ConfigOverrides, +) -> Result { + AgentConfigLoader::from_env()? + .load_by_slug(agent_slug, cli_overrides, config_overrides) + .await +} + +fn merge_toml_values(base: &mut TomlValue, overlay: &TomlValue) { + if let TomlValue::Table(overlay_table) = overlay + && let TomlValue::Table(base_table) = base + { + for (key, value) in overlay_table { + if let Some(existing) = base_table.get_mut(key) { + merge_toml_values(existing, value); + } else { + base_table.insert(key.clone(), value.clone()); + } + } + } else { + *base = overlay.clone(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use codex_core::config::ConfigOverrides; + use codex_core::protocol::SandboxPolicy; + use tempfile::tempdir; + + #[tokio::test] + async fn parses_agent_id_and_loads_config() { + let temp_home = tempdir().expect("tempdir"); + let global = temp_home.path().join("global"); + let agents_root = global.join("agents"); + fs::create_dir_all(&agents_root).expect("agents dir"); + + // Seed global config. + fs::create_dir_all(global.join("log")).expect("log dir"); + fs::create_dir_all(global.join("sessions")).expect("sessions dir"); + fs::create_dir_all(global.join("history")).expect("history dir"); + fs::create_dir_all(global.join("mcp")).expect("mcp dir"); + fs::create_dir_all(&agents_root).expect("agents dir"); + fs::create_dir_all(global.join("tmp")).expect("tmp dir"); + fs::write(global.join("config.toml"), "model = \"o1\"").expect("write global config"); + + let loader = AgentConfigLoader::new(global.clone()); + let cli_overrides = CliConfigOverrides { + raw_overrides: vec!["model=\"o2\"".to_string()], + }; + + let context = loader + .load_by_slug(None, &cli_overrides, ConfigOverrides::default()) + .await + .expect("load context"); + + assert!(context.agent_id().is_none()); + assert_eq!(context.codex_home(), global.as_path()); + assert_eq!(context.config().model, "o2", "CLI override should win"); + + let agent_id = AgentId::parse("rust_test_writer").expect("parse"); + let agent_dir = loader + .registry + .ensure_agent_dir(&agent_id) + .expect("agent dir"); + fs::write( + agent_dir.join("config.toml"), + "sandbox_mode = \"danger-full-access\"", + ) + .expect("write agent config"); + + let context = loader + .load( + Some(&agent_id), + &CliConfigOverrides::default(), + ConfigOverrides::default(), + ) + .await + .expect("load agent context"); + + assert_eq!(context.agent_id().unwrap().as_str(), "rust_test_writer"); + assert_eq!(context.codex_home(), agent_dir.as_path()); + assert_eq!( + context.config().sandbox_policy, + SandboxPolicy::DangerFullAccess + ); + } + + #[test] + fn agent_id_rejects_invalid_characters() { + assert!(AgentId::parse("Ok").is_err()); + assert!(AgentId::parse("with space").is_err()); + assert!(AgentId::parse("rust#1").is_err()); + assert!(AgentId::parse("").is_err()); + assert!(AgentId::parse("rust_test_writer").is_ok()); + } + + #[test] + fn merge_toml_recursively_merges_tables() { + use toml::value::Table; + + let mut base_table = Table::new(); + base_table.insert("model".into(), TomlValue::String("o1".into())); + let mut base_nested = Table::new(); + base_nested.insert("value".into(), TomlValue::Integer(1)); + base_table.insert("nested".into(), TomlValue::Table(base_nested)); + let mut base = TomlValue::Table(base_table); + + let mut overlay_table = Table::new(); + let mut overlay_nested = Table::new(); + overlay_nested.insert("value".into(), TomlValue::Integer(2)); + overlay_nested.insert("extra".into(), TomlValue::Boolean(true)); + overlay_table.insert("nested".into(), TomlValue::Table(overlay_nested)); + overlay_table.insert("new".into(), TomlValue::String("field".into())); + let overlay = TomlValue::Table(overlay_table); + + merge_toml_values(&mut base, &overlay); + let nested = base + .get("nested") + .unwrap() + .as_table() + .expect("nested table"); + assert_eq!(nested.get("value").unwrap().as_integer(), Some(2)); + assert_eq!(nested.get("extra").unwrap().as_bool(), Some(true)); + assert_eq!(base.get("new").unwrap().as_str(), Some("field")); + } +} diff --git a/codex-rs/tui/Cargo.toml b/codex-rs/tui/Cargo.toml index 2c6f32a20aae..e9f5a48cbd8b 100644 --- a/codex-rs/tui/Cargo.toml +++ b/codex-rs/tui/Cargo.toml @@ -38,6 +38,7 @@ codex-file-search = { workspace = true } codex-git-tooling = { workspace = true } codex-login = { workspace = true } codex-ollama = { workspace = true } +codex-multi-agent = { workspace = true } codex-protocol = { workspace = true } codex-app-server-protocol = { workspace = true } color-eyre = { workspace = true } diff --git a/codex-rs/tui/src/cli.rs b/codex-rs/tui/src/cli.rs index ee2a0355da0c..d1196d8c4a75 100644 --- a/codex-rs/tui/src/cli.rs +++ b/codex-rs/tui/src/cli.rs @@ -41,6 +41,10 @@ pub struct Cli { #[arg(long = "profile", short = 'p')] pub config_profile: Option, + /// Run the CLI using the configuration scoped to `~/.codex/agents//`. + #[arg(long = "agent", value_name = "AGENT_ID")] + pub agent: Option, + /// Select the sandbox policy to use when executing model-generated shell /// commands. #[arg(long = "sandbox", short = 's')] diff --git a/codex-rs/tui/src/lib.rs b/codex-rs/tui/src/lib.rs index 5d7188c03611..6ebbfba1e4ff 100644 --- a/codex-rs/tui/src/lib.rs +++ b/codex-rs/tui/src/lib.rs @@ -14,8 +14,6 @@ use codex_core::RolloutRecorder; use codex_core::config::Config; use codex_core::config::ConfigOverrides; use codex_core::config::ConfigToml; -use codex_core::config::find_codex_home; -use codex_core::config::load_config_as_toml_with_cli_overrides; use codex_core::find_conversation_path_by_id_str; use codex_core::protocol::AskForApproval; use codex_core::protocol::SandboxPolicy; @@ -149,49 +147,22 @@ pub async fn run_main( show_raw_agent_reasoning: cli.oss.then_some(true), tools_web_search_request: cli.web_search.then_some(true), }; - let raw_overrides = cli.config_overrides.raw_overrides.clone(); - let overrides_cli = codex_common::CliConfigOverrides { raw_overrides }; - let cli_kv_overrides = match overrides_cli.parse_overrides() { - Ok(v) => v, - #[allow(clippy::print_stderr)] - Err(e) => { - eprintln!("Error parsing -c overrides: {e}"); - std::process::exit(1); - } - }; - - let mut config = { - // Load configuration and support CLI overrides. - - #[allow(clippy::print_stderr)] - match Config::load_with_cli_overrides(cli_kv_overrides.clone(), overrides).await { - Ok(config) => config, - Err(err) => { - eprintln!("Error loading configuration: {err}"); - std::process::exit(1); - } - } - }; - - // we load config.toml here to determine project state. #[allow(clippy::print_stderr)] - let config_toml = { - let codex_home = match find_codex_home() { - Ok(codex_home) => codex_home, - Err(err) => { - eprintln!("Error finding codex home: {err}"); - std::process::exit(1); - } - }; - - match load_config_as_toml_with_cli_overrides(&codex_home, cli_kv_overrides).await { - Ok(config_toml) => config_toml, - Err(err) => { - eprintln!("Error loading config.toml: {err}"); - std::process::exit(1); - } + let agent_context = match codex_multi_agent::load_agent_context( + cli.agent.as_deref(), + &cli.config_overrides, + overrides, + ) + .await + { + Ok(ctx) => ctx, + Err(err) => { + eprintln!("Error loading configuration: {err}"); + std::process::exit(1); } }; + let config_toml = agent_context.config_toml().clone(); + let mut config = agent_context.into_config(); let cli_profile_override = cli.config_profile.clone(); let active_profile = cli_profile_override From 725f60f397b223a838b14d6c42e761fd5d521e84 Mon Sep 17 00:00:00 2001 From: George Nesterenok Date: Tue, 14 Oct 2025 23:18:26 +0000 Subject: [PATCH 02/17] explicit agent call implementation --- ai-temp/CHANGELOG.md | 8 + ai-temp/example-codex-home/.gitignore | 6 + ai-temp/example-codex-home/AGENTS.md | 14 +- ai-temp/example-codex-home/README.md | 25 +- .../agents/critic/AGENTS.md | 5 + .../{test_driver => critic}/config.toml | 2 +- .../agents/ideas_provider/AGENTS.md | 5 + .../config.toml | 2 +- .../agents/rust_test_writer/AGENTS.md | 5 - .../agents/test_driver/AGENTS.md | 5 - ai-temp/example-codex-home/config.toml | 2 +- ai-temp/orchestration-integration.md | 179 ++++++++++ ai-temp/ui-ux-delegation.md | 29 ++ codex-rs/Cargo.lock | 2 + codex-rs/multi-agent/Cargo.toml | 3 + codex-rs/multi-agent/src/lib.rs | 8 + codex-rs/multi-agent/src/orchestrator.rs | 335 ++++++++++++++++++ codex-rs/tui/src/app.rs | 108 ++++++ codex-rs/tui/src/app_event.rs | 25 ++ codex-rs/tui/src/chatwidget.rs | 144 ++++++++ codex-rs/tui/src/chatwidget/tests.rs | 94 +++++ codex-rs/tui/src/lib.rs | 32 +- 22 files changed, 1008 insertions(+), 30 deletions(-) create mode 100644 ai-temp/example-codex-home/.gitignore create mode 100644 ai-temp/example-codex-home/agents/critic/AGENTS.md rename ai-temp/example-codex-home/agents/{test_driver => critic}/config.toml (72%) create mode 100644 ai-temp/example-codex-home/agents/ideas_provider/AGENTS.md rename ai-temp/example-codex-home/agents/{rust_test_writer => ideas_provider}/config.toml (79%) delete mode 100644 ai-temp/example-codex-home/agents/rust_test_writer/AGENTS.md delete mode 100644 ai-temp/example-codex-home/agents/test_driver/AGENTS.md create mode 100644 ai-temp/orchestration-integration.md create mode 100644 ai-temp/ui-ux-delegation.md create mode 100644 codex-rs/multi-agent/src/orchestrator.rs diff --git a/ai-temp/CHANGELOG.md b/ai-temp/CHANGELOG.md index 829c90bb7305..91aee050b6e2 100644 --- a/ai-temp/CHANGELOG.md +++ b/ai-temp/CHANGELOG.md @@ -8,3 +8,11 @@ - Scaffolded the `codex-multi-agent` crate with `AgentId`, `AgentRegistry`, and async config loading that merges global/agent/CLI overrides into an `AgentContext`. - Wired the TUI bootstrapper to the new loader, introducing a `--agent` flag that scopes interactive runs to `~/.codex/agents//`. - Added `ai-temp/example-codex-home/` with ready-to-run config, instructions, and multiple agent directories for hands-on testing via `CODEX_HOME=...` and `--agent`. +- Authored `ai-temp/orchestration-integration.md`, outlining logic, UI/UX, and minimal-coupling hooks to let the primary agent delegate work to sub-agents in the existing codebase. +- Captured delegation decisions (single-flight execution, shared auth, primary-agent-composed prompts) inside `ai-temp/orchestration-integration.md`. +- Implemented the orchestrator (`codex-rs/multi-agent/src/orchestrator.rs`) and wired the TUI to support inline delegation via `#agent_id ...` prompts, with progress and completion surfaced through the main transcript. +- Simplified the example Codex home to `ideas_provider` (gpt-5) and `critic` (gpt-5-nano) agents for easier manual testing. +- Fixed the TUI delegation hook so user-entered `#agent_id …` messages trigger the orchestrator, and added a regression test to guard the behaviour. +- Delegated runs now stream live output (`DelegateEvent::Delta`) through the TUI, and remaining UX follow-ups are tracked in `ai-temp/ui-ux-delegation.md`. +- Added a dedicated status indicator while a delegate runs, restored the idle header on completion, and regression-tested streaming to prevent animation regressions. +- Updated the sample Codex home instructions/README, ensured the critic agent uses `gpt-5-nano`, and documented the new delegation UX in `ai-temp/ui-ux-delegation.md`. diff --git a/ai-temp/example-codex-home/.gitignore b/ai-temp/example-codex-home/.gitignore new file mode 100644 index 000000000000..2a9f473f92a1 --- /dev/null +++ b/ai-temp/example-codex-home/.gitignore @@ -0,0 +1,6 @@ +# Runtime artifacts generated while testing the sample Codex home +auth.json +history.jsonl +log/ +sessions/ +agents/*/sessions/ diff --git a/ai-temp/example-codex-home/AGENTS.md b/ai-temp/example-codex-home/AGENTS.md index 948e9d8e1344..65d7644d7d2f 100644 --- a/ai-temp/example-codex-home/AGENTS.md +++ b/ai-temp/example-codex-home/AGENTS.md @@ -1,6 +1,14 @@ # Sample Primary Agent Instructions -Welcome to the example Codex home directory. This `AGENTS.md` mirrors what you might keep under `~/.codex/AGENTS.md`. +This directory demonstrates a multi-agent workflow. You are the coordinator: -- Default behaviour: focus on quick experiments in the current repository. -- Keep answers concise; link back to multi-agent documentation when relevant. +1. **Understand the request.** Gather context, restate the goal, and identify missing details. +2. **Choose when to delegate.** If the task needs options, architecture, or brainstorming, craft a detailed prompt for the ideas provider and hand it off immediately. The orchestrator picks up messages that start with `#ideas_provider …`, so include the full context it needs without waiting for the user to ask. +3. **Critique before action.** Once you have a leading option, summarize it for the critic via `#critic …`. Ask for risks, missing tests, and edge cases so you surface blockers early. +4. **Synthesize next steps.** Combine ideation and critique into a concise plan, call out open questions, and suggest the single most sensible next action. + +General rules: + +- Keep your own replies short unless the user explicitly wants depth; link to `ai-temp/` docs when the user needs background. +- If a request clearly doesn’t benefit from delegation, note why you’re handling it solo. +- For manual testing you can always type `#ideas_provider …` or `#critic …` yourself—the orchestrator uses the same pathway either way. diff --git a/ai-temp/example-codex-home/README.md b/ai-temp/example-codex-home/README.md index a0ebeb57c75b..26b62d775cf0 100644 --- a/ai-temp/example-codex-home/README.md +++ b/ai-temp/example-codex-home/README.md @@ -1,28 +1,31 @@ # Sample Codex Home Setup -This directory mirrors a minimal `~/.codex` layout so you can try the new multi-agent loader without touching your real config. +This directory mirrors a minimal `~/.codex` layout so you can try the multi-agent loader without touching your real config. ## Directory Structure - `config.toml`: baseline settings used when no sub-agent is selected. -- `AGENTS.md`: default instruction set for the primary agent. -- `agents/rust_test_writer`: sub-agent focused on Rust testing. -- `agents/test_driver`: sub-agent that keeps sandboxing strict while running checks. +- `AGENTS.md`: default instruction set for the primary agent (orchestrator). +- `agents/ideas_provider/`: GPT-5 sub-agent that proposes multiple approaches. +- `agents/critic/`: GPT-5-nano sub-agent that reviews the leading option. - `log/`, `sessions/`: empty placeholders so Codex can write logs and rollouts. ## Quick Start ```bash -export CODEX_HOME="$(pwd)/ai-temp/example-codex-home" +# Build the CLI once (from /path/to/repo) +cargo build -p codex-cli -# Primary agent (uses AGENTS.md + config.toml in this directory) -codex --help +# Launch the TUI against this sample Codex home +CODEX_HOME="$(pwd)/ai-temp/example-codex-home" target/debug/codex -# Rust-focused sub-agent -codex --agent rust_test_writer +# Launch a specific sub-agent directly +CODEX_HOME="$(pwd)/ai-temp/example-codex-home" target/debug/codex --agent ideas_provider +CODEX_HOME="$(pwd)/ai-temp/example-codex-home" target/debug/codex --agent critic -# Test driver sub-agent with read-only sandbox -codex --agent test_driver +# Inside the primary session you can delegate manually: +# type: '#ideas_provider outline parser refactors' +# Watch logs in log/codex-tui.log to confirm delegation activity. ``` Unset `CODEX_HOME` (or point it back to your real path) once you're done experimenting. diff --git a/ai-temp/example-codex-home/agents/critic/AGENTS.md b/ai-temp/example-codex-home/agents/critic/AGENTS.md new file mode 100644 index 000000000000..a55ea71fe9c4 --- /dev/null +++ b/ai-temp/example-codex-home/agents/critic/AGENTS.md @@ -0,0 +1,5 @@ +# Critic Agent + +- Review proposals produced by other agents or the user and highlight risks, gaps, and missing tests. +- Rate each idea on a 1–5 scale for feasibility and impact. +- Recommend one actionable next step or clarifying question; avoid producing final code. diff --git a/ai-temp/example-codex-home/agents/test_driver/config.toml b/ai-temp/example-codex-home/agents/critic/config.toml similarity index 72% rename from ai-temp/example-codex-home/agents/test_driver/config.toml rename to ai-temp/example-codex-home/agents/critic/config.toml index 936a98410e4c..19392ad4e280 100644 --- a/ai-temp/example-codex-home/agents/test_driver/config.toml +++ b/ai-temp/example-codex-home/agents/critic/config.toml @@ -1,4 +1,4 @@ -model = "gpt-4o-mini" +model = "gpt-5-nano" sandbox_mode = "read-only" [history] diff --git a/ai-temp/example-codex-home/agents/ideas_provider/AGENTS.md b/ai-temp/example-codex-home/agents/ideas_provider/AGENTS.md new file mode 100644 index 000000000000..86f37e6f8456 --- /dev/null +++ b/ai-temp/example-codex-home/agents/ideas_provider/AGENTS.md @@ -0,0 +1,5 @@ +# Ideas Provider Agent + +- Generate creative implementation approaches, step-by-step outlines, and draft code snippets. +- Focus on breadth: offer at least three distinct ideas before recommending one. +- Avoid running commands; defer execution to other agents or the user. diff --git a/ai-temp/example-codex-home/agents/rust_test_writer/config.toml b/ai-temp/example-codex-home/agents/ideas_provider/config.toml similarity index 79% rename from ai-temp/example-codex-home/agents/rust_test_writer/config.toml rename to ai-temp/example-codex-home/agents/ideas_provider/config.toml index 74b1fce60b99..62f129767ecf 100644 --- a/ai-temp/example-codex-home/agents/rust_test_writer/config.toml +++ b/ai-temp/example-codex-home/agents/ideas_provider/config.toml @@ -1,4 +1,4 @@ -model = "o1-mini" +model = "gpt-5" sandbox_mode = "workspace-write" [history] diff --git a/ai-temp/example-codex-home/agents/rust_test_writer/AGENTS.md b/ai-temp/example-codex-home/agents/rust_test_writer/AGENTS.md deleted file mode 100644 index 080550f745a7..000000000000 --- a/ai-temp/example-codex-home/agents/rust_test_writer/AGENTS.md +++ /dev/null @@ -1,5 +0,0 @@ -# Rust Test Writer Agent - -- Own the Rust test suite; prioritize `cargo test -p ` workflows. -- Suggest additional assertions and snapshot updates when behaviour changes. -- Assume the working directory is the repository root unless overridden. diff --git a/ai-temp/example-codex-home/agents/test_driver/AGENTS.md b/ai-temp/example-codex-home/agents/test_driver/AGENTS.md deleted file mode 100644 index 36bcfbc87d63..000000000000 --- a/ai-temp/example-codex-home/agents/test_driver/AGENTS.md +++ /dev/null @@ -1,5 +0,0 @@ -# Test Driver Agent - -- Act as a build-and-test orchestrator. -- Run only idempotent commands (`cargo check`, `cargo test`, `npm test`). -- Summarize failures succinctly with probable fixes. diff --git a/ai-temp/example-codex-home/config.toml b/ai-temp/example-codex-home/config.toml index 3093f3656bde..3e68b7fe19f3 100644 --- a/ai-temp/example-codex-home/config.toml +++ b/ai-temp/example-codex-home/config.toml @@ -1,4 +1,4 @@ -model = "gpt-4o-mini" +model = "gpt-5" sandbox_mode = "workspace-write" [history] diff --git a/ai-temp/orchestration-integration.md b/ai-temp/orchestration-integration.md new file mode 100644 index 000000000000..be75419e6549 --- /dev/null +++ b/ai-temp/orchestration-integration.md @@ -0,0 +1,179 @@ +# Multi-Agent Orchestration Integration Plan + +This document describes how to wire true sub-agent orchestration into the Codex CLI so the primary agent can delegate work to agent profiles living under `~/.codex/agents//`. It focuses on runtime control-flow, UI/UX, and minimal-coupling integration points in the existing codebase. + +--- + +## 1. Runtime Architecture + +### 1.1 Components + +- **`codex-multi-agent` crate (`codex-rs/multi-agent/src/lib.rs`)** + Already exposes `AgentId`, `AgentRegistry`, and async loaders that return `AgentContext` values (merged `ConfigToml` + `Config`). We extend this crate with an orchestration module to keep agent resolution and config cloning isolated from the rest of the app. + +- **Orchestrator core (new)** + Proposed module `codex-rs/multi-agent/src/orchestrator.rs` exporting: + - `AgentHandle`: carries `AgentContext`, active `ConversationId`, and bookkeeping (start/end timestamps, status). + - `DelegateRequest`: SPA-style struct describing who/what to run (`agent_id`, prompt payload, optional working directory override). + - `AgentOrchestrator`: stateful controller that owns: + - A primary `AgentHandle` (mirrors currently running conversation). + - A per-agent `ConversationManager` + `UnboundedSender` pair created via `ConversationManager::new_conversation` (`codex-rs/core/src/conversation_manager.rs:57`). + - Result channels to stream `Event` values back to the primary UI after post-processing. + +- **`ConversationManager` reuse** + Sub-agent sessions use the same `ConversationManager` entry points. The orchestrator calls `ConversationManager::new_conversation` with the agent-specific `Config` so all persistence automatically lands in `~/.codex/agents//` (per §2.2). + +- **Primary session** + Unchanged: `tui::App` (`codex-rs/tui/src/app.rs:78`) continues to own a `ConversationManager` for the main agent. The orchestrator is injected so it can spawn additional conversations on demand. + +### 1.2 Execution Flow + +1. **Delegate trigger** + - User explicitly requests delegation (see UI plan below), or the primary agent emits a structured tool call. + - We normalize the intent into `DelegateRequest`. + +2. **Agent resolution** + - `AgentOrchestrator::resolve_agent` calls `AgentConfigLoader::load` with the requested `AgentId`. + - On success, the orchestrator instantiates / reuses a `ConversationManager` scoped to that agent. Authentication stays shared (`AuthManager` from the primary session) per current design docs. + +3. **Conversation bootstrap** + - Call `ConversationManager::new_conversation` with the agent `Config`. + - The orchestrator captures the new `UnboundedSender` from `spawn_agent` (`codex-rs/tui/src/chatwidget/agent.rs:16`) or an equivalent helper in the orchestrator crate. + +4. **Task execution** + - The orchestrator forwards the translated prompt into the sub-agent conversation (`conversation.submit`). + - Streamed `Event` values are intercepted before they reach the UI. For every event: + - Persist to the sub-agent transcript as normal (handled by core). + - Convert to orchestrator messages (`DelegateProgress`, `DelegateOutput`), then forward to the primary session via a new `AppEvent::DelegateUpdate`. + +5. **Completion and summary** + - When `EventMsg::TaskComplete` fires, the orchestrator synthesizes a summary cell (e.g., `history_cell::AgentMessageCell`) and injects it into the primary transcript via `AppEvent::InsertHistoryCell`. + - Store a compact record (duration, exit status) for `/status` display and optional audit logging (`~/.codex/log/multi-agent.log` per `ai-temp/persistence-design.md`). + +6. **Cleanup** + - Keep the sub-agent conversation alive if the profile supports follow-up chat, otherwise call `ConversationManager::remove_conversation`. + +--- + +## 2. Control-Flow Integration + +### 2.1 Entry Points + +| Concern | File | Hook | +| --- | --- | --- | +| Orchestrator instantiation | `codex-rs/tui/src/app.rs:82` | Inject an `AgentOrchestrator` alongside the existing `ConversationManager`. | +| Slash-command parsing | `codex-rs/tui/src/slash_command.rs` & `codex-rs/tui/src/chatwidget.rs:1126` | Add `/delegate` (or `/agent`) command to open a delegate picker or dispatch a delegate request. | +| UI → orchestrator request | `codex-rs/tui/src/chatwidget.rs:773` (user submissions) | When the message starts with `#agent:` or `/delegate`, emit `AppEvent::DelegateRequest`. | +| App event handling | `codex-rs/tui/src/app.rs:247` (`while let Some(event)`) | Route new `AppEvent::DelegateRequest` to `AgentOrchestrator::handle_request`. | +| Event fan-in | `codex-rs/tui/src/app.rs:330` | Handle `AppEvent::DelegateUpdate` to mutate transcript/history cells. | +| Status card | `codex-rs/tui/src/status/card.rs:68` | Pull orchestrator metrics (active agents, last run) to display in `/status`. | + +### 2.2 Persistence + +- Sub-agent sessions reuse existing persistence automatically because `Config::codex_home` already points at `~/.codex/agents/` once we load through `AgentConfigLoader`. +- For the primary history: add summary inserts via `AppEvent::InsertHistoryCell` (`codex-rs/tui/src/app_event.rs:31`). No changes needed in core rollout recording. + +### 2.3 Error Handling + +- Map orchestration errors to `AppEvent::InsertHistoryCell` with `history_cell::new_error_event` so failures surface in the main transcript. +- Log details with `tracing::error!` inside the orchestrator, aligning with the `ai-temp/error-handling.md` guidance. + +--- + +## 3. UI & UX Plan + +### 3.1 Invocation + +- **Slash command**: `/delegate [prompt...]` + - Add `SlashCommand::Delegate` in `codex-rs/tui/src/slash_command.rs`. + - In `ChatWidget::dispatch_command` (`codex-rs/tui/src/chatwidget.rs:1126`), call a new method `open_delegate_dialog()` that lists available agents via `AgentRegistry::list_agent_ids`. + +- **Inline mention**: allow prompts to start with `#` to route directly. Processing lives in `ChatWidget::submit_text_message` where we already prepare `InputItem` vectors. + - **Implemented**: the current TUI recognizes prompts beginning with `#agent_id` and hands them to the orchestrator, so progress and completion surface directly in the transcript without additional UI steps. + +### 3.2 Transcript Presentation + +- Introduce a specialized history cell (e.g., `DelegationSummaryCell`) under `codex-rs/tui/src/history_cell.rs`. + - Show a header `↳ rust_test_writer (success in 23s)` and embed the sub-agent's final answer. + - Link to the sub-agent session path using the existing `SessionHeader` styling helpers (`codex-rs/tui/src/chatwidget/session_header.rs`). + +- While the sub-agent runs, insert a “progress” cell (spinner) similar to exec command cells (`codex-rs/tui/src/exec_cell/render.rs:157`). Update via `DelegateProgress` events. + +### 3.3 Status View + +- Extend `compose_agents_summary` (`codex-rs/tui/src/status/helpers.rs:14`) to append active sub-agent counts and last-run statuses by querying the orchestrator handle cache. + +### 3.4 Keyboard & UX + +- Shortcut: `Ctrl+D` opens the delegate picker when the composer is empty. +- For task isolation, disable `/delegate` while another sub-agent call is running unless the selected agent supports concurrent runs (metadata flag in agent config). + +--- + +## 4. Minimal Coupling Strategy + +1. **Keep core unaware** + - No changes to `codex-rs/core/src/codex.rs` or the protocol. The orchestrator consumes the existing `Op`/`Event` API via `CodexConversation`. + +2. **Orchestrator as a library** + - Implement orchestration in `codex-multi-agent` (new module) so the CLI/TUI crates depend only on a slim API: + ```rust + pub struct AgentOrchestrator { /* … */ } + impl AgentOrchestrator { + pub async fn available_agents(&self) -> Result>; + pub async fn delegate(&self, request: DelegateRequest) -> Result; + pub fn subscribe(&self) -> mpsc::UnboundedReceiver; + } + ``` + - This keeps the TUI glue thin and defers heavy logic to the crate that already knows how to load configs. + +3. **UI changes confined to `tui/`** + - Avoid threading orchestration state through unrelated widgets. Only `ChatWidget`, `App`, and the status card interact with the orchestrator. + +4. **CLI parity** + - Other frontends (`codex exec`, `codex cloud`) can opt-in later because orchestration lives behind a library boundary. No changes required now. + +--- + +## 5. Implementation Phases + +1. **Library groundwork** + - Extend `codex-multi-agent` with orchestrator types and helper methods. + - Add unit tests verifying `delegate()` spawns conversations and streams events (mock `ConversationManager`). + +2. **TUI integration** + - Instantiate orchestrator in `App::run` (`codex-rs/tui/src/app.rs:84`). + - Add new `AppEvent` variants (`codex-rs/tui/src/app_event.rs:15`). + - Update `ChatWidget` to emit delegate requests and render updates. + +3. **UI polish** + - Add history cell types and status indicators. + - Expose keyboard shortcuts and help text. + +4. **Testing** + - Snapshot tests for `/delegate` output in `tui/src/chatwidget/tests.rs`. + - Integration test creating a fake agent directory and verifying the orchestrator selects the correct `Config`. + - Manual smoke test using the sample Codex home in `ai-temp/example-codex-home/`. + +--- + +## 6. Decisions & Open Questions + +- **Concurrent delegates**: Launch one delegate at a time. The orchestrator stores requests in a FIFO queue and short-circuits when the active run completes so parallelism can be enabled later without redesign (keep the queue abstraction in place). +- **Prompt hand-off semantics**: The primary agent composes the sub-agent prompt with all relevant context before invoking `delegate()`. The orchestrator forwards the prompt verbatim without trimming history. +- **Return payload**: Still open. Default plan remains to summarize results in the primary transcript while exposing a “view details” action to open the sub-agent session. +- **Auth isolation**: Shared. All agents continue to use the primary `AuthManager`; per-agent credentials are out of scope unless a future requirement emerges. + +--- + +## 7. References + +- Agent loader implementation – `codex-rs/multi-agent/src/lib.rs` +- Conversation bootstrap – `codex-rs/core/src/conversation_manager.rs:57` +- TUI spawn helpers – `codex-rs/tui/src/chatwidget/agent.rs:16` +- Slash command dispatch – `codex-rs/tui/src/chatwidget.rs:1126` +- History cell construction – `codex-rs/tui/src/history_cell.rs` +- Status card summary – `codex-rs/tui/src/status/helpers.rs:14` +- App event wiring – `codex-rs/tui/src/app.rs:212` & `codex-rs/tui/src/app_event.rs:15` + +These anchors will guide the low-impact code changes required to hook orchestration into the existing CLI. diff --git a/ai-temp/ui-ux-delegation.md b/ai-temp/ui-ux-delegation.md new file mode 100644 index 000000000000..720ba8aaf57d --- /dev/null +++ b/ai-temp/ui-ux-delegation.md @@ -0,0 +1,29 @@ +# Delegation UI & UX Notes + +## Current Flow +- Primary agent streaming uses `StreamController` to animate delta lines (`codex-rs/tui/src/chatwidget.rs:698`, `streaming/controller.rs:9`). The controller emits `AgentMessageCell` entries and drives the commit animation via `AppEvent::StartCommitAnimation`. +- Exec and MCP tool calls rely on dedicated history cells (`ExecCell`, `new_active_mcp_tool_call`) with live updates for begin/end events (`chatwidget.rs:633`, `chatwidget.rs:909`). +- Delegation events from the orchestrator reach `App::handle_delegate_update` (`codex-rs/tui/src/app.rs:446`). `DelegateEvent::Delta` now streams sub-agent output through the same `StreamController` pipeline, while start/completion still use `add_info_message`/`add_delegate_completion` for context. +- `DelegateEvent::Started` activates the bottom-pane status indicator with a “Delegating to #` header and hides it once the run finishes (`codex-rs/tui/src/chatwidget.rs:2165-2196`), reducing ambiguity about who is currently working. + +## Observed Gaps +- No transcript linking: once the delegate finishes, the TUI shows the final answer but lacks a quick way to drill into the delegate’s own session (the path is only available in logs). +- Duration is implicit: the status header flips back to “Working” when delegation ends, but we still do not surface elapsed time or a final summary chip in the transcript. + +## UX Goals +1. **Live streaming** – continue to reuse `StreamController`, but add safeguards against duplicate completions (covered by the new test) and consider showing a collapsed summary once the stream ends. +2. **Session breadcrumbs** – insert a history cell with the delegate’s session ID and an action (e.g., `/delegate-open `) to reopen or inspect the sub-agent log. +3. **Status context** – enhance the existing “Delegating to #…” banner with elapsed time and/or a persistent history chip that points back to the sub-agent run. + +## Implementation Notes +- Delegation already reuses `StreamController` via `DelegateEvent::Delta`; keep the plumbing local to `ChatWidget` so other surfaces can opt-in without pulling UI dependencies into the orchestrator. +- Add new history cell types (e.g., `DelegateStartCell`, `DelegateSummaryCell`) to avoid overloading existing exec/info cells. +- Propagate failure (`DelegateEvent::Failed`) into a red error history cell and optionally a notification (see `Notification::AgentTurnComplete` in `chatwidget.rs:1871` for pattern). +- Update `status/helpers.rs` so `/status` lists active/past delegates with timestamps. + +## Outstanding Questions +- Should delegate output merge into the primary transcript, or display in a collapsible block to avoid clutter? +- Do we expose a command to jump into the delegate’s rollout file (`codex_core::ConversationManager::resume_conversation_from_rollout`) from the UI? +- How do we handle nested delegation (delegate triggering its own sub-delegate)? Requires queueing and UI affordances. + +These design notes align with the wiring in `codex-rs/tui/src/app.rs`, `codex-rs/tui/src/chatwidget.rs`, and the orchestrator in `codex-rs/multi-agent/src/orchestrator.rs`. Further iterations should focus on breadcrumbs, elapsed-time surfacing, and tighter transcript integration now that delta streaming and status updates are in place. diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 2578e0c1ff08..632028ab5562 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -1264,8 +1264,10 @@ dependencies = [ "serde", "serde_json", "tempfile", + "thiserror 2.0.16", "tokio", "toml", + "uuid", ] [[package]] diff --git a/codex-rs/multi-agent/Cargo.toml b/codex-rs/multi-agent/Cargo.toml index 93514af898ea..2abba84b4e5b 100644 --- a/codex-rs/multi-agent/Cargo.toml +++ b/codex-rs/multi-agent/Cargo.toml @@ -9,6 +9,9 @@ codex-common = { workspace = true, features = ["cli"] } codex-core.workspace = true serde.workspace = true toml.workspace = true +thiserror.workspace = true +tokio = { workspace = true, features = ["rt", "sync"] } +uuid.workspace = true [dev-dependencies] serde_json.workspace = true diff --git a/codex-rs/multi-agent/src/lib.rs b/codex-rs/multi-agent/src/lib.rs index 89443fdc6e95..68bb79e33944 100644 --- a/codex-rs/multi-agent/src/lib.rs +++ b/codex-rs/multi-agent/src/lib.rs @@ -330,6 +330,14 @@ fn merge_toml_values(base: &mut TomlValue, overlay: &TomlValue) { } } +pub mod orchestrator; +pub use orchestrator::AgentOrchestrator; +pub use orchestrator::DelegateEvent; +pub use orchestrator::DelegatePrompt; +pub use orchestrator::DelegateRequest; +pub use orchestrator::DelegateRunId; +pub use orchestrator::OrchestratorError; + #[cfg(test)] mod tests { use super::*; diff --git a/codex-rs/multi-agent/src/orchestrator.rs b/codex-rs/multi-agent/src/orchestrator.rs new file mode 100644 index 000000000000..6a1a534b18e4 --- /dev/null +++ b/codex-rs/multi-agent/src/orchestrator.rs @@ -0,0 +1,335 @@ +use std::sync::Arc; +use std::time::SystemTime; + +use codex_common::CliConfigOverrides; +use codex_core::AuthManager; +use codex_core::ConversationManager; +use codex_core::config::ConfigOverrides; +use codex_core::protocol::EventMsg; +use codex_core::protocol::InputItem; +use codex_core::protocol::Op; +use codex_core::protocol::SessionSource; +use std::time::Duration; +use tokio::sync::Mutex; +use tokio::sync::mpsc; +use uuid::Uuid; + +use crate::AgentConfigLoader; +use crate::AgentId; + +/// Identifier used to correlate delegate runs. +pub type DelegateRunId = String; + +/// Request payload used when delegating work to a sub-agent. +#[derive(Debug, Clone)] +pub struct DelegateRequest { + pub agent_id: AgentId, + pub prompt: DelegatePrompt, +} + +/// The prompt content forwarded to the sub-agent. +#[derive(Debug, Clone)] +pub struct DelegatePrompt { + pub text: String, +} + +impl DelegatePrompt { + pub fn new(text: impl Into) -> Self { + Self { text: text.into() } + } +} + +/// Progress and completion updates emitted by the orchestrator. +#[derive(Debug, Clone)] +pub enum DelegateEvent { + Started { + run_id: DelegateRunId, + agent_id: AgentId, + prompt: String, + started_at: SystemTime, + }, + Delta { + run_id: DelegateRunId, + agent_id: AgentId, + chunk: String, + }, + Completed { + run_id: DelegateRunId, + agent_id: AgentId, + output: Option, + duration: Duration, + }, + Failed { + run_id: DelegateRunId, + agent_id: AgentId, + error: String, + }, +} + +/// Errors that can surface when orchestrating delegates. +#[derive(thiserror::Error, Debug)] +pub enum OrchestratorError { + #[error("another delegate is already running")] + DelegateInProgress, + #[error("agent `{0}` not found")] + AgentNotFound(String), + #[error("failed to enqueue delegate: {0}")] + DelegateSetupFailed(String), +} + +/// Lightweight controller that spins up sub-agent conversations on demand and +/// streams condensed updates back to the caller. +pub struct AgentOrchestrator { + loader: AgentConfigLoader, + auth_manager: Arc, + session_source: SessionSource, + cli_overrides: CliConfigOverrides, + config_overrides: ConfigOverrides, + listeners: Mutex>>, + active_run: Mutex>, +} + +impl AgentOrchestrator { + pub fn new( + global_codex_home: impl Into, + auth_manager: Arc, + session_source: SessionSource, + cli_overrides: CliConfigOverrides, + config_overrides: ConfigOverrides, + ) -> Self { + let loader = AgentConfigLoader::new(global_codex_home.into()); + Self { + loader, + auth_manager, + session_source, + cli_overrides, + config_overrides, + listeners: Mutex::new(Vec::new()), + active_run: Mutex::new(None), + } + } + + /// Register a listener that receives [`DelegateEvent`] updates. + pub async fn subscribe(self: &Arc) -> mpsc::UnboundedReceiver { + let (tx, rx) = mpsc::unbounded_channel(); + self.listeners.lock().await.push(tx); + rx + } + + /// Trigger a delegate run. Returns the run id if successfully enqueued. + pub async fn delegate( + self: &Arc, + request: DelegateRequest, + ) -> std::result::Result { + let mut active = self.active_run.lock().await; + if active.is_some() { + return Err(OrchestratorError::DelegateInProgress); + } + + let run_id = Uuid::new_v4().to_string(); + *active = Some(run_id.clone()); + drop(active); + + let prompt_text = request.prompt.text.clone(); + self.emit(DelegateEvent::Started { + run_id: run_id.clone(), + agent_id: request.agent_id.clone(), + prompt: prompt_text, + started_at: SystemTime::now(), + }) + .await; + + let loader = self.loader.clone(); + let auth_manager = self.auth_manager.clone(); + let session_source = self.session_source; + let cli_overrides = self.cli_overrides.clone(); + let config_overrides = self.config_overrides.clone(); + let orchestrator = Arc::clone(self); + let run_id_clone = run_id.clone(); + tokio::spawn(async move { + let orchestrator_for_task = Arc::clone(&orchestrator); + let result = orchestrator_for_task + .run_delegate_task( + loader, + auth_manager, + session_source, + cli_overrides, + config_overrides, + run_id_clone.clone(), + request, + ) + .await; + + match result { + Ok(output) => { + orchestrator + .emit(DelegateEvent::Completed { + run_id: run_id_clone.clone(), + agent_id: output.agent_id, + output: output.message, + duration: output.duration, + }) + .await; + } + Err(err) => { + orchestrator + .emit(DelegateEvent::Failed { + run_id: run_id_clone.clone(), + agent_id: err.agent_id, + error: err.error, + }) + .await; + } + } + + let mut active = orchestrator.active_run.lock().await; + *active = None; + }); + + Ok(run_id) + } + + async fn emit(&self, event: DelegateEvent) { + let mut listeners = self.listeners.lock().await; + listeners.retain(|tx| tx.send(event.clone()).is_ok()); + } + + async fn run_delegate_task( + self: Arc, + loader: AgentConfigLoader, + auth_manager: Arc, + session_source: SessionSource, + cli_overrides: CliConfigOverrides, + config_overrides: ConfigOverrides, + run_id: DelegateRunId, + request: DelegateRequest, + ) -> std::result::Result { + let start = SystemTime::now(); + let agent_id = request.agent_id.clone(); + let context = loader + .load(Some(&agent_id), &cli_overrides, config_overrides) + .await + .map_err(|err| DelegateFailure { + agent_id: agent_id.clone(), + error: format!("failed to load agent config: {err:#}"), + })?; + + let config = context.into_config(); + let conversation_manager = Arc::new(ConversationManager::new( + auth_manager.clone(), + session_source, + )); + + let conversation_bundle = conversation_manager + .new_conversation(config) + .await + .map_err(|err| DelegateFailure { + agent_id: agent_id.clone(), + error: format!("failed to start conversation: {err:#}"), + })?; + let conversation = conversation_bundle.conversation; + + let mut items = Vec::new(); + if !request.prompt.text.trim().is_empty() { + items.push(InputItem::Text { + text: request.prompt.text.clone(), + }); + } + if items.is_empty() { + return Err(DelegateFailure { + agent_id: agent_id.clone(), + error: "delegated prompt is empty".to_string(), + }); + } + + conversation + .submit(Op::UserInput { items }) + .await + .map_err(|err| DelegateFailure { + agent_id: agent_id.clone(), + error: format!("failed to submit delegate prompt: {err:#}"), + })?; + + let mut aggregated = String::new(); + loop { + let event = conversation + .next_event() + .await + .map_err(|err| DelegateFailure { + agent_id: agent_id.clone(), + error: format!("failed to read delegate events: {err:#}"), + })?; + + match event.msg { + EventMsg::AgentMessage(msg) => { + if aggregated.is_empty() { + aggregated = msg.message.clone(); + self.emit(DelegateEvent::Delta { + run_id: run_id.clone(), + agent_id: agent_id.clone(), + chunk: msg.message, + }) + .await; + } else { + aggregated = msg.message; + } + } + EventMsg::AgentMessageDelta(delta) => { + aggregated.push_str(&delta.delta); + self.emit(DelegateEvent::Delta { + run_id: run_id.clone(), + agent_id: agent_id.clone(), + chunk: delta.delta, + }) + .await; + } + EventMsg::TaskComplete(task_complete) => { + let duration = start.elapsed().unwrap_or(Duration::ZERO); + let message = task_complete + .last_agent_message + .or_else(|| (!aggregated.is_empty()).then_some(aggregated.clone())); + + let _ = conversation.submit(Op::Shutdown).await; + + return Ok(DelegateSuccess { + agent_id, + message, + duration, + }); + } + EventMsg::Error(err) => { + let _ = conversation.submit(Op::Shutdown).await; + return Err(DelegateFailure { + agent_id, + error: format!("delegate reported error: {}", err.message), + }); + } + EventMsg::TurnAborted(reason) => { + let _ = conversation.submit(Op::Shutdown).await; + return Err(DelegateFailure { + agent_id, + error: format!("delegate aborted: {:?}", reason.reason), + }); + } + EventMsg::ShutdownComplete => break, + _ => {} + } + } + + Err(DelegateFailure { + agent_id, + error: "delegate ended unexpectedly".to_string(), + }) + } +} + +struct DelegateSuccess { + agent_id: AgentId, + message: Option, + duration: Duration, +} + +struct DelegateFailure { + agent_id: AgentId, + error: String, +} diff --git a/codex-rs/tui/src/app.rs b/codex-rs/tui/src/app.rs index cb3dea5e60d9..a59e8ac8b7e9 100644 --- a/codex-rs/tui/src/app.rs +++ b/codex-rs/tui/src/app.rs @@ -1,5 +1,6 @@ use crate::app_backtrack::BacktrackState; use crate::app_event::AppEvent; +use crate::app_event::DelegateRequestPayload; use crate::app_event_sender::AppEventSender; use crate::bottom_pane::ApprovalRequest; use crate::chatwidget::ChatWidget; @@ -21,6 +22,10 @@ use codex_core::model_family::find_family_for_model; use codex_core::protocol::SessionSource; use codex_core::protocol::TokenUsage; use codex_core::protocol_config_types::ReasoningEffort as ReasoningEffortConfig; +use codex_multi_agent::AgentOrchestrator; +use codex_multi_agent::DelegateEvent; +use codex_multi_agent::DelegateRequest; +use codex_multi_agent::OrchestratorError; use codex_protocol::ConversationId; use color_eyre::eyre::Result; use color_eyre::eyre::WrapErr; @@ -50,6 +55,7 @@ pub(crate) struct App { pub(crate) app_event_tx: AppEventSender, pub(crate) chat_widget: ChatWidget, pub(crate) auth_manager: Arc, + pub(crate) delegate_orchestrator: Arc, /// Config is stored here so we can recreate ChatWidgets as needed. pub(crate) config: Config, @@ -74,9 +80,11 @@ pub(crate) struct App { } impl App { + #[allow(clippy::too_many_arguments)] pub async fn run( tui: &mut tui::Tui, auth_manager: Arc, + delegate_orchestrator: Arc, config: Config, active_profile: Option, initial_prompt: Option, @@ -87,6 +95,14 @@ impl App { let (app_event_tx, mut app_event_rx) = unbounded_channel(); let app_event_tx = AppEventSender::new(app_event_tx); + let mut delegate_event_rx = delegate_orchestrator.subscribe().await; + let delegate_app_event_tx = app_event_tx.clone(); + tokio::spawn(async move { + while let Some(event) = delegate_event_rx.recv().await { + delegate_app_event_tx.send(AppEvent::DelegateUpdate(event)); + } + }); + let conversation_manager = Arc::new(ConversationManager::new( auth_manager.clone(), SessionSource::Cli, @@ -142,6 +158,7 @@ impl App { app_event_tx, chat_widget, auth_manager: auth_manager.clone(), + delegate_orchestrator, config, active_profile, file_search, @@ -232,6 +249,12 @@ impl App { self.chat_widget = ChatWidget::new(init, self.server.clone()); tui.frame_requester().schedule_frame(); } + AppEvent::DelegateRequest(payload) => { + self.handle_delegate_request(payload).await; + } + AppEvent::DelegateUpdate(update) => { + self.handle_delegate_update(update); + } AppEvent::InsertHistoryCell(cell) => { let cell: Arc = cell.into(); if let Some(Overlay::Transcript(t)) = &mut self.overlay { @@ -404,6 +427,75 @@ impl App { Ok(true) } + async fn handle_delegate_request(&mut self, payload: DelegateRequestPayload) { + let request: DelegateRequest = payload.clone().into(); + if let Err(err) = self.delegate_orchestrator.delegate(request).await { + let message = match err { + OrchestratorError::DelegateInProgress => { + "A delegate is already running.".to_string() + } + OrchestratorError::AgentNotFound(id) => format!("Agent `{id}` not found."), + OrchestratorError::DelegateSetupFailed(reason) => { + format!("Unable to start delegate: {reason}") + } + }; + self.chat_widget.add_error_message(message); + } + } + + fn handle_delegate_update(&mut self, event: DelegateEvent) { + match event { + DelegateEvent::Started { + run_id, + agent_id, + prompt, + .. + } => { + self.chat_widget + .on_delegate_started(&run_id, &agent_id, &prompt); + } + DelegateEvent::Delta { run_id, chunk, .. } => { + self.chat_widget.on_delegate_delta(&run_id, &chunk); + } + DelegateEvent::Completed { + run_id, + agent_id, + output, + duration, + .. + } => { + let streamed = self.chat_widget.on_delegate_completed(&run_id); + let hint = Some(format!( + "finished in {}", + Self::format_delegate_duration(duration) + )); + let response = output.as_deref().filter(|_| !streamed); + self.chat_widget + .add_delegate_completion(&agent_id, response, hint); + } + DelegateEvent::Failed { + run_id, + agent_id, + error, + } => { + self.chat_widget + .on_delegate_failed(&run_id, &agent_id, &error); + } + } + } + + fn format_delegate_duration(duration: Duration) -> String { + if duration.as_secs() >= 60 { + let mins = duration.as_secs() / 60; + let secs = duration.as_secs() % 60; + format!("{mins}m{secs:02}s") + } else if duration.as_millis() >= 1000 { + format!("{:.1}s", duration.as_secs_f32()) + } else { + format!("{:.0}ms", duration.as_millis()) + } + } + pub(crate) fn token_usage(&self) -> codex_core::protocol::TokenUsage { self.chat_widget.token_usage() } @@ -484,15 +576,21 @@ mod tests { use crate::history_cell::HistoryCell; use crate::history_cell::UserHistoryCell; use crate::history_cell::new_session_info; + use codex_common::CliConfigOverrides; use codex_core::AuthManager; use codex_core::CodexAuth; use codex_core::ConversationManager; + use codex_core::config::ConfigOverrides; use codex_core::protocol::SessionConfiguredEvent; + use codex_core::protocol::SessionSource; use codex_protocol::ConversationId; use ratatui::prelude::Line; use std::path::PathBuf; use std::sync::Arc; use std::sync::atomic::AtomicBool; + use tempfile::tempdir; + + use codex_multi_agent::AgentOrchestrator; fn make_test_app() -> App { let (chat_widget, app_event_tx, _rx, _op_rx) = make_chatwidget_manual_with_sender(); @@ -503,6 +601,15 @@ mod tests { ))); let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key")); + let auth_manager_arc = auth_manager.clone(); + let temp_home = tempdir().expect("tempdir"); + let delegate_orchestrator = Arc::new(AgentOrchestrator::new( + temp_home.path().to_path_buf(), + auth_manager_arc, + SessionSource::Cli, + CliConfigOverrides::default(), + ConfigOverrides::default(), + )); let file_search = FileSearchManager::new(config.cwd.clone(), app_event_tx.clone()); App { @@ -510,6 +617,7 @@ mod tests { app_event_tx, chat_widget, auth_manager, + delegate_orchestrator, config, active_profile: None, file_search, diff --git a/codex-rs/tui/src/app_event.rs b/codex-rs/tui/src/app_event.rs index 9d79c8ae1325..9c55a451e6e4 100644 --- a/codex-rs/tui/src/app_event.rs +++ b/codex-rs/tui/src/app_event.rs @@ -4,6 +4,10 @@ use codex_common::model_presets::ModelPreset; use codex_core::protocol::ConversationPathResponseEvent; use codex_core::protocol::Event; use codex_file_search::FileMatch; +use codex_multi_agent::AgentId; +use codex_multi_agent::DelegateEvent; +use codex_multi_agent::DelegatePrompt; +use codex_multi_agent::DelegateRequest; use crate::bottom_pane::ApprovalRequest; use crate::history_cell::HistoryCell; @@ -27,6 +31,12 @@ pub(crate) enum AppEvent { /// bubbling channels through layers of widgets. CodexOp(codex_core::protocol::Op), + /// Request delegation to a sub-agent. + DelegateRequest(DelegateRequestPayload), + + /// Update emitted from the orchestrator about delegate progress/completion. + DelegateUpdate(DelegateEvent), + /// Kick off an asynchronous file search for the given query (text after /// the `@`). Previous searches may be cancelled by the app layer so there /// is at most one in-flight search. @@ -88,3 +98,18 @@ pub(crate) enum AppEvent { /// Open the approval popup. FullScreenApprovalRequest(ApprovalRequest), } + +#[derive(Debug, Clone)] +pub(crate) struct DelegateRequestPayload { + pub agent_id: AgentId, + pub prompt: DelegatePrompt, +} + +impl From for DelegateRequest { + fn from(value: DelegateRequestPayload) -> Self { + DelegateRequest { + agent_id: value.agent_id, + prompt: value.prompt, + } + } +} diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index 787e09809386..c0e9fbdff25d 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -59,6 +59,7 @@ use tokio::sync::mpsc::UnboundedSender; use tracing::debug; use crate::app_event::AppEvent; +use crate::app_event::DelegateRequestPayload; use crate::app_event_sender::AppEventSender; use crate::bottom_pane::ApprovalRequest; use crate::bottom_pane::BottomPane; @@ -111,6 +112,8 @@ use codex_git_tooling::GhostCommit; use codex_git_tooling::GitToolingError; use codex_git_tooling::create_ghost_commit; use codex_git_tooling::restore_ghost_commit; +use codex_multi_agent::AgentId; +use codex_multi_agent::DelegatePrompt; use codex_protocol::plan_tool::UpdatePlanArgs; use strum::IntoEnumIterator; @@ -263,6 +266,11 @@ pub(crate) struct ChatWidget { // Whether to add a final message separator after the last message needs_final_message_separator: bool, + delegate_run: Option, + delegate_had_stream: bool, + delegate_status_claimed: bool, + delegate_previous_status_header: Option, + last_rendered_width: std::cell::Cell>, } @@ -954,6 +962,10 @@ impl ChatWidget { ghost_snapshots: Vec::new(), ghost_snapshots_disabled: true, needs_final_message_separator: false, + delegate_run: None, + delegate_had_stream: false, + delegate_status_claimed: false, + delegate_previous_status_header: None, last_rendered_width: std::cell::Cell::new(None), } } @@ -1019,6 +1031,10 @@ impl ChatWidget { ghost_snapshots: Vec::new(), ghost_snapshots_disabled: true, needs_final_message_separator: false, + delegate_run: None, + delegate_had_stream: false, + delegate_status_claimed: false, + delegate_previous_status_header: None, last_rendered_width: std::cell::Cell::new(None), } } @@ -1270,6 +1286,11 @@ impl ChatWidget { return; } + // Intercept explicit delegation commands (only support text-only submissions). + if image_paths.is_empty() && !text.is_empty() && self.try_delegate_shortcut(&text) { + return; + } + self.capture_ghost_snapshot(); let mut items: Vec = Vec::new(); @@ -2107,6 +2128,9 @@ impl ChatWidget { if text.is_empty() { return; } + if self.try_delegate_shortcut(&text) { + return; + } self.submit_user_message(text.into()); } @@ -2121,6 +2145,126 @@ impl ChatWidget { self.conversation_id } + pub(crate) fn add_delegate_completion( + &mut self, + agent_id: &AgentId, + response: Option<&str>, + duration_hint: Option, + ) { + let header = format!("↳ #{agent} completed", agent = agent_id.as_str()); + self.add_info_message(header, duration_hint); + + let Some(text) = response.map(str::trim).filter(|s| !s.is_empty()) else { + return; + }; + + self.flush_answer_stream_with_separator(); + self.flush_active_cell(); + + let mut lines: Vec> = Vec::new(); + append_markdown(text, None, &mut lines, &self.config); + let cell = AgentMessageCell::new(lines, true); + self.add_to_history(cell); + self.request_redraw(); + } + + pub(crate) fn on_delegate_started(&mut self, run_id: &str, agent_id: &AgentId, prompt: &str) { + self.delegate_run = Some(run_id.to_string()); + self.delegate_had_stream = false; + self.delegate_previous_status_header = Some(self.current_status_header.clone()); + if self.bottom_pane.status_widget().is_none() { + self.bottom_pane.set_task_running(true); + self.delegate_status_claimed = true; + } else { + self.delegate_status_claimed = false; + } + self.set_status_header(format!("Delegating to #{}", agent_id.as_str())); + let trimmed = prompt.trim(); + let hint = if trimmed.is_empty() { + None + } else { + Some(trimmed.to_string()) + }; + self.add_info_message(format!("↳ #{agent}…", agent = agent_id.as_str()), hint); + self.request_redraw(); + } + + pub(crate) fn on_delegate_delta(&mut self, run_id: &str, chunk: &str) { + if self.delegate_run.as_deref() != Some(run_id) { + return; + } + self.delegate_had_stream = true; + self.handle_streaming_delta(chunk.to_string()); + } + + pub(crate) fn on_delegate_completed(&mut self, run_id: &str) -> bool { + if self.delegate_run.as_deref() != Some(run_id) { + return false; + } + let had_stream = self.delegate_had_stream; + if had_stream { + self.flush_answer_stream_with_separator(); + self.handle_stream_finished(); + self.app_event_tx.send(AppEvent::StopCommitAnimation); + } + if let Some(previous) = self.delegate_previous_status_header.take() { + self.set_status_header(previous); + } + if self.delegate_status_claimed { + self.bottom_pane.set_task_running(false); + self.delegate_status_claimed = false; + } + self.delegate_run = None; + self.delegate_had_stream = false; + had_stream + } + + pub(crate) fn on_delegate_failed(&mut self, run_id: &str, agent_id: &AgentId, error: &str) { + let _ = self.on_delegate_completed(run_id); + self.add_error_message(format!( + "Delegation to #{} failed: {}", + agent_id.as_str(), + error + )); + } + + fn try_delegate_shortcut(&mut self, text: &str) -> bool { + let trimmed = text.trim(); + if !trimmed.starts_with('#') { + return false; + } + + let mut parts = trimmed.splitn(2, char::is_whitespace); + let tag = parts.next().unwrap_or(""); + let prompt = parts.next().unwrap_or("").trim(); + + let raw_id = tag.trim_start_matches('#').trim_end_matches(':'); + if raw_id.is_empty() { + self.add_error_message("Specify an agent after '#'.".to_string()); + return true; + } + + let agent_id = match AgentId::parse(raw_id) { + Ok(id) => id, + Err(err) => { + self.add_error_message(format!("Invalid agent id `{raw_id}`: {err}")); + return true; + } + }; + + if prompt.is_empty() { + self.add_error_message("Delegate requests need a prompt.".to_string()); + return true; + } + + let payload = DelegateRequestPayload { + agent_id, + prompt: DelegatePrompt::new(prompt.to_string()), + }; + self.app_event_tx.send(AppEvent::DelegateRequest(payload)); + true + } + /// Return a reference to the widget's current config (includes any /// runtime overrides applied via TUI, e.g., model or approval policy). pub(crate) fn config_ref(&self) -> &Config { diff --git a/codex-rs/tui/src/chatwidget/tests.rs b/codex-rs/tui/src/chatwidget/tests.rs index 3bfdf243bc90..0a37cddc6df5 100644 --- a/codex-rs/tui/src/chatwidget/tests.rs +++ b/codex-rs/tui/src/chatwidget/tests.rs @@ -35,6 +35,7 @@ use codex_core::protocol::StreamErrorEvent; use codex_core::protocol::TaskCompleteEvent; use codex_core::protocol::TaskStartedEvent; use codex_core::protocol::ViewImageToolCallEvent; +use codex_multi_agent::AgentId; use codex_protocol::ConversationId; use codex_protocol::plan_tool::PlanItemArg; use codex_protocol::plan_tool::StepStatus; @@ -288,6 +289,10 @@ fn make_chatwidget_manual() -> ( ghost_snapshots: Vec::new(), ghost_snapshots_disabled: false, needs_final_message_separator: false, + delegate_run: None, + delegate_had_stream: false, + delegate_status_claimed: false, + delegate_previous_status_header: None, last_rendered_width: std::cell::Cell::new(None), }; (widget, rx, op_rx) @@ -378,6 +383,95 @@ fn test_rate_limit_warnings_monthly() { ); } +#[test] +fn hash_prefix_routes_to_delegate() { + let (mut chat, mut rx, mut op_rx) = make_chatwidget_manual(); + + chat.submit_user_message(UserMessage { + text: "#ideas_provider explore parser modularization".to_string(), + image_paths: Vec::new(), + }); + + assert!( + op_rx.try_recv().is_err(), + "delegated prompts must not reach the primary conversation" + ); + + let mut saw_delegate = false; + while let Ok(event) = rx.try_recv() { + if matches!(event, AppEvent::DelegateRequest(_)) { + saw_delegate = true; + break; + } + } + + assert!( + saw_delegate, + "expected delegate request event for #ideas_provider" + ); +} + +#[test] +fn delegate_stream_deltas_and_restore_status() { + let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(); + let agent = AgentId::parse("ideas_provider").expect("valid agent id"); + + assert!(chat.bottom_pane.status_widget().is_none()); + assert_eq!(chat.current_status_header, "Working"); + + chat.on_delegate_started("run-1", &agent, "sketch integration points"); + assert_eq!(chat.delegate_run.as_deref(), Some("run-1")); + assert!(chat.delegate_status_claimed); + assert!(chat.bottom_pane.status_widget().is_some()); + assert_eq!(chat.current_status_header, "Delegating to #ideas_provider"); + + chat.on_delegate_delta("run-1", "First idea\n"); + let mut saw_start = false; + while let Ok(event) = rx.try_recv() { + if matches!(event, AppEvent::StartCommitAnimation) { + saw_start = true; + } + } + assert!( + saw_start, + "expected commit animation when streaming delegate output" + ); + + chat.on_commit_tick(); + let mut saw_history_line = false; + while let Ok(event) = rx.try_recv() { + if let AppEvent::InsertHistoryCell(cell) = event { + let text = lines_to_single_string(&cell.display_lines(80)); + if text.contains("First idea") { + saw_history_line = true; + } + } + } + assert!( + saw_history_line, + "expected streamed delegate output in history" + ); + + let streamed = chat.on_delegate_completed("run-1"); + assert!( + streamed, + "delegate completion should report streaming output" + ); + + let mut saw_stop = false; + while let Ok(event) = rx.try_recv() { + if let AppEvent::StopCommitAnimation = event { saw_stop = true } + } + assert!( + saw_stop, + "expected commit animation to stop after delegate completion" + ); + assert!(chat.delegate_run.is_none()); + assert!(!chat.delegate_status_claimed); + assert!(chat.bottom_pane.status_widget().is_none()); + assert_eq!(chat.current_status_header, "Working"); +} + // (removed experimental resize snapshot test) #[test] diff --git a/codex-rs/tui/src/lib.rs b/codex-rs/tui/src/lib.rs index 6ebbfba1e4ff..473864cf7dfb 100644 --- a/codex-rs/tui/src/lib.rs +++ b/codex-rs/tui/src/lib.rs @@ -6,6 +6,7 @@ use app::App; pub use app::AppExitInfo; use codex_app_server_protocol::AuthMode; +use codex_common::CliConfigOverrides; use codex_core::AuthManager; use codex_core::BUILT_IN_OSS_MODEL_PROVIDER_ID; use codex_core::CodexAuth; @@ -17,11 +18,14 @@ use codex_core::config::ConfigToml; use codex_core::find_conversation_path_by_id_str; use codex_core::protocol::AskForApproval; use codex_core::protocol::SandboxPolicy; +use codex_core::protocol::SessionSource; +use codex_multi_agent::AgentOrchestrator; use codex_ollama::DEFAULT_OSS_MODEL; use codex_protocol::config_types::SandboxMode; use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge; use std::fs::OpenOptions; use std::path::PathBuf; +use std::sync::Arc; use tracing::error; use tracing_appender::non_blocking; use tracing_subscriber::EnvFilter; @@ -147,6 +151,8 @@ pub async fn run_main( show_raw_agent_reasoning: cli.oss.then_some(true), tools_web_search_request: cli.web_search.then_some(true), }; + let delegate_config_overrides = overrides.clone(); + let delegate_cli_overrides = cli.config_overrides.clone(); #[allow(clippy::print_stderr)] let agent_context = match codex_multi_agent::load_agent_context( cli.agent.as_deref(), @@ -161,6 +167,7 @@ pub async fn run_main( std::process::exit(1); } }; + let global_codex_home = agent_context.global_codex_home().to_path_buf(); let config_toml = agent_context.config_toml().clone(); let mut config = agent_context.into_config(); @@ -242,9 +249,17 @@ pub async fn run_main( let _ = tracing_subscriber::registry().with(file_layer).try_init(); }; - run_ratatui_app(cli, config, active_profile, should_show_trust_screen) - .await - .map_err(|err| std::io::Error::other(err.to_string())) + run_ratatui_app( + cli, + config, + active_profile, + should_show_trust_screen, + global_codex_home, + delegate_cli_overrides, + delegate_config_overrides, + ) + .await + .map_err(|err| std::io::Error::other(err.to_string())) } async fn run_ratatui_app( @@ -252,6 +267,9 @@ async fn run_ratatui_app( config: Config, active_profile: Option, should_show_trust_screen: bool, + global_codex_home: PathBuf, + delegate_cli_overrides: CliConfigOverrides, + delegate_config_overrides: ConfigOverrides, ) -> color_eyre::Result { let mut config = config; color_eyre::install()?; @@ -336,6 +354,13 @@ async fn run_ratatui_app( session_log::maybe_init(&config); let auth_manager = AuthManager::shared(config.codex_home.clone(), false); + let delegate_orchestrator = Arc::new(AgentOrchestrator::new( + global_codex_home, + auth_manager.clone(), + SessionSource::Cli, + delegate_cli_overrides, + delegate_config_overrides, + )); let login_status = get_login_status(&config); let should_show_windows_wsl_screen = cfg!(target_os = "windows") && !config.windows_wsl_setup_acknowledged; @@ -425,6 +450,7 @@ async fn run_ratatui_app( let app_result = App::run( &mut tui, auth_manager, + delegate_orchestrator, config, active_profile, prompt, From d894ca8c896ca04f150b083e66ea60888db536d3 Mon Sep 17 00:00:00 2001 From: George Nesterenok Date: Tue, 14 Oct 2025 21:18:09 +0000 Subject: [PATCH 03/17] finalised docs for agent invoke aproach --- ai-temp/CHANGELOG.md | 1 + ai-temp/tool-implementation-patterns.md | 71 +++++++++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 ai-temp/tool-implementation-patterns.md diff --git a/ai-temp/CHANGELOG.md b/ai-temp/CHANGELOG.md index 91aee050b6e2..c5d514727191 100644 --- a/ai-temp/CHANGELOG.md +++ b/ai-temp/CHANGELOG.md @@ -16,3 +16,4 @@ - Delegated runs now stream live output (`DelegateEvent::Delta`) through the TUI, and remaining UX follow-ups are tracked in `ai-temp/ui-ux-delegation.md`. - Added a dedicated status indicator while a delegate runs, restored the idle header on completion, and regression-tested streaming to prevent animation regressions. - Updated the sample Codex home instructions/README, ensured the critic agent uses `gpt-5-nano`, and documented the new delegation UX in `ai-temp/ui-ux-delegation.md`. +- Documented plan-tool implementation patterns and how they inform future delegation tools (`ai-temp/tool-implementation-patterns.md`). diff --git a/ai-temp/tool-implementation-patterns.md b/ai-temp/tool-implementation-patterns.md new file mode 100644 index 000000000000..963ce943fd05 --- /dev/null +++ b/ai-temp/tool-implementation-patterns.md @@ -0,0 +1,71 @@ +# Tool Implementation Patterns (Plan Tool Case Study) + +This note captures the patterns we observed while digging into the built-in plan tool (`update_plan`). It now also outlines how we will apply those lessons to the multi-agent delegation tool so the primary assistant invokes sub-agents via structured tool calls. + +## 1. Spec + Handler Separation +- Specification and handler live under `codex_core::tools`. +- `LazyLock` builds the JSON schema for arguments, mirroring the MCP declaration. +- Registration happens via `ToolRegistryBuilder` only when the active config sets `plan_tool = true`. + +## 2. Schema-First Validation +- The spec enumerates the allowed statuses (`pending`, `in_progress`, `completed`) and requires each plan item to declare both `step` and `status`. +- Parsing uses `serde_json::from_str` into `UpdatePlanArgs`, so malformed payloads fail before any state is mutated. + +## 3. Stateless Server Handler +- The handler simply converts the payload into a `PlanUpdate` event on the session bus and returns `"Plan updated"`. +- All “real” state (plan rendering, history, undo) stays in the client layers. + +## 4. Config-Driven Inclusion +- `Config.include_plan_tool` toggles availability. Front ends (CLI, TUI, app server) set this flag through `ConfigOverrides`. +- When disabled, the tool spec and handler never register, preventing accidental invocation. + +## 5. Client-Side Presentation +- The TUI listens for `EventMsg::PlanUpdate` to render a checklist-style history cell. +- Tests assert the event-to-UI path (`codex-rs/core/tests/suite/tool_harness.rs`, `codex-rs/tui/src/chatwidget/tests.rs`) so regressions surface quickly. + +## 6. Takeaways for Delegation Tools +- Reuse the same pattern: declare a schema-rich `ToolSpec`, keep the handler stateless, and emit structured events for the UI. +- Guard inclusion with config or profile flags so we can stage features safely. +- Keep UX logic (streaming, history cells) in the client; server code just transports structured data. +- Treat delegation as an AI-triggered capability: the user cannot directly execute sub-agents; instead, the main model decides when to call the delegation tool, with `#agent_id` serving only as a hint in prompts and autocomplete. + +## 7. Multi-Agent Delegate Tool Blueprint + +### 7.1 Invocation Model +- The primary assistant issues a tool call (working name: `delegate_agent`) whenever it wants help from a sub-agent. Users supply plain language requests; the model chooses whether delegation is appropriate. +- The frontend passes user text verbatim. It never intercepts hash-prefixed tokens—`#critic`, `#ideas_provider`, etc.—beyond offering them in autocomplete. Instructions explain that these tags hint which agent to choose. + +### 7.2 Tool Spec Shape +```json +{ + "type": "object", + "required": ["agent_id", "prompt"], + "properties": { + "agent_id": { "type": "string", "pattern": "^[a-z0-9_\\-]+$" }, + "prompt": { "type": "string" }, + "context": { + "type": "object", + "properties": { + "working_directory": { "type": "string" }, + "hints": { "type": "array", "items": { "type": "string" } } + }, + "additionalProperties": true + } + } +} +``` +- We can add optional fields later (timeouts, resource budgets) without breaking the schema. +- The handler validates `agent_id` with `AgentRegistry`, loads the merged `Config`, and passes the prompt/context into the orchestrator. + +### 7.3 Handler Responsibilities +- Mirror the exec tool: enqueue the delegate run, stream progress via `DelegateEvent::Started/Delta/Completed/Failed`, and return a compact JSON result (e.g., `{ "status": "ok", "summary": "...", "session": "session-id" }`). +- Errors reuse the same shape with `status: "error"` so the UI can surface them consistently. +- The handler itself remains thin—after scheduling the work, it hands control back to the runtime. + +### 7.4 Client Integration +- The TUI reuses `StreamController` to show delegate streaming, and inserts a summary history cell once the tool completes (agent id, elapsed time, link to the sub-agent session). +- Because users cannot trigger the tool directly, slash commands and message preprocessing stay untouched; guidance lives in instructions and autocomplete metadata. + +### 7.5 Instruction Updates +- Primary instructions clarify that `#agent_id` tags are hints encouraging the assistant to invoke the delegation tool, not commands. +- Sub-agent instructions remain focused on their specialised roles; the orchestrator constructs the prompt passed through the tool payload. From 542f590950a4c7d6b1dd3d32aae2bcbb7c0767f7 Mon Sep 17 00:00:00 2001 From: George Nesterenok Date: Tue, 14 Oct 2025 23:19:40 +0000 Subject: [PATCH 04/17] integrate sub-agent as tool call --- ai-temp/example-codex-home/AGENTS.md | 7 +- ai-temp/example-codex-home/README.md | 7 +- codex-rs/Cargo.lock | 1 + codex-rs/app-server-protocol/src/protocol.rs | 5 + .../app-server/src/codex_message_processor.rs | 2 + codex-rs/core/src/codex.rs | 23 ++ codex-rs/core/src/config.rs | 10 + codex-rs/core/src/conversation_manager.rs | 30 ++- codex-rs/core/src/delegate_tool.rs | 84 +++++++ codex-rs/core/src/lib.rs | 1 + codex-rs/core/src/state/service.rs | 3 + codex-rs/core/src/tools/handlers/delegate.rs | 234 ++++++++++++++++++ codex-rs/core/src/tools/handlers/mod.rs | 3 + codex-rs/core/src/tools/spec.rs | 46 ++++ codex-rs/core/tests/suite/model_tools.rs | 1 + codex-rs/core/tests/suite/prompt_caching.rs | 1 + codex-rs/core/tests/suite/tool_harness.rs | 2 + codex-rs/exec/src/lib.rs | 1 + codex-rs/mcp-server/src/codex_tool_config.rs | 1 + codex-rs/multi-agent/Cargo.toml | 1 + codex-rs/multi-agent/src/lib.rs | 7 + codex-rs/multi-agent/src/orchestrator.rs | 110 ++++++++ codex-rs/tui/src/app.rs | 49 +--- codex-rs/tui/src/app_event.rs | 21 -- codex-rs/tui/src/chatwidget.rs | 39 +-- codex-rs/tui/src/chatwidget/tests.rs | 32 +-- codex-rs/tui/src/lib.rs | 4 +- 27 files changed, 589 insertions(+), 136 deletions(-) create mode 100644 codex-rs/core/src/delegate_tool.rs create mode 100644 codex-rs/core/src/tools/handlers/delegate.rs diff --git a/ai-temp/example-codex-home/AGENTS.md b/ai-temp/example-codex-home/AGENTS.md index 65d7644d7d2f..928dc1393cbb 100644 --- a/ai-temp/example-codex-home/AGENTS.md +++ b/ai-temp/example-codex-home/AGENTS.md @@ -3,12 +3,13 @@ This directory demonstrates a multi-agent workflow. You are the coordinator: 1. **Understand the request.** Gather context, restate the goal, and identify missing details. -2. **Choose when to delegate.** If the task needs options, architecture, or brainstorming, craft a detailed prompt for the ideas provider and hand it off immediately. The orchestrator picks up messages that start with `#ideas_provider …`, so include the full context it needs without waiting for the user to ask. -3. **Critique before action.** Once you have a leading option, summarize it for the critic via `#critic …`. Ask for risks, missing tests, and edge cases so you surface blockers early. +2. **Choose when to delegate.** If the task needs options, architecture, or brainstorming, craft a detailed prompt for the ideas provider and call the delegation tool. Use `#ideas_provider` inside your own reasoning or reply when it helps you remember which agent to call—the orchestrator treats the hash tag purely as a hint. +3. **Critique before action.** Once you have a leading option, summarize it for the critic via the same tool. Highlight the context and goals; `#critic` remains a hint for the tool, not a literal command. 4. **Synthesize next steps.** Combine ideation and critique into a concise plan, call out open questions, and suggest the single most sensible next action. General rules: - Keep your own replies short unless the user explicitly wants depth; link to `ai-temp/` docs when the user needs background. - If a request clearly doesn’t benefit from delegation, note why you’re handling it solo. -- For manual testing you can always type `#ideas_provider …` or `#critic …` yourself—the orchestrator uses the same pathway either way. +- The `delegate_agent` tool is AI-only. Treat `#ideas_provider` / `#critic` as guidance in instructions; the user cannot execute sub-agents directly. +- When testing manually, focus on describing the problem clearly so the assistant chooses the right delegate—it will ignore bare `#agent` commands unless they’re part of a well-formed instruction. diff --git a/ai-temp/example-codex-home/README.md b/ai-temp/example-codex-home/README.md index 26b62d775cf0..38982a9626ac 100644 --- a/ai-temp/example-codex-home/README.md +++ b/ai-temp/example-codex-home/README.md @@ -19,12 +19,13 @@ cargo build -p codex-cli # Launch the TUI against this sample Codex home CODEX_HOME="$(pwd)/ai-temp/example-codex-home" target/debug/codex -# Launch a specific sub-agent directly +# Launch a specific sub-agent directly (skips the orchestrator) CODEX_HOME="$(pwd)/ai-temp/example-codex-home" target/debug/codex --agent ideas_provider CODEX_HOME="$(pwd)/ai-temp/example-codex-home" target/debug/codex --agent critic -# Inside the primary session you can delegate manually: -# type: '#ideas_provider outline parser refactors' +# Inside the primary session, describe the task as usual. +# The main assistant decides when to call the `delegate_agent` tool. +# Use tags like `#ideas_provider` in your prompts only as hints for the AI. # Watch logs in log/codex-tui.log to confirm delegation activity. ``` diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 632028ab5562..475383465337 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -1259,6 +1259,7 @@ name = "codex-multi-agent" version = "0.0.0" dependencies = [ "anyhow", + "async-trait", "codex-common", "codex-core", "serde", diff --git a/codex-rs/app-server-protocol/src/protocol.rs b/codex-rs/app-server-protocol/src/protocol.rs index 845a2431f443..077b3abb8454 100644 --- a/codex-rs/app-server-protocol/src/protocol.rs +++ b/codex-rs/app-server-protocol/src/protocol.rs @@ -232,6 +232,10 @@ pub struct NewConversationParams { #[serde(skip_serializing_if = "Option::is_none")] pub include_plan_tool: Option, + /// Whether to include the delegate tool that invokes sub-agents. + #[serde(skip_serializing_if = "Option::is_none")] + pub include_delegate_tool: Option, + /// Whether to include the apply patch tool in the conversation. #[serde(skip_serializing_if = "Option::is_none")] pub include_apply_patch_tool: Option, @@ -842,6 +846,7 @@ mod tests { config: None, base_instructions: None, include_plan_tool: None, + include_delegate_tool: None, include_apply_patch_tool: None, }, }; diff --git a/codex-rs/app-server/src/codex_message_processor.rs b/codex-rs/app-server/src/codex_message_processor.rs index cb894a51abe1..21f460c6b71c 100644 --- a/codex-rs/app-server/src/codex_message_processor.rs +++ b/codex-rs/app-server/src/codex_message_processor.rs @@ -1334,6 +1334,7 @@ async fn derive_config_from_params( config: cli_overrides, base_instructions, include_plan_tool, + include_delegate_tool, include_apply_patch_tool, } = params; let overrides = ConfigOverrides { @@ -1347,6 +1348,7 @@ async fn derive_config_from_params( codex_linux_sandbox_exe, base_instructions, include_plan_tool, + include_delegate_tool, include_apply_patch_tool, include_view_image_tool: None, show_raw_agent_reasoning: None, diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index fe352f0103f4..342bc34582f9 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -6,6 +6,7 @@ use std::sync::atomic::AtomicU64; use crate::AuthManager; use crate::client_common::REVIEW_PROMPT; +use crate::delegate_tool::DelegateToolAdapter; use crate::event_mapping::map_response_item_to_event_messages; use crate::function_tool::FunctionCallError; use crate::review_format::format_review_findings_block; @@ -149,6 +150,7 @@ impl Codex { pub async fn spawn( config: Config, auth_manager: Arc, + delegate_adapter: Option>, conversation_history: InitialHistory, session_source: SessionSource, ) -> CodexResult { @@ -177,6 +179,7 @@ impl Codex { configure_session, config.clone(), auth_manager.clone(), + delegate_adapter, tx_event.clone(), conversation_history, session_source, @@ -312,6 +315,7 @@ impl Session { configure_session: ConfigureSession, config: Arc, auth_manager: Arc, + delegate_adapter: Option>, tx_event: Sender, initial_history: InitialHistory, session_source: SessionSource, @@ -442,11 +446,14 @@ impl Session { model_reasoning_summary, conversation_id, ); + let delegate_enabled = config.include_delegate_tool && delegate_adapter.is_some(); + let turn_context = TurnContext { client, tools_config: ToolsConfig::new(&ToolsConfigParams { model_family: &config.model_family, include_plan_tool: config.include_plan_tool, + include_delegate_tool: delegate_enabled, include_apply_patch_tool: config.include_apply_patch_tool, include_web_search_request: config.tools_web_search_request, use_streamable_shell_tool: config.use_experimental_streamable_shell_tool, @@ -475,6 +482,7 @@ impl Session { turn_context.cwd.clone(), config.codex_linux_sandbox_exe.clone(), )), + delegate_adapter, }; let sess = Arc::new(Session { @@ -516,6 +524,10 @@ impl Session { self.tx_event.clone() } + pub(crate) fn delegate_adapter(&self) -> Option> { + self.services.delegate_adapter.as_ref().map(Arc::clone) + } + fn next_internal_sub_id(&self) -> String { let id = self .next_internal_sub_id @@ -1193,9 +1205,12 @@ async fn submission_loop( .unwrap_or(prev.sandbox_policy.clone()); let new_cwd = cwd.clone().unwrap_or_else(|| prev.cwd.clone()); + let delegate_enabled = + config.include_delegate_tool && sess.delegate_adapter().is_some(); let tools_config = ToolsConfig::new(&ToolsConfigParams { model_family: &effective_family, include_plan_tool: config.include_plan_tool, + include_delegate_tool: delegate_enabled, include_apply_patch_tool: config.include_apply_patch_tool, include_web_search_request: config.tools_web_search_request, use_streamable_shell_tool: config.use_experimental_streamable_shell_tool, @@ -1293,11 +1308,14 @@ async fn submission_loop( sess.conversation_id, ); + let delegate_enabled = + config.include_delegate_tool && sess.delegate_adapter().is_some(); let fresh_turn_context = TurnContext { client, tools_config: ToolsConfig::new(&ToolsConfigParams { model_family: &model_family, include_plan_tool: config.include_plan_tool, + include_delegate_tool: delegate_enabled, include_apply_patch_tool: config.include_apply_patch_tool, include_web_search_request: config.tools_web_search_request, use_streamable_shell_tool: config @@ -1539,6 +1557,7 @@ async fn spawn_review_thread( let tools_config = ToolsConfig::new(&ToolsConfigParams { model_family: &review_model_family, include_plan_tool: false, + include_delegate_tool: false, include_apply_patch_tool: config.include_apply_patch_tool, include_web_search_request: false, use_streamable_shell_tool: false, @@ -2749,6 +2768,7 @@ mod tests { let tools_config = ToolsConfig::new(&ToolsConfigParams { model_family: &config.model_family, include_plan_tool: config.include_plan_tool, + include_delegate_tool: config.include_delegate_tool, include_apply_patch_tool: config.include_apply_patch_tool, include_web_search_request: config.tools_web_search_request, use_streamable_shell_tool: config.use_experimental_streamable_shell_tool, @@ -2780,6 +2800,7 @@ mod tests { turn_context.cwd.clone(), None, )), + delegate_adapter: None, }; let session = Session { conversation_id, @@ -2822,6 +2843,7 @@ mod tests { let tools_config = ToolsConfig::new(&ToolsConfigParams { model_family: &config.model_family, include_plan_tool: config.include_plan_tool, + include_delegate_tool: config.include_delegate_tool, include_apply_patch_tool: config.include_apply_patch_tool, include_web_search_request: config.tools_web_search_request, use_streamable_shell_tool: config.use_experimental_streamable_shell_tool, @@ -2853,6 +2875,7 @@ mod tests { config.cwd.clone(), None, )), + delegate_adapter: None, }; let session = Arc::new(Session { conversation_id, diff --git a/codex-rs/core/src/config.rs b/codex-rs/core/src/config.rs index c715651851ee..2deb70cc6ceb 100644 --- a/codex-rs/core/src/config.rs +++ b/codex-rs/core/src/config.rs @@ -199,6 +199,9 @@ pub struct Config { /// Include an experimental plan tool that the model can use to update its current plan and status of each step. pub include_plan_tool: bool, + /// Include the delegate tool that lets the primary agent route work to sub-agents. + pub include_delegate_tool: bool, + /// Include the `apply_patch` tool for models that benefit from invoking /// file edits as a structured tool call. When unset, this falls back to the /// model family's default preference. @@ -953,6 +956,7 @@ pub struct ConfigOverrides { pub codex_linux_sandbox_exe: Option, pub base_instructions: Option, pub include_plan_tool: Option, + pub include_delegate_tool: Option, pub include_apply_patch_tool: Option, pub include_view_image_tool: Option, pub show_raw_agent_reasoning: Option, @@ -981,6 +985,7 @@ impl Config { codex_linux_sandbox_exe, base_instructions, include_plan_tool, + include_delegate_tool, include_apply_patch_tool, include_view_image_tool, show_raw_agent_reasoning, @@ -1165,6 +1170,7 @@ impl Config { .or(cfg.chatgpt_base_url) .unwrap_or("https://chatgpt.com/backend-api/".to_string()), include_plan_tool: include_plan_tool.unwrap_or(false), + include_delegate_tool: include_delegate_tool.unwrap_or(false), include_apply_patch_tool: include_apply_patch_tool .or(cfg.experimental_use_freeform_apply_patch) .unwrap_or(false), @@ -2114,6 +2120,7 @@ model_verbosity = "high" chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(), base_instructions: None, include_plan_tool: false, + include_delegate_tool: false, include_apply_patch_tool: false, tools_web_search_request: false, use_experimental_streamable_shell_tool: false, @@ -2177,6 +2184,7 @@ model_verbosity = "high" chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(), base_instructions: None, include_plan_tool: false, + include_delegate_tool: false, include_apply_patch_tool: false, tools_web_search_request: false, use_experimental_streamable_shell_tool: false, @@ -2255,6 +2263,7 @@ model_verbosity = "high" chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(), base_instructions: None, include_plan_tool: false, + include_delegate_tool: false, include_apply_patch_tool: false, tools_web_search_request: false, use_experimental_streamable_shell_tool: false, @@ -2319,6 +2328,7 @@ model_verbosity = "high" chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(), base_instructions: None, include_plan_tool: false, + include_delegate_tool: false, include_apply_patch_tool: false, tools_web_search_request: false, use_experimental_streamable_shell_tool: false, diff --git a/codex-rs/core/src/conversation_manager.rs b/codex-rs/core/src/conversation_manager.rs index aeb0780725b6..19a2bef5d4d9 100644 --- a/codex-rs/core/src/conversation_manager.rs +++ b/codex-rs/core/src/conversation_manager.rs @@ -7,6 +7,7 @@ use crate::codex::compact::content_items_to_text; use crate::codex::compact::is_session_prefix_message; use crate::codex_conversation::CodexConversation; use crate::config::Config; +use crate::delegate_tool::DelegateToolAdapter; use crate::error::CodexErr; use crate::error::Result as CodexResult; use crate::protocol::Event; @@ -37,14 +38,24 @@ pub struct ConversationManager { conversations: Arc>>>, auth_manager: Arc, session_source: SessionSource, + delegate_adapter: Option>, } impl ConversationManager { pub fn new(auth_manager: Arc, session_source: SessionSource) -> Self { + Self::with_delegate(auth_manager, session_source, None) + } + + pub fn with_delegate( + auth_manager: Arc, + session_source: SessionSource, + delegate_adapter: Option>, + ) -> Self { Self { conversations: Arc::new(RwLock::new(HashMap::new())), auth_manager, session_source, + delegate_adapter, } } @@ -73,6 +84,7 @@ impl ConversationManager { } = Codex::spawn( config, auth_manager, + self.delegate_adapter.clone(), InitialHistory::New, self.session_source, ) @@ -133,7 +145,14 @@ impl ConversationManager { let CodexSpawnOk { codex, conversation_id, - } = Codex::spawn(config, auth_manager, initial_history, self.session_source).await?; + } = Codex::spawn( + config, + auth_manager, + self.delegate_adapter.clone(), + initial_history, + self.session_source, + ) + .await?; self.finalize_spawn(codex, conversation_id).await } @@ -167,7 +186,14 @@ impl ConversationManager { let CodexSpawnOk { codex, conversation_id, - } = Codex::spawn(config, auth_manager, history, self.session_source).await?; + } = Codex::spawn( + config, + auth_manager, + self.delegate_adapter.clone(), + history, + self.session_source, + ) + .await?; self.finalize_spawn(codex, conversation_id).await } diff --git a/codex-rs/core/src/delegate_tool.rs b/codex-rs/core/src/delegate_tool.rs new file mode 100644 index 000000000000..80380c7c2d84 --- /dev/null +++ b/codex-rs/core/src/delegate_tool.rs @@ -0,0 +1,84 @@ +use async_trait::async_trait; +use serde::Deserialize; +use serde::Serialize; +use std::time::Duration; +use std::time::SystemTime; +use tokio::sync::mpsc::UnboundedReceiver; + +/// Identifier assigned to a delegate run. Mirrors the orchestrator's run id. +pub type DelegateRunId = String; + +/// Additional hints the primary agent can pass to a delegate tool invocation. +#[derive(Debug, Clone, Default, Deserialize, Serialize)] +pub struct DelegateToolContext { + #[serde(skip_serializing_if = "Option::is_none")] + pub working_directory: Option, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub hints: Vec, +} + +/// Payload sent by the primary agent when invoking the delegate tool. +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct DelegateToolRequest { + pub agent_id: String, + pub prompt: String, + #[serde(default)] + pub context: DelegateToolContext, +} + +/// Event emitted while a delegate run is in flight. +#[derive(Debug, Clone)] +pub enum DelegateToolEvent { + Started { + run_id: DelegateRunId, + agent_id: String, + prompt: String, + started_at: SystemTime, + }, + Delta { + run_id: DelegateRunId, + agent_id: String, + chunk: String, + }, + Completed { + run_id: DelegateRunId, + agent_id: String, + output: Option, + duration: Duration, + }, + Failed { + run_id: DelegateRunId, + agent_id: String, + error: String, + }, +} + +/// Result returned when a delegate request is accepted. +#[derive(Debug, Clone)] +pub struct DelegateToolRun { + pub run_id: DelegateRunId, + pub agent_id: String, +} + +#[derive(thiserror::Error, Debug)] +pub enum DelegateToolError { + #[error("another delegate is already running")] + DelegateInProgress, + #[error("agent `{0}` not found")] + AgentNotFound(String), + #[error("delegate setup failed: {0}")] + SetupFailed(String), +} + +pub type DelegateEventReceiver = UnboundedReceiver; + +/// Adapter abstraction that lets front-ends wire their orchestrator into the core tool handler. +#[async_trait] +pub trait DelegateToolAdapter: Send + Sync { + async fn subscribe(&self) -> DelegateEventReceiver; + + async fn delegate( + &self, + request: DelegateToolRequest, + ) -> Result; +} diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index 201d8feb4d92..b5f0815ca2d4 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -23,6 +23,7 @@ pub mod config_profile; pub mod config_types; mod conversation_history; pub mod custom_prompts; +pub mod delegate_tool; mod environment_context; pub mod error; pub mod exec; diff --git a/codex-rs/core/src/state/service.rs b/codex-rs/core/src/state/service.rs index 994352eddfa9..3133c009b3ab 100644 --- a/codex-rs/core/src/state/service.rs +++ b/codex-rs/core/src/state/service.rs @@ -1,9 +1,11 @@ use crate::RolloutRecorder; +use crate::delegate_tool::DelegateToolAdapter; use crate::exec_command::ExecSessionManager; use crate::executor::Executor; use crate::mcp_connection_manager::McpConnectionManager; use crate::unified_exec::UnifiedExecSessionManager; use crate::user_notification::UserNotifier; +use std::sync::Arc; use tokio::sync::Mutex; pub(crate) struct SessionServices { @@ -15,4 +17,5 @@ pub(crate) struct SessionServices { pub(crate) user_shell: crate::shell::Shell, pub(crate) show_raw_agent_reasoning: bool, pub(crate) executor: Executor, + pub(crate) delegate_adapter: Option>, } diff --git a/codex-rs/core/src/tools/handlers/delegate.rs b/codex-rs/core/src/tools/handlers/delegate.rs new file mode 100644 index 000000000000..459690ab460b --- /dev/null +++ b/codex-rs/core/src/tools/handlers/delegate.rs @@ -0,0 +1,234 @@ +use crate::client_common::tools::ResponsesApiTool; +use crate::client_common::tools::ToolSpec; +use crate::delegate_tool::DelegateEventReceiver; +use crate::delegate_tool::DelegateToolContext; +use crate::delegate_tool::DelegateToolError; +use crate::delegate_tool::DelegateToolEvent; +use crate::delegate_tool::DelegateToolRequest; +use crate::delegate_tool::DelegateToolRun; +use crate::function_tool::FunctionCallError; +use crate::openai_tools::JsonSchema; +use crate::tools::context::ToolInvocation; +use crate::tools::context::ToolOutput; +use crate::tools::context::ToolPayload; +use crate::tools::registry::ToolHandler; +use crate::tools::registry::ToolKind; +use async_trait::async_trait; +use serde::Deserialize; +use serde::Serialize; +use std::collections::BTreeMap; +use std::sync::LazyLock; +use std::time::Duration; + +pub struct DelegateToolHandler; + +pub static DELEGATE_TOOL: LazyLock = LazyLock::new(|| { + let mut context_props = BTreeMap::new(); + context_props.insert( + "working_directory".to_string(), + JsonSchema::String { + description: Some("Override the delegate's working directory".to_string()), + }, + ); + context_props.insert( + "hints".to_string(), + JsonSchema::Array { + items: Box::new(JsonSchema::String { + description: Some("Optional high-level hints to guide the delegate".to_string()), + }), + description: Some("Additional hints for the delegate".to_string()), + }, + ); + + let mut properties = BTreeMap::new(); + properties.insert( + "agent_id".to_string(), + JsonSchema::String { + description: Some("Identifier of the sub-agent to invoke".to_string()), + }, + ); + properties.insert( + "prompt".to_string(), + JsonSchema::String { + description: Some("Instructions passed to the sub-agent".to_string()), + }, + ); + properties.insert( + "context".to_string(), + JsonSchema::Object { + properties: context_props, + required: None, + additional_properties: Some(false.into()), + }, + ); + + ToolSpec::Function(ResponsesApiTool { + name: "delegate_agent".to_string(), + description: r#"Delegates work to a configured sub-agent. +Provide the agent id, a prompt, and optional context such as working directory overrides. +"# + .to_string(), + strict: false, + parameters: JsonSchema::Object { + properties, + required: Some(vec!["agent_id".to_string(), "prompt".to_string()]), + additional_properties: Some(false.into()), + }, + }) +}); + +#[derive(Debug, Deserialize)] +struct DelegateToolArgs { + agent_id: String, + prompt: String, + #[serde(default)] + context: Option, +} + +#[derive(Debug, Default, Deserialize)] +struct DelegateToolArgsContext { + working_directory: Option, + #[serde(default)] + hints: Vec, +} + +impl From for DelegateToolContext { + fn from(value: DelegateToolArgsContext) -> Self { + Self { + working_directory: value.working_directory, + hints: value.hints, + } + } +} + +#[derive(Debug, Serialize)] +struct DelegateToolResponse { + status: &'static str, + agent_id: String, + run_id: String, + #[serde(skip_serializing_if = "Option::is_none")] + summary: Option, + #[serde(skip_serializing_if = "Option::is_none")] + duration_ms: Option, +} + +#[async_trait] +impl ToolHandler for DelegateToolHandler { + fn kind(&self) -> ToolKind { + ToolKind::Function + } + + async fn handle(&self, invocation: ToolInvocation) -> Result { + let ToolInvocation { + session, payload, .. + } = invocation; + + let arguments = match payload { + ToolPayload::Function { arguments } => arguments, + _ => { + return Err(FunctionCallError::RespondToModel( + "delegate_agent handler received unsupported payload".to_string(), + )); + } + }; + + let args: DelegateToolArgs = serde_json::from_str(&arguments).map_err(|e| { + FunctionCallError::RespondToModel(format!("failed to parse function arguments: {e}")) + })?; + + let adapter = session.delegate_adapter().ok_or_else(|| { + FunctionCallError::RespondToModel("delegate tool is not available".to_string()) + })?; + + let mut events = adapter.subscribe().await; + + let request = DelegateToolRequest { + agent_id: args.agent_id.clone(), + prompt: args.prompt.clone(), + context: args.context.unwrap_or_default().into(), + }; + + let run = adapter.delegate(request).await.map_err(map_adapter_error)?; + + let (summary, duration) = wait_for_completion(&mut events, &run) + .await + .map_err(FunctionCallError::RespondToModel)?; + + let response = DelegateToolResponse { + status: "ok", + agent_id: run.agent_id, + run_id: run.run_id, + summary, + duration_ms: duration.map(|d| d.as_millis() as u64), + }; + + let content = serde_json::to_string(&response) + .map_err(|e| FunctionCallError::Fatal(format!("failed to serialize response: {e}")))?; + + Ok(ToolOutput::Function { + content, + success: Some(true), + }) + } +} + +async fn wait_for_completion( + events: &mut DelegateEventReceiver, + run: &DelegateToolRun, +) -> Result<(Option, Option), String> { + let mut collected = String::new(); + + while let Some(event) = events.recv().await { + if event_run_id(&event) != run.run_id { + continue; + } + + match event { + DelegateToolEvent::Delta { chunk, .. } => { + collected.push_str(&chunk); + } + DelegateToolEvent::Completed { + output, duration, .. + } => { + let summary = output.or_else(|| { + if collected.trim().is_empty() { + None + } else { + Some(collected.clone()) + } + }); + return Ok((summary, Some(duration))); + } + DelegateToolEvent::Failed { error, .. } => { + return Err(error); + } + _ => {} + } + } + + Err("delegate run ended unexpectedly".to_string()) +} + +fn event_run_id(event: &DelegateToolEvent) -> &str { + match event { + DelegateToolEvent::Started { run_id, .. } + | DelegateToolEvent::Delta { run_id, .. } + | DelegateToolEvent::Completed { run_id, .. } + | DelegateToolEvent::Failed { run_id, .. } => run_id, + } +} + +fn map_adapter_error(err: DelegateToolError) -> FunctionCallError { + match err { + DelegateToolError::DelegateInProgress => FunctionCallError::RespondToModel( + "another delegate is already running; wait for it to finish before delegating again" + .to_string(), + ), + DelegateToolError::AgentNotFound(agent_id) => FunctionCallError::RespondToModel(format!( + "delegate agent `{agent_id}` is not configured" + )), + DelegateToolError::SetupFailed(reason) => { + FunctionCallError::RespondToModel(format!("failed to start delegate: {reason}")) + } + } +} diff --git a/codex-rs/core/src/tools/handlers/mod.rs b/codex-rs/core/src/tools/handlers/mod.rs index 9bff9fd5e7d5..3d02cad09e27 100644 --- a/codex-rs/core/src/tools/handlers/mod.rs +++ b/codex-rs/core/src/tools/handlers/mod.rs @@ -1,4 +1,5 @@ pub mod apply_patch; +mod delegate; mod exec_stream; mod grep_files; mod list_dir; @@ -13,6 +14,8 @@ mod view_image; pub use plan::PLAN_TOOL; pub use apply_patch::ApplyPatchHandler; +pub use delegate::DELEGATE_TOOL; +pub use delegate::DelegateToolHandler; pub use exec_stream::ExecStreamHandler; pub use grep_files::GrepFilesHandler; pub use list_dir::ListDirHandler; diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index c10f8e22f9ad..4ece1c8f9277 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -24,6 +24,7 @@ pub enum ConfigShellToolType { pub(crate) struct ToolsConfig { pub shell_type: ConfigShellToolType, pub plan_tool: bool, + pub delegate_tool: bool, pub apply_patch_tool_type: Option, pub web_search_request: bool, pub include_view_image_tool: bool, @@ -34,6 +35,7 @@ pub(crate) struct ToolsConfig { pub(crate) struct ToolsConfigParams<'a> { pub(crate) model_family: &'a ModelFamily, pub(crate) include_plan_tool: bool, + pub(crate) include_delegate_tool: bool, pub(crate) include_apply_patch_tool: bool, pub(crate) include_web_search_request: bool, pub(crate) use_streamable_shell_tool: bool, @@ -46,6 +48,7 @@ impl ToolsConfig { let ToolsConfigParams { model_family, include_plan_tool, + include_delegate_tool, include_apply_patch_tool, include_web_search_request, use_streamable_shell_tool, @@ -75,6 +78,7 @@ impl ToolsConfig { Self { shell_type, plan_tool: *include_plan_tool, + delegate_tool: *include_delegate_tool, apply_patch_tool_type, web_search_request: *include_web_search_request, include_view_image_tool: *include_view_image_tool, @@ -720,6 +724,8 @@ pub(crate) fn build_specs( use crate::exec_command::create_exec_command_tool_for_responses_api; use crate::exec_command::create_write_stdin_tool_for_responses_api; use crate::tools::handlers::ApplyPatchHandler; + use crate::tools::handlers::DELEGATE_TOOL; + use crate::tools::handlers::DelegateToolHandler; use crate::tools::handlers::ExecStreamHandler; use crate::tools::handlers::GrepFilesHandler; use crate::tools::handlers::ListDirHandler; @@ -738,6 +744,7 @@ pub(crate) fn build_specs( let exec_stream_handler = Arc::new(ExecStreamHandler); let unified_exec_handler = Arc::new(UnifiedExecHandler); let plan_handler = Arc::new(PlanHandler); + let delegate_handler = Arc::new(DelegateToolHandler); let apply_patch_handler = Arc::new(ApplyPatchHandler); let view_image_handler = Arc::new(ViewImageHandler); let mcp_handler = Arc::new(McpHandler); @@ -776,6 +783,11 @@ pub(crate) fn build_specs( builder.register_handler("update_plan", plan_handler); } + if config.delegate_tool { + builder.push_spec(DELEGATE_TOOL.clone()); + builder.register_handler("delegate_agent", delegate_handler); + } + if let Some(apply_patch_tool_type) = &config.apply_patch_tool_type { match apply_patch_tool_type { ApplyPatchToolType::Freeform => { @@ -909,6 +921,7 @@ mod tests { let config = ToolsConfig::new(&ToolsConfigParams { model_family: &model_family, include_plan_tool: true, + include_delegate_tool: false, include_apply_patch_tool: false, include_web_search_request: true, use_streamable_shell_tool: false, @@ -929,6 +942,7 @@ mod tests { let config = ToolsConfig::new(&ToolsConfigParams { model_family: &model_family, include_plan_tool: true, + include_delegate_tool: false, include_apply_patch_tool: false, include_web_search_request: true, use_streamable_shell_tool: false, @@ -951,6 +965,7 @@ mod tests { let config = ToolsConfig::new(&ToolsConfigParams { model_family: &model_family, include_plan_tool: false, + include_delegate_tool: false, include_apply_patch_tool: false, include_web_search_request: false, use_streamable_shell_tool: false, @@ -972,6 +987,7 @@ mod tests { let config = ToolsConfig::new(&ToolsConfigParams { model_family: &model_family, include_plan_tool: false, + include_delegate_tool: false, include_apply_patch_tool: false, include_web_search_request: false, use_streamable_shell_tool: false, @@ -1004,6 +1020,7 @@ mod tests { let config = ToolsConfig::new(&ToolsConfigParams { model_family: &model_family, include_plan_tool: false, + include_delegate_tool: false, include_apply_patch_tool: false, include_web_search_request: true, use_streamable_shell_tool: false, @@ -1109,6 +1126,7 @@ mod tests { let config = ToolsConfig::new(&ToolsConfigParams { model_family: &model_family, include_plan_tool: false, + include_delegate_tool: false, include_apply_patch_tool: false, include_web_search_request: false, use_streamable_shell_tool: false, @@ -1186,6 +1204,7 @@ mod tests { let config = ToolsConfig::new(&ToolsConfigParams { model_family: &model_family, include_plan_tool: false, + include_delegate_tool: false, include_apply_patch_tool: false, include_web_search_request: true, use_streamable_shell_tool: false, @@ -1255,6 +1274,7 @@ mod tests { let config = ToolsConfig::new(&ToolsConfigParams { model_family: &model_family, include_plan_tool: false, + include_delegate_tool: false, include_apply_patch_tool: false, include_web_search_request: true, use_streamable_shell_tool: false, @@ -1319,6 +1339,7 @@ mod tests { let config = ToolsConfig::new(&ToolsConfigParams { model_family: &model_family, include_plan_tool: false, + include_delegate_tool: false, include_apply_patch_tool: true, include_web_search_request: true, use_streamable_shell_tool: false, @@ -1386,6 +1407,7 @@ mod tests { let config = ToolsConfig::new(&ToolsConfigParams { model_family: &model_family, include_plan_tool: false, + include_delegate_tool: false, include_apply_patch_tool: false, include_web_search_request: true, use_streamable_shell_tool: false, @@ -1465,6 +1487,7 @@ mod tests { let config = ToolsConfig::new(&ToolsConfigParams { model_family: &model_family, include_plan_tool: false, + include_delegate_tool: false, include_apply_patch_tool: false, include_web_search_request: true, use_streamable_shell_tool: false, @@ -1583,4 +1606,27 @@ mod tests { }) ); } + + #[test] + fn delegate_tool_enabled_by_flag() { + let model_family = find_family_for_model("gpt-5-codex") + .expect("gpt-5-codex should be a valid model family"); + let config = ToolsConfig::new(&ToolsConfigParams { + model_family: &model_family, + include_plan_tool: false, + include_delegate_tool: true, + include_apply_patch_tool: false, + include_web_search_request: false, + use_streamable_shell_tool: false, + include_view_image_tool: false, + experimental_unified_exec_tool: true, + }); + let (tools, _) = build_specs(&config, None).build(); + + assert!( + tools + .iter() + .any(|tool| tool_name(&tool.spec) == "delegate_agent") + ); + } } diff --git a/codex-rs/core/tests/suite/model_tools.rs b/codex-rs/core/tests/suite/model_tools.rs index ee7b44d4b0d9..3f05c933bc57 100644 --- a/codex-rs/core/tests/suite/model_tools.rs +++ b/codex-rs/core/tests/suite/model_tools.rs @@ -57,6 +57,7 @@ async fn collect_tool_identifiers_for_model(model: &str) -> Vec { config.model_family = find_family_for_model(model).unwrap_or_else(|| panic!("unknown model family for {model}")); config.include_plan_tool = false; + config.include_delegate_tool = false; config.include_apply_patch_tool = false; config.include_view_image_tool = false; config.tools_web_search_request = false; diff --git a/codex-rs/core/tests/suite/prompt_caching.rs b/codex-rs/core/tests/suite/prompt_caching.rs index 9ca0cc9369a7..c4f509682672 100644 --- a/codex-rs/core/tests/suite/prompt_caching.rs +++ b/codex-rs/core/tests/suite/prompt_caching.rs @@ -186,6 +186,7 @@ async fn prompt_tools_are_consistent_across_requests() { config.model_provider = model_provider; config.user_instructions = Some("be consistent and helpful".to_string()); config.include_plan_tool = true; + config.include_delegate_tool = false; let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key")); diff --git a/codex-rs/core/tests/suite/tool_harness.rs b/codex-rs/core/tests/suite/tool_harness.rs index eaefe7d9dc96..b70f0b547687 100644 --- a/codex-rs/core/tests/suite/tool_harness.rs +++ b/codex-rs/core/tests/suite/tool_harness.rs @@ -105,6 +105,7 @@ async fn update_plan_tool_emits_plan_update_event() -> anyhow::Result<()> { let mut builder = test_codex().with_config(|config| { config.include_plan_tool = true; + config.include_delegate_tool = false; }); let TestCodex { codex, @@ -192,6 +193,7 @@ async fn update_plan_tool_rejects_malformed_payload() -> anyhow::Result<()> { let mut builder = test_codex().with_config(|config| { config.include_plan_tool = true; + config.include_delegate_tool = false; }); let TestCodex { codex, diff --git a/codex-rs/exec/src/lib.rs b/codex-rs/exec/src/lib.rs index 967da52b8a69..7f78669f486e 100644 --- a/codex-rs/exec/src/lib.rs +++ b/codex-rs/exec/src/lib.rs @@ -177,6 +177,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option) -> any codex_linux_sandbox_exe, base_instructions: None, include_plan_tool: Some(include_plan_tool), + include_delegate_tool: Some(false), include_apply_patch_tool: None, include_view_image_tool: None, show_raw_agent_reasoning: oss.then_some(true), diff --git a/codex-rs/mcp-server/src/codex_tool_config.rs b/codex-rs/mcp-server/src/codex_tool_config.rs index 3cd2cccdce86..744a6a97c130 100644 --- a/codex-rs/mcp-server/src/codex_tool_config.rs +++ b/codex-rs/mcp-server/src/codex_tool_config.rs @@ -160,6 +160,7 @@ impl CodexToolCallParam { codex_linux_sandbox_exe, base_instructions, include_plan_tool, + include_delegate_tool: None, include_apply_patch_tool: None, include_view_image_tool: None, show_raw_agent_reasoning: None, diff --git a/codex-rs/multi-agent/Cargo.toml b/codex-rs/multi-agent/Cargo.toml index 2abba84b4e5b..2fec46a362dc 100644 --- a/codex-rs/multi-agent/Cargo.toml +++ b/codex-rs/multi-agent/Cargo.toml @@ -4,6 +4,7 @@ version.workspace = true edition.workspace = true [dependencies] +async-trait.workspace = true anyhow.workspace = true codex-common = { workspace = true, features = ["cli"] } codex-core.workspace = true diff --git a/codex-rs/multi-agent/src/lib.rs b/codex-rs/multi-agent/src/lib.rs index 68bb79e33944..7bbbf0c48849 100644 --- a/codex-rs/multi-agent/src/lib.rs +++ b/codex-rs/multi-agent/src/lib.rs @@ -10,6 +10,7 @@ use std::fmt; use std::fs; use std::path::Path; use std::path::PathBuf; +use std::sync::Arc; use anyhow::Context; use anyhow::Result; @@ -20,6 +21,7 @@ use codex_core::config::ConfigOverrides; use codex_core::config::ConfigToml; use codex_core::config::find_codex_home; use codex_core::config_loader; +use codex_core::delegate_tool::DelegateToolAdapter; use serde::Deserialize; use serde::Serialize; use toml::Value as TomlValue; @@ -336,8 +338,13 @@ pub use orchestrator::DelegateEvent; pub use orchestrator::DelegatePrompt; pub use orchestrator::DelegateRequest; pub use orchestrator::DelegateRunId; +use orchestrator::MultiAgentDelegateAdapter; pub use orchestrator::OrchestratorError; +pub fn delegate_tool_adapter(orchestrator: Arc) -> Arc { + Arc::new(MultiAgentDelegateAdapter::new(orchestrator)) +} + #[cfg(test)] mod tests { use super::*; diff --git a/codex-rs/multi-agent/src/orchestrator.rs b/codex-rs/multi-agent/src/orchestrator.rs index 6a1a534b18e4..d89f3c1d18a3 100644 --- a/codex-rs/multi-agent/src/orchestrator.rs +++ b/codex-rs/multi-agent/src/orchestrator.rs @@ -1,10 +1,17 @@ use std::sync::Arc; use std::time::SystemTime; +use async_trait::async_trait; use codex_common::CliConfigOverrides; use codex_core::AuthManager; use codex_core::ConversationManager; use codex_core::config::ConfigOverrides; +use codex_core::delegate_tool::DelegateEventReceiver as CoreDelegateEventReceiver; +use codex_core::delegate_tool::DelegateToolAdapter; +use codex_core::delegate_tool::DelegateToolError; +use codex_core::delegate_tool::DelegateToolEvent as CoreDelegateToolEvent; +use codex_core::delegate_tool::DelegateToolRequest; +use codex_core::delegate_tool::DelegateToolRun; use codex_core::protocol::EventMsg; use codex_core::protocol::InputItem; use codex_core::protocol::Op; @@ -333,3 +340,106 @@ struct DelegateFailure { agent_id: AgentId, error: String, } + +pub struct MultiAgentDelegateAdapter { + orchestrator: Arc, +} + +impl MultiAgentDelegateAdapter { + pub fn new(orchestrator: Arc) -> Self { + Self { orchestrator } + } + + fn map_event(event: DelegateEvent) -> CoreDelegateToolEvent { + match event { + DelegateEvent::Started { + run_id, + agent_id, + prompt, + started_at, + } => CoreDelegateToolEvent::Started { + run_id, + agent_id: agent_id.as_str().to_string(), + prompt, + started_at, + }, + DelegateEvent::Delta { + run_id, + agent_id, + chunk, + } => CoreDelegateToolEvent::Delta { + run_id, + agent_id: agent_id.as_str().to_string(), + chunk, + }, + DelegateEvent::Completed { + run_id, + agent_id, + output, + duration, + } => CoreDelegateToolEvent::Completed { + run_id, + agent_id: agent_id.as_str().to_string(), + output, + duration, + }, + DelegateEvent::Failed { + run_id, + agent_id, + error, + } => CoreDelegateToolEvent::Failed { + run_id, + agent_id: agent_id.as_str().to_string(), + error, + }, + } + } + + fn map_error(err: OrchestratorError) -> DelegateToolError { + match err { + OrchestratorError::DelegateInProgress => DelegateToolError::DelegateInProgress, + OrchestratorError::AgentNotFound(agent) => DelegateToolError::AgentNotFound(agent), + OrchestratorError::DelegateSetupFailed(reason) => { + DelegateToolError::SetupFailed(reason) + } + } + } +} + +#[async_trait] +impl DelegateToolAdapter for MultiAgentDelegateAdapter { + async fn subscribe(&self) -> CoreDelegateEventReceiver { + let mut source = self.orchestrator.subscribe().await; + let (tx, rx) = mpsc::unbounded_channel(); + tokio::spawn(async move { + while let Some(event) = source.recv().await { + if tx.send(Self::map_event(event)).is_err() { + break; + } + } + }); + rx + } + + async fn delegate( + &self, + request: DelegateToolRequest, + ) -> Result { + let agent_id = AgentId::parse(request.agent_id.as_str()) + .map_err(|_| DelegateToolError::AgentNotFound(request.agent_id.clone()))?; + + let run_id = self + .orchestrator + .delegate(DelegateRequest { + agent_id: agent_id.clone(), + prompt: DelegatePrompt::new(request.prompt), + }) + .await + .map_err(Self::map_error)?; + + Ok(DelegateToolRun { + run_id, + agent_id: request.agent_id, + }) + } +} diff --git a/codex-rs/tui/src/app.rs b/codex-rs/tui/src/app.rs index a59e8ac8b7e9..a54611f78f25 100644 --- a/codex-rs/tui/src/app.rs +++ b/codex-rs/tui/src/app.rs @@ -1,6 +1,5 @@ use crate::app_backtrack::BacktrackState; use crate::app_event::AppEvent; -use crate::app_event::DelegateRequestPayload; use crate::app_event_sender::AppEventSender; use crate::bottom_pane::ApprovalRequest; use crate::chatwidget::ChatWidget; @@ -24,8 +23,7 @@ use codex_core::protocol::TokenUsage; use codex_core::protocol_config_types::ReasoningEffort as ReasoningEffortConfig; use codex_multi_agent::AgentOrchestrator; use codex_multi_agent::DelegateEvent; -use codex_multi_agent::DelegateRequest; -use codex_multi_agent::OrchestratorError; +use codex_multi_agent::delegate_tool_adapter; use codex_protocol::ConversationId; use color_eyre::eyre::Result; use color_eyre::eyre::WrapErr; @@ -55,7 +53,6 @@ pub(crate) struct App { pub(crate) app_event_tx: AppEventSender, pub(crate) chat_widget: ChatWidget, pub(crate) auth_manager: Arc, - pub(crate) delegate_orchestrator: Arc, /// Config is stored here so we can recreate ChatWidgets as needed. pub(crate) config: Config, @@ -95,6 +92,7 @@ impl App { let (app_event_tx, mut app_event_rx) = unbounded_channel(); let app_event_tx = AppEventSender::new(app_event_tx); + let delegate_adapter = delegate_tool_adapter(delegate_orchestrator.clone()); let mut delegate_event_rx = delegate_orchestrator.subscribe().await; let delegate_app_event_tx = app_event_tx.clone(); tokio::spawn(async move { @@ -103,9 +101,10 @@ impl App { } }); - let conversation_manager = Arc::new(ConversationManager::new( + let conversation_manager = Arc::new(ConversationManager::with_delegate( auth_manager.clone(), SessionSource::Cli, + Some(delegate_adapter.clone()), )); let enhanced_keys_supported = tui.enhanced_keys_supported(); @@ -158,7 +157,6 @@ impl App { app_event_tx, chat_widget, auth_manager: auth_manager.clone(), - delegate_orchestrator, config, active_profile, file_search, @@ -249,9 +247,6 @@ impl App { self.chat_widget = ChatWidget::new(init, self.server.clone()); tui.frame_requester().schedule_frame(); } - AppEvent::DelegateRequest(payload) => { - self.handle_delegate_request(payload).await; - } AppEvent::DelegateUpdate(update) => { self.handle_delegate_update(update); } @@ -427,22 +422,6 @@ impl App { Ok(true) } - async fn handle_delegate_request(&mut self, payload: DelegateRequestPayload) { - let request: DelegateRequest = payload.clone().into(); - if let Err(err) = self.delegate_orchestrator.delegate(request).await { - let message = match err { - OrchestratorError::DelegateInProgress => { - "A delegate is already running.".to_string() - } - OrchestratorError::AgentNotFound(id) => format!("Agent `{id}` not found."), - OrchestratorError::DelegateSetupFailed(reason) => { - format!("Unable to start delegate: {reason}") - } - }; - self.chat_widget.add_error_message(message); - } - } - fn handle_delegate_update(&mut self, event: DelegateEvent) { match event { DelegateEvent::Started { @@ -576,21 +555,21 @@ mod tests { use crate::history_cell::HistoryCell; use crate::history_cell::UserHistoryCell; use crate::history_cell::new_session_info; - use codex_common::CliConfigOverrides; + use codex_core::AuthManager; use codex_core::CodexAuth; use codex_core::ConversationManager; - use codex_core::config::ConfigOverrides; + use codex_core::protocol::SessionConfiguredEvent; - use codex_core::protocol::SessionSource; + use codex_protocol::ConversationId; use ratatui::prelude::Line; use std::path::PathBuf; use std::sync::Arc; use std::sync::atomic::AtomicBool; - use tempfile::tempdir; + - use codex_multi_agent::AgentOrchestrator; + fn make_test_app() -> App { let (chat_widget, app_event_tx, _rx, _op_rx) = make_chatwidget_manual_with_sender(); @@ -601,15 +580,6 @@ mod tests { ))); let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key")); - let auth_manager_arc = auth_manager.clone(); - let temp_home = tempdir().expect("tempdir"); - let delegate_orchestrator = Arc::new(AgentOrchestrator::new( - temp_home.path().to_path_buf(), - auth_manager_arc, - SessionSource::Cli, - CliConfigOverrides::default(), - ConfigOverrides::default(), - )); let file_search = FileSearchManager::new(config.cwd.clone(), app_event_tx.clone()); App { @@ -617,7 +587,6 @@ mod tests { app_event_tx, chat_widget, auth_manager, - delegate_orchestrator, config, active_profile: None, file_search, diff --git a/codex-rs/tui/src/app_event.rs b/codex-rs/tui/src/app_event.rs index 9c55a451e6e4..0dc5225e4729 100644 --- a/codex-rs/tui/src/app_event.rs +++ b/codex-rs/tui/src/app_event.rs @@ -4,10 +4,7 @@ use codex_common::model_presets::ModelPreset; use codex_core::protocol::ConversationPathResponseEvent; use codex_core::protocol::Event; use codex_file_search::FileMatch; -use codex_multi_agent::AgentId; use codex_multi_agent::DelegateEvent; -use codex_multi_agent::DelegatePrompt; -use codex_multi_agent::DelegateRequest; use crate::bottom_pane::ApprovalRequest; use crate::history_cell::HistoryCell; @@ -31,9 +28,6 @@ pub(crate) enum AppEvent { /// bubbling channels through layers of widgets. CodexOp(codex_core::protocol::Op), - /// Request delegation to a sub-agent. - DelegateRequest(DelegateRequestPayload), - /// Update emitted from the orchestrator about delegate progress/completion. DelegateUpdate(DelegateEvent), @@ -98,18 +92,3 @@ pub(crate) enum AppEvent { /// Open the approval popup. FullScreenApprovalRequest(ApprovalRequest), } - -#[derive(Debug, Clone)] -pub(crate) struct DelegateRequestPayload { - pub agent_id: AgentId, - pub prompt: DelegatePrompt, -} - -impl From for DelegateRequest { - fn from(value: DelegateRequestPayload) -> Self { - DelegateRequest { - agent_id: value.agent_id, - prompt: value.prompt, - } - } -} diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index c0e9fbdff25d..07e6679c0936 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -59,7 +59,6 @@ use tokio::sync::mpsc::UnboundedSender; use tracing::debug; use crate::app_event::AppEvent; -use crate::app_event::DelegateRequestPayload; use crate::app_event_sender::AppEventSender; use crate::bottom_pane::ApprovalRequest; use crate::bottom_pane::BottomPane; @@ -113,7 +112,6 @@ use codex_git_tooling::GitToolingError; use codex_git_tooling::create_ghost_commit; use codex_git_tooling::restore_ghost_commit; use codex_multi_agent::AgentId; -use codex_multi_agent::DelegatePrompt; use codex_protocol::plan_tool::UpdatePlanArgs; use strum::IntoEnumIterator; @@ -2228,41 +2226,8 @@ impl ChatWidget { )); } - fn try_delegate_shortcut(&mut self, text: &str) -> bool { - let trimmed = text.trim(); - if !trimmed.starts_with('#') { - return false; - } - - let mut parts = trimmed.splitn(2, char::is_whitespace); - let tag = parts.next().unwrap_or(""); - let prompt = parts.next().unwrap_or("").trim(); - - let raw_id = tag.trim_start_matches('#').trim_end_matches(':'); - if raw_id.is_empty() { - self.add_error_message("Specify an agent after '#'.".to_string()); - return true; - } - - let agent_id = match AgentId::parse(raw_id) { - Ok(id) => id, - Err(err) => { - self.add_error_message(format!("Invalid agent id `{raw_id}`: {err}")); - return true; - } - }; - - if prompt.is_empty() { - self.add_error_message("Delegate requests need a prompt.".to_string()); - return true; - } - - let payload = DelegateRequestPayload { - agent_id, - prompt: DelegatePrompt::new(prompt.to_string()), - }; - self.app_event_tx.send(AppEvent::DelegateRequest(payload)); - true + fn try_delegate_shortcut(&mut self, _text: &str) -> bool { + false } /// Return a reference to the widget's current config (includes any diff --git a/codex-rs/tui/src/chatwidget/tests.rs b/codex-rs/tui/src/chatwidget/tests.rs index 0a37cddc6df5..b1fa500159ec 100644 --- a/codex-rs/tui/src/chatwidget/tests.rs +++ b/codex-rs/tui/src/chatwidget/tests.rs @@ -383,34 +383,6 @@ fn test_rate_limit_warnings_monthly() { ); } -#[test] -fn hash_prefix_routes_to_delegate() { - let (mut chat, mut rx, mut op_rx) = make_chatwidget_manual(); - - chat.submit_user_message(UserMessage { - text: "#ideas_provider explore parser modularization".to_string(), - image_paths: Vec::new(), - }); - - assert!( - op_rx.try_recv().is_err(), - "delegated prompts must not reach the primary conversation" - ); - - let mut saw_delegate = false; - while let Ok(event) = rx.try_recv() { - if matches!(event, AppEvent::DelegateRequest(_)) { - saw_delegate = true; - break; - } - } - - assert!( - saw_delegate, - "expected delegate request event for #ideas_provider" - ); -} - #[test] fn delegate_stream_deltas_and_restore_status() { let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(); @@ -460,7 +432,9 @@ fn delegate_stream_deltas_and_restore_status() { let mut saw_stop = false; while let Ok(event) = rx.try_recv() { - if let AppEvent::StopCommitAnimation = event { saw_stop = true } + if let AppEvent::StopCommitAnimation = event { + saw_stop = true + } } assert!( saw_stop, diff --git a/codex-rs/tui/src/lib.rs b/codex-rs/tui/src/lib.rs index 473864cf7dfb..4bcbdfe6a4db 100644 --- a/codex-rs/tui/src/lib.rs +++ b/codex-rs/tui/src/lib.rs @@ -146,12 +146,14 @@ pub async fn run_main( codex_linux_sandbox_exe, base_instructions: None, include_plan_tool: Some(true), + include_delegate_tool: Some(true), include_apply_patch_tool: None, include_view_image_tool: None, show_raw_agent_reasoning: cli.oss.then_some(true), tools_web_search_request: cli.web_search.then_some(true), }; - let delegate_config_overrides = overrides.clone(); + let mut delegate_config_overrides = overrides.clone(); + delegate_config_overrides.include_delegate_tool = Some(false); let delegate_cli_overrides = cli.config_overrides.clone(); #[allow(clippy::print_stderr)] let agent_context = match codex_multi_agent::load_agent_context( From 23c6370a26f6617511ac333d3e4813423ad159a4 Mon Sep 17 00:00:00 2001 From: George Nesterenok Date: Tue, 14 Oct 2025 23:20:21 +0000 Subject: [PATCH 05/17] update example codex-home to be simpler --- ai-temp/example-codex-home/AGENTS.md | 7 +- ai-temp/example-codex-home/README.md | 6 + .../agents/critic/AGENTS.md | 6 +- .../agents/critic/config.toml | 2 +- .../agents/ideas_provider/AGENTS.md | 6 +- ai-temp/example-conversation.md | 370 ++++++++++++++++++ 6 files changed, 387 insertions(+), 10 deletions(-) create mode 100644 ai-temp/example-conversation.md diff --git a/ai-temp/example-codex-home/AGENTS.md b/ai-temp/example-codex-home/AGENTS.md index 928dc1393cbb..3b5393808aa4 100644 --- a/ai-temp/example-codex-home/AGENTS.md +++ b/ai-temp/example-codex-home/AGENTS.md @@ -3,13 +3,14 @@ This directory demonstrates a multi-agent workflow. You are the coordinator: 1. **Understand the request.** Gather context, restate the goal, and identify missing details. -2. **Choose when to delegate.** If the task needs options, architecture, or brainstorming, craft a detailed prompt for the ideas provider and call the delegation tool. Use `#ideas_provider` inside your own reasoning or reply when it helps you remember which agent to call—the orchestrator treats the hash tag purely as a hint. -3. **Critique before action.** Once you have a leading option, summarize it for the critic via the same tool. Highlight the context and goals; `#critic` remains a hint for the tool, not a literal command. -4. **Synthesize next steps.** Combine ideation and critique into a concise plan, call out open questions, and suggest the single most sensible next action. +2. **Delegate ideation first.** When the user wants exploration, send a text-only brief to the ideas provider. Ask for multiple approaches, high-level steps, pros/cons, and explicit mention that no commands should run. +3. **Pass the leading option to the critic.** Summarize the approach you favor (include constraints or assumptions) and ask the critic to surface risks, blind spots, or missing tests. Remind them this is still a read-only evaluation. +4. **Synthesize the dialogue.** After both delegates reply, weave their insights into a short recommendation: highlight consensus, note any blockers, and propose what the user should decide next. General rules: - Keep your own replies short unless the user explicitly wants depth; link to `ai-temp/` docs when the user needs background. - If a request clearly doesn’t benefit from delegation, note why you’re handling it solo. - The `delegate_agent` tool is AI-only. Treat `#ideas_provider` / `#critic` as guidance in instructions; the user cannot execute sub-agents directly. +- Keep the entire flow read-only: no file writes, shell commands, or code patches—just guidance and analysis. - When testing manually, focus on describing the problem clearly so the assistant chooses the right delegate—it will ignore bare `#agent` commands unless they’re part of a well-formed instruction. diff --git a/ai-temp/example-codex-home/README.md b/ai-temp/example-codex-home/README.md index 38982a9626ac..14530a1e9257 100644 --- a/ai-temp/example-codex-home/README.md +++ b/ai-temp/example-codex-home/README.md @@ -29,4 +29,10 @@ CODEX_HOME="$(pwd)/ai-temp/example-codex-home" target/debug/codex --agent critic # Watch logs in log/codex-tui.log to confirm delegation activity. ``` +### Suggested Prompt for the Read-Only Flow + +``` +We’re scoping a read-only refactor of the parser—no code yet. Please gather three approaches from the ideas provider, pick the leading option, run it by the critic for risks, and then give me a combined recommendation. +``` + Unset `CODEX_HOME` (or point it back to your real path) once you're done experimenting. diff --git a/ai-temp/example-codex-home/agents/critic/AGENTS.md b/ai-temp/example-codex-home/agents/critic/AGENTS.md index a55ea71fe9c4..54516f8594e6 100644 --- a/ai-temp/example-codex-home/agents/critic/AGENTS.md +++ b/ai-temp/example-codex-home/agents/critic/AGENTS.md @@ -1,5 +1,5 @@ # Critic Agent -- Review proposals produced by other agents or the user and highlight risks, gaps, and missing tests. -- Rate each idea on a 1–5 scale for feasibility and impact. -- Recommend one actionable next step or clarifying question; avoid producing final code. +- Review the coordinator’s summarized approach and surface risks, blind spots, and missing validation. +- Stay in read-only mode: respond with analysis only—no commands, code, or approvals. +- Conclude with a short checklist of what should be clarified or prototyped next before implementation begins. diff --git a/ai-temp/example-codex-home/agents/critic/config.toml b/ai-temp/example-codex-home/agents/critic/config.toml index 19392ad4e280..3569c45a14b4 100644 --- a/ai-temp/example-codex-home/agents/critic/config.toml +++ b/ai-temp/example-codex-home/agents/critic/config.toml @@ -1,4 +1,4 @@ -model = "gpt-5-nano" +model = "gpt-5" sandbox_mode = "read-only" [history] diff --git a/ai-temp/example-codex-home/agents/ideas_provider/AGENTS.md b/ai-temp/example-codex-home/agents/ideas_provider/AGENTS.md index 86f37e6f8456..3631507bac49 100644 --- a/ai-temp/example-codex-home/agents/ideas_provider/AGENTS.md +++ b/ai-temp/example-codex-home/agents/ideas_provider/AGENTS.md @@ -1,5 +1,5 @@ # Ideas Provider Agent -- Generate creative implementation approaches, step-by-step outlines, and draft code snippets. -- Focus on breadth: offer at least three distinct ideas before recommending one. -- Avoid running commands; defer execution to other agents or the user. +- Produce three read-only strategies plus trade-offs for the requested change (no commands, no file edits). +- For each strategy, outline the main steps, key dependencies, and testing considerations. +- Close with a brief comparison that names the most promising option and why, leaving final judgment to the coordinator. diff --git a/ai-temp/example-conversation.md b/ai-temp/example-conversation.md new file mode 100644 index 000000000000..509d70686674 --- /dev/null +++ b/ai-temp/example-conversation.md @@ -0,0 +1,370 @@ +❯ CODEX*HOME="$(pwd)/../ai-temp/example-codex-home" target/debug/codex +╭────────────────────────────────────────────╮ +│ >* OpenAI Codex (v0.0.0) │ +│ │ +│ model: gpt-5 /model to change │ +│ directory: ~/projects/forks/codex/codex-rs │ +╰────────────────────────────────────────────╯ + +To get started, describe a task or try one of these commands: + +/init - create an AGENTS.md file with instructions for Codex +/status - show current session configuration +/approvals - choose what Codex can do without approval +/model - choose what model and reasoning effort to use +/review - review any changes and find issues + +› • We need to add a bubble-sort implementation in Python, but let’s keep it read-only for +now. Please: 1. Ask the ideas provider for three alternative designs (iterative, optimized variations, +test scaffolding) with pros/cons—text only, no code yet. 2. Pick the most promising idea, summarize it in your own words, and send that summary to +the critic asking for risks, missing tests, or edge cases. 3. Finish by combining their feedback into a short recommendation, including what we +should +verify before writing any code. + +• I’ll start by drafting a brief to the ideas provider for three text-only designs. + +• Updated Plan +└ □ Gather three design options +□ Critique leading approach +□ Synthesize recommendation + +• ↳ #ideas_provider… You are the ideas provider. Text-only, no code, no commands. We need +three alternative designs to implement bubble sort in Python (read-only exploration). Please +provide: + +1. Iterative baseline: a straightforward bubble sort that repeatedly passes through the list + and swaps adjacent out-of-order elements. +2. Optimized variations: include at least early-exit (stop if a full pass makes zero swaps) + and a shrinking boundary using the last-swapped index to avoid re-checking the sorted suffix. +3. Test scaffolding strategy: how to structure tests for correctness, edge cases, property- + based checks, and simple performance sanity checks. + For each item, include: + +- High-level steps (3–6 steps) +- Pros and cons +- Complexity (time best/avg/worst, space), and stability +- Edge cases to guarantee coverage (e.g., empty list, single element, already sorted, reverse + sorted, duplicates, negative numbers, mixed types and how to treat them) +- Any assumptions or constraints (e.g., in-place vs new list, mutability, type comparability) + Keep it concise but complete. Explicitly state that no commands or code should be run. Only + produce text, suitable for planning. + +─ Worked for 56s ──────────────────────────────────────────────────────────────────────────── + +• No commands or code should be run. Text-only planning follows. + +Iterative Baseline + +- High-Level Steps + - Loop n-1 passes over the list. + - For each pass, compare adjacent pairs left-to-right. + - Swap if out of order to “bubble” larger items rightward. + - After each pass, the largest among the unsorted portion is in place at the end. + - Repeat until all passes complete. +- Pros + - Easiest to understand and implement. + - Predictable behavior for teaching and debugging. + - In-place; stable if swapping only when strictly necessary. +- Cons + - Always does O(n^2) comparisons even when already sorted. + - Slow for large inputs; uncompetitive vs built-ins. +- Complexity & Stability + - Time: best O(n^2), average O(n^2), worst O(n^2). + - Space: O(1) auxiliary (in-place). + - Stability: stable if swapping only when > (not >=). +- Edge Cases + - Empty list: return as-is without error. + - Single element: no swaps, return as-is. + - Already sorted: still performs full n-1 passes. + - Reverse sorted: maximum swaps; still correct. + - Duplicates: maintain original relative order. + - Negative numbers: compare normally. + - Mixed types: in Python 3, cross-type comparisons may raise TypeError; either reject + or require a key/comparator. +- Assumptions/Constraints + - In-place sort of a mutable sequence. + - Elements must be mutually comparable under the comparator (default ). + - Optional: accept a key or cmp-like callable to control ordering; if absent, rely on + native comparisons. + +Optimized Variations + +- High-Level Steps + - Early-Exit: Track whether any swap occurred in a pass; if none, stop. + - Shrinking Boundary: Track the last swapped index in each pass; limit the next pass to + this boundary (suffix already sorted). + - Left-Bubble Variant (optional): Bubble smallest leftward to handle some distributions + (not necessary if using last-swapped boundary). + - Combine early-exit and last-swapped for best practical effect. +- Pros + - Early-exit yields O(n) best-case on sorted/almost-sorted inputs. + - Last-swapped boundary cuts unnecessary comparisons for trailing sorted suffixes. + - Still simple, in-place, and stable. +- Cons + - Slightly more bookkeeping than the baseline. + - Still O(n^2) average/worst; not competitive for large n. +- Complexity & Stability + - Time: best O(n) (early-exit on sorted), average O(n^2), worst O(n^2). + - Space: O(1). + - Stability: stable when swapping only on strict comparisons. +- Edge Cases + - Empty, single-element: perform zero or one pass; early-exit triggers. + - Already sorted: early-exit on first pass. + - Reverse sorted: still worst-case; boundary will still shrink per pass. + - Duplicates: ensure no swaps on equal keys; verify relative order preserved. + - Negative numbers: normal behavior. + - Mixed types: as above—define policy (reject or require key). + - Pathological cases: Many equal elements; confirm stability and no unnecessary swaps + on equality. +- Assumptions/Constraints + - In-place mutation of a list-like sequence. + - Elements comparable, or a key function provided. + - Early-exit flag and last-swapped index are correctly reset each pass. + +Test Scaffolding Strategy + +- High-Level Steps + - Structure unit tests around table-driven cases (input → expected). + - Add property-based tests comparing output to Python’s sorted (reference) for random + arrays. + - Include stability tests by pairing values with original indices. + - Add light performance sanity checks on scaling (not microbenchmarks). +- Pros + - High confidence via deterministic and generative checks. + - Catches stability regressions that value-only checks miss. + - Scales coverage without verbose hand-written cases. +- Cons + - Property-based tests need care with data domains (e.g., disallow mixed incomparable + types unless explicitly supported). + - Performance checks can be noisy; keep thresholds coarse. +- Scope & Coverage + - Correctness: Verify exact equality to sorted for various distributions and sizes (0, + 1, small, medium). + - Stability: Use tuples (value, original_index) with repeated values, sort by value + only and assert original order preserved when values equal. + - Edge Cases: + - Empty list. + - Single element. + - Already sorted. + - Reverse sorted. + - All equal elements. + - Duplicates with mixed positions. + - Negative values; large magnitude values. + - Floats including -0.0 and potential nan (if supporting floats, define how to + treat nan; otherwise exclude). + - Mixed types: either assert TypeError or skip if unsupported; test both policy + paths explicitly. + - Property-Based: + - Random integers across ranges; varying sizes (0–N), skewed distributions (many + duplicates). + - Optional: random floats excluding NaN if not supported; or targeted NaN tests for + defined policy. + - Compare to sorted with the same key if keys are supported. + - Performance Sanity: + - Measure rough time for sizes n in {100, 300, 900}; confirm ~9x time growth when n + triples (quadratic indication). + - Confirm early-exit: sorted input scales near linearly; time for sorted vs random + shows large gap. + - Keep wide tolerances; avoid flaky thresholds. +- Assumptions/Constraints + - Tests do not depend on external resources; deterministic seeds for random generators. + - No commands or code should be run in this planning; when implemented, use local test + runner. + - If a key function is supported, test with simple projections (e.g., abs) and + stability under equal keys. + +Comparison + +- Most promising: Optimized bubble sort with early-exit and shrinking boundary. It + preserves simplicity and stability, provides O(n) best-case on sorted/almost-sorted + inputs, and reduces unnecessary comparisons via last-swapped boundary, while remaining + in-place and easy to test. The baseline is useful for teaching and as a reference + implementation. The test scaffolding applies to both and should be adopted regardless. + Final choice can weigh readability needs (baseline) versus practical speed on nearly- + sorted data (optimized). + +• ↳ #ideas_provider completed finished in 47.4s + +───────────────────────────────────────────────────────────────────────────────────────────── + +• Great, I’ll pass the leading optimized approach to the critic to probe risks and missing +tests. + +• Updated Plan +└ ✔ Gather three design options +□ Critique leading approach +□ Synthesize recommendation + +• ↳ #critic… You are the critic. Read-only evaluation, text-only, no code, no commands. +Summary of the leading approach (my words): + +- Implement an in-place, stable bubble sort in Python using two optimizations: + 1. Early-exit: track if any swap occurred in a pass; if none, terminate for O(n) best case + on already-sorted or nearly-sorted inputs. + 2. Shrinking boundary via last-swapped index: record the last index where a swap occurred; + the next pass only scans up to that index because the suffix beyond is already sorted. +- Comparisons are strict (swap only when a[i] > a[i+1]) to preserve stability. Do not swap + on equality. +- API assumptions: operate on a mutable sequence (list) in place. Accept an optional `key` + callable to enable sorting objects or custom projections; if provided, compare key(a[i]) and + key(a[i+1]). If `key` is None, use native comparisons. +- Type policy: mixed or incomparable types raise TypeError (align with Python 3 behavior). + For floats with NaN, either exclude from inputs or document behavior (Python’s `sorted` + places NaNs at the end but they compare unordered); we will either follow Python semantics or + explicitly reject NaN values—TBD. +- Complexity: best O(n), average/worst O(n^2); space O(1). Stability: stable by construction. +- Tests: table-driven unit tests across edge cases; property-based tests comparing to + Python’s `sorted` with the same `key`; explicit stability tests using pairs (value, original + index) to confirm equal keys preserve order; error-handling tests for incomparable mixes; + light performance sanity checks (quadratic scaling on random inputs; much faster on already- + sorted due to early-exit), with loose tolerances. + Please surface: +- Risks, blind spots, or tricky parts (e.g., off-by-one errors in boundary logic, stability + pitfalls if equality handling drifts, behavior with NaN/partial orders, key function caching + vs recomputation trade-offs, cost of repeated key calls). +- Missing tests or edge cases we should add (e.g., many equal elements, nearly-sorted with + a late inversion, alternating patterns, large lists, custom objects with flaky comparators, + exceptions thrown inside key). +- Any assumptions that should be tightened or clarified before implementation. Keep it + concise but thorough. Still read-only; no code or commands. + +─ Worked for 1m 12s ───────────────────────────────────────────────────────────────────────── + +• Risks and Tricky Parts + +- Boundary shrink off-by-one: define the new scan limit precisely. Using the right index + of the last swap vs. left index changes the next-pass upper bound by 1; an error here can + miss needed comparisons or over-scan. +- Early-exit correctness: ensure the outer loop exits immediately when no swaps occur; + don’t accidentally run an extra empty pass. +- Stability drift: swapping on equality or using a non-strict comparator breaks stability. + Verify comparisons are strictly “greater-than” on the chosen projection. +- Key recomputation cost: naive bubble sort calls the key repeatedly per element per pass + (O(n^2) key calls). If keys are expensive, this dominates runtime. +- Key caching trade-off: full precompute gives O(n) extra space (violates stated O(1)), + while sliding-window caching keeps O(1) space but requires careful updates on swaps to + avoid stale keys. +- Partial orders/NaN: float NaN causes both x>y and y>x to be False; items may remain in + original positions and the array may not be totally ordered. Behavior will differ from + “intuitive” numeric sorting unless explicitly defined. +- Mixed/incomparable types: comparisons raise TypeError mid-sort and leave the list + partially mutated; document and test this. +- Comparator method choice: relying on “>” triggers gt or falls back to the right operand’s + lt. Some custom objects implement only lt; confirm behavior is acceptable and consistent + with expectations. +- Edge bounds: when the last swap occurs at index 0, the next bound becomes minimal; ensure + the loop doesn’t underflow or skip necessary comparisons when the bound is 0 or 1. +- Input mutability: only in-place sortable, index-assignable sequences are supported. + Tuples, deques, and read-only sequences should be rejected or documented. +- Best-case O(n) hinges on both early-exit and last-swapped index; if either is implemented + incorrectly, nearly-sorted inputs degrade to quadratic. +- Test flakiness from timing: “performance sanity” based on wall-clock is brittle; prefer + counting comparisons/key calls or passes. +- Exception in key: exceptions should propagate; document that the list may be partially + reordered at the point of failure. +- Large-N tests: quadratic algorithms can make tests slow/unreliable; keep sizes modest + and targeted. + +Missing Tests and Edge Cases + +- Trivial sizes: empty, single element, two elements already sorted and reversed. +- Many equals: large runs of equal keys to confirm stability and early-exit behavior. +- Nearly sorted with a late inversion (e.g., one small item near the end). +- Alternating high/low (worst-case bubbling). +- Reverse-sorted list. +- Mixed but comparable types (e.g., ints and bools) vs. incomparable mixes (e.g., ints and + strings) to validate TypeError. +- Floats: with infinities; with NaN (either assert a documented policy or skip/expect a + specific outcome). +- Custom objects: only lt implemented; flaky/non-transitive comparators (document undefined + behavior but verify no crashes). +- Key function: expensive key with a call counter to verify call count scaling; key raising + exceptions. +- Large-ish list within safe bounds to avoid timeouts (e.g., a few thousand elements) to + sanity-check quadratic scaling without timing assertions. +- Stability check using (value, original_index) pairs under many-equal-key scenarios. + +Assumptions to Tighten + +- Accepted input types: require collections.abc.MutableSequence (or explicitly “list + only”). Define behavior for sequences that don’t support item assignment. +- API shape: in-place, return None (align with list.sort), document no reverse or cmp args + unless intentionally supported. +- Key semantics: key must be pure and consistent; define whether keys are recomputed vs. + cached (and resulting space complexity). +- Ordering semantics: clarify stance on NaN/partial orders (reject, document stability- + preserving but not totally ordered, or emulate sorted behavior). +- Comparator direction: commit to “>” on values/keys for ascending order and state how + custom types are expected to behave. +- Complexity statement: reflect chosen key strategy (O(1) space if no caching; O(n) if + precomputed keys). + +Pre-Implementation Checklist + +- Finalize boundary-update rule (exact index used) and loop limits. +- Decide and document NaN/partial-order handling; add tests accordingly. +- Choose key strategy: recompute vs. sliding-window cache vs. full precompute; update + space-complexity note. +- Lock API details: accepted sequence types, return value, absence/presence of reverse. +- Define exception behavior (TypeError and key exceptions) and add tests. +- Replace timing checks with instrumentation (count passes/comparisons/key calls) in tests. +- Add stability stress tests (many equal keys, varied patterns). +- Confirm comparator direction and behavior with custom objects implementing only lt. + +• ↳ #critic completed finished in 1m25s + +• Updated Plan +└ ✔ Gather three design options +✔ Critique leading approach +✔ Synthesize recommendation + +───────────────────────────────────────────────────────────────────────────────────────────── + +• Recommendation + +- Choose the optimized bubble sort: in-place, stable, with early-exit and a shrinking + boundary using the last-swapped index. It retains simplicity while improving best-case + behavior on nearly-sorted inputs. +- API: operate on a Python list in place; return None (list.sort-style). Support an + optional key callable; skip reverse/comp for now. +- Comparisons: swap only when strictly greater to preserve stability. When key is provided, + compare key(a[i]) to key(a[i+1]). +- Types: require mutually comparable elements under the chosen key. Mixed/incomparable + types raise TypeError and may leave partial reordering at the failure point (document). +- NaN/partial orders: exclude NaN from supported inputs to keep behavior predictable and + consistent with tests vs sorted; document explicitly. +- Key strategy: recompute keys (O(1) space) for simplicity; tests should track call counts + to avoid timing flakiness. +- Boundary update: use the right index of the last swap as the exclusive upper bound for + the next pass, to avoid off-by-one errors. + +What to verify before coding + +- Finalize loop limits and boundary rule: exact indices and termination when no swaps + occur. +- Lock API details: list-only, in-place, return None, optional key, no reverse. +- Clarify errors: TypeError on incomparable mixes; exceptions from key propagate; note + possible partial reordering on failure. +- Decide NaN policy: explicitly unsupported and tested as such. +- Tests to include: + - Table-driven: empty, one element, two elements (sorted/reversed), already sorted, + reverse sorted, many equals, alternating high/low, nearly sorted with a late + inversion, negative numbers, large magnitudes. + - Stability: repeated equal keys using (value, original_index) to assert order + preservation. + - Key behavior: with/without key; expensive key with a counter; key that raises. + - Type policy: ints+strings triggering TypeError; ints+bools (comparable) behave as + expected. + - Floats: infinities allowed; NaN explicitly rejected or skipped per policy. + - Instrumentation-based performance sanity: count passes/comparisons/key calls (avoid + wall-clock). + +Rationale and consensus + +- Both delegates agree the optimized variant gives meaningful best-case gains with minimal + complexity, remains stable and in-place, and benefits from robust test scaffolding. +- Primary risks: off-by-one in the boundary logic, accidental stability regressions + (swapping on equality), undefined behavior around NaN/partial orders, and noisy timing- + based tests. +- Blockers to resolve up front: NaN policy, exact boundary rule, key recomputation vs + caching (we recommend recompute), and explicit API/exception semantics. From 525671b6b434b5801bdcc5d1adf0c1f4b8cd0ee2 Mon Sep 17 00:00:00 2001 From: George Nesterenok Date: Wed, 15 Oct 2025 14:51:46 +0000 Subject: [PATCH 06/17] doc: note delegate transition silence --- ai-temp/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/ai-temp/CHANGELOG.md b/ai-temp/CHANGELOG.md index c5d514727191..1d326f99abd8 100644 --- a/ai-temp/CHANGELOG.md +++ b/ai-temp/CHANGELOG.md @@ -17,3 +17,4 @@ - Added a dedicated status indicator while a delegate runs, restored the idle header on completion, and regression-tested streaming to prevent animation regressions. - Updated the sample Codex home instructions/README, ensured the critic agent uses `gpt-5-nano`, and documented the new delegation UX in `ai-temp/ui-ux-delegation.md`. - Documented plan-tool implementation patterns and how they inform future delegation tools (`ai-temp/tool-implementation-patterns.md`). +- Observed that the coordinator remains silent between delegate runs (e.g., between `#ideas_provider` completion and the `#critic` request) because tool composition happens model-side without emitting UI events; leaving this behaviour in place for now. From 172f9f45799725c860bc8d394593f64c8a3b52ed Mon Sep 17 00:00:00 2001 From: George Nesterenok Date: Wed, 15 Oct 2025 17:06:47 +0000 Subject: [PATCH 07/17] plan doc for agent-switching feat --- ai-temp/agent-switching.md | 77 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 ai-temp/agent-switching.md diff --git a/ai-temp/agent-switching.md b/ai-temp/agent-switching.md new file mode 100644 index 000000000000..24056d07e46b --- /dev/null +++ b/ai-temp/agent-switching.md @@ -0,0 +1,77 @@ +# Agent Switching Flow + +## Goal +- Let users temporarily leave the main assistant, talk directly to the delegate that just ran, and then return to the primary conversation with the new context automatically folded in. +- Preserve the sub-agent’s own history/logs while also giving the main agent enough summary data to continue the original task without manual copy/paste. +- Keep this behaviour additive to the existing delegation pipeline (`ai-temp/orchestration-integration.md`) so we do not fork separate orchestration code paths. + +## Interaction Flow +1. **Primary delegation** – User asks the main agent for work; it invokes `AgentOrchestrator::delegate(...)` (`codex-rs/multi-agent/src/orchestrator.rs`) and streams the sub-agent result as today. +2. **Switch request** – Once the delegate finishes, the UI surfaces an affordance (button, slash command, or keyboard shortcut) to “enter” that delegate session. The request references the agent id plus the conversation/session handle held by the orchestrator. +3. **Direct conversation** – While switched, user prompts are routed straight to the sub-agent’s `ConversationManager` (`codex-rs/core/src/conversation_manager.rs:57`), writing to `~/.codex/agents//history.jsonl` and `sessions/` per `ai-temp/persistence-design.md`. +4. **Return & sync** – Exiting the sub-agent triggers a summary event back to the primary conversation. The orchestrator composes: + - Latest sub-agent turns since the last delegation summary. + - Any user instructions entered during the switch. + - Optional metadata (elapsed time, exit status) for the main transcript. +5. **Primary follow-up** – The main agent resumes with an updated context item (e.g., injected history cell) so the user can issue the closing instruction (“Solve task X”) without restating manual edits. It stays idle until the user provides that follow-up prompt; there is no automatic validation pass unless the user explicitly asks for one. + +## Orchestrator Responsibilities +- Track active sub-agent sessions beyond the initial delegate run, including an offset to know which messages were created during the manual switch. +- Provide APIs to: + - `enter_agent(agent_id, session_id)` – hand back a handle to the sub-agent conversation. + - `exit_agent(agent_id, session_id)` – return summaries for rehydrating the primary transcript. +- Maintain a lightweight audit of switches (agent id, start/end timestamps) for `/status` (`codex-rs/tui/src/status/helpers.rs`) and debugging. +- Ensure authentication and tool permissions obey the main agent’s policy; the switch cannot elevate capabilities beyond what the delegate already has. + +## Persistence & Context Sync +- Sub-agent turns continue to live exclusively under `~/.codex/agents//` so per-agent isolation stays intact (`ai-temp/persistence-design.md`). +- The main agent stores only synthesized snapshots: user switch transcript, sub-agent response digest, and references to the underlying rollout file. +- Conflict reconciliation remains manual. Unless a sub-agent overrides its working directory, it edits the same workspace as the main agent, so users should rely on git/review tooling to resolve overlapping changes. +- Each `AgentContext` already persists the delegate’s working directory (`ConfigOverrides::cwd`). When the sub-agent runs with a non-default cwd, the return summary should echo that path so the main agent understands where the edits landed. +- Consider storing a “since marker” (session id + line number) inside the orchestrator so re-entry picks up where the user left off. +- When returning, append a history cell in the main transcript citing the sub-agent session path and summarizing the net changes. + +## UI Considerations +- Extend the delegation UI (`ai-temp/ui-ux-delegation.md`) with: + - A status banner showing `In #` while switched, with a shortcut to return to the primary agent. + - History cells that log switch events (`Entered #critic`, `Returned from #critic – applied adjustments`). + - Optional shortcut `/agent return` to exit quickly. +- Provide an autocomplete (`#agent_id · YYYY-MM-DD HH:MM`) that lists only delegates invoked during the current main-agent session so users can target the right sub-agent before switching. +- Hide picker entries whose conversations are no longer resumable (e.g., cleanup, failure). If the user attempts to switch into a stale handle, surface an error toast and keep them in the current context while logging the failure. +- While switched, the prompt input should clearly identify the active agent (e.g., placeholder text, accent color) to avoid accidental edits. +- Surface breadcrumbs in `/status` showing the current agent stack (`Main → #ideas_provider → #critic`), making nested switches easier to follow later. +- Scope out a history browser for now; we do not surface delegate sessions from previous main-agent runs. + +## Edge Cases & Safeguards +- **Aborted delegate sessions** – If the orchestrator or sub-agent errors while you are switched in, emit a `DelegateEvent::Failed`, append an error history cell in the main transcript, and automatically return the user to the main agent. Also write the detailed failure to `codex-tui.log`. +- **Active-run guard** – Maintain the existing single-run invariant: attempts to start a fresh delegate while another is active (including while switched) should yield `DelegateInProgress` and a user-facing notice. +- **Multi-agent hopping** – Switching among multiple delegates is hub-and-spoke: you can move main ↔ #ideas ↔ main ↔ #critic freely. Future “delegate chains” (sub-agents invoking their own sub-agents) remain out of scope; note this in breadcrumbs/help text so expectations stay clear. +- **Undo/redo** – Codex does not provide an orchestrator-level undo stack. Any manual file edits a user performs while switched should be managed through their VCS tooling. +- **Tool overlap** – Each sub-agent carries its own tool registry (e.g., plan tool). Streaming results during the switch stay in the sub-agent transcript; summaries injected on return should mention any plan updates so the main agent context is accurate. + +## Code Impact + +### Multi-Agent Feature Surfaces +- `codex-rs/multi-agent/src/orchestrator.rs` – extend state to track active delegate sessions, add `enter_agent`/`exit_agent` helpers, and retain offsets so we know which turns to summarize when the user returns. +- `codex-rs/multi-agent/src/lib.rs` – re-export the switching API and plumb new structs/enums (e.g., switch summaries, session handles). We may add a dedicated `switching.rs` module for bookkeeping. +- `codex-rs/multi-agent/src/tests/` (new) – cover enter/exit flows, ensuring we capture only newly added turns and that summaries are produced correctly. + +### Core Runtime +- `codex-rs/core/src/conversation_manager.rs` – expose APIs to hand out existing `CodexConversation` handles (or resume by rollout) so the orchestrator can park and resume sub-agent sessions. We may need a lightweight “since marker” abstraction here. +- `codex-rs/core/src/delegate_tool.rs` – extend `DelegateToolEvent`/`DelegateToolRun` to serialize manual switch summaries back to the client. +- `codex-rs/core/src/tools/handlers/delegate.rs` – accept the richer payload, surface switch-specific metadata to the model, and ensure the handler stops streaming once the user exits the sub-agent. +- `codex-rs/core/src/codex.rs` – thread the orchestrator’s switch adapter into new conversations (similar to how the delegate adapter is wired today). + +### TUI Integration +- `codex-rs/tui/src/app_event.rs` & `codex-rs/tui/src/app.rs` – introduce `AppEvent` variants for “enter agent”, “exit agent”, and “switch summaries”; drive the event loop transitions. +- `codex-rs/tui/src/chatwidget.rs` (plus `chatwidget/agent.rs`) – route user input to the active sub-agent while switched, render banners/breadcrumbs, and rehydrate the main transcript when returning. +- `codex-rs/tui/src/history_cell.rs` – add cell types for “entered delegate” / “returned from delegate” entries with session links. +- `codex-rs/tui/src/status/helpers.rs` & `/status` widgets – surface the active agent stack and recent switch history. +- `codex-rs/tui/src/slash_command.rs` – wire `/agent enter ` / `/agent return` (or similar) commands if we expose keyboard-driven switching. +- `codex-rs/tui/src/tests/` – update snapshot/unit tests to cover the new event stream and UI affordances. +- Autocomplete picker (similar to the file search UI) that lists delegates invoked during the current main-agent session as `#agent_id · YYYY-MM-DD HH:MM`, making it easy to choose the desired session before switching. +- We explicitly skip building a “replay” browser for older delegate sessions in this iteration. + +### CLI & Configuration +- `codex-rs/cli/src/main.rs` – ensure the CLI still constructs the orchestrator once, passing the new switching adapter into the TUI bootstrapper. +- `docs/` – update user-facing documentation (e.g., `docs/tui.md`, `docs/multi-agent.md`) to describe how to enter/exit a delegate session. From fee47b6d603238799238117991a523be4ac207cf Mon Sep 17 00:00:00 2001 From: George Nesterenok Date: Wed, 15 Oct 2025 20:37:48 +0000 Subject: [PATCH 08/17] Tighten example agent prompts for lean delegation --- ai-temp/example-codex-home/AGENTS.md | 20 +++++++++---------- .../agents/critic/AGENTS.md | 7 ++++--- .../agents/ideas_provider/AGENTS.md | 7 ++++--- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/ai-temp/example-codex-home/AGENTS.md b/ai-temp/example-codex-home/AGENTS.md index 3b5393808aa4..bb1889ca5fb7 100644 --- a/ai-temp/example-codex-home/AGENTS.md +++ b/ai-temp/example-codex-home/AGENTS.md @@ -1,16 +1,16 @@ # Sample Primary Agent Instructions -This directory demonstrates a multi-agent workflow. You are the coordinator: +This directory demonstrates a multi-agent workflow. You coordinate any kind of request—software, product, research, storytelling, etc.—by delegating analysis to specialists: -1. **Understand the request.** Gather context, restate the goal, and identify missing details. -2. **Delegate ideation first.** When the user wants exploration, send a text-only brief to the ideas provider. Ask for multiple approaches, high-level steps, pros/cons, and explicit mention that no commands should run. -3. **Pass the leading option to the critic.** Summarize the approach you favor (include constraints or assumptions) and ask the critic to surface risks, blind spots, or missing tests. Remind them this is still a read-only evaluation. -4. **Synthesize the dialogue.** After both delegates reply, weave their insights into a short recommendation: highlight consensus, note any blockers, and propose what the user should decide next. +1. **Understand the request.** Restate the goal, gather missing context, and note constraints. +2. **Delegate ideation first.** When exploration is useful, brief the ideas provider (text-only) to generate concise alternative directions. +3. **Pass the leading option to the critic.** Summarize the approach you favor (include assumptions) and ask the critic to surface risks or validation gaps. +4. **Synthesize the dialogue.** After both delegates reply, produce **exactly one paragraph** (≤75 words) tying together the insights and recommending next steps—no headings, bullets, or follow-up notes. General rules: -- Keep your own replies short unless the user explicitly wants depth; link to `ai-temp/` docs when the user needs background. -- If a request clearly doesn’t benefit from delegation, note why you’re handling it solo. -- The `delegate_agent` tool is AI-only. Treat `#ideas_provider` / `#critic` as guidance in instructions; the user cannot execute sub-agents directly. -- Keep the entire flow read-only: no file writes, shell commands, or code patches—just guidance and analysis. -- When testing manually, focus on describing the problem clearly so the assistant chooses the right delegate—it will ignore bare `#agent` commands unless they’re part of a well-formed instruction. +- Keep replies short unless the user explicitly requests depth; cite `ai-temp/` docs when needed for background. +- If delegation adds no value, explain why you are handling the request directly. +- The `delegate_agent` tool is AI-only. Treat `#ideas_provider` / `#critic` tags as hints for the model; the user cannot invoke sub-agents directly. +- Stay read-only: no file writes, shell commands, or code edits—only guidance and analysis. +- When manually testing, describe the problem clearly so the coordinator chooses the right delegate; bare `#agent` commands alone are ignored. diff --git a/ai-temp/example-codex-home/agents/critic/AGENTS.md b/ai-temp/example-codex-home/agents/critic/AGENTS.md index 54516f8594e6..1f4b04d11890 100644 --- a/ai-temp/example-codex-home/agents/critic/AGENTS.md +++ b/ai-temp/example-codex-home/agents/critic/AGENTS.md @@ -1,5 +1,6 @@ # Critic Agent -- Review the coordinator’s summarized approach and surface risks, blind spots, and missing validation. -- Stay in read-only mode: respond with analysis only—no commands, code, or approvals. -- Conclude with a short checklist of what should be clarified or prototyped next before implementation begins. +- Stay strictly read-only: analysis comments only—no commands, code, or approvals. +- Return **exactly five** bullet points. +- Each bullet must be a **single sentence** (≤20 words) calling out a risk, blind spot, or missing validation. +- Do not add introductions, conclusions, or extra narrative—only the five bullets. diff --git a/ai-temp/example-codex-home/agents/ideas_provider/AGENTS.md b/ai-temp/example-codex-home/agents/ideas_provider/AGENTS.md index 3631507bac49..cc2714a1dd6d 100644 --- a/ai-temp/example-codex-home/agents/ideas_provider/AGENTS.md +++ b/ai-temp/example-codex-home/agents/ideas_provider/AGENTS.md @@ -1,5 +1,6 @@ # Ideas Provider Agent -- Produce three read-only strategies plus trade-offs for the requested change (no commands, no file edits). -- For each strategy, outline the main steps, key dependencies, and testing considerations. -- Close with a brief comparison that names the most promising option and why, leaving final judgment to the coordinator. +- Respond in read-only mode: no commands, code, or file edits. +- Return **exactly five** bullet points. +- Each bullet must be a **single sentence** (≤20 words) describing a distinct approach and its key testing focus. +- Do not add intros, outros, or extra commentary—just the five bullets. From 410e9400cfeb8dfb1cc1ef17b79a8b681b20762a Mon Sep 17 00:00:00 2001 From: George Nesterenok Date: Wed, 15 Oct 2025 20:40:31 +0000 Subject: [PATCH 09/17] Improve agent switching flow, show active delegate indicator, and constrain example prompts --- ai-temp/agent-switching.md | 2 + codex-rs/multi-agent/src/lib.rs | 2 + codex-rs/multi-agent/src/orchestrator.rs | 137 +++++++- codex-rs/tui/src/app.rs | 239 +++++++++++++ codex-rs/tui/src/app_event.rs | 11 + codex-rs/tui/src/bottom_pane/chat_composer.rs | 319 +++++++++++++++++- .../tui/src/bottom_pane/delegate_popup.rs | 153 +++++++++ codex-rs/tui/src/bottom_pane/footer.rs | 61 +++- codex-rs/tui/src/bottom_pane/mod.rs | 23 ++ ...ter__tests__footer_shortcuts_delegate.snap | 5 + codex-rs/tui/src/chatwidget.rs | 251 +++++++++++++- codex-rs/tui/src/chatwidget/tests.rs | 4 + codex-rs/tui/src/lib.rs | 66 +++- codex-rs/tui/src/slash_command.rs | 3 + 14 files changed, 1221 insertions(+), 55 deletions(-) create mode 100644 codex-rs/tui/src/bottom_pane/delegate_popup.rs create mode 100644 codex-rs/tui/src/bottom_pane/snapshots/codex_tui__bottom_pane__footer__tests__footer_shortcuts_delegate.snap diff --git a/ai-temp/agent-switching.md b/ai-temp/agent-switching.md index 24056d07e46b..cc10158439c5 100644 --- a/ai-temp/agent-switching.md +++ b/ai-temp/agent-switching.md @@ -37,6 +37,8 @@ - History cells that log switch events (`Entered #critic`, `Returned from #critic – applied adjustments`). - Optional shortcut `/agent return` to exit quickly. - Provide an autocomplete (`#agent_id · YYYY-MM-DD HH:MM`) that lists only delegates invoked during the current main-agent session so users can target the right sub-agent before switching. +- The composer now surfaces this autocomplete automatically when typing `#`; selecting an entry both inserts `#agent_id` into the prompt and switches context to that agent. +- While switched, show an inline footer indicator (`In #agent`) next to the context meter so the active delegate is always visible. - Hide picker entries whose conversations are no longer resumable (e.g., cleanup, failure). If the user attempts to switch into a stale handle, surface an error toast and keep them in the current context while logging the failure. - While switched, the prompt input should clearly identify the active agent (e.g., placeholder text, accent color) to avoid accidental edits. - Surface breadcrumbs in `/status` showing the current agent stack (`Main → #ideas_provider → #critic`), making nested switches easier to follow later. diff --git a/codex-rs/multi-agent/src/lib.rs b/codex-rs/multi-agent/src/lib.rs index 7bbbf0c48849..5d9b7c9374d8 100644 --- a/codex-rs/multi-agent/src/lib.rs +++ b/codex-rs/multi-agent/src/lib.rs @@ -333,11 +333,13 @@ fn merge_toml_values(base: &mut TomlValue, overlay: &TomlValue) { } pub mod orchestrator; +pub use orchestrator::ActiveDelegateSession; pub use orchestrator::AgentOrchestrator; pub use orchestrator::DelegateEvent; pub use orchestrator::DelegatePrompt; pub use orchestrator::DelegateRequest; pub use orchestrator::DelegateRunId; +pub use orchestrator::DelegateSessionSummary; use orchestrator::MultiAgentDelegateAdapter; pub use orchestrator::OrchestratorError; diff --git a/codex-rs/multi-agent/src/orchestrator.rs b/codex-rs/multi-agent/src/orchestrator.rs index d89f3c1d18a3..ebef7afe8253 100644 --- a/codex-rs/multi-agent/src/orchestrator.rs +++ b/codex-rs/multi-agent/src/orchestrator.rs @@ -1,10 +1,14 @@ +use std::collections::HashMap; +use std::path::PathBuf; use std::sync::Arc; use std::time::SystemTime; use async_trait::async_trait; use codex_common::CliConfigOverrides; use codex_core::AuthManager; +use codex_core::CodexConversation; use codex_core::ConversationManager; +use codex_core::config::Config; use codex_core::config::ConfigOverrides; use codex_core::delegate_tool::DelegateEventReceiver as CoreDelegateEventReceiver; use codex_core::delegate_tool::DelegateToolAdapter; @@ -15,6 +19,7 @@ use codex_core::delegate_tool::DelegateToolRun; use codex_core::protocol::EventMsg; use codex_core::protocol::InputItem; use codex_core::protocol::Op; +use codex_core::protocol::SessionConfiguredEvent; use codex_core::protocol::SessionSource; use std::time::Duration; use tokio::sync::Mutex; @@ -32,6 +37,7 @@ pub type DelegateRunId = String; pub struct DelegateRequest { pub agent_id: AgentId, pub prompt: DelegatePrompt, + pub user_initial: Vec, } /// The prompt content forwarded to the sub-agent. @@ -82,6 +88,26 @@ pub enum OrchestratorError { AgentNotFound(String), #[error("failed to enqueue delegate: {0}")] DelegateSetupFailed(String), + #[error("delegate session `{0}` not found")] + SessionNotFound(String), +} + +/// High-level metadata describing a delegate session available for switching. +#[derive(Debug, Clone)] +pub struct DelegateSessionSummary { + pub conversation_id: String, + pub agent_id: AgentId, + pub last_interacted_at: SystemTime, + pub cwd: PathBuf, +} + +/// Payload returned when entering an existing delegate session. +#[derive(Clone)] +pub struct ActiveDelegateSession { + pub summary: DelegateSessionSummary, + pub conversation: Arc, + pub session_configured: Arc, + pub config: Config, } /// Lightweight controller that spins up sub-agent conversations on demand and @@ -94,6 +120,7 @@ pub struct AgentOrchestrator { config_overrides: ConfigOverrides, listeners: Mutex>>, active_run: Mutex>, + sessions: Mutex>, } impl AgentOrchestrator { @@ -113,6 +140,7 @@ impl AgentOrchestrator { config_overrides, listeners: Mutex::new(Vec::new()), active_run: Mutex::new(None), + sessions: Mutex::new(HashMap::new()), } } @@ -169,12 +197,16 @@ impl AgentOrchestrator { match result { Ok(output) => { + orchestrator.store_session(&output).await; + let agent_id = output.agent_id.clone(); + let message = output.message.clone(); + let duration = output.duration; orchestrator .emit(DelegateEvent::Completed { run_id: run_id_clone.clone(), - agent_id: output.agent_id, - output: output.message, - duration: output.duration, + agent_id, + output: message, + duration, }) .await; } @@ -201,6 +233,68 @@ impl AgentOrchestrator { listeners.retain(|tx| tx.send(event.clone()).is_ok()); } + /// Return all active delegate sessions ordered by most recent interaction. + pub async fn active_sessions(&self) -> Vec { + let sessions = self.sessions.lock().await; + let mut summaries: Vec<_> = sessions + .values() + .map(|entry| entry.summary.clone()) + .collect(); + summaries.sort_by(|a, b| b.last_interacted_at.cmp(&a.last_interacted_at)); + summaries + } + + /// Enter an existing delegate session for direct interaction. + pub async fn enter_session( + &self, + conversation_id: &str, + ) -> Result { + let mut sessions = self.sessions.lock().await; + let entry = sessions + .get_mut(conversation_id) + .ok_or_else(|| OrchestratorError::SessionNotFound(conversation_id.to_string()))?; + entry.summary.last_interacted_at = SystemTime::now(); + Ok(ActiveDelegateSession { + summary: entry.summary.clone(), + conversation: entry.conversation.clone(), + session_configured: entry.session_configured.clone(), + config: entry.config.clone(), + }) + } + + /// Remove a delegate session – used when the conversation is closed or no longer usable. + pub async fn remove_session(&self, conversation_id: &str) { + let mut sessions = self.sessions.lock().await; + sessions.remove(conversation_id); + } + + /// Refresh the session's last-interacted timestamp without opening it. + pub async fn touch_session(&self, conversation_id: &str) { + let mut sessions = self.sessions.lock().await; + if let Some(entry) = sessions.get_mut(conversation_id) { + entry.summary.last_interacted_at = SystemTime::now(); + } + } + + async fn store_session(&self, success: &DelegateSuccess) { + let mut sessions = self.sessions.lock().await; + let summary = DelegateSessionSummary { + conversation_id: success.conversation_id.clone(), + agent_id: success.agent_id.clone(), + last_interacted_at: SystemTime::now(), + cwd: success.cwd.clone(), + }; + sessions.insert( + success.conversation_id.clone(), + StoredDelegateSession { + summary, + conversation: success.conversation.clone(), + session_configured: success.session_configured.clone(), + config: success.config.clone(), + }, + ); + } + async fn run_delegate_task( self: Arc, loader: AgentConfigLoader, @@ -222,6 +316,8 @@ impl AgentOrchestrator { })?; let config = context.into_config(); + let cwd = config.cwd.clone(); + let config_clone = config.clone(); let conversation_manager = Arc::new(ConversationManager::new( auth_manager.clone(), session_source, @@ -234,21 +330,17 @@ impl AgentOrchestrator { agent_id: agent_id.clone(), error: format!("failed to start conversation: {err:#}"), })?; + let conversation_id = conversation_bundle.conversation_id.to_string(); + let session_configured = Arc::new(conversation_bundle.session_configured); let conversation = conversation_bundle.conversation; let mut items = Vec::new(); + items.extend(request.user_initial.clone()); if !request.prompt.text.trim().is_empty() { items.push(InputItem::Text { text: request.prompt.text.clone(), }); } - if items.is_empty() { - return Err(DelegateFailure { - agent_id: agent_id.clone(), - error: "delegated prompt is empty".to_string(), - }); - } - conversation .submit(Op::UserInput { items }) .await @@ -296,23 +388,24 @@ impl AgentOrchestrator { .last_agent_message .or_else(|| (!aggregated.is_empty()).then_some(aggregated.clone())); - let _ = conversation.submit(Op::Shutdown).await; - return Ok(DelegateSuccess { agent_id, + conversation_id, + conversation: conversation.clone(), + session_configured: session_configured.clone(), + cwd: cwd.clone(), + config: config_clone.clone(), message, duration, }); } EventMsg::Error(err) => { - let _ = conversation.submit(Op::Shutdown).await; return Err(DelegateFailure { agent_id, error: format!("delegate reported error: {}", err.message), }); } EventMsg::TurnAborted(reason) => { - let _ = conversation.submit(Op::Shutdown).await; return Err(DelegateFailure { agent_id, error: format!("delegate aborted: {:?}", reason.reason), @@ -332,6 +425,11 @@ impl AgentOrchestrator { struct DelegateSuccess { agent_id: AgentId, + conversation_id: String, + conversation: Arc, + session_configured: Arc, + cwd: PathBuf, + config: Config, message: Option, duration: Duration, } @@ -341,6 +439,13 @@ struct DelegateFailure { error: String, } +struct StoredDelegateSession { + summary: DelegateSessionSummary, + conversation: Arc, + session_configured: Arc, + config: Config, +} + pub struct MultiAgentDelegateAdapter { orchestrator: Arc, } @@ -402,6 +507,9 @@ impl MultiAgentDelegateAdapter { OrchestratorError::DelegateSetupFailed(reason) => { DelegateToolError::SetupFailed(reason) } + OrchestratorError::SessionNotFound(session_id) => { + DelegateToolError::SetupFailed(format!("session not found: {session_id}")) + } } } } @@ -433,6 +541,7 @@ impl DelegateToolAdapter for MultiAgentDelegateAdapter { .delegate(DelegateRequest { agent_id: agent_id.clone(), prompt: DelegatePrompt::new(request.prompt), + user_initial: Vec::new(), }) .await .map_err(Self::map_error)?; diff --git a/codex-rs/tui/src/app.rs b/codex-rs/tui/src/app.rs index 06b63d00ba71..5f4d4b96d7d5 100644 --- a/codex-rs/tui/src/app.rs +++ b/codex-rs/tui/src/app.rs @@ -3,6 +3,7 @@ use crate::app_event::AppEvent; use crate::app_event_sender::AppEventSender; use crate::bottom_pane::ApprovalRequest; use crate::chatwidget::ChatWidget; +use crate::chatwidget::ChatWidgetInit; use crate::diff_render::DiffSummary; use crate::exec_command::strip_bash_lc_and_escape; use crate::file_search::FileSearchManager; @@ -18,11 +19,13 @@ use codex_core::ConversationManager; use codex_core::config::Config; use codex_core::config::persist_model_selection; use codex_core::model_family::find_family_for_model; +use codex_core::protocol::SessionConfiguredEvent; use codex_core::protocol::SessionSource; use codex_core::protocol::TokenUsage; use codex_core::protocol_config_types::ReasoningEffort as ReasoningEffortConfig; use codex_multi_agent::AgentOrchestrator; use codex_multi_agent::DelegateEvent; +use codex_multi_agent::DelegateSessionSummary; use codex_multi_agent::delegate_tool_adapter; use codex_protocol::ConversationId; use color_eyre::eyre::Result; @@ -32,12 +35,14 @@ use crossterm::event::KeyEvent; use crossterm::event::KeyEventKind; use ratatui::style::Stylize; use ratatui::text::Line; +use std::collections::HashMap; use std::path::PathBuf; use std::sync::Arc; use std::sync::atomic::AtomicBool; use std::sync::atomic::Ordering; use std::thread; use std::time::Duration; +use std::time::SystemTime; use tokio::select; use tokio::sync::mpsc::unbounded_channel; // use uuid::Uuid; @@ -53,6 +58,7 @@ pub(crate) struct App { pub(crate) app_event_tx: AppEventSender, pub(crate) chat_widget: ChatWidget, pub(crate) auth_manager: Arc, + pub(crate) delegate_orchestrator: Arc, /// Config is stored here so we can recreate ChatWidgets as needed. pub(crate) config: Config, @@ -74,6 +80,11 @@ pub(crate) struct App { // Esc-backtracking state grouped pub(crate) backtrack: crate::app_backtrack::BacktrackState, + + delegate_sessions: HashMap, + active_delegate: Option, + active_delegate_summary: Option, + primary_chat_backup: Option, } impl App { @@ -157,6 +168,7 @@ impl App { app_event_tx, chat_widget, auth_manager: auth_manager.clone(), + delegate_orchestrator, config, active_profile, file_search, @@ -167,6 +179,10 @@ impl App { has_emitted_history_lines: false, commit_anim_running: Arc::new(AtomicBool::new(false)), backtrack: BacktrackState::default(), + delegate_sessions: HashMap::new(), + active_delegate: None, + active_delegate_summary: None, + primary_chat_backup: None, }; let tui_events = tui.event_stream(); @@ -329,6 +345,24 @@ impl App { self.file_search.on_user_query(query); } } + AppEvent::StartDelegateSearch(query) => { + let all_sessions = self.delegate_orchestrator.active_sessions().await; + let query_lower = query.to_lowercase(); + let mut matches: Vec<_> = all_sessions + .into_iter() + .filter(|summary| { + query_lower.is_empty() + || summary + .agent_id + .as_str() + .to_lowercase() + .contains(&query_lower) + }) + .collect(); + matches.sort_by(|a, b| b.last_interacted_at.cmp(&a.last_interacted_at)); + self.chat_widget + .apply_delegate_search_result(query, matches); + } AppEvent::FileSearchResult { query, matches } => { self.chat_widget.apply_file_search_result(query, matches); } @@ -390,6 +424,32 @@ impl App { AppEvent::UpdateSandboxPolicy(policy) => { self.chat_widget.set_sandbox_policy(policy); } + AppEvent::OpenDelegatePicker => { + let sessions = self.delegate_orchestrator.active_sessions().await; + if sessions.is_empty() { + self.chat_widget.add_info_message( + "No delegate sessions available.".to_string(), + Some("Ask the main agent to delegate a task first.".to_string()), + ); + } else { + self.chat_widget + .open_delegate_picker(sessions, self.active_delegate.as_deref()); + } + } + AppEvent::EnterDelegateSession(conversation_id) => { + if let Err(err) = self.activate_delegate_session(tui, conversation_id).await { + tracing::error!("failed to enter delegate session: {err}"); + self.chat_widget + .add_error_message(format!("Failed to open delegate: {err}")); + } + } + AppEvent::ExitDelegateSession => { + if let Err(err) = self.return_to_primary(tui).await { + tracing::error!("failed to return to primary agent: {err}"); + self.chat_widget + .add_error_message(format!("Failed to return to main agent: {err}")); + } + } AppEvent::OpenReviewBranchPicker(cwd) => { self.chat_widget.show_review_branch_picker(&cwd).await; } @@ -463,6 +523,121 @@ impl App { } } + async fn activate_delegate_session( + &mut self, + tui: &mut tui::Tui, + conversation_id: String, + ) -> Result<(), String> { + if self.active_delegate.as_deref() == Some(conversation_id.as_str()) { + return Ok(()); + } + + if self.active_delegate.is_some() { + self.stash_active_delegate(); + } + + let state = if let Some(state) = self.delegate_sessions.remove(&conversation_id) { + state + } else { + let session = self + .delegate_orchestrator + .enter_session(&conversation_id) + .await + .map_err(|err| format!("{err}"))?; + let init = ChatWidgetInit { + config: session.config.clone(), + frame_requester: tui.frame_requester(), + app_event_tx: self.app_event_tx.clone(), + initial_prompt: None, + initial_images: Vec::new(), + enhanced_keys_supported: self.enhanced_keys_supported, + auth_manager: self.auth_manager.clone(), + }; + let session_configured = expect_unique_session_configured(session.session_configured); + let mut chat_widget = + ChatWidget::new_from_existing(init, session.conversation, session_configured); + chat_widget.set_delegate_context(Some(session.summary.clone())); + DelegateSessionState { + summary: session.summary, + chat_widget, + } + }; + + let DelegateSessionState { + summary, + mut chat_widget, + } = state; + chat_widget.set_delegate_context(Some(summary.clone())); + let mut previous = std::mem::replace(&mut self.chat_widget, chat_widget); + previous.set_delegate_context(None); + self.primary_chat_backup = Some(previous); + self.active_delegate = Some(conversation_id.clone()); + self.active_delegate_summary = Some(summary.clone()); + self.chat_widget.set_delegate_context(Some(summary.clone())); + self.delegate_orchestrator + .touch_session(&conversation_id) + .await; + tui.frame_requester().schedule_frame(); + Ok(()) + } + + fn stash_active_delegate(&mut self) { + if let Some(active_id) = self.active_delegate.take() { + let mut summary = self + .active_delegate_summary + .take() + .expect("delegate summary missing"); + let main_chat = self + .primary_chat_backup + .take() + .expect("primary chat missing when stashing delegate"); + summary.last_interacted_at = SystemTime::now(); + let mut delegate_chat = std::mem::replace(&mut self.chat_widget, main_chat); + delegate_chat.set_delegate_context(Some(summary.clone())); + self.chat_widget.set_delegate_context(None); + self.delegate_sessions.insert( + active_id, + DelegateSessionState { + summary, + chat_widget: delegate_chat, + }, + ); + } + } + + async fn return_to_primary(&mut self, tui: &mut tui::Tui) -> Result<(), String> { + if let Some(active_id) = self.active_delegate.take() { + let mut summary = self + .active_delegate_summary + .take() + .expect("delegate summary missing"); + let capture = self.chat_widget.take_delegate_capture(); + let main_chat = self + .primary_chat_backup + .take() + .ok_or_else(|| "primary conversation unavailable".to_string())?; + summary.last_interacted_at = SystemTime::now(); + let mut delegate_chat = std::mem::replace(&mut self.chat_widget, main_chat); + delegate_chat.set_delegate_context(Some(summary.clone())); + self.chat_widget.set_delegate_context(None); + self.delegate_sessions.insert( + active_id.clone(), + DelegateSessionState { + summary: summary.clone(), + chat_widget: delegate_chat, + }, + ); + self.delegate_orchestrator.touch_session(&active_id).await; + self.primary_chat_backup = None; + self.active_delegate_summary = None; + if let Some(capture) = capture { + self.chat_widget.apply_delegate_summary(&summary, capture); + } + tui.frame_requester().schedule_frame(); + } + Ok(()) + } + fn format_delegate_duration(duration: Duration) -> String { if duration.as_secs() >= 60 { let mins = duration.as_secs() / 60; @@ -544,6 +719,17 @@ impl App { } } +struct DelegateSessionState { + summary: DelegateSessionSummary, + chat_widget: ChatWidget, +} + +fn expect_unique_session_configured( + session_configured: Arc, +) -> SessionConfiguredEvent { + Arc::unwrap_or_clone(session_configured) +} + #[cfg(test)] mod tests { use super::*; @@ -556,11 +742,14 @@ mod tests { use crate::history_cell::UserHistoryCell; use crate::history_cell::new_session_info; + use codex_common::CliConfigOverrides; use codex_core::AuthManager; use codex_core::CodexAuth; use codex_core::ConversationManager; + use codex_core::config::ConfigOverrides; use codex_core::protocol::SessionConfiguredEvent; + use codex_core::protocol::SessionSource; use codex_protocol::ConversationId; use ratatui::prelude::Line; @@ -578,12 +767,36 @@ mod tests { let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key")); let file_search = FileSearchManager::new(config.cwd.clone(), app_event_tx.clone()); + let delegate_orchestrator = Arc::new(AgentOrchestrator::new( + config.codex_home.clone(), + auth_manager.clone(), + SessionSource::Cli, + CliConfigOverrides::default(), + ConfigOverrides { + model: None, + review_model: None, + cwd: None, + approval_policy: None, + sandbox_mode: None, + model_provider: None, + config_profile: None, + codex_linux_sandbox_exe: None, + base_instructions: None, + include_plan_tool: None, + include_delegate_tool: None, + include_apply_patch_tool: None, + include_view_image_tool: None, + show_raw_agent_reasoning: None, + tools_web_search_request: None, + }, + )); App { server, app_event_tx, chat_widget, auth_manager, + delegate_orchestrator, config, active_profile: None, file_search, @@ -594,6 +807,10 @@ mod tests { enhanced_keys_supported: false, commit_anim_running: Arc::new(AtomicBool::new(false)), backtrack: BacktrackState::default(), + delegate_sessions: HashMap::new(), + active_delegate: None, + active_delegate_summary: None, + primary_chat_backup: None, } } @@ -677,4 +894,26 @@ mod tests { assert_eq!(nth, 1); assert_eq!(prefill, "follow-up (edited)"); } + + #[test] + fn expect_unique_session_configured_clones_when_shared() { + let event = SessionConfiguredEvent { + session_id: ConversationId::new(), + model: "gpt-test".to_string(), + reasoning_effort: None, + history_log_id: 0, + history_entry_count: 0, + initial_messages: None, + rollout_path: PathBuf::new(), + }; + + let shared = Arc::new(event.clone()); + let _other_owner = Arc::clone(&shared); + + let resolved = expect_unique_session_configured(shared); + + assert_eq!(resolved.model, event.model); + assert_eq!(resolved.history_log_id, event.history_log_id); + assert_eq!(resolved.history_entry_count, event.history_entry_count); + } } diff --git a/codex-rs/tui/src/app_event.rs b/codex-rs/tui/src/app_event.rs index 0dc5225e4729..5e97609e2bba 100644 --- a/codex-rs/tui/src/app_event.rs +++ b/codex-rs/tui/src/app_event.rs @@ -35,6 +35,8 @@ pub(crate) enum AppEvent { /// the `@`). Previous searches may be cancelled by the app layer so there /// is at most one in-flight search. StartFileSearch(String), + /// Kick off delegate suggestions for the current `#` token. + StartDelegateSearch(String), /// Result of a completed asynchronous file search. The `query` echoes the /// original search term so the UI can decide whether the results are @@ -77,6 +79,15 @@ pub(crate) enum AppEvent { /// Update the current sandbox policy in the running app and widget. UpdateSandboxPolicy(SandboxPolicy), + /// Request to open the delegate session picker. + OpenDelegatePicker, + + /// Switch into the provided delegate session. + EnterDelegateSession(String), + + /// Return from the active delegate session to the main agent. + ExitDelegateSession, + /// Forwarded conversation history snapshot from the current conversation. ConversationHistory(ConversationPathResponseEvent), diff --git a/codex-rs/tui/src/bottom_pane/chat_composer.rs b/codex-rs/tui/src/bottom_pane/chat_composer.rs index 0d1395d51d9f..2769104da0cc 100644 --- a/codex-rs/tui/src/bottom_pane/chat_composer.rs +++ b/codex-rs/tui/src/bottom_pane/chat_composer.rs @@ -18,6 +18,7 @@ use ratatui::widgets::WidgetRef; use super::chat_composer_history::ChatComposerHistory; use super::command_popup::CommandItem; use super::command_popup::CommandPopup; +use super::delegate_popup::DelegatePopup; use super::file_search_popup::FileSearchPopup; use super::footer::FooterMode; use super::footer::FooterProps; @@ -50,6 +51,7 @@ use crate::clipboard_paste::pasted_image_format; use crate::history_cell; use crate::ui_consts::LIVE_PREFIX_COLS; use codex_file_search::FileMatch; +use codex_multi_agent::DelegateSessionSummary; use std::cell::RefCell; use std::collections::HashMap; use std::path::Path; @@ -96,6 +98,8 @@ pub(crate) struct ChatComposer { use_shift_enter_hint: bool, dismissed_file_popup_token: Option, current_file_query: Option, + dismissed_delegate_popup_token: Option, + current_delegate_query: Option, pending_pastes: Vec<(String, String)>, has_focus: bool, attached_images: Vec, @@ -109,6 +113,7 @@ pub(crate) struct ChatComposer { footer_mode: FooterMode, footer_hint_override: Option>, context_window_percent: Option, + delegate_label: Option, } /// Popup state – at most one can be visible at any time. @@ -116,6 +121,7 @@ enum ActivePopup { None, Command(CommandPopup), File(FileSearchPopup), + Delegate(DelegatePopup), } const FOOTER_SPACING_HEIGHT: u16 = 0; @@ -141,6 +147,8 @@ impl ChatComposer { use_shift_enter_hint, dismissed_file_popup_token: None, current_file_query: None, + dismissed_delegate_popup_token: None, + current_delegate_query: None, pending_pastes: Vec::new(), has_focus: has_input_focus, attached_images: Vec::new(), @@ -152,6 +160,7 @@ impl ChatComposer { footer_mode: FooterMode::ShortcutSummary, footer_hint_override: None, context_window_percent: None, + delegate_label: None, }; // Apply configuration via the setter to keep side-effects centralized. this.set_disable_paste_burst(disable_paste_burst); @@ -162,7 +171,7 @@ impl ChatComposer { let footer_props = self.footer_props(); let footer_hint_height = self .custom_footer_height() - .unwrap_or_else(|| footer_height(footer_props)); + .unwrap_or_else(|| footer_height(&footer_props)); let footer_spacing = Self::footer_spacing(footer_hint_height); let footer_total_height = footer_hint_height + footer_spacing; const COLS_WITH_MARGIN: u16 = LIVE_PREFIX_COLS + 1; @@ -173,6 +182,7 @@ impl ChatComposer { ActivePopup::None => footer_total_height, ActivePopup::Command(c) => c.calculate_required_height(width), ActivePopup::File(c) => c.calculate_required_height(), + ActivePopup::Delegate(c) => c.calculate_required_height(), } } @@ -180,7 +190,7 @@ impl ChatComposer { let footer_props = self.footer_props(); let footer_hint_height = self .custom_footer_height() - .unwrap_or_else(|| footer_height(footer_props)); + .unwrap_or_else(|| footer_height(&footer_props)); let footer_spacing = Self::footer_spacing(footer_hint_height); let footer_total_height = footer_hint_height + footer_spacing; let popup_constraint = match &self.active_popup { @@ -188,6 +198,7 @@ impl ChatComposer { Constraint::Max(popup.calculate_required_height(area.width)) } ActivePopup::File(popup) => Constraint::Max(popup.calculate_required_height()), + ActivePopup::Delegate(popup) => Constraint::Max(popup.calculate_required_height()), ActivePopup::None => Constraint::Max(footer_total_height), }; let mut area = area; @@ -265,8 +276,10 @@ impl ChatComposer { self.sync_command_popup(); if matches!(self.active_popup, ActivePopup::Command(_)) { self.dismissed_file_popup_token = None; + self.dismissed_delegate_popup_token = None; } else { self.sync_file_search_popup(); + self.sync_delegate_popup(); } true } @@ -314,6 +327,7 @@ impl ChatComposer { self.textarea.set_cursor(0); self.sync_command_popup(); self.sync_file_search_popup(); + self.sync_delegate_popup(); } /// Get the current composer text. @@ -369,6 +383,25 @@ impl ChatComposer { } } + pub(crate) fn on_delegate_search_result( + &mut self, + query: String, + matches: Vec, + ) { + let current_opt = Self::current_hash_token(&self.textarea); + let Some(current_token) = current_opt else { + return; + }; + + if !current_token.starts_with(&query) { + return; + } + + if let ActivePopup::Delegate(popup) = &mut self.active_popup { + popup.set_matches(&query, matches); + } + } + pub fn set_ctrl_c_quit_hint(&mut self, show: bool, has_focus: bool) { self.ctrl_c_quit_hint = show; if show { @@ -383,6 +416,7 @@ impl ChatComposer { self.textarea.insert_str(text); self.sync_command_popup(); self.sync_file_search_popup(); + self.sync_delegate_popup(); } /// Handle a key event coming from the main UI. @@ -390,15 +424,29 @@ impl ChatComposer { let result = match &mut self.active_popup { ActivePopup::Command(_) => self.handle_key_event_with_slash_popup(key_event), ActivePopup::File(_) => self.handle_key_event_with_file_popup(key_event), + ActivePopup::Delegate(_) => self.handle_key_event_with_delegate_popup(key_event), ActivePopup::None => self.handle_key_event_without_popup(key_event), }; // Update (or hide/show) popup after processing the key. self.sync_command_popup(); - if matches!(self.active_popup, ActivePopup::Command(_)) { - self.dismissed_file_popup_token = None; - } else { - self.sync_file_search_popup(); + match self.active_popup { + ActivePopup::Command(_) => { + self.dismissed_file_popup_token = None; + self.dismissed_delegate_popup_token = None; + } + ActivePopup::File(_) => { + self.sync_file_search_popup(); + self.sync_delegate_popup(); + } + ActivePopup::Delegate(_) => { + self.sync_delegate_popup(); + self.sync_file_search_popup(); + } + ActivePopup::None => { + self.sync_file_search_popup(); + self.sync_delegate_popup(); + } } result @@ -688,6 +736,70 @@ impl ChatComposer { } } + fn handle_key_event_with_delegate_popup(&mut self, key_event: KeyEvent) -> (InputResult, bool) { + if self.handle_shortcut_overlay_key(&key_event) { + return (InputResult::None, true); + } + if key_event.code == KeyCode::Esc { + let next_mode = esc_hint_mode(self.footer_mode, self.is_task_running); + if next_mode != self.footer_mode { + self.footer_mode = next_mode; + return (InputResult::None, true); + } + } else { + self.footer_mode = reset_mode_after_activity(self.footer_mode); + } + let ActivePopup::Delegate(popup) = &mut self.active_popup else { + unreachable!(); + }; + + match key_event { + KeyEvent { + code: KeyCode::Up, .. + } => { + popup.move_up(); + (InputResult::None, true) + } + KeyEvent { + code: KeyCode::Down, + .. + } => { + popup.move_down(); + (InputResult::None, true) + } + KeyEvent { + code: KeyCode::Esc, .. + } => { + if let Some(tok) = Self::current_hash_token(&self.textarea) { + self.dismissed_delegate_popup_token = Some(tok); + } + self.active_popup = ActivePopup::None; + (InputResult::None, true) + } + KeyEvent { + code: KeyCode::Tab, .. + } + | KeyEvent { + code: KeyCode::Enter, + modifiers: KeyModifiers::NONE, + .. + } => { + let Some(session) = popup.selected_session().cloned() else { + self.active_popup = ActivePopup::None; + return (InputResult::None, true); + }; + let agent_token = session.agent_id.as_str().to_owned(); + let conversation_id = session.conversation_id.clone(); + self.insert_selected_delegate(agent_token.as_str()); + self.app_event_tx + .send(AppEvent::EnterDelegateSession(conversation_id)); + self.active_popup = ActivePopup::None; + (InputResult::None, true) + } + input => self.handle_input_basic(input), + } + } + fn is_image_path(path: &str) -> bool { let lower = path.to_ascii_lowercase(); lower.ends_with(".png") || lower.ends_with(".jpg") || lower.ends_with(".jpeg") @@ -795,6 +907,90 @@ impl ChatComposer { left_at.or(right_at) } + fn current_hash_token(textarea: &TextArea) -> Option { + let cursor_offset = textarea.cursor(); + let text = textarea.text(); + + let mut safe_cursor = cursor_offset.min(text.len()); + if safe_cursor < text.len() && !text.is_char_boundary(safe_cursor) { + safe_cursor = text + .char_indices() + .map(|(i, _)| i) + .take_while(|&i| i <= cursor_offset) + .last() + .unwrap_or(0); + } + + let before_cursor = &text[..safe_cursor]; + let after_cursor = &text[safe_cursor..]; + + let at_whitespace = if safe_cursor < text.len() { + text[safe_cursor..] + .chars() + .next() + .map(char::is_whitespace) + .unwrap_or(false) + } else { + false + }; + + let start_left = before_cursor + .char_indices() + .rfind(|(_, c)| c.is_whitespace()) + .map(|(idx, c)| idx + c.len_utf8()) + .unwrap_or(0); + let end_left_rel = after_cursor + .char_indices() + .find(|(_, c)| c.is_whitespace()) + .map(|(idx, _)| idx) + .unwrap_or(after_cursor.len()); + let end_left = safe_cursor + end_left_rel; + let token_left = if start_left < end_left { + Some(&text[start_left..end_left]) + } else { + None + }; + + let ws_len_right: usize = after_cursor + .chars() + .take_while(|c| c.is_whitespace()) + .map(char::len_utf8) + .sum(); + let start_right = safe_cursor + ws_len_right; + let end_right_rel = text[start_right..] + .char_indices() + .find(|(_, c)| c.is_whitespace()) + .map(|(idx, _)| idx) + .unwrap_or(text.len() - start_right); + let end_right = start_right + end_right_rel; + let token_right = if start_right < end_right { + Some(&text[start_right..end_right]) + } else { + None + }; + + let left_hash = token_left + .filter(|t| t.starts_with('#')) + .map(|t| t[1..].to_string()); + let right_hash = token_right + .filter(|t| t.starts_with('#')) + .map(|t| t[1..].to_string()); + + if at_whitespace { + if right_hash.is_some() { + return right_hash; + } + if token_left.is_some_and(|t| t == "#") { + return None; + } + return left_hash; + } + if after_cursor.starts_with('#') { + return right_hash.or(left_hash); + } + left_hash.or(right_hash) + } + /// Replace the active `@token` (the one under the cursor) with `path`. /// /// The algorithm mirrors `current_at_token` so replacement works no matter @@ -846,6 +1042,42 @@ impl ChatComposer { self.textarea.set_cursor(new_cursor); } + fn insert_selected_delegate(&mut self, agent_id: &str) { + let cursor_offset = self.textarea.cursor(); + let text = self.textarea.text(); + let safe_cursor = Self::clamp_to_char_boundary(text, cursor_offset); + + let before_cursor = &text[..safe_cursor]; + let after_cursor = &text[safe_cursor..]; + + let start_idx = before_cursor + .char_indices() + .rfind(|(_, c)| c.is_whitespace()) + .map(|(idx, c)| idx + c.len_utf8()) + .unwrap_or(0); + let end_rel_idx = after_cursor + .char_indices() + .find(|(_, c)| c.is_whitespace()) + .map(|(idx, _)| idx) + .unwrap_or(after_cursor.len()); + let end_idx = safe_cursor + end_rel_idx; + + let replacement = format!("#{agent_id}"); + + let mut new_text = + String::with_capacity(text.len() - (end_idx - start_idx) + replacement.len() + 1); + new_text.push_str(&text[..start_idx]); + new_text.push_str(&replacement); + new_text.push(' '); + new_text.push_str(&text[end_idx..]); + + self.textarea.set_text(&new_text); + let new_cursor = start_idx + .saturating_add(replacement.len()) + .saturating_add(1); + self.textarea.set_cursor(new_cursor); + } + /// Handle key event when no popup is visible. fn handle_key_event_without_popup(&mut self, key_event: KeyEvent) -> (InputResult, bool) { if self.handle_shortcut_overlay_key(&key_event) { @@ -1374,6 +1606,7 @@ impl ChatComposer { use_shift_enter_hint: self.use_shift_enter_hint, is_task_running: self.is_task_running, context_window_percent: self.context_window_percent, + delegate_label: self.delegate_label.clone(), } } @@ -1496,6 +1729,63 @@ impl ChatComposer { self.dismissed_file_popup_token = None; } + fn sync_delegate_popup(&mut self) { + let query = match Self::current_hash_token(&self.textarea) { + Some(token) => token, + None => { + if matches!(self.active_popup, ActivePopup::Delegate(_)) { + self.active_popup = ActivePopup::None; + } + self.current_delegate_query = None; + self.dismissed_delegate_popup_token = None; + return; + } + }; + + if self.dismissed_delegate_popup_token.as_ref() == Some(&query) { + return; + } + + if !matches!( + self.active_popup, + ActivePopup::Delegate(_) | ActivePopup::None + ) { + self.current_delegate_query = Some(query); + return; + } + + if !query.is_empty() { + self.app_event_tx + .send(AppEvent::StartDelegateSearch(query.clone())); + } + + match &mut self.active_popup { + ActivePopup::Delegate(popup) => { + if query.is_empty() { + popup.set_empty_prompt(); + } else { + popup.set_query(&query); + } + } + ActivePopup::None => { + let mut popup = DelegatePopup::new(); + if query.is_empty() { + popup.set_empty_prompt(); + } else { + popup.set_query(&query); + } + self.active_popup = ActivePopup::Delegate(popup); + } + ActivePopup::Command(_) | ActivePopup::File(_) => { + // handled by outer guard + return; + } + } + + self.current_delegate_query = Some(query); + self.dismissed_delegate_popup_token = None; + } + fn set_has_focus(&mut self, has_focus: bool) { self.has_focus = has_focus; } @@ -1510,6 +1800,14 @@ impl ChatComposer { } } + pub(crate) fn set_delegate_label(&mut self, label: Option) -> bool { + if self.delegate_label == label { + return false; + } + self.delegate_label = label; + true + } + pub(crate) fn set_esc_backtrack_hint(&mut self, show: bool) { self.esc_backtrack_hint = show; if show { @@ -1530,11 +1828,14 @@ impl WidgetRef for ChatComposer { ActivePopup::File(popup) => { popup.render_ref(popup_rect, buf); } + ActivePopup::Delegate(popup) => { + popup.render_ref(popup_rect, buf); + } ActivePopup::None => { let footer_props = self.footer_props(); let custom_height = self.custom_footer_height(); let footer_hint_height = - custom_height.unwrap_or_else(|| footer_height(footer_props)); + custom_height.unwrap_or_else(|| footer_height(&footer_props)); let footer_spacing = Self::footer_spacing(footer_hint_height); let hint_rect = if footer_spacing > 0 && footer_hint_height > 0 { let [_, hint_rect] = Layout::vertical([ @@ -1565,7 +1866,7 @@ impl WidgetRef for ChatComposer { Line::from(spans).render_ref(custom_rect, buf); } } else { - render_footer(hint_rect, buf, footer_props); + render_footer(hint_rect, buf, &footer_props); } } } @@ -1730,7 +2031,7 @@ mod tests { ); setup(&mut composer); let footer_props = composer.footer_props(); - let footer_lines = footer_height(footer_props); + let footer_lines = footer_height(&footer_props); let footer_spacing = ChatComposer::footer_spacing(footer_lines); let height = footer_lines + footer_spacing + 8; let mut terminal = Terminal::new(TestBackend::new(width, height)).unwrap(); diff --git a/codex-rs/tui/src/bottom_pane/delegate_popup.rs b/codex-rs/tui/src/bottom_pane/delegate_popup.rs new file mode 100644 index 000000000000..cbed58b7f305 --- /dev/null +++ b/codex-rs/tui/src/bottom_pane/delegate_popup.rs @@ -0,0 +1,153 @@ +use std::fmt::Write; + +use chrono::DateTime; +use chrono::Local; +use chrono::Utc; +use codex_multi_agent::DelegateSessionSummary; +use ratatui::buffer::Buffer; +use ratatui::layout::Rect; +use ratatui::widgets::WidgetRef; + +use crate::render::Insets; +use crate::render::RectExt; + +use super::popup_consts::MAX_POPUP_ROWS; +use super::scroll_state::ScrollState; +use super::selection_popup_common::GenericDisplayRow; +use super::selection_popup_common::render_rows; + +/// Visual state for the delegate selection popup (triggered via `#` tokens). +pub(crate) struct DelegatePopup { + display_query: String, + pending_query: String, + waiting: bool, + matches: Vec, + state: ScrollState, +} + +impl DelegatePopup { + pub(crate) fn new() -> Self { + Self { + display_query: String::new(), + pending_query: String::new(), + waiting: true, + matches: Vec::new(), + state: ScrollState::new(), + } + } + + pub(crate) fn set_query(&mut self, query: &str) { + if query == self.pending_query { + return; + } + + let keep_existing = query.starts_with(&self.display_query); + + self.pending_query.clear(); + self.pending_query.push_str(query); + self.waiting = true; + + if !keep_existing { + self.matches.clear(); + self.state.reset(); + } + } + + pub(crate) fn set_empty_prompt(&mut self) { + self.display_query.clear(); + self.pending_query.clear(); + self.waiting = false; + self.matches.clear(); + self.state.reset(); + } + + pub(crate) fn set_matches(&mut self, query: &str, matches: Vec) { + if query != self.pending_query { + return; + } + + self.display_query = query.to_string(); + self.matches = matches; + self.waiting = false; + let len = self.matches.len(); + self.state.clamp_selection(len); + self.state.ensure_visible(len, len.min(MAX_POPUP_ROWS)); + } + + pub(crate) fn move_up(&mut self) { + let len = self.matches.len(); + self.state.move_up_wrap(len); + self.state.ensure_visible(len, len.min(MAX_POPUP_ROWS)); + } + + pub(crate) fn move_down(&mut self) { + let len = self.matches.len(); + self.state.move_down_wrap(len); + self.state.ensure_visible(len, len.min(MAX_POPUP_ROWS)); + } + + pub(crate) fn selected_session(&self) -> Option<&DelegateSessionSummary> { + self.state + .selected_idx + .and_then(|idx| self.matches.get(idx)) + } + + pub(crate) fn calculate_required_height(&self) -> u16 { + self.matches.len().clamp(1, MAX_POPUP_ROWS) as u16 + } + + fn rows(&self) -> Vec { + if self.matches.is_empty() { + return Vec::new(); + } + + self.matches + .iter() + .map(|summary| { + let mut description = String::new(); + let _ = write!( + description, + "{} · {}", + format_timestamp(summary.last_interacted_at), + summary.cwd.display() + ); + + GenericDisplayRow { + name: format!("#{}", summary.agent_id.as_str()), + match_indices: None, + is_current: false, + display_shortcut: None, + description: Some(description), + } + }) + .collect() + } +} + +impl WidgetRef for &DelegatePopup { + fn render_ref(&self, area: Rect, buf: &mut Buffer) { + let rows = self.rows(); + let empty_message = if self.waiting { + "loading..." + } else { + "no delegates" + }; + + render_rows( + area.inset(Insets::tlbr(0, 2, 0, 0)), + buf, + &rows, + &self.state, + MAX_POPUP_ROWS, + empty_message, + ); + } +} + +fn format_timestamp(time: std::time::SystemTime) -> String { + let datetime: DateTime = time.into(); + datetime + .with_timezone(&Local) + .format("%Y-%m-%d %H:%M") + .to_string() +} diff --git a/codex-rs/tui/src/bottom_pane/footer.rs b/codex-rs/tui/src/bottom_pane/footer.rs index 6e92a0ce6852..d7166a8f6e44 100644 --- a/codex-rs/tui/src/bottom_pane/footer.rs +++ b/codex-rs/tui/src/bottom_pane/footer.rs @@ -11,13 +11,14 @@ use ratatui::text::Span; use ratatui::widgets::Paragraph; use ratatui::widgets::Widget; -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Debug)] pub(crate) struct FooterProps { pub(crate) mode: FooterMode, pub(crate) esc_backtrack_hint: bool, pub(crate) use_shift_enter_hint: bool, pub(crate) is_task_running: bool, pub(crate) context_window_percent: Option, + pub(crate) delegate_label: Option, } #[derive(Clone, Copy, Debug, Eq, PartialEq)] @@ -58,11 +59,11 @@ pub(crate) fn reset_mode_after_activity(current: FooterMode) -> FooterMode { } } -pub(crate) fn footer_height(props: FooterProps) -> u16 { +pub(crate) fn footer_height(props: &FooterProps) -> u16 { footer_lines(props).len() as u16 } -pub(crate) fn render_footer(area: Rect, buf: &mut Buffer, props: FooterProps) { +pub(crate) fn render_footer(area: Rect, buf: &mut Buffer, props: &FooterProps) { Paragraph::new(prefix_lines( footer_lines(props), " ".repeat(FOOTER_INDENT_COLS).into(), @@ -71,7 +72,7 @@ pub(crate) fn render_footer(area: Rect, buf: &mut Buffer, props: FooterProps) { .render(area, buf); } -fn footer_lines(props: FooterProps) -> Vec> { +fn footer_lines(props: &FooterProps) -> Vec> { // Show the context indicator on the left, appended after the primary hint // (e.g., "? for shortcuts"). Keep it visible even when typing (i.e., when // the shortcut hint is hidden). Hide it only for the multi-line @@ -81,20 +82,39 @@ fn footer_lines(props: FooterProps) -> Vec> { is_task_running: props.is_task_running, })], FooterMode::ShortcutSummary => { - let mut line = context_window_line(props.context_window_percent); - line.push_span(" · ".dim()); - line.extend(vec![ + let mut spans: Vec> = Vec::new(); + if let Some(label) = props.delegate_label.as_ref() { + spans.push(format!("In {}", label).cyan()); + spans.push(" · ".dim()); + } + let context_line = context_window_line(props.context_window_percent); + for span in context_line { + spans.push(span); + } + spans.push(" · ".dim()); + spans.extend(vec![ key_hint::plain(KeyCode::Char('?')).into(), " for shortcuts".dim(), ]); - vec![line] + vec![Line::from(spans)] } FooterMode::ShortcutOverlay => shortcut_overlay_lines(ShortcutsState { use_shift_enter_hint: props.use_shift_enter_hint, esc_backtrack_hint: props.esc_backtrack_hint, }), FooterMode::EscHint => vec![esc_hint_line(props.esc_backtrack_hint)], - FooterMode::ContextOnly => vec![context_window_line(props.context_window_percent)], + FooterMode::ContextOnly => { + let mut spans: Vec> = Vec::new(); + if let Some(label) = props.delegate_label.as_ref() { + spans.push(format!("In {}", label).cyan()); + spans.push(" · ".dim()); + } + let context_line = context_window_line(props.context_window_percent); + for span in context_line { + spans.push(span); + } + vec![Line::from(spans)] + } } } @@ -379,12 +399,12 @@ mod tests { use ratatui::backend::TestBackend; fn snapshot_footer(name: &str, props: FooterProps) { - let height = footer_height(props).max(1); + let height = footer_height(&props).max(1); let mut terminal = Terminal::new(TestBackend::new(80, height)).unwrap(); terminal .draw(|f| { let area = Rect::new(0, 0, f.area().width, height); - render_footer(area, f.buffer_mut(), props); + render_footer(area, f.buffer_mut(), &props); }) .unwrap(); assert_snapshot!(name, terminal.backend()); @@ -400,6 +420,7 @@ mod tests { use_shift_enter_hint: false, is_task_running: false, context_window_percent: None, + delegate_label: None, }, ); @@ -411,6 +432,7 @@ mod tests { use_shift_enter_hint: true, is_task_running: false, context_window_percent: None, + delegate_label: None, }, ); @@ -422,6 +444,7 @@ mod tests { use_shift_enter_hint: false, is_task_running: false, context_window_percent: None, + delegate_label: None, }, ); @@ -433,6 +456,7 @@ mod tests { use_shift_enter_hint: false, is_task_running: true, context_window_percent: None, + delegate_label: None, }, ); @@ -444,6 +468,7 @@ mod tests { use_shift_enter_hint: false, is_task_running: false, context_window_percent: None, + delegate_label: None, }, ); @@ -455,6 +480,7 @@ mod tests { use_shift_enter_hint: false, is_task_running: false, context_window_percent: None, + delegate_label: None, }, ); @@ -466,6 +492,19 @@ mod tests { use_shift_enter_hint: false, is_task_running: true, context_window_percent: Some(72), + delegate_label: None, + }, + ); + + snapshot_footer( + "footer_shortcuts_delegate", + FooterProps { + mode: FooterMode::ShortcutSummary, + esc_backtrack_hint: false, + use_shift_enter_hint: false, + is_task_running: false, + context_window_percent: Some(85), + delegate_label: Some("#critic".to_string()), }, ); } diff --git a/codex-rs/tui/src/bottom_pane/mod.rs b/codex-rs/tui/src/bottom_pane/mod.rs index db13a041cd35..209dd9d2d019 100644 --- a/codex-rs/tui/src/bottom_pane/mod.rs +++ b/codex-rs/tui/src/bottom_pane/mod.rs @@ -5,6 +5,7 @@ use crate::app_event_sender::AppEventSender; use crate::tui::FrameRequester; use bottom_pane_view::BottomPaneView; use codex_file_search::FileMatch; +use codex_multi_agent::DelegateSessionSummary; use crossterm::event::KeyCode; use crossterm::event::KeyEvent; use ratatui::buffer::Buffer; @@ -22,6 +23,7 @@ mod chat_composer; mod chat_composer_history; mod command_popup; pub mod custom_prompt_view; +mod delegate_popup; mod file_search_popup; mod footer; mod list_selection_view; @@ -69,6 +71,7 @@ pub(crate) struct BottomPane { /// Queued user messages to show under the status indicator. queued_user_messages: Vec, context_window_percent: Option, + delegate_label: Option, } pub(crate) struct BottomPaneParams { @@ -102,6 +105,7 @@ impl BottomPane { queued_user_messages: Vec::new(), esc_backtrack_hint: false, context_window_percent: None, + delegate_label: None, } } @@ -360,6 +364,16 @@ impl BottomPane { self.request_redraw(); } + pub(crate) fn set_delegate_label(&mut self, label: Option) { + if self.delegate_label == label { + return; + } + self.delegate_label = label.clone(); + if self.composer.set_delegate_label(label) { + self.request_redraw(); + } + } + /// Show a generic list selection view with the provided items. pub(crate) fn show_selection_view(&mut self, params: list_selection_view::SelectionViewParams) { let view = list_selection_view::ListSelectionView::new(params, self.app_event_tx.clone()); @@ -479,6 +493,15 @@ impl BottomPane { self.request_redraw(); } + pub(crate) fn on_delegate_search_result( + &mut self, + query: String, + matches: Vec, + ) { + self.composer.on_delegate_search_result(query, matches); + self.request_redraw(); + } + pub(crate) fn attach_image( &mut self, path: PathBuf, diff --git a/codex-rs/tui/src/bottom_pane/snapshots/codex_tui__bottom_pane__footer__tests__footer_shortcuts_delegate.snap b/codex-rs/tui/src/bottom_pane/snapshots/codex_tui__bottom_pane__footer__tests__footer_shortcuts_delegate.snap new file mode 100644 index 000000000000..38c2119a37e0 --- /dev/null +++ b/codex-rs/tui/src/bottom_pane/snapshots/codex_tui__bottom_pane__footer__tests__footer_shortcuts_delegate.snap @@ -0,0 +1,5 @@ +--- +source: tui/src/bottom_pane/footer.rs +expression: terminal.backend() +--- +" In #critic · 85% context left · ? for shortcuts " diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index 07e6679c0936..c01e89755e73 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -93,9 +93,13 @@ use self::agent::spawn_agent_from_existing; mod session_header; use self::session_header::SessionHeader; use crate::streaming::controller::StreamController; +use std::fmt::Write; use std::path::Path; +use std::time::SystemTime; +use chrono::DateTime; use chrono::Local; +use chrono::Utc; use codex_common::approval_presets::ApprovalPreset; use codex_common::approval_presets::builtin_approval_presets; use codex_common::model_presets::ModelPreset; @@ -112,6 +116,7 @@ use codex_git_tooling::GitToolingError; use codex_git_tooling::create_ghost_commit; use codex_git_tooling::restore_ghost_commit; use codex_multi_agent::AgentId; +use codex_multi_agent::DelegateSessionSummary; use codex_protocol::plan_tool::UpdatePlanArgs; use strum::IntoEnumIterator; @@ -268,6 +273,10 @@ pub(crate) struct ChatWidget { delegate_had_stream: bool, delegate_status_claimed: bool, delegate_previous_status_header: Option, + delegate_context: Option, + delegate_user_frames: Vec, + delegate_agent_frames: Vec, + pending_delegate_context: Vec, last_rendered_width: std::cell::Cell>, } @@ -277,6 +286,18 @@ struct UserMessage { image_paths: Vec, } +#[derive(Default)] +pub(crate) struct DelegateCapture { + pub user_inputs: Vec, + pub agent_outputs: Vec, +} + +impl DelegateCapture { + fn is_empty(&self) -> bool { + self.user_inputs.is_empty() && self.agent_outputs.is_empty() + } +} + impl From for UserMessage { fn from(text: String) -> Self { Self { @@ -347,6 +368,79 @@ impl ChatWidget { } } + pub(crate) fn set_delegate_context(&mut self, summary: Option) { + let label = summary + .as_ref() + .map(|s| format!("#{}", s.agent_id.as_str())); + self.bottom_pane.set_delegate_label(label); + self.delegate_context = summary; + self.delegate_user_frames.clear(); + self.delegate_agent_frames.clear(); + } + + pub(crate) fn take_delegate_capture(&mut self) -> Option { + if self.delegate_user_frames.is_empty() && self.delegate_agent_frames.is_empty() { + return None; + } + Some(DelegateCapture { + user_inputs: std::mem::take(&mut self.delegate_user_frames), + agent_outputs: std::mem::take(&mut self.delegate_agent_frames), + }) + } + + pub(crate) fn apply_delegate_summary( + &mut self, + summary: &DelegateSessionSummary, + capture: DelegateCapture, + ) { + if capture.is_empty() { + self.add_info_message( + format!( + "Returned from #{} (no new messages)", + summary.agent_id.as_str() + ), + None, + ); + return; + } + + let mut context = String::new(); + let _ = writeln!( + context, + "Context from #{} (cwd: {})", + summary.agent_id.as_str(), + summary.cwd.display() + ); + + for item in capture.user_inputs { + if let InputItem::Text { text } = item { + let trimmed = text.trim(); + if !trimmed.is_empty() { + let _ = writeln!(context, "You → {trimmed}"); + } + } + } + + for message in capture.agent_outputs { + let trimmed = message.trim(); + if !trimmed.is_empty() { + let _ = writeln!(context, "{} → {trimmed}", summary.agent_id.as_str()); + } + } + + let context = context.trim().to_string(); + if context.is_empty() { + return; + } + + self.pending_delegate_context.push(context.clone()); + self.add_to_history(history_cell::new_info_event( + format!("Returned from #{}", summary.agent_id.as_str()), + Some("Queued delegate context for next prompt.".to_string()), + )); + self.add_to_history(history_cell::new_info_event(context.clone(), None)); + } + fn on_agent_message(&mut self, message: String) { // If we have a stream_controller, then the final agent message is redundant and will be a // duplicate of what has already been streamed. @@ -419,11 +513,20 @@ impl ChatWidget { self.running_commands.clear(); self.request_redraw(); + if self.delegate_context.is_some() + && let Some(message) = last_agent_message.as_ref() + { + if !message.trim().is_empty() { + self.delegate_agent_frames.push(message.clone()); + } + } + + let notification_response = last_agent_message.clone().unwrap_or_default(); // If there is a queued user message, send exactly one now to begin the next turn. self.maybe_send_next_queued_input(); // Emit a notification when the turn completes (suppressed if focused). self.notify(Notification::AgentTurnComplete { - response: last_agent_message.unwrap_or_default(), + response: notification_response, }); } @@ -964,6 +1067,10 @@ impl ChatWidget { delegate_had_stream: false, delegate_status_claimed: false, delegate_previous_status_header: None, + delegate_context: None, + delegate_user_frames: Vec::new(), + delegate_agent_frames: Vec::new(), + pending_delegate_context: Vec::new(), last_rendered_width: std::cell::Cell::new(None), } } @@ -1033,6 +1140,10 @@ impl ChatWidget { delegate_had_stream: false, delegate_status_claimed: false, delegate_previous_status_header: None, + delegate_context: None, + delegate_user_frames: Vec::new(), + delegate_agent_frames: Vec::new(), + pending_delegate_context: Vec::new(), last_rendered_width: std::cell::Cell::new(None), } } @@ -1189,6 +1300,9 @@ impl ChatWidget { SlashCommand::Mention => { self.insert_str("@"); } + SlashCommand::Agent => { + self.app_event_tx.send(AppEvent::OpenDelegatePicker); + } SlashCommand::Status => { self.add_status_output(); } @@ -1279,11 +1393,25 @@ impl ChatWidget { } fn submit_user_message(&mut self, user_message: UserMessage) { - let UserMessage { text, image_paths } = user_message; + let UserMessage { + mut text, + image_paths, + } = user_message; if text.is_empty() && image_paths.is_empty() { return; } + let display_text = text.clone(); + + if self.delegate_context.is_some() + && !display_text.trim().is_empty() + && image_paths.is_empty() + { + self.delegate_user_frames.push(InputItem::Text { + text: display_text.clone(), + }); + } + // Intercept explicit delegation commands (only support text-only submissions). if image_paths.is_empty() && !text.is_empty() && self.try_delegate_shortcut(&text) { return; @@ -1291,6 +1419,22 @@ impl ChatWidget { self.capture_ghost_snapshot(); + if self.delegate_context.is_none() + && !self.pending_delegate_context.is_empty() + && !text.trim().is_empty() + { + let mut prefix = self.pending_delegate_context.join("\n\n"); + self.pending_delegate_context.clear(); + if !prefix.is_empty() { + if !prefix.ends_with('\n') { + prefix.push('\n'); + } + prefix.push('\n'); + } + prefix.push_str(&text); + text = prefix; + } + let mut items: Vec = Vec::new(); if !text.is_empty() { @@ -1301,24 +1445,21 @@ impl ChatWidget { items.push(InputItem::LocalImage { path }); } - self.codex_op_tx - .send(Op::UserInput { items }) - .unwrap_or_else(|e| { - tracing::error!("failed to send message: {e}"); - }); + if let Err(e) = self.codex_op_tx.send(Op::UserInput { items }) { + tracing::error!("failed to send message: {e}"); + } - // Persist the text to cross-session message history. if !text.is_empty() { - self.codex_op_tx + if let Err(e) = self + .codex_op_tx .send(Op::AddToHistory { text: text.clone() }) - .unwrap_or_else(|e| { - tracing::error!("failed to send AddHistory op: {e}"); - }); + { + tracing::error!("failed to send AddHistory op: {e}"); + } } - // Only show the text portion in conversation history. - if !text.is_empty() { - self.add_to_history(history_cell::new_user_prompt(text)); + if !display_text.is_empty() { + self.add_to_history(history_cell::new_user_prompt(display_text)); } self.needs_final_message_separator = false; } @@ -1805,6 +1946,76 @@ impl ChatWidget { }); } + pub(crate) fn open_delegate_picker( + &mut self, + mut sessions: Vec, + active_delegate: Option<&str>, + ) { + if sessions.is_empty() { + self.add_info_message( + "No delegate sessions available.".to_string(), + Some("Ask the main agent to delegate a task first.".to_string()), + ); + return; + } + + sessions.sort_by(|a, b| b.last_interacted_at.cmp(&a.last_interacted_at)); + + let mut items: Vec = Vec::new(); + + if active_delegate.is_some() { + let actions: Vec = + vec![Box::new(|tx| tx.send(AppEvent::ExitDelegateSession))]; + items.push(SelectionItem { + name: "Return to main agent".to_string(), + description: None, + is_current: false, + actions, + dismiss_on_select: true, + ..Default::default() + }); + } + + for summary in sessions { + let conversation_id = summary.conversation_id.clone(); + let label = format!( + "#{} · {}", + summary.agent_id.as_str(), + Self::format_delegate_timestamp(summary.last_interacted_at) + ); + let description = Some(summary.cwd.display().to_string()); + let is_current = active_delegate == Some(conversation_id.as_str()); + let conversation_id_for_action = conversation_id.clone(); + let actions: Vec = vec![Box::new(move |tx| { + tx.send(AppEvent::EnterDelegateSession( + conversation_id_for_action.clone(), + )); + })]; + items.push(SelectionItem { + name: label, + description, + is_current, + actions, + dismiss_on_select: true, + ..Default::default() + }); + } + + self.bottom_pane.show_selection_view(SelectionViewParams { + title: Some("Switch agent".to_string()), + footer_hint: Some(standard_popup_hint_line()), + items, + ..Default::default() + }); + } + + fn format_delegate_timestamp(time: SystemTime) -> String { + let utc: DateTime = time.into(); + utc.with_timezone(&Local) + .format("%Y-%m-%d %H:%M") + .to_string() + } + /// Open a popup to choose the approvals mode (ask for approval policy + sandbox policy). pub(crate) fn open_approvals_popup(&mut self) { let current_approval = self.config.approval_policy; @@ -1892,6 +2103,14 @@ impl ChatWidget { self.bottom_pane.on_file_search_result(query, matches); } + pub(crate) fn apply_delegate_search_result( + &mut self, + query: String, + matches: Vec, + ) { + self.bottom_pane.on_delegate_search_result(query, matches); + } + /// Handle Ctrl-C key press. fn on_ctrl_c(&mut self) { if self.bottom_pane.on_ctrl_c() == CancellationEvent::Handled { @@ -2169,6 +2388,8 @@ impl ChatWidget { pub(crate) fn on_delegate_started(&mut self, run_id: &str, agent_id: &AgentId, prompt: &str) { self.delegate_run = Some(run_id.to_string()); self.delegate_had_stream = false; + self.delegate_user_frames.clear(); + self.delegate_agent_frames.clear(); self.delegate_previous_status_header = Some(self.current_status_header.clone()); if self.bottom_pane.status_widget().is_none() { self.bottom_pane.set_task_running(true); diff --git a/codex-rs/tui/src/chatwidget/tests.rs b/codex-rs/tui/src/chatwidget/tests.rs index b1fa500159ec..a1e18f2eb12b 100644 --- a/codex-rs/tui/src/chatwidget/tests.rs +++ b/codex-rs/tui/src/chatwidget/tests.rs @@ -293,6 +293,10 @@ fn make_chatwidget_manual() -> ( delegate_had_stream: false, delegate_status_claimed: false, delegate_previous_status_header: None, + delegate_context: None, + delegate_user_frames: Vec::new(), + delegate_agent_frames: Vec::new(), + pending_delegate_context: Vec::new(), last_rendered_width: std::cell::Cell::new(None), }; (widget, rx, op_rx) diff --git a/codex-rs/tui/src/lib.rs b/codex-rs/tui/src/lib.rs index bfa6c0cf39ba..b2edcfc19e6c 100644 --- a/codex-rs/tui/src/lib.rs +++ b/codex-rs/tui/src/lib.rs @@ -24,6 +24,7 @@ use codex_ollama::DEFAULT_OSS_MODEL; use codex_protocol::config_types::SandboxMode; use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge; use std::fs::OpenOptions; +use std::path::Path; use std::path::PathBuf; use std::sync::Arc; use tracing::error; @@ -78,6 +79,60 @@ mod wrapping; #[cfg(test)] pub mod test_backend; +#[cfg(test)] +mod auth_tests { + use super::*; + use codex_core::auth::AuthDotJson; + use codex_core::auth::write_auth_json; + use tempfile::tempdir; + + #[test] + fn login_status_uses_parent_codex_home_for_auth() { + let global_home = tempdir().expect("tempdir"); + let auth_path = global_home.path().join("auth.json"); + write_auth_json( + &auth_path, + &AuthDotJson { + openai_api_key: Some("sk-test".to_string()), + tokens: None, + last_refresh: None, + }, + ) + .expect("write auth.json"); + + let agent_home = tempdir().expect("tempdir"); + let mut config = Config::load_from_base_config_with_overrides( + ConfigToml::default(), + ConfigOverrides::default(), + agent_home.path().to_path_buf(), + ) + .expect("config"); + config.model_provider.requires_openai_auth = true; + + let status = get_login_status(&config, global_home.path()); + assert!(matches!(status, LoginStatus::AuthMode(AuthMode::ApiKey))); + } + + #[test] + fn shared_auth_manager_reads_parent_auth() { + let global_home = tempdir().expect("tempdir"); + let auth_path = global_home.path().join("auth.json"); + write_auth_json( + &auth_path, + &AuthDotJson { + openai_api_key: Some("sk-test".to_string()), + tokens: None, + last_refresh: None, + }, + ) + .expect("write auth.json"); + + let manager = AuthManager::shared(global_home.path().to_path_buf(), false); + let auth = manager.auth().expect("auth loaded"); + assert!(matches!(auth.mode, AuthMode::ApiKey)); + } +} + #[cfg(not(debug_assertions))] mod updates; @@ -363,15 +418,15 @@ async fn run_ratatui_app( // Initialize high-fidelity session event logging if enabled. session_log::maybe_init(&config); - let auth_manager = AuthManager::shared(config.codex_home.clone(), false); + let auth_manager = AuthManager::shared(global_codex_home.clone(), false); let delegate_orchestrator = Arc::new(AgentOrchestrator::new( - global_codex_home, + global_codex_home.clone(), auth_manager.clone(), SessionSource::Cli, delegate_cli_overrides, delegate_config_overrides, )); - let login_status = get_login_status(&config); + let login_status = get_login_status(&config, &global_codex_home); let should_show_windows_wsl_screen = cfg!(target_os = "windows") && !config.windows_wsl_setup_acknowledged; let should_show_onboarding = should_show_onboarding( @@ -494,12 +549,11 @@ pub enum LoginStatus { NotAuthenticated, } -fn get_login_status(config: &Config) -> LoginStatus { +fn get_login_status(config: &Config, auth_codex_home: &Path) -> LoginStatus { if config.model_provider.requires_openai_auth { // Reading the OpenAI API key is an async operation because it may need // to refresh the token. Block on it. - let codex_home = config.codex_home.clone(); - match CodexAuth::from_codex_home(&codex_home) { + match CodexAuth::from_codex_home(auth_codex_home) { Ok(Some(auth)) => LoginStatus::AuthMode(auth.mode), Ok(None) => LoginStatus::NotAuthenticated, Err(err) => { diff --git a/codex-rs/tui/src/slash_command.rs b/codex-rs/tui/src/slash_command.rs index 14604a736d7f..2bcfe33eceda 100644 --- a/codex-rs/tui/src/slash_command.rs +++ b/codex-rs/tui/src/slash_command.rs @@ -21,6 +21,7 @@ pub enum SlashCommand { Undo, Diff, Mention, + Agent, Status, Mcp, Logout, @@ -41,6 +42,7 @@ impl SlashCommand { SlashCommand::Quit => "exit Codex", SlashCommand::Diff => "show git diff (including untracked files)", SlashCommand::Mention => "mention a file", + SlashCommand::Agent => "switch into a delegated agent session", SlashCommand::Status => "show current session configuration and token usage", SlashCommand::Model => "choose what model and reasoning effort to use", SlashCommand::Approvals => "choose what Codex can do without approval", @@ -70,6 +72,7 @@ impl SlashCommand { | SlashCommand::Logout => false, SlashCommand::Diff | SlashCommand::Mention + | SlashCommand::Agent | SlashCommand::Status | SlashCommand::Mcp | SlashCommand::Quit => true, From 46a2eeafc3d193d91587e6b9f41bda20b11726ac Mon Sep 17 00:00:00 2001 From: George Nesterenok Date: Wed, 15 Oct 2025 23:10:32 +0000 Subject: [PATCH 10/17] Remove inline #agent autocomplete from TUI --- ai-temp/CHANGELOG.md | 5 +- ai-temp/agent-switching.md | 3 - ai-temp/example-codex-home/AGENTS.md | 4 +- ai-temp/orchestration-integration.md | 3 - ai-temp/tool-implementation-patterns.md | 6 +- codex-rs/multi-agent/src/orchestrator.rs | 1 + codex-rs/tui/src/app.rs | 18 -- codex-rs/tui/src/app_event.rs | 3 - codex-rs/tui/src/bottom_pane/chat_composer.rs | 290 +----------------- .../tui/src/bottom_pane/delegate_popup.rs | 153 --------- codex-rs/tui/src/bottom_pane/mod.rs | 11 - codex-rs/tui/src/chatwidget.rs | 10 +- 12 files changed, 11 insertions(+), 496 deletions(-) delete mode 100644 codex-rs/tui/src/bottom_pane/delegate_popup.rs diff --git a/ai-temp/CHANGELOG.md b/ai-temp/CHANGELOG.md index 1d326f99abd8..8e9f88c15b77 100644 --- a/ai-temp/CHANGELOG.md +++ b/ai-temp/CHANGELOG.md @@ -2,6 +2,9 @@ > Keep this file current; it documents the evolution of the multi-agent design work. An outdated changelog breaks the orchestrator timeline. +## 2025-10-16 +- Removed the inline `#agent` autocomplete experiment and reverted documentation to focus on the delegate picker and slash command flow. + ## 2025-10-14 - Captured the baseline design artifacts (`AGENTS.md`, `config-design.md`, `instruction-design.md`, `persistence-design.md`, `error-handling.md`) compiled during the planning phase. - Reiterated the requirement that this changelog must stay up to date as the multi-agent feature evolves. @@ -10,9 +13,7 @@ - Added `ai-temp/example-codex-home/` with ready-to-run config, instructions, and multiple agent directories for hands-on testing via `CODEX_HOME=...` and `--agent`. - Authored `ai-temp/orchestration-integration.md`, outlining logic, UI/UX, and minimal-coupling hooks to let the primary agent delegate work to sub-agents in the existing codebase. - Captured delegation decisions (single-flight execution, shared auth, primary-agent-composed prompts) inside `ai-temp/orchestration-integration.md`. -- Implemented the orchestrator (`codex-rs/multi-agent/src/orchestrator.rs`) and wired the TUI to support inline delegation via `#agent_id ...` prompts, with progress and completion surfaced through the main transcript. - Simplified the example Codex home to `ideas_provider` (gpt-5) and `critic` (gpt-5-nano) agents for easier manual testing. -- Fixed the TUI delegation hook so user-entered `#agent_id …` messages trigger the orchestrator, and added a regression test to guard the behaviour. - Delegated runs now stream live output (`DelegateEvent::Delta`) through the TUI, and remaining UX follow-ups are tracked in `ai-temp/ui-ux-delegation.md`. - Added a dedicated status indicator while a delegate runs, restored the idle header on completion, and regression-tested streaming to prevent animation regressions. - Updated the sample Codex home instructions/README, ensured the critic agent uses `gpt-5-nano`, and documented the new delegation UX in `ai-temp/ui-ux-delegation.md`. diff --git a/ai-temp/agent-switching.md b/ai-temp/agent-switching.md index cc10158439c5..9e58f30f11cb 100644 --- a/ai-temp/agent-switching.md +++ b/ai-temp/agent-switching.md @@ -36,8 +36,6 @@ - A status banner showing `In #` while switched, with a shortcut to return to the primary agent. - History cells that log switch events (`Entered #critic`, `Returned from #critic – applied adjustments`). - Optional shortcut `/agent return` to exit quickly. -- Provide an autocomplete (`#agent_id · YYYY-MM-DD HH:MM`) that lists only delegates invoked during the current main-agent session so users can target the right sub-agent before switching. -- The composer now surfaces this autocomplete automatically when typing `#`; selecting an entry both inserts `#agent_id` into the prompt and switches context to that agent. - While switched, show an inline footer indicator (`In #agent`) next to the context meter so the active delegate is always visible. - Hide picker entries whose conversations are no longer resumable (e.g., cleanup, failure). If the user attempts to switch into a stale handle, surface an error toast and keep them in the current context while logging the failure. - While switched, the prompt input should clearly identify the active agent (e.g., placeholder text, accent color) to avoid accidental edits. @@ -71,7 +69,6 @@ - `codex-rs/tui/src/status/helpers.rs` & `/status` widgets – surface the active agent stack and recent switch history. - `codex-rs/tui/src/slash_command.rs` – wire `/agent enter ` / `/agent return` (or similar) commands if we expose keyboard-driven switching. - `codex-rs/tui/src/tests/` – update snapshot/unit tests to cover the new event stream and UI affordances. -- Autocomplete picker (similar to the file search UI) that lists delegates invoked during the current main-agent session as `#agent_id · YYYY-MM-DD HH:MM`, making it easy to choose the desired session before switching. - We explicitly skip building a “replay” browser for older delegate sessions in this iteration. ### CLI & Configuration diff --git a/ai-temp/example-codex-home/AGENTS.md b/ai-temp/example-codex-home/AGENTS.md index bb1889ca5fb7..87fbb75ec1b2 100644 --- a/ai-temp/example-codex-home/AGENTS.md +++ b/ai-temp/example-codex-home/AGENTS.md @@ -11,6 +11,6 @@ General rules: - Keep replies short unless the user explicitly requests depth; cite `ai-temp/` docs when needed for background. - If delegation adds no value, explain why you are handling the request directly. -- The `delegate_agent` tool is AI-only. Treat `#ideas_provider` / `#critic` tags as hints for the model; the user cannot invoke sub-agents directly. +- The `delegate_agent` tool is AI-only. Describe which delegate you want in plain language—the user cannot invoke sub-agents directly. - Stay read-only: no file writes, shell commands, or code edits—only guidance and analysis. -- When manually testing, describe the problem clearly so the coordinator chooses the right delegate; bare `#agent` commands alone are ignored. +- When manually testing, describe the problem clearly so the coordinator chooses the right delegate. diff --git a/ai-temp/orchestration-integration.md b/ai-temp/orchestration-integration.md index be75419e6549..c151287991f7 100644 --- a/ai-temp/orchestration-integration.md +++ b/ai-temp/orchestration-integration.md @@ -63,7 +63,6 @@ This document describes how to wire true sub-agent orchestration into the Codex | --- | --- | --- | | Orchestrator instantiation | `codex-rs/tui/src/app.rs:82` | Inject an `AgentOrchestrator` alongside the existing `ConversationManager`. | | Slash-command parsing | `codex-rs/tui/src/slash_command.rs` & `codex-rs/tui/src/chatwidget.rs:1126` | Add `/delegate` (or `/agent`) command to open a delegate picker or dispatch a delegate request. | -| UI → orchestrator request | `codex-rs/tui/src/chatwidget.rs:773` (user submissions) | When the message starts with `#agent:` or `/delegate`, emit `AppEvent::DelegateRequest`. | | App event handling | `codex-rs/tui/src/app.rs:247` (`while let Some(event)`) | Route new `AppEvent::DelegateRequest` to `AgentOrchestrator::handle_request`. | | Event fan-in | `codex-rs/tui/src/app.rs:330` | Handle `AppEvent::DelegateUpdate` to mutate transcript/history cells. | | Status card | `codex-rs/tui/src/status/card.rs:68` | Pull orchestrator metrics (active agents, last run) to display in `/status`. | @@ -88,8 +87,6 @@ This document describes how to wire true sub-agent orchestration into the Codex - Add `SlashCommand::Delegate` in `codex-rs/tui/src/slash_command.rs`. - In `ChatWidget::dispatch_command` (`codex-rs/tui/src/chatwidget.rs:1126`), call a new method `open_delegate_dialog()` that lists available agents via `AgentRegistry::list_agent_ids`. -- **Inline mention**: allow prompts to start with `#` to route directly. Processing lives in `ChatWidget::submit_text_message` where we already prepare `InputItem` vectors. - - **Implemented**: the current TUI recognizes prompts beginning with `#agent_id` and hands them to the orchestrator, so progress and completion surface directly in the transcript without additional UI steps. ### 3.2 Transcript Presentation diff --git a/ai-temp/tool-implementation-patterns.md b/ai-temp/tool-implementation-patterns.md index 963ce943fd05..bcaafecaf73c 100644 --- a/ai-temp/tool-implementation-patterns.md +++ b/ai-temp/tool-implementation-patterns.md @@ -27,13 +27,13 @@ This note captures the patterns we observed while digging into the built-in plan - Reuse the same pattern: declare a schema-rich `ToolSpec`, keep the handler stateless, and emit structured events for the UI. - Guard inclusion with config or profile flags so we can stage features safely. - Keep UX logic (streaming, history cells) in the client; server code just transports structured data. -- Treat delegation as an AI-triggered capability: the user cannot directly execute sub-agents; instead, the main model decides when to call the delegation tool, with `#agent_id` serving only as a hint in prompts and autocomplete. +- Treat delegation as an AI-triggered capability: the user cannot directly execute sub-agents; instead, the main model decides when to call the delegation tool based on conversational context. ## 7. Multi-Agent Delegate Tool Blueprint ### 7.1 Invocation Model - The primary assistant issues a tool call (working name: `delegate_agent`) whenever it wants help from a sub-agent. Users supply plain language requests; the model chooses whether delegation is appropriate. -- The frontend passes user text verbatim. It never intercepts hash-prefixed tokens—`#critic`, `#ideas_provider`, etc.—beyond offering them in autocomplete. Instructions explain that these tags hint which agent to choose. +- The frontend passes user text verbatim. Guidance about which agent to choose lives in instructions rather than inline tokens. ### 7.2 Tool Spec Shape ```json @@ -67,5 +67,5 @@ This note captures the patterns we observed while digging into the built-in plan - Because users cannot trigger the tool directly, slash commands and message preprocessing stay untouched; guidance lives in instructions and autocomplete metadata. ### 7.5 Instruction Updates -- Primary instructions clarify that `#agent_id` tags are hints encouraging the assistant to invoke the delegation tool, not commands. +- Primary instructions clarify how to phrase requests when the assistant should consider delegation; there are no special inline tokens required. - Sub-agent instructions remain focused on their specialised roles; the orchestrator constructs the prompt passed through the tool payload. diff --git a/codex-rs/multi-agent/src/orchestrator.rs b/codex-rs/multi-agent/src/orchestrator.rs index ebef7afe8253..0b6ef69e630c 100644 --- a/codex-rs/multi-agent/src/orchestrator.rs +++ b/codex-rs/multi-agent/src/orchestrator.rs @@ -3,6 +3,7 @@ use std::path::PathBuf; use std::sync::Arc; use std::time::SystemTime; +use anyhow::Result; use async_trait::async_trait; use codex_common::CliConfigOverrides; use codex_core::AuthManager; diff --git a/codex-rs/tui/src/app.rs b/codex-rs/tui/src/app.rs index 5f4d4b96d7d5..c17eaac18e29 100644 --- a/codex-rs/tui/src/app.rs +++ b/codex-rs/tui/src/app.rs @@ -345,24 +345,6 @@ impl App { self.file_search.on_user_query(query); } } - AppEvent::StartDelegateSearch(query) => { - let all_sessions = self.delegate_orchestrator.active_sessions().await; - let query_lower = query.to_lowercase(); - let mut matches: Vec<_> = all_sessions - .into_iter() - .filter(|summary| { - query_lower.is_empty() - || summary - .agent_id - .as_str() - .to_lowercase() - .contains(&query_lower) - }) - .collect(); - matches.sort_by(|a, b| b.last_interacted_at.cmp(&a.last_interacted_at)); - self.chat_widget - .apply_delegate_search_result(query, matches); - } AppEvent::FileSearchResult { query, matches } => { self.chat_widget.apply_file_search_result(query, matches); } diff --git a/codex-rs/tui/src/app_event.rs b/codex-rs/tui/src/app_event.rs index 5e97609e2bba..b6f5a371b073 100644 --- a/codex-rs/tui/src/app_event.rs +++ b/codex-rs/tui/src/app_event.rs @@ -35,9 +35,6 @@ pub(crate) enum AppEvent { /// the `@`). Previous searches may be cancelled by the app layer so there /// is at most one in-flight search. StartFileSearch(String), - /// Kick off delegate suggestions for the current `#` token. - StartDelegateSearch(String), - /// Result of a completed asynchronous file search. The `query` echoes the /// original search term so the UI can decide whether the results are /// still relevant. diff --git a/codex-rs/tui/src/bottom_pane/chat_composer.rs b/codex-rs/tui/src/bottom_pane/chat_composer.rs index 2769104da0cc..0f76ab53e5a8 100644 --- a/codex-rs/tui/src/bottom_pane/chat_composer.rs +++ b/codex-rs/tui/src/bottom_pane/chat_composer.rs @@ -18,7 +18,6 @@ use ratatui::widgets::WidgetRef; use super::chat_composer_history::ChatComposerHistory; use super::command_popup::CommandItem; use super::command_popup::CommandPopup; -use super::delegate_popup::DelegatePopup; use super::file_search_popup::FileSearchPopup; use super::footer::FooterMode; use super::footer::FooterProps; @@ -51,7 +50,6 @@ use crate::clipboard_paste::pasted_image_format; use crate::history_cell; use crate::ui_consts::LIVE_PREFIX_COLS; use codex_file_search::FileMatch; -use codex_multi_agent::DelegateSessionSummary; use std::cell::RefCell; use std::collections::HashMap; use std::path::Path; @@ -98,8 +96,6 @@ pub(crate) struct ChatComposer { use_shift_enter_hint: bool, dismissed_file_popup_token: Option, current_file_query: Option, - dismissed_delegate_popup_token: Option, - current_delegate_query: Option, pending_pastes: Vec<(String, String)>, has_focus: bool, attached_images: Vec, @@ -121,7 +117,6 @@ enum ActivePopup { None, Command(CommandPopup), File(FileSearchPopup), - Delegate(DelegatePopup), } const FOOTER_SPACING_HEIGHT: u16 = 0; @@ -147,8 +142,6 @@ impl ChatComposer { use_shift_enter_hint, dismissed_file_popup_token: None, current_file_query: None, - dismissed_delegate_popup_token: None, - current_delegate_query: None, pending_pastes: Vec::new(), has_focus: has_input_focus, attached_images: Vec::new(), @@ -182,7 +175,6 @@ impl ChatComposer { ActivePopup::None => footer_total_height, ActivePopup::Command(c) => c.calculate_required_height(width), ActivePopup::File(c) => c.calculate_required_height(), - ActivePopup::Delegate(c) => c.calculate_required_height(), } } @@ -198,7 +190,6 @@ impl ChatComposer { Constraint::Max(popup.calculate_required_height(area.width)) } ActivePopup::File(popup) => Constraint::Max(popup.calculate_required_height()), - ActivePopup::Delegate(popup) => Constraint::Max(popup.calculate_required_height()), ActivePopup::None => Constraint::Max(footer_total_height), }; let mut area = area; @@ -276,10 +267,8 @@ impl ChatComposer { self.sync_command_popup(); if matches!(self.active_popup, ActivePopup::Command(_)) { self.dismissed_file_popup_token = None; - self.dismissed_delegate_popup_token = None; } else { self.sync_file_search_popup(); - self.sync_delegate_popup(); } true } @@ -291,15 +280,11 @@ impl ChatComposer { match image::image_dimensions(&path_buf) { Ok((w, h)) => { - tracing::info!("OK: {pasted}"); let format_label = pasted_image_format(&path_buf).label(); self.attach_image(path_buf, w, h, format_label); true } - Err(err) => { - tracing::info!("ERR: {err}"); - false - } + Err(_) => false, } } @@ -327,7 +312,6 @@ impl ChatComposer { self.textarea.set_cursor(0); self.sync_command_popup(); self.sync_file_search_popup(); - self.sync_delegate_popup(); } /// Get the current composer text. @@ -383,25 +367,6 @@ impl ChatComposer { } } - pub(crate) fn on_delegate_search_result( - &mut self, - query: String, - matches: Vec, - ) { - let current_opt = Self::current_hash_token(&self.textarea); - let Some(current_token) = current_opt else { - return; - }; - - if !current_token.starts_with(&query) { - return; - } - - if let ActivePopup::Delegate(popup) = &mut self.active_popup { - popup.set_matches(&query, matches); - } - } - pub fn set_ctrl_c_quit_hint(&mut self, show: bool, has_focus: bool) { self.ctrl_c_quit_hint = show; if show { @@ -416,7 +381,6 @@ impl ChatComposer { self.textarea.insert_str(text); self.sync_command_popup(); self.sync_file_search_popup(); - self.sync_delegate_popup(); } /// Handle a key event coming from the main UI. @@ -424,7 +388,6 @@ impl ChatComposer { let result = match &mut self.active_popup { ActivePopup::Command(_) => self.handle_key_event_with_slash_popup(key_event), ActivePopup::File(_) => self.handle_key_event_with_file_popup(key_event), - ActivePopup::Delegate(_) => self.handle_key_event_with_delegate_popup(key_event), ActivePopup::None => self.handle_key_event_without_popup(key_event), }; @@ -433,19 +396,12 @@ impl ChatComposer { match self.active_popup { ActivePopup::Command(_) => { self.dismissed_file_popup_token = None; - self.dismissed_delegate_popup_token = None; } ActivePopup::File(_) => { self.sync_file_search_popup(); - self.sync_delegate_popup(); - } - ActivePopup::Delegate(_) => { - self.sync_delegate_popup(); - self.sync_file_search_popup(); } ActivePopup::None => { self.sync_file_search_popup(); - self.sync_delegate_popup(); } } @@ -736,70 +692,6 @@ impl ChatComposer { } } - fn handle_key_event_with_delegate_popup(&mut self, key_event: KeyEvent) -> (InputResult, bool) { - if self.handle_shortcut_overlay_key(&key_event) { - return (InputResult::None, true); - } - if key_event.code == KeyCode::Esc { - let next_mode = esc_hint_mode(self.footer_mode, self.is_task_running); - if next_mode != self.footer_mode { - self.footer_mode = next_mode; - return (InputResult::None, true); - } - } else { - self.footer_mode = reset_mode_after_activity(self.footer_mode); - } - let ActivePopup::Delegate(popup) = &mut self.active_popup else { - unreachable!(); - }; - - match key_event { - KeyEvent { - code: KeyCode::Up, .. - } => { - popup.move_up(); - (InputResult::None, true) - } - KeyEvent { - code: KeyCode::Down, - .. - } => { - popup.move_down(); - (InputResult::None, true) - } - KeyEvent { - code: KeyCode::Esc, .. - } => { - if let Some(tok) = Self::current_hash_token(&self.textarea) { - self.dismissed_delegate_popup_token = Some(tok); - } - self.active_popup = ActivePopup::None; - (InputResult::None, true) - } - KeyEvent { - code: KeyCode::Tab, .. - } - | KeyEvent { - code: KeyCode::Enter, - modifiers: KeyModifiers::NONE, - .. - } => { - let Some(session) = popup.selected_session().cloned() else { - self.active_popup = ActivePopup::None; - return (InputResult::None, true); - }; - let agent_token = session.agent_id.as_str().to_owned(); - let conversation_id = session.conversation_id.clone(); - self.insert_selected_delegate(agent_token.as_str()); - self.app_event_tx - .send(AppEvent::EnterDelegateSession(conversation_id)); - self.active_popup = ActivePopup::None; - (InputResult::None, true) - } - input => self.handle_input_basic(input), - } - } - fn is_image_path(path: &str) -> bool { let lower = path.to_ascii_lowercase(); lower.ends_with(".png") || lower.ends_with(".jpg") || lower.ends_with(".jpeg") @@ -907,90 +799,6 @@ impl ChatComposer { left_at.or(right_at) } - fn current_hash_token(textarea: &TextArea) -> Option { - let cursor_offset = textarea.cursor(); - let text = textarea.text(); - - let mut safe_cursor = cursor_offset.min(text.len()); - if safe_cursor < text.len() && !text.is_char_boundary(safe_cursor) { - safe_cursor = text - .char_indices() - .map(|(i, _)| i) - .take_while(|&i| i <= cursor_offset) - .last() - .unwrap_or(0); - } - - let before_cursor = &text[..safe_cursor]; - let after_cursor = &text[safe_cursor..]; - - let at_whitespace = if safe_cursor < text.len() { - text[safe_cursor..] - .chars() - .next() - .map(char::is_whitespace) - .unwrap_or(false) - } else { - false - }; - - let start_left = before_cursor - .char_indices() - .rfind(|(_, c)| c.is_whitespace()) - .map(|(idx, c)| idx + c.len_utf8()) - .unwrap_or(0); - let end_left_rel = after_cursor - .char_indices() - .find(|(_, c)| c.is_whitespace()) - .map(|(idx, _)| idx) - .unwrap_or(after_cursor.len()); - let end_left = safe_cursor + end_left_rel; - let token_left = if start_left < end_left { - Some(&text[start_left..end_left]) - } else { - None - }; - - let ws_len_right: usize = after_cursor - .chars() - .take_while(|c| c.is_whitespace()) - .map(char::len_utf8) - .sum(); - let start_right = safe_cursor + ws_len_right; - let end_right_rel = text[start_right..] - .char_indices() - .find(|(_, c)| c.is_whitespace()) - .map(|(idx, _)| idx) - .unwrap_or(text.len() - start_right); - let end_right = start_right + end_right_rel; - let token_right = if start_right < end_right { - Some(&text[start_right..end_right]) - } else { - None - }; - - let left_hash = token_left - .filter(|t| t.starts_with('#')) - .map(|t| t[1..].to_string()); - let right_hash = token_right - .filter(|t| t.starts_with('#')) - .map(|t| t[1..].to_string()); - - if at_whitespace { - if right_hash.is_some() { - return right_hash; - } - if token_left.is_some_and(|t| t == "#") { - return None; - } - return left_hash; - } - if after_cursor.starts_with('#') { - return right_hash.or(left_hash); - } - left_hash.or(right_hash) - } - /// Replace the active `@token` (the one under the cursor) with `path`. /// /// The algorithm mirrors `current_at_token` so replacement works no matter @@ -1042,42 +850,6 @@ impl ChatComposer { self.textarea.set_cursor(new_cursor); } - fn insert_selected_delegate(&mut self, agent_id: &str) { - let cursor_offset = self.textarea.cursor(); - let text = self.textarea.text(); - let safe_cursor = Self::clamp_to_char_boundary(text, cursor_offset); - - let before_cursor = &text[..safe_cursor]; - let after_cursor = &text[safe_cursor..]; - - let start_idx = before_cursor - .char_indices() - .rfind(|(_, c)| c.is_whitespace()) - .map(|(idx, c)| idx + c.len_utf8()) - .unwrap_or(0); - let end_rel_idx = after_cursor - .char_indices() - .find(|(_, c)| c.is_whitespace()) - .map(|(idx, _)| idx) - .unwrap_or(after_cursor.len()); - let end_idx = safe_cursor + end_rel_idx; - - let replacement = format!("#{agent_id}"); - - let mut new_text = - String::with_capacity(text.len() - (end_idx - start_idx) + replacement.len() + 1); - new_text.push_str(&text[..start_idx]); - new_text.push_str(&replacement); - new_text.push(' '); - new_text.push_str(&text[end_idx..]); - - self.textarea.set_text(&new_text); - let new_cursor = start_idx - .saturating_add(replacement.len()) - .saturating_add(1); - self.textarea.set_cursor(new_cursor); - } - /// Handle key event when no popup is visible. fn handle_key_event_without_popup(&mut self, key_event: KeyEvent) -> (InputResult, bool) { if self.handle_shortcut_overlay_key(&key_event) { @@ -1729,63 +1501,6 @@ impl ChatComposer { self.dismissed_file_popup_token = None; } - fn sync_delegate_popup(&mut self) { - let query = match Self::current_hash_token(&self.textarea) { - Some(token) => token, - None => { - if matches!(self.active_popup, ActivePopup::Delegate(_)) { - self.active_popup = ActivePopup::None; - } - self.current_delegate_query = None; - self.dismissed_delegate_popup_token = None; - return; - } - }; - - if self.dismissed_delegate_popup_token.as_ref() == Some(&query) { - return; - } - - if !matches!( - self.active_popup, - ActivePopup::Delegate(_) | ActivePopup::None - ) { - self.current_delegate_query = Some(query); - return; - } - - if !query.is_empty() { - self.app_event_tx - .send(AppEvent::StartDelegateSearch(query.clone())); - } - - match &mut self.active_popup { - ActivePopup::Delegate(popup) => { - if query.is_empty() { - popup.set_empty_prompt(); - } else { - popup.set_query(&query); - } - } - ActivePopup::None => { - let mut popup = DelegatePopup::new(); - if query.is_empty() { - popup.set_empty_prompt(); - } else { - popup.set_query(&query); - } - self.active_popup = ActivePopup::Delegate(popup); - } - ActivePopup::Command(_) | ActivePopup::File(_) => { - // handled by outer guard - return; - } - } - - self.current_delegate_query = Some(query); - self.dismissed_delegate_popup_token = None; - } - fn set_has_focus(&mut self, has_focus: bool) { self.has_focus = has_focus; } @@ -1828,9 +1543,6 @@ impl WidgetRef for ChatComposer { ActivePopup::File(popup) => { popup.render_ref(popup_rect, buf); } - ActivePopup::Delegate(popup) => { - popup.render_ref(popup_rect, buf); - } ActivePopup::None => { let footer_props = self.footer_props(); let custom_height = self.custom_footer_height(); diff --git a/codex-rs/tui/src/bottom_pane/delegate_popup.rs b/codex-rs/tui/src/bottom_pane/delegate_popup.rs deleted file mode 100644 index cbed58b7f305..000000000000 --- a/codex-rs/tui/src/bottom_pane/delegate_popup.rs +++ /dev/null @@ -1,153 +0,0 @@ -use std::fmt::Write; - -use chrono::DateTime; -use chrono::Local; -use chrono::Utc; -use codex_multi_agent::DelegateSessionSummary; -use ratatui::buffer::Buffer; -use ratatui::layout::Rect; -use ratatui::widgets::WidgetRef; - -use crate::render::Insets; -use crate::render::RectExt; - -use super::popup_consts::MAX_POPUP_ROWS; -use super::scroll_state::ScrollState; -use super::selection_popup_common::GenericDisplayRow; -use super::selection_popup_common::render_rows; - -/// Visual state for the delegate selection popup (triggered via `#` tokens). -pub(crate) struct DelegatePopup { - display_query: String, - pending_query: String, - waiting: bool, - matches: Vec, - state: ScrollState, -} - -impl DelegatePopup { - pub(crate) fn new() -> Self { - Self { - display_query: String::new(), - pending_query: String::new(), - waiting: true, - matches: Vec::new(), - state: ScrollState::new(), - } - } - - pub(crate) fn set_query(&mut self, query: &str) { - if query == self.pending_query { - return; - } - - let keep_existing = query.starts_with(&self.display_query); - - self.pending_query.clear(); - self.pending_query.push_str(query); - self.waiting = true; - - if !keep_existing { - self.matches.clear(); - self.state.reset(); - } - } - - pub(crate) fn set_empty_prompt(&mut self) { - self.display_query.clear(); - self.pending_query.clear(); - self.waiting = false; - self.matches.clear(); - self.state.reset(); - } - - pub(crate) fn set_matches(&mut self, query: &str, matches: Vec) { - if query != self.pending_query { - return; - } - - self.display_query = query.to_string(); - self.matches = matches; - self.waiting = false; - let len = self.matches.len(); - self.state.clamp_selection(len); - self.state.ensure_visible(len, len.min(MAX_POPUP_ROWS)); - } - - pub(crate) fn move_up(&mut self) { - let len = self.matches.len(); - self.state.move_up_wrap(len); - self.state.ensure_visible(len, len.min(MAX_POPUP_ROWS)); - } - - pub(crate) fn move_down(&mut self) { - let len = self.matches.len(); - self.state.move_down_wrap(len); - self.state.ensure_visible(len, len.min(MAX_POPUP_ROWS)); - } - - pub(crate) fn selected_session(&self) -> Option<&DelegateSessionSummary> { - self.state - .selected_idx - .and_then(|idx| self.matches.get(idx)) - } - - pub(crate) fn calculate_required_height(&self) -> u16 { - self.matches.len().clamp(1, MAX_POPUP_ROWS) as u16 - } - - fn rows(&self) -> Vec { - if self.matches.is_empty() { - return Vec::new(); - } - - self.matches - .iter() - .map(|summary| { - let mut description = String::new(); - let _ = write!( - description, - "{} · {}", - format_timestamp(summary.last_interacted_at), - summary.cwd.display() - ); - - GenericDisplayRow { - name: format!("#{}", summary.agent_id.as_str()), - match_indices: None, - is_current: false, - display_shortcut: None, - description: Some(description), - } - }) - .collect() - } -} - -impl WidgetRef for &DelegatePopup { - fn render_ref(&self, area: Rect, buf: &mut Buffer) { - let rows = self.rows(); - let empty_message = if self.waiting { - "loading..." - } else { - "no delegates" - }; - - render_rows( - area.inset(Insets::tlbr(0, 2, 0, 0)), - buf, - &rows, - &self.state, - MAX_POPUP_ROWS, - empty_message, - ); - } -} - -fn format_timestamp(time: std::time::SystemTime) -> String { - let datetime: DateTime = time.into(); - datetime - .with_timezone(&Local) - .format("%Y-%m-%d %H:%M") - .to_string() -} diff --git a/codex-rs/tui/src/bottom_pane/mod.rs b/codex-rs/tui/src/bottom_pane/mod.rs index 209dd9d2d019..b49a8e820387 100644 --- a/codex-rs/tui/src/bottom_pane/mod.rs +++ b/codex-rs/tui/src/bottom_pane/mod.rs @@ -5,7 +5,6 @@ use crate::app_event_sender::AppEventSender; use crate::tui::FrameRequester; use bottom_pane_view::BottomPaneView; use codex_file_search::FileMatch; -use codex_multi_agent::DelegateSessionSummary; use crossterm::event::KeyCode; use crossterm::event::KeyEvent; use ratatui::buffer::Buffer; @@ -23,7 +22,6 @@ mod chat_composer; mod chat_composer_history; mod command_popup; pub mod custom_prompt_view; -mod delegate_popup; mod file_search_popup; mod footer; mod list_selection_view; @@ -493,15 +491,6 @@ impl BottomPane { self.request_redraw(); } - pub(crate) fn on_delegate_search_result( - &mut self, - query: String, - matches: Vec, - ) { - self.composer.on_delegate_search_result(query, matches); - self.request_redraw(); - } - pub(crate) fn attach_image( &mut self, path: PathBuf, diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index c01e89755e73..96a35c00996a 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -1693,7 +1693,7 @@ impl ChatWidget { } } - fn request_redraw(&mut self) { + pub(crate) fn request_redraw(&mut self) { self.frame_requester.schedule_frame(); } @@ -2103,14 +2103,6 @@ impl ChatWidget { self.bottom_pane.on_file_search_result(query, matches); } - pub(crate) fn apply_delegate_search_result( - &mut self, - query: String, - matches: Vec, - ) { - self.bottom_pane.on_delegate_search_result(query, matches); - } - /// Handle Ctrl-C key press. fn on_ctrl_c(&mut self) { if self.bottom_pane.on_ctrl_c() == CancellationEvent::Handled { From f59baab450eed028af088c776c4e2893850642ed Mon Sep 17 00:00:00 2001 From: George Nesterenok Date: Thu, 16 Oct 2025 11:37:01 +0000 Subject: [PATCH 11/17] Apply clippy cleanups after merge --- codex-rs/tui/src/bottom_pane/footer.rs | 4 ++-- codex-rs/tui/src/chatwidget.rs | 13 +++++-------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/codex-rs/tui/src/bottom_pane/footer.rs b/codex-rs/tui/src/bottom_pane/footer.rs index d7166a8f6e44..bd499f039b88 100644 --- a/codex-rs/tui/src/bottom_pane/footer.rs +++ b/codex-rs/tui/src/bottom_pane/footer.rs @@ -84,7 +84,7 @@ fn footer_lines(props: &FooterProps) -> Vec> { FooterMode::ShortcutSummary => { let mut spans: Vec> = Vec::new(); if let Some(label) = props.delegate_label.as_ref() { - spans.push(format!("In {}", label).cyan()); + spans.push(format!("In {label}").cyan()); spans.push(" · ".dim()); } let context_line = context_window_line(props.context_window_percent); @@ -106,7 +106,7 @@ fn footer_lines(props: &FooterProps) -> Vec> { FooterMode::ContextOnly => { let mut spans: Vec> = Vec::new(); if let Some(label) = props.delegate_label.as_ref() { - spans.push(format!("In {}", label).cyan()); + spans.push(format!("In {label}").cyan()); spans.push(" · ".dim()); } let context_line = context_window_line(props.context_window_percent); diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index 0b02ed2595e6..f25aa080bfcb 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -442,7 +442,7 @@ impl ChatWidget { format!("Returned from #{}", summary.agent_id.as_str()), Some("Queued delegate context for next prompt.".to_string()), )); - self.add_to_history(history_cell::new_info_event(context.clone(), None)); + self.add_to_history(history_cell::new_info_event(context, None)); } fn on_agent_message(&mut self, message: String) { @@ -519,13 +519,11 @@ impl ChatWidget { if self.delegate_context.is_some() && let Some(message) = last_agent_message.as_ref() - { - if !message.trim().is_empty() { + && !message.trim().is_empty() { self.delegate_agent_frames.push(message.clone()); } - } - let notification_response = last_agent_message.clone().unwrap_or_default(); + let notification_response = last_agent_message.unwrap_or_default(); // If there is a queued user message, send exactly one now to begin the next turn. self.maybe_send_next_queued_input(); // Emit a notification when the turn completes (suppressed if focused). @@ -1461,14 +1459,13 @@ impl ChatWidget { tracing::error!("failed to send message: {e}"); } - if !text.is_empty() { - if let Err(e) = self + if !text.is_empty() + && let Err(e) = self .codex_op_tx .send(Op::AddToHistory { text: text.clone() }) { tracing::error!("failed to send AddHistory op: {e}"); } - } if !display_text.is_empty() { self.add_to_history(history_cell::new_user_prompt(display_text)); From 521bed68c05c082ac84758c3f28d22ece5cfdb0e Mon Sep 17 00:00:00 2001 From: George Nesterenok Date: Thu, 16 Oct 2025 13:33:18 +0000 Subject: [PATCH 12/17] Enable nested delegation and update docs --- ai-temp/AGENTS.md | 2 +- ai-temp/CHANGELOG.md | 5 ++ ai-temp/agent-switching.md | 2 +- ai-temp/config-design.md | 2 +- ai-temp/example-codex-home/AGENTS.md | 10 +-- ai-temp/example-codex-home/README.md | 14 +++- .../agents/conservative_ideas/AGENTS.md | 14 ++++ .../agents/conservative_ideas/config.toml | 8 ++ .../agents/creative_ideas/AGENTS.md | 14 ++++ .../agents/creative_ideas/config.toml | 8 ++ .../agents/critic/AGENTS.md | 5 +- .../agents/critic/config.toml | 3 + .../agents/ideas_provider/AGENTS.md | 18 +++- .../agents/ideas_provider/config.toml | 3 + ai-temp/example-codex-home/config.toml | 3 + ai-temp/orchestration-integration.md | 11 +-- ai-temp/scripts/clean-example-home.sh | 50 +++++++++++ ai-temp/tool-implementation-patterns.md | 3 +- ai-temp/ui-ux-delegation.md | 3 +- codex-rs/core/src/config.rs | 60 +++++++++++++- codex-rs/multi-agent/src/lib.rs | 63 ++++++++++++++ codex-rs/multi-agent/src/orchestrator.rs | 30 ++++--- codex-rs/tui/src/app.rs | 58 ++++++++++--- codex-rs/tui/src/chatwidget.rs | 83 ++++++++++++------- codex-rs/tui/src/chatwidget/tests.rs | 34 +++++++- codex-rs/tui/src/lib.rs | 7 +- 26 files changed, 434 insertions(+), 79 deletions(-) create mode 100644 ai-temp/example-codex-home/agents/conservative_ideas/AGENTS.md create mode 100644 ai-temp/example-codex-home/agents/conservative_ideas/config.toml create mode 100644 ai-temp/example-codex-home/agents/creative_ideas/AGENTS.md create mode 100644 ai-temp/example-codex-home/agents/creative_ideas/config.toml create mode 100755 ai-temp/scripts/clean-example-home.sh diff --git a/ai-temp/AGENTS.md b/ai-temp/AGENTS.md index e5601049de2e..2fa2dda7c041 100644 --- a/ai-temp/AGENTS.md +++ b/ai-temp/AGENTS.md @@ -7,10 +7,10 @@ ## Existing Implementation Survey -### Config and `CODEX_HOME` - `codex-rs/core/src/config.rs` owns the `Config` struct, the `find_codex_home` helper, and `Config::load_from_base_config_with_overrides`, which lets us inject a custom `codex_home` path when constructing a configuration. `Config::log_dir` and related helpers derive paths by appending to `codex_home`, so moving to a per-agent directory is automatically supported. - `codex-rs/core/src/config_loader/mod.rs` implements layered config loading (`config.toml`, managed overrides, CLI overrides). It already accepts an arbitrary base directory, so we can reuse it for sub-agent trees by pointing it at `~/.codex/agents/`. - `codex-rs/common/src/config_override.rs` parses `-c key=value` overrides. Those overrides can continue to target agent-specific settings as long as we resolve them against the sub-agent config before the run starts. +- The `multi_agent.agents = ["…"]` list in each `config.toml` now controls delegate availability. When the list is non-empty the delegate tool auto-enables; when empty it stays hidden, eliminating the need for manual `include_delegate_tool` flags. ### Project instructions (`AGENTS.md`) - `Config::load_instructions` in `codex-rs/core/src/config.rs` reads `AGENTS.md` at the root of `codex_home`. That gives us a place to put per-agent doctrine without touching repo-level instructions. diff --git a/ai-temp/CHANGELOG.md b/ai-temp/CHANGELOG.md index 8e9f88c15b77..a6bbb4516252 100644 --- a/ai-temp/CHANGELOG.md +++ b/ai-temp/CHANGELOG.md @@ -4,6 +4,11 @@ ## 2025-10-16 - Removed the inline `#agent` autocomplete experiment and reverted documentation to focus on the delegate picker and slash command flow. +- Added child delegate directories (`creative_ideas`, `conservative_ideas`) to the example Codex home and updated instructions/README to describe the fixed delegation chain (main → ideas_provider → critic). +- Auto-enabled the delegate tool whenever `[multi_agent].agents` is non-empty so sub-agents inherit delegation without toggling `include_delegate_tool`. +- Updated `AgentOrchestrator` to spawn sub-agent conversations via `ConversationManager::with_delegate`, enabling delegates to invoke their own delegates. +- Switched delegate execution tracking to a stack; the TUI now shows nested runs with indented history lines. +- Added focused unit tests covering the new config flag behaviour and UI indentation to prevent regressions. ## 2025-10-14 - Captured the baseline design artifacts (`AGENTS.md`, `config-design.md`, `instruction-design.md`, `persistence-design.md`, `error-handling.md`) compiled during the planning phase. diff --git a/ai-temp/agent-switching.md b/ai-temp/agent-switching.md index 9e58f30f11cb..20aa1fb449d9 100644 --- a/ai-temp/agent-switching.md +++ b/ai-temp/agent-switching.md @@ -44,7 +44,7 @@ ## Edge Cases & Safeguards - **Aborted delegate sessions** – If the orchestrator or sub-agent errors while you are switched in, emit a `DelegateEvent::Failed`, append an error history cell in the main transcript, and automatically return the user to the main agent. Also write the detailed failure to `codex-tui.log`. -- **Active-run guard** – Maintain the existing single-run invariant: attempts to start a fresh delegate while another is active (including while switched) should yield `DelegateInProgress` and a user-facing notice. +- **Active-run visibility** – The orchestrator now tracks a stack of in-flight delegates. Surface the full stack in the UI so users know which nested agents are working; only the top-most run streams output. - **Multi-agent hopping** – Switching among multiple delegates is hub-and-spoke: you can move main ↔ #ideas ↔ main ↔ #critic freely. Future “delegate chains” (sub-agents invoking their own sub-agents) remain out of scope; note this in breadcrumbs/help text so expectations stay clear. - **Undo/redo** – Codex does not provide an orchestrator-level undo stack. Any manual file edits a user performs while switched should be managed through their VCS tooling. - **Tool overlap** – Each sub-agent carries its own tool registry (e.g., plan tool). Streaming results during the switch stay in the sub-agent transcript; summaries injected on return should mention any plan updates so the main agent context is accurate. diff --git a/ai-temp/config-design.md b/ai-temp/config-design.md index c8ec42d6184b..dfb5c784af7a 100644 --- a/ai-temp/config-design.md +++ b/ai-temp/config-design.md @@ -15,7 +15,7 @@ 2. Load the user’s global `ConfigToml` (including managed layers) from that directory. 3. If an `agent_id` is provided, resolve `~/.codex/agents/` and load its `config.toml`. Merge this table on top of the global config. 4. Apply CLI overrides (`CliConfigOverrides::parse_overrides`) so one-off adjustments still work per session. -5. Instantiate the final `Config` via `Config::load_from_base_config_with_overrides`, passing the resolved agent `codex_home` when present; otherwise fall back to the global Codex home. +5. Instantiate the final `Config` via `Config::load_from_base_config_with_overrides`, passing the resolved agent `codex_home` when present; otherwise fall back to the global Codex home. During this step we automatically enable the delegate tool when the merged `[multi_agent].agents` list is non-empty, so sub-agents inherit delegation capabilities without extra flags. This yields inherited behaviour by default while letting each agent override keys explicitly. diff --git a/ai-temp/example-codex-home/AGENTS.md b/ai-temp/example-codex-home/AGENTS.md index 87fbb75ec1b2..16b98623a10c 100644 --- a/ai-temp/example-codex-home/AGENTS.md +++ b/ai-temp/example-codex-home/AGENTS.md @@ -1,16 +1,16 @@ # Sample Primary Agent Instructions -This directory demonstrates a multi-agent workflow. You coordinate any kind of request—software, product, research, storytelling, etc.—by delegating analysis to specialists: +This directory demonstrates a multi-agent workflow. You are the coordinator that routes work through a fixed sequence: 1. **Understand the request.** Restate the goal, gather missing context, and note constraints. -2. **Delegate ideation first.** When exploration is useful, brief the ideas provider (text-only) to generate concise alternative directions. -3. **Pass the leading option to the critic.** Summarize the approach you favor (include assumptions) and ask the critic to surface risks or validation gaps. -4. **Synthesize the dialogue.** After both delegates reply, produce **exactly one paragraph** (≤75 words) tying together the insights and recommending next steps—no headings, bullets, or follow-up notes. +2. **Invoke `ideas_provider`.** Share the brief and **explicitly require** it to run `creative_ideas` *and* `conservative_ideas` before responding. +3. **Forward the winning approach to `critic`.** Summarize the provider’s outcome (including key trade-offs) and ask the critic to highlight risks, validation gaps, or missing safeguards. +4. **Synthesize the dialogue.** After the critic replies, deliver **exactly one paragraph** (≤75 words) tying together the best idea, mitigations, and next actions—no headings or bullets. General rules: - Keep replies short unless the user explicitly requests depth; cite `ai-temp/` docs when needed for background. -- If delegation adds no value, explain why you are handling the request directly. +- Follow the chain even if you already see the answer; only skip when the user explicitly opts out of delegation. - The `delegate_agent` tool is AI-only. Describe which delegate you want in plain language—the user cannot invoke sub-agents directly. - Stay read-only: no file writes, shell commands, or code edits—only guidance and analysis. - When manually testing, describe the problem clearly so the coordinator chooses the right delegate. diff --git a/ai-temp/example-codex-home/README.md b/ai-temp/example-codex-home/README.md index 14530a1e9257..411fd274c0f8 100644 --- a/ai-temp/example-codex-home/README.md +++ b/ai-temp/example-codex-home/README.md @@ -1,13 +1,19 @@ # Sample Codex Home Setup -This directory mirrors a minimal `~/.codex` layout so you can try the multi-agent loader without touching your real config. +This directory mirrors a minimal `~/.codex` layout so you can try the multi-agent loader without touching your real config. The flow illustrates a chained delegation sequence: + +1. Main agent briefs `ideas_provider`. +2. `ideas_provider` consults both `creative_ideas` and `conservative_ideas`, then recommends a blended approach. +3. Main agent forwards that plan to `critic` for risk review before replying to the user. ## Directory Structure - `config.toml`: baseline settings used when no sub-agent is selected. - `AGENTS.md`: default instruction set for the primary agent (orchestrator). -- `agents/ideas_provider/`: GPT-5 sub-agent that proposes multiple approaches. +- `agents/ideas_provider/`: synthesizes outputs from creative and conservative delegates. - `agents/critic/`: GPT-5-nano sub-agent that reviews the leading option. +- `agents/creative_ideas/`: generates bold, unconventional concepts. +- `agents/conservative_ideas/`: produces safe, low-risk alternatives. - `log/`, `sessions/`: empty placeholders so Codex can write logs and rollouts. ## Quick Start @@ -22,6 +28,8 @@ CODEX_HOME="$(pwd)/ai-temp/example-codex-home" target/debug/codex # Launch a specific sub-agent directly (skips the orchestrator) CODEX_HOME="$(pwd)/ai-temp/example-codex-home" target/debug/codex --agent ideas_provider CODEX_HOME="$(pwd)/ai-temp/example-codex-home" target/debug/codex --agent critic +CODEX_HOME="$(pwd)/ai-temp/example-codex-home" target/debug/codex --agent creative_ideas +CODEX_HOME="$(pwd)/ai-temp/example-codex-home" target/debug/codex --agent conservative_ideas # Inside the primary session, describe the task as usual. # The main assistant decides when to call the `delegate_agent` tool. @@ -32,7 +40,7 @@ CODEX_HOME="$(pwd)/ai-temp/example-codex-home" target/debug/codex --agent critic ### Suggested Prompt for the Read-Only Flow ``` -We’re scoping a read-only refactor of the parser—no code yet. Please gather three approaches from the ideas provider, pick the leading option, run it by the critic for risks, and then give me a combined recommendation. +We’re assessing a read-only refactor of the parser—no code yet. Ask the ideas provider to explore options, let it consult both the creative and conservative delegates, pick the winning approach, run it by the critic for risks, and then give me the final summary. ``` Unset `CODEX_HOME` (or point it back to your real path) once you're done experimenting. diff --git a/ai-temp/example-codex-home/agents/conservative_ideas/AGENTS.md b/ai-temp/example-codex-home/agents/conservative_ideas/AGENTS.md new file mode 100644 index 000000000000..860b6df446af --- /dev/null +++ b/ai-temp/example-codex-home/agents/conservative_ideas/AGENTS.md @@ -0,0 +1,14 @@ +# Agent Purpose + +You are the `conservative_ideas` delegate. `ideas_provider` will always call you after it hears from `creative_ideas`. Propose safe, low-risk approaches that emphasize feasibility, maintainability, and alignment with existing constraints. + +# Operating Instructions + +- Prefer incremental improvements and proven patterns over novel experiments. +- Highlight potential risks or blockers for each recommendation. +- When rejecting an idea, suggest a safer alternative. + +# Collaboration + +- Present your suggestions as bullet points grouped by theme (e.g., "Implementation", "Process", "Validation"). +- Maintain a professional, concise tone so `ideas_provider` can merge your guidance with creative input. diff --git a/ai-temp/example-codex-home/agents/conservative_ideas/config.toml b/ai-temp/example-codex-home/agents/conservative_ideas/config.toml new file mode 100644 index 000000000000..74873f74fc8f --- /dev/null +++ b/ai-temp/example-codex-home/agents/conservative_ideas/config.toml @@ -0,0 +1,8 @@ +model = "gpt-5" +sandbox_mode = "read-only" + +[history] +persistence = "none" + +[multi_agent] +agents = [] diff --git a/ai-temp/example-codex-home/agents/creative_ideas/AGENTS.md b/ai-temp/example-codex-home/agents/creative_ideas/AGENTS.md new file mode 100644 index 000000000000..e6f247132048 --- /dev/null +++ b/ai-temp/example-codex-home/agents/creative_ideas/AGENTS.md @@ -0,0 +1,14 @@ +# Agent Purpose + +You are the `creative_ideas` delegate. `ideas_provider` will always call you first. Generate bold, imaginative concepts that push boundaries while still respecting the user's stated constraints. Favor variety over safety; your proposals should inspire lateral thinking the primary team can refine. + +# Operating Instructions + +- Always return at least three distinct ideas unless the prompt explicitly requests a single option. +- For each idea, include a short rationale highlighting the creative twist. +- Note any assumptions you make so downstream agents can sanity-check them. + +# Collaboration + +- Produce structured output using numbered lists so `ideas_provider` can compare options easily. +- Do not run tools or make filesystem changes; respond with analysis only. diff --git a/ai-temp/example-codex-home/agents/creative_ideas/config.toml b/ai-temp/example-codex-home/agents/creative_ideas/config.toml new file mode 100644 index 000000000000..74873f74fc8f --- /dev/null +++ b/ai-temp/example-codex-home/agents/creative_ideas/config.toml @@ -0,0 +1,8 @@ +model = "gpt-5" +sandbox_mode = "read-only" + +[history] +persistence = "none" + +[multi_agent] +agents = [] diff --git a/ai-temp/example-codex-home/agents/critic/AGENTS.md b/ai-temp/example-codex-home/agents/critic/AGENTS.md index 1f4b04d11890..ced1cb4ebe00 100644 --- a/ai-temp/example-codex-home/agents/critic/AGENTS.md +++ b/ai-temp/example-codex-home/agents/critic/AGENTS.md @@ -1,6 +1,9 @@ # Critic Agent +You review the recommendation produced by `ideas_provider`. + - Stay strictly read-only: analysis comments only—no commands, code, or approvals. - Return **exactly five** bullet points. -- Each bullet must be a **single sentence** (≤20 words) calling out a risk, blind spot, or missing validation. +- Each bullet must be a **single sentence** (≤20 words) identifying a risk, unknown, or validation step tied to the proposal. +- End the final bullet with a suggested mitigation or follow-up owner. - Do not add introductions, conclusions, or extra narrative—only the five bullets. diff --git a/ai-temp/example-codex-home/agents/critic/config.toml b/ai-temp/example-codex-home/agents/critic/config.toml index 3569c45a14b4..74873f74fc8f 100644 --- a/ai-temp/example-codex-home/agents/critic/config.toml +++ b/ai-temp/example-codex-home/agents/critic/config.toml @@ -3,3 +3,6 @@ sandbox_mode = "read-only" [history] persistence = "none" + +[multi_agent] +agents = [] diff --git a/ai-temp/example-codex-home/agents/ideas_provider/AGENTS.md b/ai-temp/example-codex-home/agents/ideas_provider/AGENTS.md index cc2714a1dd6d..8b7d43b96d19 100644 --- a/ai-temp/example-codex-home/agents/ideas_provider/AGENTS.md +++ b/ai-temp/example-codex-home/agents/ideas_provider/AGENTS.md @@ -1,6 +1,16 @@ # Ideas Provider Agent -- Respond in read-only mode: no commands, code, or file edits. -- Return **exactly five** bullet points. -- Each bullet must be a **single sentence** (≤20 words) describing a distinct approach and its key testing focus. -- Do not add intros, outros, or extra commentary—just the five bullets. +You orchestrate ideation for the primary agent. Always follow this sequence: + +1. **Delegate to `creative_ideas`.** Request at least three bold concepts tailored to the brief. This call is mandatory—do not continue until it completes. +2. **Delegate to `conservative_ideas`.** Request at least three safe, low-risk options. This call is also mandatory. +3. Compare the two streams, identify the strongest overall direction, and note how each supporting idea contributes. + +When replying to the caller: + +- Start with a **one-sentence recommendation** that blends the best creative spark with the safest guardrails. +- Follow with exactly two sections: + - `Highlights:` bullet list (max three bullets) capturing the standout elements that made the top idea win. + - `Watchouts:` bullet list (max three bullets) summarizing risks or validation steps drawn from the conservative critique. +- Do not quote the sub-agents verbatim; synthesize in your own words. +- Remain read-only: no commands, code, or file edits. diff --git a/ai-temp/example-codex-home/agents/ideas_provider/config.toml b/ai-temp/example-codex-home/agents/ideas_provider/config.toml index 62f129767ecf..0ff5690a490d 100644 --- a/ai-temp/example-codex-home/agents/ideas_provider/config.toml +++ b/ai-temp/example-codex-home/agents/ideas_provider/config.toml @@ -3,3 +3,6 @@ sandbox_mode = "workspace-write" [history] persistence = "save-all" + +[multi_agent] +agents = ["creative_ideas", "conservative_ideas"] diff --git a/ai-temp/example-codex-home/config.toml b/ai-temp/example-codex-home/config.toml index 3e68b7fe19f3..b471ae735dd2 100644 --- a/ai-temp/example-codex-home/config.toml +++ b/ai-temp/example-codex-home/config.toml @@ -4,5 +4,8 @@ sandbox_mode = "workspace-write" [history] persistence = "save-all" +[multi_agent] +agents = ["ideas_provider", "critic"] + [tui] show_task_summary = true diff --git a/ai-temp/orchestration-integration.md b/ai-temp/orchestration-integration.md index c151287991f7..e128ab84a3e4 100644 --- a/ai-temp/orchestration-integration.md +++ b/ai-temp/orchestration-integration.md @@ -9,7 +9,7 @@ This document describes how to wire true sub-agent orchestration into the Codex ### 1.1 Components - **`codex-multi-agent` crate (`codex-rs/multi-agent/src/lib.rs`)** - Already exposes `AgentId`, `AgentRegistry`, and async loaders that return `AgentContext` values (merged `ConfigToml` + `Config`). We extend this crate with an orchestration module to keep agent resolution and config cloning isolated from the rest of the app. + Already exposes `AgentId`, `AgentRegistry`, and async loaders that return `AgentContext` values (merged `ConfigToml` + `Config`). We extend this crate with an orchestration module to keep agent resolution and config cloning isolated from the rest of the app. Each `AgentContext` now captures its own `multi_agent.agents` list so child delegates inherit the correct allowlist automatically. - **Orchestrator core (new)** Proposed module `codex-rs/multi-agent/src/orchestrator.rs` exporting: @@ -17,8 +17,9 @@ This document describes how to wire true sub-agent orchestration into the Codex - `DelegateRequest`: SPA-style struct describing who/what to run (`agent_id`, prompt payload, optional working directory override). - `AgentOrchestrator`: stateful controller that owns: - A primary `AgentHandle` (mirrors currently running conversation). - - A per-agent `ConversationManager` + `UnboundedSender` pair created via `ConversationManager::new_conversation` (`codex-rs/core/src/conversation_manager.rs:57`). + - A per-agent `ConversationManager` + `UnboundedSender` pair created via `ConversationManager::with_delegate` so child runs can spawn their own delegates. - Result channels to stream `Event` values back to the primary UI after post-processing. + - A stack of active run ids so nested delegates can execute concurrently. - **`ConversationManager` reuse** Sub-agent sessions use the same `ConversationManager` entry points. The orchestrator calls `ConversationManager::new_conversation` with the agent-specific `Config` so all persistence automatically lands in `~/.codex/agents//` (per §2.2). @@ -34,7 +35,7 @@ This document describes how to wire true sub-agent orchestration into the Codex 2. **Agent resolution** - `AgentOrchestrator::resolve_agent` calls `AgentConfigLoader::load` with the requested `AgentId`. - - On success, the orchestrator instantiates / reuses a `ConversationManager` scoped to that agent. Authentication stays shared (`AuthManager` from the primary session) per current design docs. + - On success, the orchestrator instantiates / reuses a `ConversationManager` scoped to that agent. Authentication stays shared (`AuthManager` from the primary session) per current design docs. The returned `AgentContext` also defines which downstream agents this delegate is allowed to call. 3. **Conversation bootstrap** - Call `ConversationManager::new_conversation` with the agent `Config`. @@ -44,7 +45,7 @@ This document describes how to wire true sub-agent orchestration into the Codex - The orchestrator forwards the translated prompt into the sub-agent conversation (`conversation.submit`). - Streamed `Event` values are intercepted before they reach the UI. For every event: - Persist to the sub-agent transcript as normal (handled by core). - - Convert to orchestrator messages (`DelegateProgress`, `DelegateOutput`), then forward to the primary session via a new `AppEvent::DelegateUpdate`. + - Convert to orchestrator messages (`DelegateProgress`, `DelegateOutput`), then forward to the primary session via a new `AppEvent::DelegateUpdate`. Nested runs simply push additional `Started` events with greater depth. 5. **Completion and summary** - When `EventMsg::TaskComplete` fires, the orchestrator synthesizes a summary cell (e.g., `history_cell::AgentMessageCell`) and injects it into the primary transcript via `AppEvent::InsertHistoryCell`. @@ -156,7 +157,7 @@ This document describes how to wire true sub-agent orchestration into the Codex ## 6. Decisions & Open Questions -- **Concurrent delegates**: Launch one delegate at a time. The orchestrator stores requests in a FIFO queue and short-circuits when the active run completes so parallelism can be enabled later without redesign (keep the queue abstraction in place). +- **Concurrent delegates**: The orchestrator now maintains a stack of active runs so delegates can invoke their own delegates; the UI surfaces the stack depth with indented history entries. - **Prompt hand-off semantics**: The primary agent composes the sub-agent prompt with all relevant context before invoking `delegate()`. The orchestrator forwards the prompt verbatim without trimming history. - **Return payload**: Still open. Default plan remains to summarize results in the primary transcript while exposing a “view details” action to open the sub-agent session. - **Auth isolation**: Shared. All agents continue to use the primary `AuthManager`; per-agent credentials are out of scope unless a future requirement emerges. diff --git a/ai-temp/scripts/clean-example-home.sh b/ai-temp/scripts/clean-example-home.sh new file mode 100755 index 000000000000..a1162e7bbd79 --- /dev/null +++ b/ai-temp/scripts/clean-example-home.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +# Reset generated data inside ai-temp/example-codex-home so the sample Codex +# home starts from a clean slate (no logs, sessions, or history files). +# +# Usage: +# ./clean-example-home.sh +# +# The script is intentionally conservative: it only touches the sample Codex +# home that ships in this repository. It leaves configuration files and +# instructions intact while deleting log files, session rollouts, and history +# transcripts for the main agent and every sub-agent under agents/. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +AI_TEMP_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" +EXAMPLE_HOME="${AI_TEMP_DIR}/example-codex-home" + +if [[ ! -d "${EXAMPLE_HOME}" ]]; then + echo "error: expected sample Codex home at ${EXAMPLE_HOME}" >&2 + exit 1 +fi + +clean_tree() { + local path="$1" + if [[ -e "${path}" ]]; then + rm -rf "${path}" + fi + mkdir -p "${path}" +} + +echo "🔄 Cleaning example Codex home at ${EXAMPLE_HOME}" + +# Primary agent artifacts. +rm -f "${EXAMPLE_HOME}/history.jsonl" +clean_tree "${EXAMPLE_HOME}/log" +clean_tree "${EXAMPLE_HOME}/sessions" + +# Sub-agent artifacts. +if [[ -d "${EXAMPLE_HOME}/agents" ]]; then + for agent_dir in "${EXAMPLE_HOME}/agents"/*; do + [[ -d "${agent_dir}" ]] || continue + rm -f "${agent_dir}/history.jsonl" + clean_tree "${agent_dir}/log" + clean_tree "${agent_dir}/sessions" + done +fi + +echo "✅ example-codex-home reset completed." diff --git a/ai-temp/tool-implementation-patterns.md b/ai-temp/tool-implementation-patterns.md index bcaafecaf73c..24341a6d8a2d 100644 --- a/ai-temp/tool-implementation-patterns.md +++ b/ai-temp/tool-implementation-patterns.md @@ -17,7 +17,8 @@ This note captures the patterns we observed while digging into the built-in plan ## 4. Config-Driven Inclusion - `Config.include_plan_tool` toggles availability. Front ends (CLI, TUI, app server) set this flag through `ConfigOverrides`. -- When disabled, the tool spec and handler never register, preventing accidental invocation. +- For delegation we piggyback on `[multi_agent].agents`: the flag is auto-enabled whenever that list is non-empty, so child agents gain the delegate tool without extra overrides. +- When the feature is disabled, the tool spec and handler never register, preventing accidental invocation. ## 5. Client-Side Presentation - The TUI listens for `EventMsg::PlanUpdate` to render a checklist-style history cell. diff --git a/ai-temp/ui-ux-delegation.md b/ai-temp/ui-ux-delegation.md index 720ba8aaf57d..412d89226161 100644 --- a/ai-temp/ui-ux-delegation.md +++ b/ai-temp/ui-ux-delegation.md @@ -3,12 +3,13 @@ ## Current Flow - Primary agent streaming uses `StreamController` to animate delta lines (`codex-rs/tui/src/chatwidget.rs:698`, `streaming/controller.rs:9`). The controller emits `AgentMessageCell` entries and drives the commit animation via `AppEvent::StartCommitAnimation`. - Exec and MCP tool calls rely on dedicated history cells (`ExecCell`, `new_active_mcp_tool_call`) with live updates for begin/end events (`chatwidget.rs:633`, `chatwidget.rs:909`). -- Delegation events from the orchestrator reach `App::handle_delegate_update` (`codex-rs/tui/src/app.rs:446`). `DelegateEvent::Delta` now streams sub-agent output through the same `StreamController` pipeline, while start/completion still use `add_info_message`/`add_delegate_completion` for context. +- Delegation events from the orchestrator reach `App::handle_delegate_update` (`codex-rs/tui/src/app.rs:446`). `DelegateEvent::Delta` now streams sub-agent output through the same `StreamController` pipeline, while start/completion still use `add_info_message`/`add_delegate_completion` for context. Incoming events carry run depth so the chat history can render indented entries for nested delegates. - `DelegateEvent::Started` activates the bottom-pane status indicator with a “Delegating to #` header and hides it once the run finishes (`codex-rs/tui/src/chatwidget.rs:2165-2196`), reducing ambiguity about who is currently working. ## Observed Gaps - No transcript linking: once the delegate finishes, the TUI shows the final answer but lacks a quick way to drill into the delegate’s own session (the path is only available in logs). - Duration is implicit: the status header flips back to “Working” when delegation ends, but we still do not surface elapsed time or a final summary chip in the transcript. +- Nested runs only show progress via indented info messages; we may still want richer breadcrumbs or timers in the status widget. ## UX Goals 1. **Live streaming** – continue to reuse `StreamController`, but add safeguards against duplicate completions (covered by the new test) and consider showing a collapsed summary once the stream ends. diff --git a/codex-rs/core/src/config.rs b/codex-rs/core/src/config.rs index dbcce43be9c7..ce33462d6266 100644 --- a/codex-rs/core/src/config.rs +++ b/codex-rs/core/src/config.rs @@ -44,6 +44,7 @@ use dirs::home_dir; use serde::Deserialize; use std::collections::BTreeMap; use std::collections::HashMap; +use std::collections::HashSet; use std::io::ErrorKind; use std::path::Path; use std::path::PathBuf; @@ -69,6 +70,11 @@ pub(crate) const PROJECT_DOC_MAX_BYTES: usize = 32 * 1024; // 32 KiB pub(crate) const CONFIG_TOML_FILE: &str = "config.toml"; +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct MultiAgentConfig { + pub agents: Vec, +} + /// Application configuration loaded from disk and merged with overrides. #[derive(Debug, Clone, PartialEq)] pub struct Config { @@ -227,6 +233,9 @@ pub struct Config { /// Include the `view_image` tool that lets the agent attach a local image path to context. pub include_view_image_tool: bool, + /// Multi-agent options derived from config.toml. + pub multi_agent: MultiAgentConfig, + /// Centralized feature flags; source of truth for feature gating. pub features: Features, @@ -811,6 +820,9 @@ pub struct ConfigToml { /// Nested tools section for feature toggles pub tools: Option, + /// Configuration for multi-agent delegation. + pub multi_agent: Option, + /// Centralized feature flags (new). Prefer this over individual toggles. #[serde(default)] pub features: Option, @@ -881,6 +893,12 @@ impl From for Tools { } } +#[derive(Deserialize, Debug, Clone, Default, PartialEq, Eq)] +pub struct MultiAgentToml { + #[serde(default)] + pub agents: Vec, +} + impl ConfigToml { /// Derive the effective sandbox policy from the configuration. fn derive_sandbox_policy(&self, sandbox_mode_override: Option) -> SandboxPolicy { @@ -1031,6 +1049,13 @@ impl Config { let features = Features::from_config(&cfg, &config_profile, feature_overrides); + let include_delegate_tool_flag = include_delegate_tool.unwrap_or_else(|| { + cfg.multi_agent + .as_ref() + .map(|ma| !ma.agents.is_empty()) + .unwrap_or(false) + }); + let sandbox_policy = cfg.derive_sandbox_policy(sandbox_mode); let mut model_providers = built_in_model_providers(); @@ -1076,6 +1101,15 @@ impl Config { let history = cfg.history.unwrap_or_default(); + let multi_agent = MultiAgentConfig { + agents: normalize_multi_agent_agents( + cfg.multi_agent + .as_ref() + .map(|ma| ma.agents.clone()) + .unwrap_or_default(), + ), + }; + let include_plan_tool_flag = features.enabled(Feature::PlanTool); let include_apply_patch_tool_flag = features.enabled(Feature::ApplyPatchFreeform); let include_view_image_tool_flag = features.enabled(Feature::ViewImageTool); @@ -1197,13 +1231,14 @@ impl Config { .or(cfg.chatgpt_base_url) .unwrap_or("https://chatgpt.com/backend-api/".to_string()), include_plan_tool: include_plan_tool_flag, - include_delegate_tool: include_delegate_tool.unwrap_or(false), + include_delegate_tool: include_delegate_tool_flag, include_apply_patch_tool: include_apply_patch_tool_flag, tools_web_search_request, use_experimental_streamable_shell_tool, use_experimental_unified_exec_tool, use_experimental_use_rmcp_client, include_view_image_tool: include_view_image_tool_flag, + multi_agent, features, active_profile: active_profile_name, windows_wsl_setup_acknowledged: cfg.windows_wsl_setup_acknowledged.unwrap_or(false), @@ -1288,6 +1323,25 @@ impl Config { } } +fn normalize_multi_agent_agents(raw_agents: Vec) -> Vec { + let mut seen = HashSet::new(); + let mut normalized = Vec::new(); + + for value in raw_agents { + let trimmed = value.trim(); + if trimmed.is_empty() { + continue; + } + + let agent = trimmed.to_string(); + if seen.insert(agent.clone()) { + normalized.push(agent); + } + } + + normalized +} + fn default_model() -> String { OPENAI_DEFAULT_MODEL.to_string() } @@ -2255,6 +2309,7 @@ model_verbosity = "high" use_experimental_unified_exec_tool: false, use_experimental_use_rmcp_client: false, include_view_image_tool: true, + multi_agent: MultiAgentConfig::default(), features: Features::with_defaults(), active_profile: Some("o3".to_string()), windows_wsl_setup_acknowledged: false, @@ -2320,6 +2375,7 @@ model_verbosity = "high" use_experimental_unified_exec_tool: false, use_experimental_use_rmcp_client: false, include_view_image_tool: true, + multi_agent: MultiAgentConfig::default(), features: Features::with_defaults(), active_profile: Some("gpt3".to_string()), windows_wsl_setup_acknowledged: false, @@ -2400,6 +2456,7 @@ model_verbosity = "high" use_experimental_unified_exec_tool: false, use_experimental_use_rmcp_client: false, include_view_image_tool: true, + multi_agent: MultiAgentConfig::default(), features: Features::with_defaults(), active_profile: Some("zdr".to_string()), windows_wsl_setup_acknowledged: false, @@ -2466,6 +2523,7 @@ model_verbosity = "high" use_experimental_unified_exec_tool: false, use_experimental_use_rmcp_client: false, include_view_image_tool: true, + multi_agent: MultiAgentConfig::default(), features: Features::with_defaults(), active_profile: Some("gpt5".to_string()), windows_wsl_setup_acknowledged: false, diff --git a/codex-rs/multi-agent/src/lib.rs b/codex-rs/multi-agent/src/lib.rs index 5d9b7c9374d8..908316ac6be3 100644 --- a/codex-rs/multi-agent/src/lib.rs +++ b/codex-rs/multi-agent/src/lib.rs @@ -145,6 +145,7 @@ pub struct AgentContext { global_codex_home: PathBuf, config_toml: ConfigToml, config: Config, + allowed_agents: Vec, } impl AgentContext { @@ -154,6 +155,7 @@ impl AgentContext { global_codex_home: PathBuf, config_toml: ConfigToml, config: Config, + allowed_agents: Vec, ) -> Self { Self { agent_id, @@ -161,6 +163,7 @@ impl AgentContext { global_codex_home, config_toml, config, + allowed_agents, } } @@ -189,6 +192,11 @@ impl AgentContext { &self.config } + /// Returns the configured sub-agent ids this context is allowed to invoke. + pub fn allowed_agents(&self) -> &[AgentId] { + &self.allowed_agents + } + /// Consume the context and return the inner [`Config`]. pub fn into_config(self) -> Config { self.config @@ -294,12 +302,20 @@ impl AgentConfigLoader { ) })?; + let allowed_agents = config + .multi_agent + .agents + .iter() + .map(|agent| AgentId::parse(agent)) + .collect::>>()?; + Ok(AgentContext::new( agent_id_owned, agent_codex_home, self.registry.global_codex_home().to_path_buf(), config_toml, config, + allowed_agents, )) } } @@ -383,6 +399,7 @@ mod tests { assert!(context.agent_id().is_none()); assert_eq!(context.codex_home(), global.as_path()); assert_eq!(context.config().model, "o2", "CLI override should win"); + assert!(context.allowed_agents().is_empty()); let agent_id = AgentId::parse("rust_test_writer").expect("parse"); let agent_dir = loader @@ -410,6 +427,52 @@ mod tests { context.config().sandbox_policy, SandboxPolicy::DangerFullAccess ); + assert!(context.allowed_agents().is_empty()); + } + + #[tokio::test] + async fn allowed_agents_follow_multi_agent_list() { + let temp_home = tempdir().expect("tempdir"); + let global = temp_home.path().join("global"); + let agents_root = global.join("agents"); + fs::create_dir_all(global.join("log")).expect("log dir"); + fs::create_dir_all(global.join("sessions")).expect("sessions dir"); + fs::create_dir_all(global.join("history")).expect("history dir"); + fs::create_dir_all(global.join("mcp")).expect("mcp dir"); + fs::create_dir_all(global.join("tmp")).expect("tmp dir"); + fs::create_dir_all(&agents_root).expect("agents dir"); + + fs::write( + global.join("config.toml"), + r#" +model = "gpt-5" + +[multi_agent] +agents = ["ideas_provider", "critic"] +"#, + ) + .expect("write global config"); + + let loader = AgentConfigLoader::new(global.clone()); + let context = loader + .load_by_slug( + None, + &CliConfigOverrides::default(), + ConfigOverrides::default(), + ) + .await + .expect("load context with multi-agent list"); + + let allowed: Vec<_> = context + .allowed_agents() + .iter() + .map(|id| id.as_str().to_string()) + .collect(); + assert_eq!(allowed, ["ideas_provider", "critic"]); + assert!( + context.config().include_delegate_tool, + "delegate tool automatically enabled when agents are configured" + ); } #[test] diff --git a/codex-rs/multi-agent/src/orchestrator.rs b/codex-rs/multi-agent/src/orchestrator.rs index 0b6ef69e630c..93907531a796 100644 --- a/codex-rs/multi-agent/src/orchestrator.rs +++ b/codex-rs/multi-agent/src/orchestrator.rs @@ -120,8 +120,9 @@ pub struct AgentOrchestrator { cli_overrides: CliConfigOverrides, config_overrides: ConfigOverrides, listeners: Mutex>>, - active_run: Mutex>, + active_runs: Mutex>, sessions: Mutex>, + allowed_agents: Vec, } impl AgentOrchestrator { @@ -131,6 +132,7 @@ impl AgentOrchestrator { session_source: SessionSource, cli_overrides: CliConfigOverrides, config_overrides: ConfigOverrides, + allowed_agents: Vec, ) -> Self { let loader = AgentConfigLoader::new(global_codex_home.into()); Self { @@ -140,8 +142,9 @@ impl AgentOrchestrator { cli_overrides, config_overrides, listeners: Mutex::new(Vec::new()), - active_run: Mutex::new(None), + active_runs: Mutex::new(Vec::new()), sessions: Mutex::new(HashMap::new()), + allowed_agents, } } @@ -157,14 +160,8 @@ impl AgentOrchestrator { self: &Arc, request: DelegateRequest, ) -> std::result::Result { - let mut active = self.active_run.lock().await; - if active.is_some() { - return Err(OrchestratorError::DelegateInProgress); - } - let run_id = Uuid::new_v4().to_string(); - *active = Some(run_id.clone()); - drop(active); + self.active_runs.lock().await.push(run_id.clone()); let prompt_text = request.prompt.text.clone(); self.emit(DelegateEvent::Started { @@ -222,8 +219,10 @@ impl AgentOrchestrator { } } - let mut active = orchestrator.active_run.lock().await; - *active = None; + let mut active = orchestrator.active_runs.lock().await; + if let Some(pos) = active.iter().rposition(|id| id == &run_id_clone) { + active.remove(pos); + } }); Ok(run_id) @@ -234,6 +233,11 @@ impl AgentOrchestrator { listeners.retain(|tx| tx.send(event.clone()).is_ok()); } + /// Return the list of configured agent ids available for delegation. + pub fn allowed_agents(&self) -> &[AgentId] { + &self.allowed_agents + } + /// Return all active delegate sessions ordered by most recent interaction. pub async fn active_sessions(&self) -> Vec { let sessions = self.sessions.lock().await; @@ -319,9 +323,11 @@ impl AgentOrchestrator { let config = context.into_config(); let cwd = config.cwd.clone(); let config_clone = config.clone(); - let conversation_manager = Arc::new(ConversationManager::new( + let delegate_adapter = crate::delegate_tool_adapter(Arc::clone(&self)); + let conversation_manager = Arc::new(ConversationManager::with_delegate( auth_manager.clone(), session_source, + Some(delegate_adapter), )); let conversation_bundle = conversation_manager diff --git a/codex-rs/tui/src/app.rs b/codex-rs/tui/src/app.rs index 1dbc32407b6a..86464d144cdb 100644 --- a/codex-rs/tui/src/app.rs +++ b/codex-rs/tui/src/app.rs @@ -24,6 +24,7 @@ use codex_core::protocol::SessionConfiguredEvent; use codex_core::protocol::SessionSource; use codex_core::protocol::TokenUsage; use codex_core::protocol_config_types::ReasoningEffort as ReasoningEffortConfig; +use codex_multi_agent::AgentId; use codex_multi_agent::AgentOrchestrator; use codex_multi_agent::DelegateEvent; use codex_multi_agent::DelegateSessionSummary; @@ -90,6 +91,7 @@ pub(crate) struct App { /// Set when the user confirms an update; propagated on exit. pub(crate) pending_update_action: Option, + delegate_stack: Vec<(String, AgentId)>, } impl App { @@ -189,6 +191,7 @@ impl App { active_delegate_summary: None, primary_chat_backup: None, pending_update_action: None, + delegate_stack: Vec::new(), }; let tui_events = tui.event_stream(); @@ -479,11 +482,20 @@ impl App { prompt, .. } => { + let depth = self.delegate_stack.len(); + self.delegate_stack.push((run_id.clone(), agent_id.clone())); self.chat_widget - .on_delegate_started(&run_id, &agent_id, &prompt); + .on_delegate_started(&run_id, &agent_id, &prompt, depth); } DelegateEvent::Delta { run_id, chunk, .. } => { - self.chat_widget.on_delegate_delta(&run_id, &chunk); + if self + .delegate_stack + .last() + .map(|(id, _)| id == &run_id) + .unwrap_or(false) + { + self.chat_widget.on_delegate_delta(&run_id, &chunk); + } } DelegateEvent::Completed { run_id, @@ -492,22 +504,42 @@ impl App { duration, .. } => { - let streamed = self.chat_widget.on_delegate_completed(&run_id); - let hint = Some(format!( - "finished in {}", - Self::format_delegate_duration(duration) - )); - let response = output.as_deref().filter(|_| !streamed); - self.chat_widget - .add_delegate_completion(&agent_id, response, hint); + if let Some(pos) = self + .delegate_stack + .iter() + .rposition(|(id, _)| id == &run_id) + { + let depth = pos; + self.delegate_stack.remove(pos); + let streamed = self.chat_widget.on_delegate_completed(&run_id, depth); + let hint = Some(format!( + "finished in {}", + Self::format_delegate_duration(duration) + )); + let response = if depth == 0 { + output.as_deref().filter(|_| !streamed) + } else { + None + }; + self.chat_widget + .add_delegate_completion(&agent_id, response, hint, depth); + } } DelegateEvent::Failed { run_id, agent_id, error, } => { - self.chat_widget - .on_delegate_failed(&run_id, &agent_id, &error); + if let Some(pos) = self + .delegate_stack + .iter() + .rposition(|(id, _)| id == &run_id) + { + let depth = pos; + self.delegate_stack.remove(pos); + self.chat_widget + .on_delegate_failed(&run_id, &agent_id, &error, depth); + } } } } @@ -779,6 +811,7 @@ mod tests { show_raw_agent_reasoning: None, tools_web_search_request: None, }, + Vec::new(), )); App { @@ -802,6 +835,7 @@ mod tests { active_delegate_summary: None, primary_chat_backup: None, pending_update_action: None, + delegate_stack: Vec::new(), } } diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index f25aa080bfcb..60be52a1fecc 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -519,9 +519,10 @@ impl ChatWidget { if self.delegate_context.is_some() && let Some(message) = last_agent_message.as_ref() - && !message.trim().is_empty() { - self.delegate_agent_frames.push(message.clone()); - } + && !message.trim().is_empty() + { + self.delegate_agent_frames.push(message.clone()); + } let notification_response = last_agent_message.unwrap_or_default(); // If there is a queued user message, send exactly one now to begin the next turn. @@ -1463,9 +1464,9 @@ impl ChatWidget { && let Err(e) = self .codex_op_tx .send(Op::AddToHistory { text: text.clone() }) - { - tracing::error!("failed to send AddHistory op: {e}"); - } + { + tracing::error!("failed to send AddHistory op: {e}"); + } if !display_text.is_empty() { self.add_to_history(history_cell::new_user_prompt(display_text)); @@ -2375,15 +2376,24 @@ impl ChatWidget { self.conversation_id } + fn delegate_label(agent_id: &AgentId, depth: usize) -> String { + format!("{}↳ #{}", " ".repeat(depth), agent_id.as_str()) + } + pub(crate) fn add_delegate_completion( &mut self, agent_id: &AgentId, response: Option<&str>, duration_hint: Option, + depth: usize, ) { - let header = format!("↳ #{agent} completed", agent = agent_id.as_str()); + let header = format!("{} completed", Self::delegate_label(agent_id, depth)); self.add_info_message(header, duration_hint); + if depth > 0 { + return; + } + let Some(text) = response.map(str::trim).filter(|s| !s.is_empty()) else { return; }; @@ -2398,26 +2408,35 @@ impl ChatWidget { self.request_redraw(); } - pub(crate) fn on_delegate_started(&mut self, run_id: &str, agent_id: &AgentId, prompt: &str) { - self.delegate_run = Some(run_id.to_string()); - self.delegate_had_stream = false; - self.delegate_user_frames.clear(); - self.delegate_agent_frames.clear(); - self.delegate_previous_status_header = Some(self.current_status_header.clone()); - if self.bottom_pane.status_widget().is_none() { - self.bottom_pane.set_task_running(true); - self.delegate_status_claimed = true; - } else { - self.delegate_status_claimed = false; + pub(crate) fn on_delegate_started( + &mut self, + run_id: &str, + agent_id: &AgentId, + prompt: &str, + depth: usize, + ) { + let label = Self::delegate_label(agent_id, depth); + if depth == 0 { + self.delegate_run = Some(run_id.to_string()); + self.delegate_had_stream = false; + self.delegate_user_frames.clear(); + self.delegate_agent_frames.clear(); + self.delegate_previous_status_header = Some(self.current_status_header.clone()); + if self.bottom_pane.status_widget().is_none() { + self.bottom_pane.set_task_running(true); + self.delegate_status_claimed = true; + } else { + self.delegate_status_claimed = false; + } + self.set_status_header(format!("Delegating to #{}", agent_id.as_str())); } - self.set_status_header(format!("Delegating to #{}", agent_id.as_str())); let trimmed = prompt.trim(); let hint = if trimmed.is_empty() { None } else { Some(trimmed.to_string()) }; - self.add_info_message(format!("↳ #{agent}…", agent = agent_id.as_str()), hint); + self.add_info_message(format!("{label}…"), hint); self.request_redraw(); } @@ -2429,7 +2448,10 @@ impl ChatWidget { self.handle_streaming_delta(chunk.to_string()); } - pub(crate) fn on_delegate_completed(&mut self, run_id: &str) -> bool { + pub(crate) fn on_delegate_completed(&mut self, run_id: &str, depth: usize) -> bool { + if depth > 0 { + return false; + } if self.delegate_run.as_deref() != Some(run_id) { return false; } @@ -2451,13 +2473,18 @@ impl ChatWidget { had_stream } - pub(crate) fn on_delegate_failed(&mut self, run_id: &str, agent_id: &AgentId, error: &str) { - let _ = self.on_delegate_completed(run_id); - self.add_error_message(format!( - "Delegation to #{} failed: {}", - agent_id.as_str(), - error - )); + pub(crate) fn on_delegate_failed( + &mut self, + run_id: &str, + agent_id: &AgentId, + error: &str, + depth: usize, + ) { + if depth == 0 { + let _ = self.on_delegate_completed(run_id, depth); + } + let label = Self::delegate_label(agent_id, depth); + self.add_error_message(format!("{label} failed: {error}")); } fn try_delegate_shortcut(&mut self, _text: &str) -> bool { diff --git a/codex-rs/tui/src/chatwidget/tests.rs b/codex-rs/tui/src/chatwidget/tests.rs index ed5b5a510983..cb08d7a3f5a8 100644 --- a/codex-rs/tui/src/chatwidget/tests.rs +++ b/codex-rs/tui/src/chatwidget/tests.rs @@ -396,7 +396,7 @@ fn delegate_stream_deltas_and_restore_status() { assert!(chat.bottom_pane.status_widget().is_none()); assert_eq!(chat.current_status_header, "Working"); - chat.on_delegate_started("run-1", &agent, "sketch integration points"); + chat.on_delegate_started("run-1", &agent, "sketch integration points", 0); assert_eq!(chat.delegate_run.as_deref(), Some("run-1")); assert!(chat.delegate_status_claimed); assert!(chat.bottom_pane.status_widget().is_some()); @@ -429,7 +429,7 @@ fn delegate_stream_deltas_and_restore_status() { "expected streamed delegate output in history" ); - let streamed = chat.on_delegate_completed("run-1"); + let streamed = chat.on_delegate_completed("run-1", 0); assert!( streamed, "delegate completion should report streaming output" @@ -451,6 +451,36 @@ fn delegate_stream_deltas_and_restore_status() { assert_eq!(chat.current_status_header, "Working"); } +#[test] +fn nested_delegate_info_events_are_indented() { + let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(); + let outer = AgentId::parse("ideas_provider").expect("valid id"); + let inner = AgentId::parse("creative_ideas").expect("valid id"); + + chat.on_delegate_started("outer-run", &outer, "outer brief", 0); + chat.on_delegate_started("inner-run", &inner, "inner brief", 1); + + let mut messages = Vec::new(); + while let Ok(event) = rx.try_recv() { + if let AppEvent::InsertHistoryCell(cell) = event { + messages.push(lines_to_single_string(&cell.display_lines(120))); + } + } + + assert!( + messages + .iter() + .any(|line| line.contains("↳ #ideas_provider…")), + "expected top-level delegate entry" + ); + assert!( + messages + .iter() + .any(|line| line.contains(" ↳ #creative_ideas…")), + "expected indented nested delegate entry" + ); +} + // (removed experimental resize snapshot test) #[test] diff --git a/codex-rs/tui/src/lib.rs b/codex-rs/tui/src/lib.rs index a73593ba287e..28e02863a8ad 100644 --- a/codex-rs/tui/src/lib.rs +++ b/codex-rs/tui/src/lib.rs @@ -19,6 +19,7 @@ use codex_core::find_conversation_path_by_id_str; use codex_core::protocol::AskForApproval; use codex_core::protocol::SandboxPolicy; use codex_core::protocol::SessionSource; +use codex_multi_agent::AgentId; use codex_multi_agent::AgentOrchestrator; use codex_ollama::DEFAULT_OSS_MODEL; use codex_protocol::config_types::SandboxMode; @@ -240,7 +241,7 @@ pub async fn run_main( tools_web_search_request: cli.web_search.then_some(true), }; let mut delegate_config_overrides = overrides.clone(); - delegate_config_overrides.include_delegate_tool = Some(false); + delegate_config_overrides.include_delegate_tool = None; let delegate_cli_overrides = cli.config_overrides.clone(); #[allow(clippy::print_stderr)] let agent_context = match codex_multi_agent::load_agent_context( @@ -256,6 +257,7 @@ pub async fn run_main( std::process::exit(1); } }; + let allowed_agents = agent_context.allowed_agents().to_vec(); let global_codex_home = agent_context.global_codex_home().to_path_buf(); let config_toml = agent_context.config_toml().clone(); let mut config = agent_context.into_config(); @@ -346,6 +348,7 @@ pub async fn run_main( global_codex_home, delegate_cli_overrides, delegate_config_overrides, + allowed_agents, ) .await .map_err(|err| std::io::Error::other(err.to_string())) @@ -359,6 +362,7 @@ async fn run_ratatui_app( global_codex_home: PathBuf, delegate_cli_overrides: CliConfigOverrides, delegate_config_overrides: ConfigOverrides, + allowed_agents: Vec, ) -> color_eyre::Result { let mut config = config; color_eyre::install()?; @@ -457,6 +461,7 @@ async fn run_ratatui_app( SessionSource::Cli, delegate_cli_overrides, delegate_config_overrides, + allowed_agents, )); let login_status = get_login_status(&config, &global_codex_home); let should_show_windows_wsl_screen = From 2da437269dba0243fe15e6cce12f3f71af0c0d76 Mon Sep 17 00:00:00 2001 From: George Nesterenok Date: Thu, 16 Oct 2025 16:59:29 +0000 Subject: [PATCH 13/17] Introduce parallel delegate execution with batching --- ai-temp/CHANGELOG.md | 3 + ai-temp/example-codex-home/AGENTS.md | 1 + .../agents/ideas_provider/AGENTS.md | 16 +- ai-temp/parallel-delegation.md | 45 +++ ai-temp/tool-implementation-patterns.md | 33 ++- codex-rs/core/src/codex.rs | 4 + codex-rs/core/src/config.rs | 24 +- codex-rs/core/src/delegate_tool.rs | 14 + codex-rs/core/src/tools/handlers/delegate.rs | 271 +++++++++++++++++- codex-rs/core/src/tools/spec.rs | 5 +- codex-rs/core/tests/suite/delegate_tool.rs | 106 +++++++ codex-rs/core/tests/suite/mod.rs | 1 + codex-rs/multi-agent/src/orchestrator.rs | 79 ++++- codex-rs/tui/src/app.rs | 208 +++++++++++--- codex-rs/tui/src/chatwidget.rs | 128 +++++---- codex-rs/tui/src/chatwidget/tests.rs | 36 ++- codex-rs/tui/src/lib.rs | 1 + 17 files changed, 847 insertions(+), 128 deletions(-) create mode 100644 ai-temp/parallel-delegation.md create mode 100644 codex-rs/core/tests/suite/delegate_tool.rs diff --git a/ai-temp/CHANGELOG.md b/ai-temp/CHANGELOG.md index a6bbb4516252..d65221d46dfd 100644 --- a/ai-temp/CHANGELOG.md +++ b/ai-temp/CHANGELOG.md @@ -3,6 +3,9 @@ > Keep this file current; it documents the evolution of the multi-agent design work. An outdated changelog breaks the orchestrator timeline. ## 2025-10-16 +- Added batched delegate execution: the core handler now accepts `batch` payloads, launches every child run concurrently (respecting the orchestrator’s concurrency cap), and returns per-agent summaries in a single response. +- Updated the TUI delegate tree to indent siblings (two spaces per depth) and keep the status banner aligned while multiple children stream at once; adjusted snapshot/unit coverage accordingly. +- Refreshed documentation and sample instructions (`ai-temp/parallel-delegation.md`, `ai-temp/tool-implementation-patterns.md`, example Codex home agents) to describe the batched call flow and new presentation. - Removed the inline `#agent` autocomplete experiment and reverted documentation to focus on the delegate picker and slash command flow. - Added child delegate directories (`creative_ideas`, `conservative_ideas`) to the example Codex home and updated instructions/README to describe the fixed delegation chain (main → ideas_provider → critic). - Auto-enabled the delegate tool whenever `[multi_agent].agents` is non-empty so sub-agents inherit delegation without toggling `include_delegate_tool`. diff --git a/ai-temp/example-codex-home/AGENTS.md b/ai-temp/example-codex-home/AGENTS.md index 16b98623a10c..d2495b5606aa 100644 --- a/ai-temp/example-codex-home/AGENTS.md +++ b/ai-temp/example-codex-home/AGENTS.md @@ -12,5 +12,6 @@ General rules: - Keep replies short unless the user explicitly requests depth; cite `ai-temp/` docs when needed for background. - Follow the chain even if you already see the answer; only skip when the user explicitly opts out of delegation. - The `delegate_agent` tool is AI-only. Describe which delegate you want in plain language—the user cannot invoke sub-agents directly. +- You can launch multiple delegates in parallel. The CLI indents nested runs beneath their parent (two spaces per depth), and up to five delegates may be active at once; wait for all required sub-agents to finish before synthesizing. Call `delegate_agent` once with a `batch` array containing each `{agent_id, prompt}` so both delegates run even on models limited to a single tool invocation per turn. - Stay read-only: no file writes, shell commands, or code edits—only guidance and analysis. - When manually testing, describe the problem clearly so the coordinator chooses the right delegate. diff --git a/ai-temp/example-codex-home/agents/ideas_provider/AGENTS.md b/ai-temp/example-codex-home/agents/ideas_provider/AGENTS.md index 8b7d43b96d19..7eb8b4d360c4 100644 --- a/ai-temp/example-codex-home/agents/ideas_provider/AGENTS.md +++ b/ai-temp/example-codex-home/agents/ideas_provider/AGENTS.md @@ -2,9 +2,19 @@ You orchestrate ideation for the primary agent. Always follow this sequence: -1. **Delegate to `creative_ideas`.** Request at least three bold concepts tailored to the brief. This call is mandatory—do not continue until it completes. -2. **Delegate to `conservative_ideas`.** Request at least three safe, low-risk options. This call is also mandatory. -3. Compare the two streams, identify the strongest overall direction, and note how each supporting idea contributes. +1. **Launch `creative_ideas` and `conservative_ideas` in parallel.** Call `delegate_agent` once with a `batch` array that lists both delegates—this ensures the orchestrator fans out the work even on models that only expose a single tool invocation per turn. Each entry must request at least three options tailored to the brief. + + ```json + { + "batch": [ + {"agent_id": "creative_ideas", "prompt": "..."}, + {"agent_id": "conservative_ideas", "prompt": "..."} + ] + } + ``` + +2. **Verify both delegates actually ran.** Do not proceed to synthesis until you have received outputs from *both* delegates; if one fails or is missing, re-run it before continuing. +3. After both delegates finish, compare their streams, identify the strongest overall direction, and note how each supporting idea contributes. When replying to the caller: diff --git a/ai-temp/parallel-delegation.md b/ai-temp/parallel-delegation.md new file mode 100644 index 000000000000..f94235ee2456 --- /dev/null +++ b/ai-temp/parallel-delegation.md @@ -0,0 +1,45 @@ +# Parallel Delegation Options + +This note captures the current state of agent delegation in the Codex CLI and outlines +approaches for synchronous, parallel, and detached (fire-and-forget) sub-agent runs. + +## 1. Sequential Delegation (Status: available) +- `delegate_agent` blocks until the sub-agent reports `DelegateEvent::Completed`, so the caller + naturally waits on each run before deciding the next action (`codex-rs/core/src/tools/handlers/delegate.rs`). +- The orchestrator spawns each delegate in its own task but nothing else proceeds until the handler + resolves (`codex-rs/multi-agent/src/orchestrator.rs`). +- TUI rendering assumes a single streaming leaf at any time; nested runs show as deeper indentation + but only the top item streams (`codex-rs/tui/src/app.rs`). +- Use case: pipelines where each sub-agent’s result conditions the next prompt. + +## 2. Parallel Delegation (Status: implemented) +- Core runtime now registers `delegate_agent` with parallel support and enforces a configurable + concurrency cap (`[multi_agent].max_concurrent_delegates`, default 5) so front-ends can launch + multiple sub-agents at once without overwhelming the orchestrator. +- Parallel tool batching is only available on model families that expose + `supports_parallel_tool_calls`. Today that includes `test-gpt-5-codex`/`codex-*` internal models; + production tiers (`gpt-5-codex`, `gpt-5`) still force single-function-call turns, so existing + agents fall back to sequential delegation unless the CLI handles batching locally. +- The TUI replaces the simple stack with a delegate tree that keeps lineage for every run. Nested and + sibling delegates now render with indentation (two spaces per depth) so siblings appear grouped + under their parent, while history entries and status headers stay in sync as roots start and finish. +- Streaming output can hop between active delegates; each run maintains its own capture buffer so + summaries and transcript snippets remain scoped correctly. +- Single-call models now use the handler’s `batch` payload to trigger all delegates in one tool turn, + so the orchestration layer fans out work even when the model can’t issue multiple function calls. +- Remaining task: expand prompts/docs so agents understand that parallel delegates are available, how + the UI surfaces them, and when to leverage concurrency. + +## 3. Detached Delegation (Status: future work) +- Requires a non-blocking variant (e.g., `delegate_agent_async`) that returns immediately with a + `run_id` and relies on the orchestrator’s event stream for progress. +- Must surface background activity in the UI: notification list, optional “attach to run” + command, and stored sessions leveraging `AgentOrchestrator::store_session`. +- Needs policy for auto-cleanup and rate limiting so runaway agents do not flood the orchestrator. +- Join-on-demand flow could reuse existing session switching helpers once a run finishes or when + the user opts in. + +## 4. Next Decisions +1. Pick a parallelization strategy (simple flag + UI refactor vs. dedicated helper). +2. Specify UX for background runs before adding async variant (notifications, manual join, audit). +3. Extend documentation/prompts once the capabilities land so models know when to choose each path. diff --git a/ai-temp/tool-implementation-patterns.md b/ai-temp/tool-implementation-patterns.md index 24341a6d8a2d..057ff0ce470b 100644 --- a/ai-temp/tool-implementation-patterns.md +++ b/ai-temp/tool-implementation-patterns.md @@ -40,7 +40,6 @@ This note captures the patterns we observed while digging into the built-in plan ```json { "type": "object", - "required": ["agent_id", "prompt"], "properties": { "agent_id": { "type": "string", "pattern": "^[a-z0-9_\\-]+$" }, "prompt": { "type": "string" }, @@ -51,20 +50,46 @@ This note captures the patterns we observed while digging into the built-in plan "hints": { "type": "array", "items": { "type": "string" } } }, "additionalProperties": true + }, + "batch": { + "type": "array", + "items": { + "type": "object", + "required": ["agent_id", "prompt"], + "properties": { + "agent_id": { "type": "string", "pattern": "^[a-z0-9_\\-]+$" }, + "prompt": { "type": "string" }, + "context": { + "type": "object", + "properties": { + "working_directory": { "type": "string" }, + "hints": { "type": "array", "items": { "type": "string" } } + }, + "additionalProperties": true + } + }, + "additionalProperties": false + } } - } + }, + "anyOf": [ + { "required": ["agent_id", "prompt"] }, + { "required": ["batch"], "properties": { "batch": { "minItems": 1 } } } + ], + "additionalProperties": false } ``` - We can add optional fields later (timeouts, resource budgets) without breaking the schema. - The handler validates `agent_id` with `AgentRegistry`, loads the merged `Config`, and passes the prompt/context into the orchestrator. +- A per-agent concurrency cap guards resource usage. `[multi_agent].max_concurrent_delegates` defaults to 5 and returns `DelegateToolError::DelegateInProgress` once the limit is hit, signalling the model to queue additional work. ### 7.3 Handler Responsibilities -- Mirror the exec tool: enqueue the delegate run, stream progress via `DelegateEvent::Started/Delta/Completed/Failed`, and return a compact JSON result (e.g., `{ "status": "ok", "summary": "...", "session": "session-id" }`). +- Mirror the exec tool: enqueue the delegate run, stream progress via `DelegateEvent::Started/Delta/Completed/Failed`, and return a compact JSON result. When batching requests, respond with `{"status":"ok","runs":[...]}` where each entry includes the `agent_id`, `run_id`, optional summary, and duration. - Errors reuse the same shape with `status: "error"` so the UI can surface them consistently. - The handler itself remains thin—after scheduling the work, it hands control back to the runtime. ### 7.4 Client Integration -- The TUI reuses `StreamController` to show delegate streaming, and inserts a summary history cell once the tool completes (agent id, elapsed time, link to the sub-agent session). +- The TUI maintains a delegate tree so nested runs display with increasing indentation (two spaces per depth) and status indicators rotate between active roots. Streaming can hop between delegates; each run keeps its own buffer before being summarized into history. - Because users cannot trigger the tool directly, slash commands and message preprocessing stay untouched; guidance lives in instructions and autocomplete metadata. ### 7.5 Instruction Updates diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 881f04d970f1..b9b602bf2115 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -552,6 +552,10 @@ impl Session { self.tx_event.clone() } + pub(crate) fn conversation_id(&self) -> ConversationId { + self.conversation_id + } + pub(crate) fn delegate_adapter(&self) -> Option> { self.services.delegate_adapter.as_ref().map(Arc::clone) } diff --git a/codex-rs/core/src/config.rs b/codex-rs/core/src/config.rs index ce33462d6266..61d6ba43129c 100644 --- a/codex-rs/core/src/config.rs +++ b/codex-rs/core/src/config.rs @@ -70,9 +70,21 @@ pub(crate) const PROJECT_DOC_MAX_BYTES: usize = 32 * 1024; // 32 KiB pub(crate) const CONFIG_TOML_FILE: &str = "config.toml"; -#[derive(Debug, Clone, Default, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct MultiAgentConfig { pub agents: Vec, + pub max_concurrent_delegates: usize, +} + +pub const DEFAULT_MAX_CONCURRENT_DELEGATES: usize = 5; + +impl Default for MultiAgentConfig { + fn default() -> Self { + Self { + agents: Vec::new(), + max_concurrent_delegates: DEFAULT_MAX_CONCURRENT_DELEGATES, + } + } } /// Application configuration loaded from disk and merged with overrides. @@ -897,6 +909,8 @@ impl From for Tools { pub struct MultiAgentToml { #[serde(default)] pub agents: Vec, + #[serde(default)] + pub max_concurrent_delegates: Option, } impl ConfigToml { @@ -1101,6 +1115,13 @@ impl Config { let history = cfg.history.unwrap_or_default(); + let max_concurrent_delegates = cfg + .multi_agent + .as_ref() + .and_then(|ma| ma.max_concurrent_delegates) + .unwrap_or(DEFAULT_MAX_CONCURRENT_DELEGATES) + .max(1); + let multi_agent = MultiAgentConfig { agents: normalize_multi_agent_agents( cfg.multi_agent @@ -1108,6 +1129,7 @@ impl Config { .map(|ma| ma.agents.clone()) .unwrap_or_default(), ), + max_concurrent_delegates, }; let include_plan_tool_flag = features.enabled(Feature::PlanTool); diff --git a/codex-rs/core/src/delegate_tool.rs b/codex-rs/core/src/delegate_tool.rs index 80380c7c2d84..5a2d8b90323e 100644 --- a/codex-rs/core/src/delegate_tool.rs +++ b/codex-rs/core/src/delegate_tool.rs @@ -17,6 +17,15 @@ pub struct DelegateToolContext { pub hints: Vec, } +/// Single entry in a batched delegate request. +#[derive(Debug, Clone, Default, Deserialize, Serialize)] +pub struct DelegateToolBatchEntry { + pub agent_id: String, + pub prompt: String, + #[serde(default)] + pub context: DelegateToolContext, +} + /// Payload sent by the primary agent when invoking the delegate tool. #[derive(Debug, Clone, Deserialize, Serialize)] pub struct DelegateToolRequest { @@ -24,6 +33,10 @@ pub struct DelegateToolRequest { pub prompt: String, #[serde(default)] pub context: DelegateToolContext, + #[serde(default, skip_serializing_if = "Option::is_none", skip_deserializing)] + pub caller_conversation_id: Option, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub batch: Vec, } /// Event emitted while a delegate run is in flight. @@ -34,6 +47,7 @@ pub enum DelegateToolEvent { agent_id: String, prompt: String, started_at: SystemTime, + parent_run_id: Option, }, Delta { run_id: DelegateRunId, diff --git a/codex-rs/core/src/tools/handlers/delegate.rs b/codex-rs/core/src/tools/handlers/delegate.rs index 459690ab460b..745b42add91f 100644 --- a/codex-rs/core/src/tools/handlers/delegate.rs +++ b/codex-rs/core/src/tools/handlers/delegate.rs @@ -1,6 +1,7 @@ use crate::client_common::tools::ResponsesApiTool; use crate::client_common::tools::ToolSpec; use crate::delegate_tool::DelegateEventReceiver; +use crate::delegate_tool::DelegateToolAdapter; use crate::delegate_tool::DelegateToolContext; use crate::delegate_tool::DelegateToolError; use crate::delegate_tool::DelegateToolEvent; @@ -14,9 +15,12 @@ use crate::tools::context::ToolPayload; use crate::tools::registry::ToolHandler; use crate::tools::registry::ToolKind; use async_trait::async_trait; +use codex_protocol::ConversationId; use serde::Deserialize; use serde::Serialize; use std::collections::BTreeMap; +use std::collections::HashMap; +use std::collections::HashSet; use std::sync::LazyLock; use std::time::Duration; @@ -40,6 +44,28 @@ pub static DELEGATE_TOOL: LazyLock = LazyLock::new(|| { }, ); + let mut batch_entry_props = BTreeMap::new(); + batch_entry_props.insert( + "agent_id".to_string(), + JsonSchema::String { + description: Some("Identifier of the sub-agent to invoke".to_string()), + }, + ); + batch_entry_props.insert( + "prompt".to_string(), + JsonSchema::String { + description: Some("Instructions passed to the sub-agent".to_string()), + }, + ); + batch_entry_props.insert( + "context".to_string(), + JsonSchema::Object { + properties: context_props.clone(), + required: None, + additional_properties: Some(false.into()), + }, + ); + let mut properties = BTreeMap::new(); properties.insert( "agent_id".to_string(), @@ -61,6 +87,20 @@ pub static DELEGATE_TOOL: LazyLock = LazyLock::new(|| { additional_properties: Some(false.into()), }, ); + properties.insert( + "batch".to_string(), + JsonSchema::Array { + items: Box::new(JsonSchema::Object { + properties: batch_entry_props, + required: Some(vec!["agent_id".to_string(), "prompt".to_string()]), + additional_properties: Some(false.into()), + }), + description: Some( + "Invoke multiple delegates in one call; each entry must supply agent_id and prompt" + .to_string(), + ), + }, + ); ToolSpec::Function(ResponsesApiTool { name: "delegate_agent".to_string(), @@ -71,7 +111,7 @@ Provide the agent id, a prompt, and optional context such as working directory o strict: false, parameters: JsonSchema::Object { properties, - required: Some(vec!["agent_id".to_string(), "prompt".to_string()]), + required: None, additional_properties: Some(false.into()), }, }) @@ -79,10 +119,14 @@ Provide the agent id, a prompt, and optional context such as working directory o #[derive(Debug, Deserialize)] struct DelegateToolArgs { - agent_id: String, - prompt: String, + #[serde(default)] + agent_id: Option, + #[serde(default)] + prompt: Option, #[serde(default)] context: Option, + #[serde(default)] + batch: Vec, } #[derive(Debug, Default, Deserialize)] @@ -92,6 +136,14 @@ struct DelegateToolArgsContext { hints: Vec, } +#[derive(Debug, Deserialize)] +struct DelegateToolBatchArgs { + agent_id: String, + prompt: String, + #[serde(default)] + context: Option, +} + impl From for DelegateToolContext { fn from(value: DelegateToolArgsContext) -> Self { Self { @@ -112,6 +164,22 @@ struct DelegateToolResponse { duration_ms: Option, } +#[derive(Debug, Serialize)] +struct DelegateToolBatchRun { + agent_id: String, + run_id: String, + #[serde(skip_serializing_if = "Option::is_none")] + summary: Option, + #[serde(skip_serializing_if = "Option::is_none")] + duration_ms: Option, +} + +#[derive(Debug, Serialize)] +struct DelegateToolBatchResponse { + status: &'static str, + runs: Vec, +} + #[async_trait] impl ToolHandler for DelegateToolHandler { fn kind(&self) -> ToolKind { @@ -136,16 +204,48 @@ impl ToolHandler for DelegateToolHandler { FunctionCallError::RespondToModel(format!("failed to parse function arguments: {e}")) })?; + if !args.batch.is_empty() && (args.agent_id.is_some() || args.prompt.is_some()) { + return Err(FunctionCallError::RespondToModel( + "when `batch` is provided, omit top-level `agent_id` and `prompt` fields".into(), + )); + } + let adapter = session.delegate_adapter().ok_or_else(|| { FunctionCallError::RespondToModel("delegate tool is not available".to_string()) })?; let mut events = adapter.subscribe().await; + let conversation_id = session.conversation_id(); + + if !args.batch.is_empty() { + let runs = + handle_batch_entries(adapter.as_ref(), &mut events, &conversation_id, args.batch) + .await?; + + let response = DelegateToolBatchResponse { status: "ok", runs }; + let content = serde_json::to_string(&response).map_err(|e| { + FunctionCallError::Fatal(format!("failed to serialize response: {e}")) + })?; + + return Ok(ToolOutput::Function { + content, + success: Some(true), + }); + } + + let agent_id = args.agent_id.ok_or_else(|| { + FunctionCallError::RespondToModel("missing `agent_id` for delegate_agent call".into()) + })?; + let prompt = args.prompt.ok_or_else(|| { + FunctionCallError::RespondToModel("missing `prompt` for delegate_agent call".into()) + })?; let request = DelegateToolRequest { - agent_id: args.agent_id.clone(), - prompt: args.prompt.clone(), + agent_id: agent_id.clone(), + prompt: prompt.clone(), context: args.context.unwrap_or_default().into(), + caller_conversation_id: Some(conversation_id.to_string()), + batch: Vec::new(), }; let run = adapter.delegate(request).await.map_err(map_adapter_error)?; @@ -218,6 +318,82 @@ fn event_run_id(event: &DelegateToolEvent) -> &str { } } +async fn handle_batch_entries( + adapter: &dyn DelegateToolAdapter, + events: &mut DelegateEventReceiver, + conversation_id: &ConversationId, + batch: Vec, +) -> Result, FunctionCallError> { + let mut runs = Vec::with_capacity(batch.len()); + let mut launched = Vec::with_capacity(batch.len()); + let conversation_id = conversation_id.to_string(); + + for entry in batch { + let request = DelegateToolRequest { + agent_id: entry.agent_id.clone(), + prompt: entry.prompt.clone(), + context: entry.context.unwrap_or_default().into(), + caller_conversation_id: Some(conversation_id.clone()), + batch: Vec::new(), + }; + + let run = adapter.delegate(request).await.map_err(map_adapter_error)?; + launched.push(run); + } + + let mut interested: HashSet = launched.iter().map(|run| run.run_id.clone()).collect(); + let mut collected: HashMap = HashMap::new(); + let mut summaries: HashMap, Option)> = HashMap::new(); + + while !interested.is_empty() { + let event = events.recv().await.ok_or_else(|| { + FunctionCallError::RespondToModel("delegate run ended unexpectedly".to_string()) + })?; + + let run_id = event_run_id(&event).to_string(); + if !interested.contains(&run_id) { + continue; + } + + match event { + DelegateToolEvent::Delta { chunk, .. } => { + collected.entry(run_id).or_default().push_str(&chunk); + } + DelegateToolEvent::Completed { + output, duration, .. + } => { + let summary = output.or_else(|| { + collected.remove(&run_id).and_then(|text| { + if text.trim().is_empty() { + None + } else { + Some(text) + } + }) + }); + summaries.insert(run_id.clone(), (summary, Some(duration))); + interested.remove(&run_id); + } + DelegateToolEvent::Failed { error, .. } => { + return Err(FunctionCallError::RespondToModel(error)); + } + _ => {} + } + } + + for run in launched { + let (summary, duration) = summaries.remove(&run.run_id).unwrap_or((None, None)); + runs.push(DelegateToolBatchRun { + agent_id: run.agent_id, + run_id: run.run_id, + summary, + duration_ms: duration.map(|d| d.as_millis() as u64), + }); + } + + Ok(runs) +} + fn map_adapter_error(err: DelegateToolError) -> FunctionCallError { match err { DelegateToolError::DelegateInProgress => FunctionCallError::RespondToModel( @@ -232,3 +408,88 @@ fn map_adapter_error(err: DelegateToolError) -> FunctionCallError { } } } + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::Arc; + use tokio::sync::Mutex; + use tokio::sync::mpsc; + + struct MockDelegateAdapter { + sender: Mutex>>, + requests: Mutex>, + } + + impl MockDelegateAdapter { + fn new() -> Self { + Self { + sender: Mutex::new(None), + requests: Mutex::new(Vec::new()), + } + } + } + + #[async_trait] + impl DelegateToolAdapter for MockDelegateAdapter { + async fn subscribe(&self) -> DelegateEventReceiver { + let (tx, rx) = mpsc::unbounded_channel(); + *self.sender.lock().await = Some(tx); + rx + } + + async fn delegate( + &self, + request: DelegateToolRequest, + ) -> Result { + self.requests.lock().await.push(request.clone()); + let run_id = format!("run-{}", request.agent_id); + if let Some(sender) = self.sender.lock().await.as_ref() { + let _ = sender.send(DelegateToolEvent::Completed { + run_id: run_id.clone(), + agent_id: request.agent_id.clone(), + output: Some(format!("summary: {}", request.prompt)), + duration: Duration::from_millis(5), + }); + } + Ok(DelegateToolRun { + run_id, + agent_id: request.agent_id, + }) + } + } + + #[tokio::test] + async fn handle_batch_executes_in_order() { + let adapter = Arc::new(MockDelegateAdapter::new()); + let mut events = adapter.subscribe().await; + + let batch = vec![ + DelegateToolBatchArgs { + agent_id: "alpha".into(), + prompt: "one".into(), + context: None, + }, + DelegateToolBatchArgs { + agent_id: "bravo".into(), + prompt: "two".into(), + context: None, + }, + ]; + + let runs = + handle_batch_entries(adapter.as_ref(), &mut events, &ConversationId::new(), batch) + .await + .expect("batch runs"); + + let requests = adapter.requests.lock().await.clone(); + assert_eq!(requests.len(), 2); + assert_eq!(requests[0].agent_id, "alpha"); + assert_eq!(requests[1].agent_id, "bravo"); + + assert_eq!(runs.len(), 2); + assert_eq!(runs[0].agent_id, "alpha"); + assert_eq!(runs[1].agent_id, "bravo"); + assert!(runs.iter().all(|run| run.summary.is_some())); + } +} diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index 06446967fe3f..2c2e8e30e7c4 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -781,7 +781,7 @@ pub(crate) fn build_specs( } if config.delegate_tool { - builder.push_spec(DELEGATE_TOOL.clone()); + builder.push_spec_with_parallel_support(DELEGATE_TOOL.clone(), true); builder.register_handler("delegate_agent", delegate_handler); } @@ -963,7 +963,7 @@ mod tests { let config = ToolsConfig::new(&ToolsConfigParams { model_family: &model_family, features: &features, - include_delegate_tool: false, + include_delegate_tool: true, }); let (tools, _) = build_specs(&config, None).build(); @@ -971,6 +971,7 @@ mod tests { assert!(find_tool(&tools, "grep_files").supports_parallel_tool_calls); assert!(find_tool(&tools, "list_dir").supports_parallel_tool_calls); assert!(find_tool(&tools, "read_file").supports_parallel_tool_calls); + assert!(find_tool(&tools, "delegate_agent").supports_parallel_tool_calls); } #[test] diff --git a/codex-rs/core/tests/suite/delegate_tool.rs b/codex-rs/core/tests/suite/delegate_tool.rs new file mode 100644 index 000000000000..c05cf3118e9b --- /dev/null +++ b/codex-rs/core/tests/suite/delegate_tool.rs @@ -0,0 +1,106 @@ +#![cfg(not(target_os = "windows"))] + +use codex_core::protocol::AskForApproval; +use codex_core::protocol::EventMsg; +use codex_core::protocol::InputItem; +use codex_core::protocol::Op; +use codex_core::protocol::SandboxPolicy; +use codex_protocol::config_types::ReasoningSummary; +use core_test_support::responses; +use core_test_support::responses::ev_assistant_message; +use core_test_support::responses::ev_completed; +use core_test_support::responses::ev_function_call; +use core_test_support::responses::ev_response_created; +use core_test_support::responses::sse; +use core_test_support::responses::start_mock_server; +use core_test_support::test_codex::TestCodex; +use core_test_support::test_codex::test_codex; +use core_test_support::wait_for_event; +use serde_json::json; +use wiremock::matchers::any; + +/// The model returns a single `delegate_agent` call that carries a `batch` array. +/// We expect the tool handler to execute each entry sequentially and return a +/// single aggregated response containing both runs. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn delegate_tool_handles_batch_requests() -> anyhow::Result<()> { + let server = start_mock_server().await; + + let TestCodex { + codex, + cwd, + session_configured, + .. + } = test_codex() + .with_config(|config| { + config.include_delegate_tool = true; + }) + .build(&server) + .await?; + + let call_id = "delegate-batch-call"; + let batch_args = json!({ + "batch": [ + {"agent_id": "alpha", "prompt": "first"}, + {"agent_id": "bravo", "prompt": "second"} + ] + }) + .to_string(); + + let first_response = sse(vec![ + ev_response_created("resp-1"), + ev_function_call(call_id, "delegate_agent", &batch_args), + ev_completed("resp-1"), + ]); + responses::mount_sse_once_match(&server, any(), first_response).await; + + let second_response = sse(vec![ + ev_assistant_message("msg-1", "batch done"), + ev_completed("resp-2"), + ]); + let second_mock = responses::mount_sse_once_match(&server, any(), second_response).await; + + let session_model = session_configured.model.clone(); + + codex + .submit(Op::UserTurn { + items: vec![InputItem::Text { + text: "please delegate to two agents".into(), + }], + final_output_json_schema: None, + cwd: cwd.path().to_path_buf(), + approval_policy: AskForApproval::Never, + sandbox_policy: SandboxPolicy::DangerFullAccess, + model: session_model, + effort: None, + summary: ReasoningSummary::Auto, + }) + .await?; + + wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; + + let request = second_mock.single_request(); + let output_item = request.function_call_output(call_id); + dbg!(&output_item); + let content = output_item + .get("output") + .and_then(|value| match value { + serde_json::Value::String(text) => Some(text.as_str()), + serde_json::Value::Object(obj) => obj.get("content").and_then(|v| v.as_str()), + _ => None, + }) + .expect("batch response text"); + + let parsed: serde_json::Value = serde_json::from_str(content)?; + assert_eq!(parsed["status"], "ok"); + let runs = parsed["runs"].as_array().expect("runs array"); + assert_eq!(runs.len(), 2); + assert_eq!(runs[0]["agent_id"], "alpha"); + assert_eq!(runs[1]["agent_id"], "bravo"); + assert!( + runs.iter() + .all(|run| run.get("run_id").and_then(|v| v.as_str()).is_some()) + ); + + Ok(()) +} diff --git a/codex-rs/core/tests/suite/mod.rs b/codex-rs/core/tests/suite/mod.rs index 6008811deaa6..fd28a5207451 100644 --- a/codex-rs/core/tests/suite/mod.rs +++ b/codex-rs/core/tests/suite/mod.rs @@ -6,6 +6,7 @@ mod cli_stream; mod client; mod compact; mod compact_resume_fork; +mod delegate_tool; mod exec; mod exec_stream_events; mod fork_conversation; diff --git a/codex-rs/multi-agent/src/orchestrator.rs b/codex-rs/multi-agent/src/orchestrator.rs index 93907531a796..3dfb4ac38588 100644 --- a/codex-rs/multi-agent/src/orchestrator.rs +++ b/codex-rs/multi-agent/src/orchestrator.rs @@ -39,6 +39,7 @@ pub struct DelegateRequest { pub agent_id: AgentId, pub prompt: DelegatePrompt, pub user_initial: Vec, + pub parent_run_id: Option, } /// The prompt content forwarded to the sub-agent. @@ -61,6 +62,7 @@ pub enum DelegateEvent { agent_id: AgentId, prompt: String, started_at: SystemTime, + parent_run_id: Option, }, Delta { run_id: DelegateRunId, @@ -123,6 +125,9 @@ pub struct AgentOrchestrator { active_runs: Mutex>, sessions: Mutex>, allowed_agents: Vec, + run_conversations: Mutex>, + conversation_runs: Mutex>, + max_concurrent_runs: usize, } impl AgentOrchestrator { @@ -133,6 +138,7 @@ impl AgentOrchestrator { cli_overrides: CliConfigOverrides, config_overrides: ConfigOverrides, allowed_agents: Vec, + max_concurrent_runs: usize, ) -> Self { let loader = AgentConfigLoader::new(global_codex_home.into()); Self { @@ -145,6 +151,9 @@ impl AgentOrchestrator { active_runs: Mutex::new(Vec::new()), sessions: Mutex::new(HashMap::new()), allowed_agents, + run_conversations: Mutex::new(HashMap::new()), + conversation_runs: Mutex::new(HashMap::new()), + max_concurrent_runs: max_concurrent_runs.max(1), } } @@ -155,20 +164,56 @@ impl AgentOrchestrator { rx } + async fn register_run_conversation(&self, run_id: &DelegateRunId, conversation_id: &str) { + self.run_conversations + .lock() + .await + .insert(run_id.clone(), conversation_id.to_string()); + self.conversation_runs + .lock() + .await + .insert(conversation_id.to_string(), run_id.clone()); + } + + async fn clear_run_conversation(&self, run_id: &DelegateRunId) { + if let Some(conversation_id) = self.run_conversations.lock().await.remove(run_id) { + self.conversation_runs.lock().await.remove(&conversation_id); + } + } + + pub async fn parent_run_for_conversation( + &self, + conversation_id: &str, + ) -> Option { + self.conversation_runs + .lock() + .await + .get(conversation_id) + .cloned() + } + /// Trigger a delegate run. Returns the run id if successfully enqueued. pub async fn delegate( self: &Arc, request: DelegateRequest, ) -> std::result::Result { let run_id = Uuid::new_v4().to_string(); - self.active_runs.lock().await.push(run_id.clone()); + { + let mut active = self.active_runs.lock().await; + if active.len() >= self.max_concurrent_runs { + return Err(OrchestratorError::DelegateInProgress); + } + active.push(run_id.clone()); + } + let parent_run_id = request.parent_run_id.clone(); let prompt_text = request.prompt.text.clone(); self.emit(DelegateEvent::Started { run_id: run_id.clone(), agent_id: request.agent_id.clone(), prompt: prompt_text, started_at: SystemTime::now(), + parent_run_id: parent_run_id.clone(), }) .await; @@ -219,6 +264,8 @@ impl AgentOrchestrator { } } + orchestrator.clear_run_conversation(&run_id_clone).await; + let mut active = orchestrator.active_runs.lock().await; if let Some(pos) = active.iter().rposition(|id| id == &run_id_clone) { active.remove(pos); @@ -300,6 +347,7 @@ impl AgentOrchestrator { ); } + #[allow(clippy::too_many_arguments)] async fn run_delegate_task( self: Arc, loader: AgentConfigLoader, @@ -338,6 +386,8 @@ impl AgentOrchestrator { error: format!("failed to start conversation: {err:#}"), })?; let conversation_id = conversation_bundle.conversation_id.to_string(); + self.register_run_conversation(&run_id, &conversation_id) + .await; let session_configured = Arc::new(conversation_bundle.session_configured); let conversation = conversation_bundle.conversation; @@ -469,11 +519,13 @@ impl MultiAgentDelegateAdapter { agent_id, prompt, started_at, + parent_run_id, } => CoreDelegateToolEvent::Started { run_id, agent_id: agent_id.as_str().to_string(), prompt, started_at, + parent_run_id, }, DelegateEvent::Delta { run_id, @@ -540,22 +592,39 @@ impl DelegateToolAdapter for MultiAgentDelegateAdapter { &self, request: DelegateToolRequest, ) -> Result { - let agent_id = AgentId::parse(request.agent_id.as_str()) - .map_err(|_| DelegateToolError::AgentNotFound(request.agent_id.clone()))?; + let DelegateToolRequest { + agent_id: agent_id_str, + prompt, + context: _, + caller_conversation_id, + batch: _, + } = request; + + let agent_id = AgentId::parse(agent_id_str.as_str()) + .map_err(|_| DelegateToolError::AgentNotFound(agent_id_str.clone()))?; + + let parent_run_id = if let Some(conversation_id) = caller_conversation_id.as_ref() { + self.orchestrator + .parent_run_for_conversation(conversation_id) + .await + } else { + None + }; let run_id = self .orchestrator .delegate(DelegateRequest { agent_id: agent_id.clone(), - prompt: DelegatePrompt::new(request.prompt), + prompt: DelegatePrompt::new(prompt), user_initial: Vec::new(), + parent_run_id, }) .await .map_err(Self::map_error)?; Ok(DelegateToolRun { run_id, - agent_id: request.agent_id, + agent_id: agent_id_str, }) } } diff --git a/codex-rs/tui/src/app.rs b/codex-rs/tui/src/app.rs index 86464d144cdb..cafb95aab3f3 100644 --- a/codex-rs/tui/src/app.rs +++ b/codex-rs/tui/src/app.rs @@ -5,6 +5,7 @@ use crate::app_event_sender::AppEventSender; use crate::bottom_pane::ApprovalRequest; use crate::chatwidget::ChatWidget; use crate::chatwidget::ChatWidgetInit; +use crate::chatwidget::DelegateDisplayLabel; use crate::diff_render::DiffSummary; use crate::exec_command::strip_bash_lc_and_escape; use crate::file_search::FileSearchManager; @@ -91,7 +92,107 @@ pub(crate) struct App { /// Set when the user confirms an update; propagated on exit. pub(crate) pending_update_action: Option, - delegate_stack: Vec<(String, AgentId)>, + delegate_tree: DelegateTree, + delegate_status_owner: Option, +} + +#[derive(Default)] +struct DelegateTree { + nodes: HashMap, + roots: Vec, +} + +struct DelegateNode { + agent_id: AgentId, + parent: Option, + children: Vec, +} + +#[derive(Clone)] +struct DelegateDisplay { + depth: usize, + label: DelegateDisplayLabel, +} + +impl DelegateTree { + fn insert( + &mut self, + run_id: String, + agent_id: AgentId, + parent: Option, + ) -> DelegateDisplay { + if let Some(parent_id) = parent.as_ref() { + if let Some(parent_node) = self.nodes.get_mut(parent_id) { + parent_node.children.push(run_id.clone()); + } + } else { + self.roots.push(run_id.clone()); + } + + self.nodes.insert( + run_id.clone(), + DelegateNode { + agent_id: agent_id.clone(), + parent: parent.clone(), + children: Vec::new(), + }, + ); + + self.display_for(&run_id, &agent_id) + } + + fn display_for(&self, run_id: &str, agent_id: &AgentId) -> DelegateDisplay { + let depth = self.depth_of(run_id).unwrap_or(0); + let base_label = if depth == 0 { + format!("↳ #{}", agent_id.as_str()) + } else { + let indent = " ".repeat(depth); + format!("{indent}↳ #{}", agent_id.as_str()) + }; + DelegateDisplay { + depth, + label: DelegateDisplayLabel { depth, base_label }, + } + } + + fn depth_of(&self, run_id: &str) -> Option { + let mut depth = 0; + let mut current = run_id; + while let Some(node) = self.nodes.get(current) { + if let Some(parent) = node.parent.as_ref() { + depth += 1; + current = parent; + } else { + break; + } + } + if self.nodes.contains_key(run_id) || self.roots.iter().any(|r| r == run_id) { + Some(depth) + } else { + None + } + } + + fn remove(&mut self, run_id: &str) { + if let Some(node) = self.nodes.remove(run_id) { + if let Some(parent_id) = node.parent { + if let Some(parent_node) = self.nodes.get_mut(&parent_id) { + parent_node.children.retain(|child| child != run_id); + } + } else { + self.roots.retain(|root| root != run_id); + } + } + } + + fn first_active_root(&self) -> Option<(String, AgentId)> { + for run_id in &self.roots { + if let Some(node) = self.nodes.get(run_id) { + return Some((run_id.clone(), node.agent_id.clone())); + } + } + None + } } impl App { @@ -191,7 +292,8 @@ impl App { active_delegate_summary: None, primary_chat_backup: None, pending_update_action: None, - delegate_stack: Vec::new(), + delegate_tree: DelegateTree::default(), + delegate_status_owner: None, }; let tui_events = tui.event_stream(); @@ -480,22 +582,30 @@ impl App { run_id, agent_id, prompt, + parent_run_id, .. } => { - let depth = self.delegate_stack.len(); - self.delegate_stack.push((run_id.clone(), agent_id.clone())); - self.chat_widget - .on_delegate_started(&run_id, &agent_id, &prompt, depth); + let display = self.delegate_tree.insert( + run_id.clone(), + agent_id.clone(), + parent_run_id.clone(), + ); + let claim_status = parent_run_id.is_none() && self.delegate_status_owner.is_none(); + if claim_status { + self.delegate_status_owner = Some(run_id.clone()); + self.chat_widget + .set_delegate_status_owner(&run_id, &agent_id); + } + self.chat_widget.on_delegate_started( + &run_id, + &agent_id, + &prompt, + display.label, + claim_status, + ); } DelegateEvent::Delta { run_id, chunk, .. } => { - if self - .delegate_stack - .last() - .map(|(id, _)| id == &run_id) - .unwrap_or(false) - { - self.chat_widget.on_delegate_delta(&run_id, &chunk); - } + self.chat_widget.on_delegate_delta(&run_id, &chunk); } DelegateEvent::Completed { run_id, @@ -504,42 +614,54 @@ impl App { duration, .. } => { - if let Some(pos) = self - .delegate_stack - .iter() - .rposition(|(id, _)| id == &run_id) - { - let depth = pos; - self.delegate_stack.remove(pos); - let streamed = self.chat_widget.on_delegate_completed(&run_id, depth); - let hint = Some(format!( - "finished in {}", - Self::format_delegate_duration(duration) - )); - let response = if depth == 0 { - output.as_deref().filter(|_| !streamed) + let display = self.delegate_tree.display_for(&run_id, &agent_id); + self.delegate_tree.remove(&run_id); + if self.delegate_status_owner.as_deref() == Some(run_id.as_str()) { + self.delegate_status_owner = None; + if let Some((next_run_id, next_agent)) = self.delegate_tree.first_active_root() + { + self.delegate_status_owner = Some(next_run_id.clone()); + self.chat_widget + .set_delegate_status_owner(&next_run_id, &next_agent); } else { - None - }; - self.chat_widget - .add_delegate_completion(&agent_id, response, hint, depth); + self.chat_widget.clear_delegate_status_owner(); + } } + let streamed = self + .chat_widget + .on_delegate_completed(&run_id, &display.label); + let hint = Some(format!( + "finished in {}", + Self::format_delegate_duration(duration) + )); + let response = if display.depth == 0 { + output.as_deref().filter(|_| !streamed) + } else { + None + }; + self.chat_widget + .add_delegate_completion(response, hint, &display.label); } DelegateEvent::Failed { run_id, agent_id, error, } => { - if let Some(pos) = self - .delegate_stack - .iter() - .rposition(|(id, _)| id == &run_id) - { - let depth = pos; - self.delegate_stack.remove(pos); - self.chat_widget - .on_delegate_failed(&run_id, &agent_id, &error, depth); + let display = self.delegate_tree.display_for(&run_id, &agent_id); + self.delegate_tree.remove(&run_id); + if self.delegate_status_owner.as_deref() == Some(run_id.as_str()) { + self.delegate_status_owner = None; + if let Some((next_run_id, next_agent)) = self.delegate_tree.first_active_root() + { + self.delegate_status_owner = Some(next_run_id.clone()); + self.chat_widget + .set_delegate_status_owner(&next_run_id, &next_agent); + } else { + self.chat_widget.clear_delegate_status_owner(); + } } + self.chat_widget + .on_delegate_failed(&run_id, &display.label, &error); } } } @@ -812,6 +934,7 @@ mod tests { tools_web_search_request: None, }, Vec::new(), + config.multi_agent.max_concurrent_delegates, )); App { @@ -835,7 +958,8 @@ mod tests { active_delegate_summary: None, primary_chat_backup: None, pending_update_action: None, - delegate_stack: Vec::new(), + delegate_tree: DelegateTree::default(), + delegate_status_owner: None, } } diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index 60be52a1fecc..9ff183919973 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -1,4 +1,5 @@ use std::collections::HashMap; +use std::collections::HashSet; use std::collections::VecDeque; use std::path::PathBuf; use std::sync::Arc; @@ -228,6 +229,12 @@ pub(crate) struct ChatWidgetInit { pub(crate) auth_manager: Arc, } +#[derive(Clone, Debug)] +pub struct DelegateDisplayLabel { + pub depth: usize, + pub base_label: String, +} + pub(crate) struct ChatWidget { app_event_tx: AppEventSender, codex_op_tx: UnboundedSender, @@ -274,8 +281,8 @@ pub(crate) struct ChatWidget { needs_final_message_separator: bool, delegate_run: Option, - delegate_had_stream: bool, - delegate_status_claimed: bool, + delegate_runs_with_stream: HashSet, + delegate_status_owner: Option, delegate_previous_status_header: Option, delegate_context: Option, delegate_user_frames: Vec, @@ -1067,8 +1074,8 @@ impl ChatWidget { ghost_snapshots_disabled: true, needs_final_message_separator: false, delegate_run: None, - delegate_had_stream: false, - delegate_status_claimed: false, + delegate_runs_with_stream: HashSet::new(), + delegate_status_owner: None, delegate_previous_status_header: None, delegate_context: None, delegate_user_frames: Vec::new(), @@ -1140,8 +1147,8 @@ impl ChatWidget { ghost_snapshots_disabled: true, needs_final_message_separator: false, delegate_run: None, - delegate_had_stream: false, - delegate_status_claimed: false, + delegate_runs_with_stream: HashSet::new(), + delegate_status_owner: None, delegate_previous_status_header: None, delegate_context: None, delegate_user_frames: Vec::new(), @@ -2376,21 +2383,16 @@ impl ChatWidget { self.conversation_id } - fn delegate_label(agent_id: &AgentId, depth: usize) -> String { - format!("{}↳ #{}", " ".repeat(depth), agent_id.as_str()) - } - pub(crate) fn add_delegate_completion( &mut self, - agent_id: &AgentId, response: Option<&str>, duration_hint: Option, - depth: usize, + label: &DelegateDisplayLabel, ) { - let header = format!("{} completed", Self::delegate_label(agent_id, depth)); + let header = format!("{} completed", label.base_label); self.add_info_message(header, duration_hint); - if depth > 0 { + if label.depth > 0 { return; } @@ -2413,78 +2415,88 @@ impl ChatWidget { run_id: &str, agent_id: &AgentId, prompt: &str, - depth: usize, + label: DelegateDisplayLabel, + claim_status: bool, ) { - let label = Self::delegate_label(agent_id, depth); - if depth == 0 { - self.delegate_run = Some(run_id.to_string()); - self.delegate_had_stream = false; + if claim_status { + self.set_delegate_status_owner_internal(run_id, agent_id); + } + if label.depth == 0 { self.delegate_user_frames.clear(); self.delegate_agent_frames.clear(); - self.delegate_previous_status_header = Some(self.current_status_header.clone()); - if self.bottom_pane.status_widget().is_none() { - self.bottom_pane.set_task_running(true); - self.delegate_status_claimed = true; - } else { - self.delegate_status_claimed = false; - } - self.set_status_header(format!("Delegating to #{}", agent_id.as_str())); } + self.delegate_runs_with_stream.remove(run_id); + self.delegate_run = Some(run_id.to_string()); let trimmed = prompt.trim(); let hint = if trimmed.is_empty() { None } else { Some(trimmed.to_string()) }; - self.add_info_message(format!("{label}…"), hint); + self.add_info_message(format!("{}…", label.base_label), hint); self.request_redraw(); } pub(crate) fn on_delegate_delta(&mut self, run_id: &str, chunk: &str) { if self.delegate_run.as_deref() != Some(run_id) { - return; + self.delegate_run = Some(run_id.to_string()); } - self.delegate_had_stream = true; + self.delegate_runs_with_stream.insert(run_id.to_string()); self.handle_streaming_delta(chunk.to_string()); } - pub(crate) fn on_delegate_completed(&mut self, run_id: &str, depth: usize) -> bool { - if depth > 0 { - return false; - } - if self.delegate_run.as_deref() != Some(run_id) { - return false; - } - let had_stream = self.delegate_had_stream; - if had_stream { - self.flush_answer_stream_with_separator(); - self.handle_stream_finished(); - self.app_event_tx.send(AppEvent::StopCommitAnimation); - } - if let Some(previous) = self.delegate_previous_status_header.take() { - self.set_status_header(previous); - } - if self.delegate_status_claimed { - self.bottom_pane.set_task_running(false); - self.delegate_status_claimed = false; + pub(crate) fn on_delegate_completed( + &mut self, + run_id: &str, + label: &DelegateDisplayLabel, + ) -> bool { + let had_stream = self.delegate_runs_with_stream.remove(run_id); + if self.delegate_run.as_deref() == Some(run_id) { + if had_stream { + self.flush_answer_stream_with_separator(); + self.handle_stream_finished(); + self.app_event_tx.send(AppEvent::StopCommitAnimation); + } + self.delegate_run = None; } - self.delegate_run = None; - self.delegate_had_stream = false; - had_stream + label.depth == 0 && had_stream } pub(crate) fn on_delegate_failed( &mut self, run_id: &str, - agent_id: &AgentId, + label: &DelegateDisplayLabel, error: &str, - depth: usize, ) { - if depth == 0 { - let _ = self.on_delegate_completed(run_id, depth); + let _ = self.on_delegate_completed(run_id, label); + self.add_error_message(format!("{} failed: {error}", label.base_label)); + } + + pub(crate) fn set_delegate_status_owner(&mut self, run_id: &str, agent_id: &AgentId) { + self.set_delegate_status_owner_internal(run_id, agent_id); + } + + pub(crate) fn clear_delegate_status_owner(&mut self) { + if self.delegate_status_owner.take().is_some() { + if let Some(previous) = self.delegate_previous_status_header.take() { + self.set_status_header(previous); + } + self.bottom_pane.set_task_running(false); + } + } + + fn set_delegate_status_owner_internal(&mut self, run_id: &str, agent_id: &AgentId) { + let is_same = self.delegate_status_owner.as_deref() == Some(run_id); + if !is_same && self.delegate_status_owner.is_none() { + if self.delegate_previous_status_header.is_none() { + self.delegate_previous_status_header = Some(self.current_status_header.clone()); + } + if self.bottom_pane.status_widget().is_none() { + self.bottom_pane.set_task_running(true); + } } - let label = Self::delegate_label(agent_id, depth); - self.add_error_message(format!("{label} failed: {error}")); + self.delegate_status_owner = Some(run_id.to_string()); + self.set_status_header(format!("Delegating to #{}", agent_id.as_str())); } fn try_delegate_shortcut(&mut self, _text: &str) -> bool { diff --git a/codex-rs/tui/src/chatwidget/tests.rs b/codex-rs/tui/src/chatwidget/tests.rs index cb08d7a3f5a8..95106739ead5 100644 --- a/codex-rs/tui/src/chatwidget/tests.rs +++ b/codex-rs/tui/src/chatwidget/tests.rs @@ -45,6 +45,7 @@ use crossterm::event::KeyEvent; use crossterm::event::KeyModifiers; use insta::assert_snapshot; use pretty_assertions::assert_eq; +use std::collections::HashSet; use std::fs::File; use std::io::BufRead; use std::io::BufReader; @@ -291,8 +292,8 @@ fn make_chatwidget_manual() -> ( ghost_snapshots_disabled: false, needs_final_message_separator: false, delegate_run: None, - delegate_had_stream: false, - delegate_status_claimed: false, + delegate_runs_with_stream: HashSet::new(), + delegate_status_owner: None, delegate_previous_status_header: None, delegate_context: None, delegate_user_frames: Vec::new(), @@ -392,13 +393,23 @@ fn test_rate_limit_warnings_monthly() { fn delegate_stream_deltas_and_restore_status() { let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(); let agent = AgentId::parse("ideas_provider").expect("valid agent id"); + let label = DelegateDisplayLabel { + depth: 0, + base_label: "↳ #ideas_provider".to_string(), + }; assert!(chat.bottom_pane.status_widget().is_none()); assert_eq!(chat.current_status_header, "Working"); - chat.on_delegate_started("run-1", &agent, "sketch integration points", 0); + chat.on_delegate_started( + "run-1", + &agent, + "sketch integration points", + label.clone(), + true, + ); assert_eq!(chat.delegate_run.as_deref(), Some("run-1")); - assert!(chat.delegate_status_claimed); + assert_eq!(chat.delegate_status_owner.as_deref(), Some("run-1")); assert!(chat.bottom_pane.status_widget().is_some()); assert_eq!(chat.current_status_header, "Delegating to #ideas_provider"); @@ -429,7 +440,7 @@ fn delegate_stream_deltas_and_restore_status() { "expected streamed delegate output in history" ); - let streamed = chat.on_delegate_completed("run-1", 0); + let streamed = chat.on_delegate_completed("run-1", &label); assert!( streamed, "delegate completion should report streaming output" @@ -446,7 +457,8 @@ fn delegate_stream_deltas_and_restore_status() { "expected commit animation to stop after delegate completion" ); assert!(chat.delegate_run.is_none()); - assert!(!chat.delegate_status_claimed); + chat.clear_delegate_status_owner(); + assert!(chat.delegate_status_owner.is_none()); assert!(chat.bottom_pane.status_widget().is_none()); assert_eq!(chat.current_status_header, "Working"); } @@ -456,9 +468,17 @@ fn nested_delegate_info_events_are_indented() { let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(); let outer = AgentId::parse("ideas_provider").expect("valid id"); let inner = AgentId::parse("creative_ideas").expect("valid id"); + let outer_label = DelegateDisplayLabel { + depth: 0, + base_label: "↳ #ideas_provider".to_string(), + }; + let inner_label = DelegateDisplayLabel { + depth: 1, + base_label: " ↳ #creative_ideas".to_string(), + }; - chat.on_delegate_started("outer-run", &outer, "outer brief", 0); - chat.on_delegate_started("inner-run", &inner, "inner brief", 1); + chat.on_delegate_started("outer-run", &outer, "outer brief", outer_label, true); + chat.on_delegate_started("inner-run", &inner, "inner brief", inner_label, false); let mut messages = Vec::new(); while let Ok(event) = rx.try_recv() { diff --git a/codex-rs/tui/src/lib.rs b/codex-rs/tui/src/lib.rs index 28e02863a8ad..73bcc746c906 100644 --- a/codex-rs/tui/src/lib.rs +++ b/codex-rs/tui/src/lib.rs @@ -462,6 +462,7 @@ async fn run_ratatui_app( delegate_cli_overrides, delegate_config_overrides, allowed_agents, + config.multi_agent.max_concurrent_delegates, )); let login_status = get_login_status(&config, &global_codex_home); let should_show_windows_wsl_screen = From 738873ee3922aba6c8a3896f2015b3e482467297 Mon Sep 17 00:00:00 2001 From: George Nesterenok Date: Thu, 16 Oct 2025 20:11:36 +0000 Subject: [PATCH 14/17] Support detached agents with optional context reattach --- ai-temp/example-codex-home/AGENTS.md | 31 +- ai-temp/example-codex-home/README.md | 14 +- .../agents/request_summarizer/AGENTS.md | 15 + .../agents/request_summarizer/config.toml | 10 + ai-temp/example-codex-home/config.toml | 5 +- .../scripts/log-notification.sh | 5 + ai-temp/example-conversation.md | 749 ++++++++++-------- ai-temp/parallel-delegation.md | 44 +- codex-rs/Cargo.lock | 1 + codex-rs/core/src/delegate_tool.rs | 22 + codex-rs/core/src/tools/handlers/delegate.rs | 149 +++- codex-rs/core/src/user_notification.rs | 31 + codex-rs/core/tests/common/Cargo.toml | 1 + codex-rs/core/tests/common/test_codex.rs | 63 +- codex-rs/multi-agent/src/lib.rs | 3 + codex-rs/multi-agent/src/orchestrator.rs | 202 ++++- codex-rs/tui/src/app.rs | 83 +- codex-rs/tui/src/app_event.rs | 6 + codex-rs/tui/src/chatwidget.rs | 170 +++- codex-rs/tui/src/chatwidget/tests.rs | 20 +- 20 files changed, 1229 insertions(+), 395 deletions(-) create mode 100644 ai-temp/example-codex-home/agents/request_summarizer/AGENTS.md create mode 100644 ai-temp/example-codex-home/agents/request_summarizer/config.toml create mode 100755 ai-temp/example-codex-home/scripts/log-notification.sh diff --git a/ai-temp/example-codex-home/AGENTS.md b/ai-temp/example-codex-home/AGENTS.md index d2495b5606aa..5aa826636da1 100644 --- a/ai-temp/example-codex-home/AGENTS.md +++ b/ai-temp/example-codex-home/AGENTS.md @@ -2,14 +2,37 @@ This directory demonstrates a multi-agent workflow. You are the coordinator that routes work through a fixed sequence: -1. **Understand the request.** Restate the goal, gather missing context, and note constraints. -2. **Invoke `ideas_provider`.** Share the brief and **explicitly require** it to run `creative_ideas` *and* `conservative_ideas` before responding. -3. **Forward the winning approach to `critic`.** Summarize the provider’s outcome (including key trade-offs) and ask the critic to highlight risks, validation gaps, or missing safeguards. -4. **Synthesize the dialogue.** After the critic replies, deliver **exactly one paragraph** (≤75 words) tying together the best idea, mitigations, and next actions—no headings or bullets. +1. **Log the user request (detached).** Immediately call `delegate_agent` so `request_summarizer` can write a markdown note to `/tmp/notes/`. Embed the *raw user message* in the prompt so the summarizer can actually summarize it. Example template (replace `` with the latest user turn, stripped of surrounding quotes): + + ```json + { + "agent_id": "request_summarizer", + "mode": "detached", + "prompt": "Latest user request: \n\nSummarize that request in ≤75 words and write the summary to /tmp/notes/request-.md. Use a single shell here-doc so the file contains only the summary paragraph. If the directory is missing, fail loudly. After writing the file, reply with exactly the absolute filename and nothing else." + } + ``` + + Continue with the remaining steps while this detached run completes. After you notice the “Detached run finished” banner (and optionally inspect the file), open `/agent`, highlight the summarizer entry, and choose “Dismiss detached run” so the list stays clean. +2. **Understand the request.** In your own words, restate the goal, list any constraints or missing information, and ask clarification questions. +3. **Invoke `ideas_provider` (batched delegates).** Use a single `delegate_agent` call with a `batch` array so both creative and conservative delegates run even if the model only allows one function call: + + ```json + { + "batch": [ + {"agent_id": "creative_ideas", "prompt": ""}, + {"agent_id": "conservative_ideas", "prompt": ""} + ] + } + ``` + + Do not proceed until both sub-agents reply. If a response is missing or failed, rerun that delegate. +4. **Forward the winning approach to `critic`.** Summarize the chosen plan, note why it won, and call `delegate_agent` with that summary. Wait for the critic’s bullets before continuing. +5. **Synthesize the dialogue.** Deliver **exactly one paragraph** (≤75 words) combining the chosen idea, key mitigations, and next steps—no headings or bullets. General rules: - Keep replies short unless the user explicitly requests depth; cite `ai-temp/` docs when needed for background. +- Detached runs surface under `/agent` as “Pending” until they complete. Dismiss them after you’ve read the saved file so future runs stay tidy. - Follow the chain even if you already see the answer; only skip when the user explicitly opts out of delegation. - The `delegate_agent` tool is AI-only. Describe which delegate you want in plain language—the user cannot invoke sub-agents directly. - You can launch multiple delegates in parallel. The CLI indents nested runs beneath their parent (two spaces per depth), and up to five delegates may be active at once; wait for all required sub-agents to finish before synthesizing. Call `delegate_agent` once with a `batch` array containing each `{agent_id, prompt}` so both delegates run even on models limited to a single tool invocation per turn. diff --git a/ai-temp/example-codex-home/README.md b/ai-temp/example-codex-home/README.md index 411fd274c0f8..213242d45859 100644 --- a/ai-temp/example-codex-home/README.md +++ b/ai-temp/example-codex-home/README.md @@ -1,19 +1,23 @@ # Sample Codex Home Setup -This directory mirrors a minimal `~/.codex` layout so you can try the multi-agent loader without touching your real config. The flow illustrates a chained delegation sequence: +This directory mirrors a minimal `~/.codex` layout so you can try the multi-agent loader without touching your real config. The flow now demonstrates a chained delegation sequence that mixes detached and synchronous agents: -1. Main agent briefs `ideas_provider`. -2. `ideas_provider` consults both `creative_ideas` and `conservative_ideas`, then recommends a blended approach. -3. Main agent forwards that plan to `critic` for risk review before replying to the user. +1. Main agent launches `request_summarizer` in **detached** mode. The summarizer writes a ≤75-word markdown file to `/tmp/notes/` capturing the fresh user request. +2. While the detached run finishes, the agent restates the request and briefs `ideas_provider`. +3. `ideas_provider` consults both `creative_ideas` and `conservative_ideas`, then recommends a blended approach. +4. Main agent forwards that plan to `critic` for risk review before replying to the user. +5. Once the detached summarizer completes, the coordinator checks `/agent`, reviews the saved file (if desired), and dismisses the run to keep the list tidy. ## Directory Structure - `config.toml`: baseline settings used when no sub-agent is selected. - `AGENTS.md`: default instruction set for the primary agent (orchestrator). +- `agents/request_summarizer/`: logs the latest user request to `/tmp/notes/*.md` via a detached delegate run. - `agents/ideas_provider/`: synthesizes outputs from creative and conservative delegates. - `agents/critic/`: GPT-5-nano sub-agent that reviews the leading option. - `agents/creative_ideas/`: generates bold, unconventional concepts. - `agents/conservative_ideas/`: produces safe, low-risk alternatives. +- `scripts/log-notification.sh`: simple helper that appends detached-run notifications to `/tmp/codex-notifications.log`. - `log/`, `sessions/`: empty placeholders so Codex can write logs and rollouts. ## Quick Start @@ -33,6 +37,8 @@ CODEX_HOME="$(pwd)/ai-temp/example-codex-home" target/debug/codex --agent conser # Inside the primary session, describe the task as usual. # The main assistant decides when to call the `delegate_agent` tool. +# Detached runs surface under /agent and emit desktop/system notifications; dismiss them after review. +# External notifications are logged to /tmp/codex-notifications.log via scripts/log-notification.sh. # Use tags like `#ideas_provider` in your prompts only as hints for the AI. # Watch logs in log/codex-tui.log to confirm delegation activity. ``` diff --git a/ai-temp/example-codex-home/agents/request_summarizer/AGENTS.md b/ai-temp/example-codex-home/agents/request_summarizer/AGENTS.md new file mode 100644 index 000000000000..5a7edfe33bea --- /dev/null +++ b/ai-temp/example-codex-home/agents/request_summarizer/AGENTS.md @@ -0,0 +1,15 @@ +# Request Summarizer Agent + +You are invoked at the very beginning of every user request. Your job is to persist a short, human-readable markdown summary of the request so teammates can review it later. + +## Responsibilities + +1. **Summarize the latest user turn** in ≤75 words. Focus on the concrete goal, constraints, and success criteria from that turn (ignore system/assistant messages). +2. **Write the summary to `/tmp/notes/`** as `request-.md`. Use one shell command (e.g., a here-doc) so the file contains exactly the summary paragraph—no headings, no instructions. Request `delegate_shell` access only once if you need it; approvals are granted automatically. +3. **Acknowledge completion** by replying with **only** the absolute filename you created (e.g., `/tmp/notes/request-2025-10-16T19:30:12Z.md`). Do not wrap it in quotes, code blocks, or extra narration. + +## Constraints + +- `/tmp/notes/` already exists; fail if it cannot be accessed. +- Keep the summary to a single paragraph of plain text suitable for markdown. +- Invoke at most one shell command, then respond with the filename as described. diff --git a/ai-temp/example-codex-home/agents/request_summarizer/config.toml b/ai-temp/example-codex-home/agents/request_summarizer/config.toml new file mode 100644 index 000000000000..9282adfcecbb --- /dev/null +++ b/ai-temp/example-codex-home/agents/request_summarizer/config.toml @@ -0,0 +1,10 @@ +model = "gpt-5" +sandbox_mode = "workspace-write" +approval_policy = "never" + +[history] +persistence = "save-all" + +[multi_agent] +agents = [] + diff --git a/ai-temp/example-codex-home/config.toml b/ai-temp/example-codex-home/config.toml index b471ae735dd2..9b09a56f42ae 100644 --- a/ai-temp/example-codex-home/config.toml +++ b/ai-temp/example-codex-home/config.toml @@ -5,7 +5,10 @@ sandbox_mode = "workspace-write" persistence = "save-all" [multi_agent] -agents = ["ideas_provider", "critic"] +agents = ["request_summarizer", "ideas_provider", "critic"] + +notify = ["./scripts/log-notification.sh"] [tui] show_task_summary = true +notifications = true diff --git a/ai-temp/example-codex-home/scripts/log-notification.sh b/ai-temp/example-codex-home/scripts/log-notification.sh new file mode 100755 index 000000000000..fb540d32ebb2 --- /dev/null +++ b/ai-temp/example-codex-home/scripts/log-notification.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +set -euo pipefail +log_file="/tmp/codex-notifications.log" +printf '%s +' "$*" >> "$log_file" diff --git a/ai-temp/example-conversation.md b/ai-temp/example-conversation.md index 509d70686674..44e500c1a68a 100644 --- a/ai-temp/example-conversation.md +++ b/ai-temp/example-conversation.md @@ -14,357 +14,422 @@ To get started, describe a task or try one of these commands: /model - choose what model and reasoning effort to use /review - review any changes and find issues -› • We need to add a bubble-sort implementation in Python, but let’s keep it read-only for -now. Please: 1. Ask the ideas provider for three alternative designs (iterative, optimized variations, -test scaffolding) with pros/cons—text only, no code yet. 2. Pick the most promising idea, summarize it in your own words, and send that summary to -the critic asking for risks, missing tests, or edge cases. 3. Finish by combining their feedback into a short recommendation, including what we -should -verify before writing any code. +› how to cut an apple? Use agents. + +• Kicking off a detached summarizer, then I’ll gather approaches from two idea agents. + +• ↳ #request_summarizer (detached)… Latest user request: how to cut an apple? Use agents. +Summarize that request in ≤75 words and write the summary to /tmp/notes/request-.md. +Use a single shell here-doc so the file contains only the summary paragraph. If the directory +is missing, fail loudly. After writing the file, reply with exactly the absolute filename and +nothing else. + +• ↳ #creative_ideas… Design a playful multi-agent procedure to teach a beginner how to +safely cut an apple. Define 3–5 agent roles (e.g., Safety Lead, Tool Prep, Cutter, Sanitation +Checker), each with responsibilities. Produce a concise, step-by-step plan (≤8 steps), with +safety checks, optional variations (wedges, slices), and a fallback if no peeler/corer is +available. Keep it kitchen-realistic and efficient. + +• ↳ #conservative_ideas… Provide a straightforward, reliable method to cut an apple safely +using only a cutting board and a sharp chef’s knife. Give a numbered, minimal 6–8 step +process with safety notes, how to core, options for wedges vs thin slices, and cleanup. Avoid +fancy tools and keep language simple for a beginner. + +─ Worked for 59s ──────────────────────────────────────────────────────────────────────────── + +• Implementation + +1. Set up: Place a damp towel under the cutting board so it won’t slide. Wash and dry the + apple. Stand it stem‑up. Use a “claw” grip to keep fingertips tucked. +2. First side (“cheek”): With the knife straight up‑and‑down, cut about 1/4 inch to one + side of the stem, slicing off a large flat piece. Keep fingers clear. +3. Second side: Rotate the apple 180° and repeat to remove the opposite large piece. +4. Remaining sides: Rotate 90°, then cut off the two narrower sides. You now have 4 pieces + and the core. +5. Core trim: Lay each piece flat (flat side down). If you see seeds or hard core, make a + thin slice to remove them. Discard the core/seeds. +6. Wedges option: Keep pieces flat‑side down and cut each into 2–4 sticks/wedges, as thick + as you like. +7. Thin slices option: Lay a piece flat and slice across into thin planks. For very thin + slices, hold the tip of the knife down and “rock” gently. +8. Cleanup: Move the knife to the back of the board, discard the core, then wash and dry + the knife and board. To slow browning, lightly rinse slices in cold water or add a few + drops of lemon juice, then refrigerate. + +Safety Notes + +- Keep the flat side down before trimming; never cut a rolling piece. +- Use a claw grip; keep fingertips behind your knuckles. +- Cut straight down; don’t cut toward your hand or body. +- Go slowly; a sharp knife with steady pressure is safer than forcing. +- If you hit seeds or feel crunch, back out and adjust your cut. +- Dry hands, apple, and board to reduce slipping; use the damp towel under the board. + +Validation + +- Seeds and hard core removed; no gritty bits remain. +- Pieces are even and stable when set down. +- Board stayed put during cutting; no slips. +- Knife and board are washed and dried; slices stored promptly. + +• ↳ #conservative_ideas completed finished in 33.4s -• I’ll start by drafting a brief to the ideas provider for three text-only designs. +───────────────────────────────────────────────────────────────────────────────────────────── -• Updated Plan -└ □ Gather three design options -□ Critique leading approach -□ Synthesize recommendation - -• ↳ #ideas_provider… You are the ideas provider. Text-only, no code, no commands. We need -three alternative designs to implement bubble sort in Python (read-only exploration). Please -provide: - -1. Iterative baseline: a straightforward bubble sort that repeatedly passes through the list - and swaps adjacent out-of-order elements. -2. Optimized variations: include at least early-exit (stop if a full pass makes zero swaps) - and a shrinking boundary using the last-swapped index to avoid re-checking the sorted suffix. -3. Test scaffolding strategy: how to structure tests for correctness, edge cases, property- - based checks, and simple performance sanity checks. - For each item, include: - -- High-level steps (3–6 steps) -- Pros and cons -- Complexity (time best/avg/worst, space), and stability -- Edge cases to guarantee coverage (e.g., empty list, single element, already sorted, reverse - sorted, duplicates, negative numbers, mixed types and how to treat them) -- Any assumptions or constraints (e.g., in-place vs new list, mutability, type comparability) - Keep it concise but complete. Explicitly state that no commands or code should be run. Only - produce text, suitable for planning. - -─ Worked for 56s ──────────────────────────────────────────────────────────────────────────── - -• No commands or code should be run. Text-only planning follows. - -Iterative Baseline - -- High-Level Steps - - Loop n-1 passes over the list. - - For each pass, compare adjacent pairs left-to-right. - - Swap if out of order to “bubble” larger items rightward. - - After each pass, the largest among the unsorted portion is in place at the end. - - Repeat until all passes complete. -- Pros - - Easiest to understand and implement. - - Predictable behavior for teaching and debugging. - - In-place; stable if swapping only when strictly necessary. -- Cons - - Always does O(n^2) comparisons even when already sorted. - - Slow for large inputs; uncompetitive vs built-ins. -- Complexity & Stability - - Time: best O(n^2), average O(n^2), worst O(n^2). - - Space: O(1) auxiliary (in-place). - - Stability: stable if swapping only when > (not >=). -- Edge Cases - - Empty list: return as-is without error. - - Single element: no swaps, return as-is. - - Already sorted: still performs full n-1 passes. - - Reverse sorted: maximum swaps; still correct. - - Duplicates: maintain original relative order. - - Negative numbers: compare normally. - - Mixed types: in Python 3, cross-type comparisons may raise TypeError; either reject - or require a key/comparator. -- Assumptions/Constraints - - In-place sort of a mutable sequence. - - Elements must be mutually comparable under the comparator (default ). - - Optional: accept a key or cmp-like callable to control ordering; if absent, rely on - native comparisons. - -Optimized Variations - -- High-Level Steps - - Early-Exit: Track whether any swap occurred in a pass; if none, stop. - - Shrinking Boundary: Track the last swapped index in each pass; limit the next pass to - this boundary (suffix already sorted). - - Left-Bubble Variant (optional): Bubble smallest leftward to handle some distributions - (not necessary if using last-swapped boundary). - - Combine early-exit and last-swapped for best practical effect. -- Pros - - Early-exit yields O(n) best-case on sorted/almost-sorted inputs. - - Last-swapped boundary cuts unnecessary comparisons for trailing sorted suffixes. - - Still simple, in-place, and stable. -- Cons - - Slightly more bookkeeping than the baseline. - - Still O(n^2) average/worst; not competitive for large n. -- Complexity & Stability - - Time: best O(n) (early-exit on sorted), average O(n^2), worst O(n^2). - - Space: O(1). - - Stability: stable when swapping only on strict comparisons. -- Edge Cases - - Empty, single-element: perform zero or one pass; early-exit triggers. - - Already sorted: early-exit on first pass. - - Reverse sorted: still worst-case; boundary will still shrink per pass. - - Duplicates: ensure no swaps on equal keys; verify relative order preserved. - - Negative numbers: normal behavior. - - Mixed types: as above—define policy (reject or require key). - - Pathological cases: Many equal elements; confirm stability and no unnecessary swaps - on equality. -- Assumptions/Constraints - - In-place mutation of a list-like sequence. - - Elements comparable, or a key function provided. - - Early-exit flag and last-swapped index are correctly reset each pass. - -Test Scaffolding Strategy - -- High-Level Steps - - Structure unit tests around table-driven cases (input → expected). - - Add property-based tests comparing output to Python’s sorted (reference) for random - arrays. - - Include stability tests by pairing values with original indices. - - Add light performance sanity checks on scaling (not microbenchmarks). -- Pros - - High confidence via deterministic and generative checks. - - Catches stability regressions that value-only checks miss. - - Scales coverage without verbose hand-written cases. -- Cons - - Property-based tests need care with data domains (e.g., disallow mixed incomparable - types unless explicitly supported). - - Performance checks can be noisy; keep thresholds coarse. -- Scope & Coverage - - Correctness: Verify exact equality to sorted for various distributions and sizes (0, - 1, small, medium). - - Stability: Use tuples (value, original_index) with repeated values, sort by value - only and assert original order preserved when values equal. - - Edge Cases: - - Empty list. - - Single element. - - Already sorted. - - Reverse sorted. - - All equal elements. - - Duplicates with mixed positions. - - Negative values; large magnitude values. - - Floats including -0.0 and potential nan (if supporting floats, define how to - treat nan; otherwise exclude). - - Mixed types: either assert TypeError or skip if unsupported; test both policy - paths explicitly. - - Property-Based: - - Random integers across ranges; varying sizes (0–N), skewed distributions (many - duplicates). - - Optional: random floats excluding NaN if not supported; or targeted NaN tests for - defined policy. - - Compare to sorted with the same key if keys are supported. - - Performance Sanity: - - Measure rough time for sizes n in {100, 300, 900}; confirm ~9x time growth when n - triples (quadratic indication). - - Confirm early-exit: sorted input scales near linearly; time for sorted vs random - shows large gap. - - Keep wide tolerances; avoid flaky thresholds. -- Assumptions/Constraints - - Tests do not depend on external resources; deterministic seeds for random generators. - - No commands or code should be run in this planning; when implemented, use local test - runner. - - If a key function is supported, test with simple projections (e.g., abs) and - stability under equal keys. - -Comparison - -- Most promising: Optimized bubble sort with early-exit and shrinking boundary. It - preserves simplicity and stability, provides O(n) best-case on sorted/almost-sorted - inputs, and reduces unnecessary comparisons via last-swapped boundary, while remaining - in-place and easy to test. The baseline is useful for teaching and as a reference - implementation. The test scaffolding applies to both and should be adopted regardless. - Final choice can weigh readability needs (baseline) versus practical speed on nearly- - sorted data (optimized). - -• ↳ #ideas_provider completed finished in 47.4s +• 1. Apple Mission Control + +- Roles + - Safety Lead: Checks dry hands, clear counter, anchored board (damp towel), announces + “sharp moving,” enforces claw grip and flat-surface rule. + - Tool Tech: Washes apple, sets chef/paring knife, peeler/corer (if any), scrap bowl; + verifies knife sharpness. + - Cutter: Performs cuts slowly, narrates moves (“cutting cheeks,” “fingers tucked”). + - Sanitation Checker: Handles compost/trash, wipes board, washes tools, plates fruit. +- Plan (≤8 steps) + 1. Safety Lead anchors board with a damp towel; confirms dry hands, clear zone, and + standing posture (hips square, elbow close). + 2. Tool Tech washes the apple; lays out chef knife, paring knife, optional peeler/ + corer; Safety calls GO/NO‑GO. + 3. Cutter stands apple up; cuts straight down on one side of the core, then the + opposite, then the two thinner sides to yield 4 “cheeks” + core (Safety: “claw grip, + blade vertical”). + 4. Optional peel: If peeler is available, Tool Tech peels the cheeks away from the + blade. Fallback (no peeler/corer): keep skin on; use paring knife to V‑cut seed + pockets from each cheek or scoop seeds from quarters with a spoon. + 5. Choose cut style: For wedges, slice each cheek into 2–3 thick pieces. For thin + slices (snacks/sandwiches), lay a cheek flat and slice crosswise in 2–3 mm sheets. + 6. Safety micro‑check before speed‑up: Safety Lead verifies knuckles forward (“claw”), + tip anchored for slicing, no wobble; Cutter continues at steady pace. + 7. Sanitation Checker bins core/seeds (compost), wipes board and knife, and plates + apple; Tool Tech stores knives safely. + +• ↳ #request_summarizer completed finished in 57.1s + +- Rationale: “Mission controlWriting a one-paragraph summary to /tmp/notes via a here-doc.” + call‑outs keep a beginner calm and systematic while embedding clear GO/NO‑GO safety/tmp/ + notes/request-2025-10-16T12:00:00Z.md gates. +- Assumptions: Standard cutting board, chef + paring knife available; single apple; + right‑handed learner (swap as needed). + +2. The Kitchen Band + +- Roles + - Conductor (Safety): Sets tempo (“slow beat”), calls “pause” for checks, confirms + anchored board and finger positions. + - Gear Tech: Tunes “instruments” (sharpens/chooses knives), preps towel under board, + washes apple. + - Lead Slicer: Executes quarter‑and‑core method; narrates rhythm (“down‑stroke, park + tip”). + - Quality Taster (Sanitation): Portioning, plating, cleanup; suggests pairing (peanut + butter, lemon spritz). +- Plan (≤8 steps) + 1. Conductor calls warm‑up: dry hands, towel under board, clear stage; Gear Tech washes + apple and sets knives/peeler/corer. + 2. Lead Slicer halves apple top‑to‑bottom, then halves again for quarters (Conductor: + “claw hands, steady board”). + 3. If corer exists, Gear Tech cores the quarters. Fallback: Lead Slicer makes a shallow + V‑cut along each quarter’s core with a paring knife; or scoops seeds with a spoon. + 4. Optional peel verse: If peeling, run peeler away from the hand with short strokes on + supported quarters; otherwise keep skin on for grip and fiber. + 5. Choose arrangement chorus: Wedges (2–3 per quarter) for dipping; or thin slices (2–3 + mm) for sandwiches/kids. + 6. Conductor calls a bar‑line safety check: knife parked edge‑away when not slicing; + fingers never ahead of blade. + 7. Quality Taster plates, wipes tools/board, and stows knives; optional lemon spritz to + prevent browning. +- Rationale: Musical pacing and call‑and‑response make safety checks feel natural and + memorable. +- Assumptions: Peeler/corer may be absent; peeler is optional; paper towel/dish towel + available. + +3. Street Crew Cut + +- Roles + - Crossing Guard (Safety): Calls STOP/GO, manages “knife traffic,” ensures stable + “intersection” (board), checks claw grip each pass. + - Road Crew (Tool Prep): Washes apple, lays down anti‑slip towel, sets knives; readies + scrap bin. + - Driver (Cutter): Executes around‑core “traffic lanes,” keeps blade vertical, + announces turns. + - Inspector (Sanitation/Serving): Clears “debris” (seeds/cores), cleans site, plates + portions. +- Plan (≤8 steps) + 1. Road Crew secures board and washes apple; Crossing Guard confirms GO: dry hands, no + clutter, blade edge away from bystanders. + 2. Driver stands apple, cuts two wide “lanes” (cheeks) around core, then two narrow + sides; Crossing Guard watches for wobble. + 3. Fallback if no peeler/corer: keep skin on; trim seed ridges with a small V using a + paring knife; spoon works to scoop seed bed if quarters are used instead. + 4. Optional peel: If peeler on site, Road Crew peels cheeks on the board with short, + away‑from‑hand strokes. + 5. Choose route: “Wedges” (snack/dip): split cheeks into chunky arcs. “Slices” (quick + snack): fan out cheeks and slice thin. + 6. Mid‑route check: STOP if fingers stray ahead of blade; reset claw, rotate piece to a + flat face before resuming. + 7. Inspector composts core/seeds, wipes board/knife, plates neatly; Crossing Guard + announces CLEAR (knife stored). +- Rationale: Traffic metaphors create intuitive STOP/GO safety gates and emphasize control + at each “turn.” +- Assumptions: One learner with a spotter; standard knives; counter space and towel + available. + +• ↳ #creative_ideas completed finished in 1m05s + +───────────────────────────────────────────────────────────────────────────────────────────── + +• Forwarding the selected conservative plan to the critic for risk checks. + +• ↳ #critic… Chosen plan: A straightforward, 8-step, tool-minimal method: stabilize cutting +board with damp towel; wash/dry apple; cut off two large cheeks then two narrow sides around +the core; lay pieces flat to trim seed pockets; choose wedges or thin slices; clean up; +optional lemon/water to prevent browning. Safety focuses on claw grip, flat-side-down cuts, +vertical blade, slow steady pressure. +Why it wins: It’s clear, fast, and safe for beginners with only a cutting board and a sharp +chef’s knife. Minimal roles and no optional gadgets reduce cognitive load while hitting key +safety gates. +Please provide concise bullet points: + +- Key risks/pitfalls and how to mitigate them. +- Any critical missing steps or clarifications to make it foolproof. +- One or two optional enhancements that don’t add complexity. + +─ Worked for 1m 02s ───────────────────────────────────────────────────────────────────────── + +• - Slippery board stabilization may fail if towel oversaturated; use a well-wrung towel and +verify no wobble. + +- Cross-contamination risk after raw proteins; explicitly separate boards or wash knife and + board thoroughly before fruit prep. +- Apple may still roll during cheek cuts; instruct stem-up orientation and cut straight + down beside the core. +- Trimming seed pockets can waste flesh; recommend shallow V-cuts and a quick visual check + for remaining seeds. +- Transferring slices risks nicks and drops; use a bench scraper for safe transfer, owner: + home cook. + +• ↳ #critic completed finished in 1m03s ───────────────────────────────────────────────────────────────────────────────────────────── -• Great, I’ll pass the leading optimized approach to the critic to probe risks and missing -tests. +• Detached summarizer launched; I’ll dismiss it after it completes. + +Understanding the request + +- Goal: Teach a beginner to cut an apple safely, using an agent-style workflow but + practical in a real kitchen. +- Constraints/missing: Available tools (chef’s knife, paring knife, peeler/corer)? Desired + shape (wedges vs thin slices)? Any need to peel or reduce browning? +- Clarifications: Do you have a stable cutting board and a sharp chef’s knife? Do you + prefer wedges or thin slices? Should the skin be kept on? + +Final synthesis +Use the simple, safe “four-cheek” method: stabilize board (wrung towel), wash/dry apple, +cut two large cheeks then two narrow sides, lay pieces flat to trim seeds, then cut wedges +or thin slices. Mitigate risks: verify no board wobble, keep claw grip, cut straight down +beside core, make shallow V-cuts, use a bench scraper to transfer. Next, clean knife/board +and optionally spritz lemon to slow browning. +❯ cargo build -p codex-cli +Compiling codex-tui v0.0.0 (/home/mastermind/projects/forks/codex/codex-rs/tui) +Compiling codex-cloud-tasks v0.0.0 (/home/mastermind/projects/forks/codex/codex-rs/cloud-tasks) +Compiling codex-cli v0.0.0 (/home/mastermind/projects/forks/codex/codex-rs/cli) +Finished `dev` profile [unoptimized + debuginfo] target(s) in 12.91s +❯ CODEX*HOME="$(pwd)/../ai-temp/example-codex-home" target/debug/codex +╭────────────────────────────────────────────╮ +│ >* OpenAI Codex (v0.0.0) │ +│ │ +│ model: gpt-5 /model to change │ +│ directory: ~/projects/forks/codex/codex-rs │ +╰────────────────────────────────────────────╯ + +To get started, describe a task or try one of these commands: + +/init - create an AGENTS.md file with instructions for Codex +/status - show current session configuration +/approvals - choose what Codex can do without approval +/model - choose what model and reasoning effort to use +/review - review any changes and find issues + +› how to cut an apple? Use agents. + +• Logging your request (detached) while I continue with planning. + +• ↳ #request_summarizer (detached)… Latest user request: how to cut an apple? Use agents. +Summarize that request in ≤75 words and write the summary to /tmp/notes/request-.md. +Use a single shell here-doc so the file contains only the summary paragraph. If the directory +is missing, fail loudly. After writing the file, reply with exactly the absolute filename and +nothing else. • Updated Plan -└ ✔ Gather three design options -□ Critique leading approach -□ Synthesize recommendation - -• ↳ #critic… You are the critic. Read-only evaluation, text-only, no code, no commands. -Summary of the leading approach (my words): - -- Implement an in-place, stable bubble sort in Python using two optimizations: - 1. Early-exit: track if any swap occurred in a pass; if none, terminate for O(n) best case - on already-sorted or nearly-sorted inputs. - 2. Shrinking boundary via last-swapped index: record the last index where a swap occurred; - the next pass only scans up to that index because the suffix beyond is already sorted. -- Comparisons are strict (swap only when a[i] > a[i+1]) to preserve stability. Do not swap - on equality. -- API assumptions: operate on a mutable sequence (list) in place. Accept an optional `key` - callable to enable sorting objects or custom projections; if provided, compare key(a[i]) and - key(a[i+1]). If `key` is None, use native comparisons. -- Type policy: mixed or incomparable types raise TypeError (align with Python 3 behavior). - For floats with NaN, either exclude from inputs or document behavior (Python’s `sorted` - places NaNs at the end but they compare unordered); we will either follow Python semantics or - explicitly reject NaN values—TBD. -- Complexity: best O(n), average/worst O(n^2); space O(1). Stability: stable by construction. -- Tests: table-driven unit tests across edge cases; property-based tests comparing to - Python’s `sorted` with the same `key`; explicit stability tests using pairs (value, original - index) to confirm equal keys preserve order; error-handling tests for incomparable mixes; - light performance sanity checks (quadratic scaling on random inputs; much faster on already- - sorted due to early-exit), with loose tolerances. - Please surface: -- Risks, blind spots, or tricky parts (e.g., off-by-one errors in boundary logic, stability - pitfalls if equality handling drifts, behavior with NaN/partial orders, key function caching - vs recomputation trade-offs, cost of repeated key calls). -- Missing tests or edge cases we should add (e.g., many equal elements, nearly-sorted with - a late inversion, alternating patterns, large lists, custom objects with flaky comparators, - exceptions thrown inside key). -- Any assumptions that should be tightened or clarified before implementation. Keep it - concise but thorough. Still read-only; no code or commands. - -─ Worked for 1m 12s ───────────────────────────────────────────────────────────────────────── - -• Risks and Tricky Parts - -- Boundary shrink off-by-one: define the new scan limit precisely. Using the right index - of the last swap vs. left index changes the next-pass upper bound by 1; an error here can - miss needed comparisons or over-scan. -- Early-exit correctness: ensure the outer loop exits immediately when no swaps occur; - don’t accidentally run an extra empty pass. -- Stability drift: swapping on equality or using a non-strict comparator breaks stability. - Verify comparisons are strictly “greater-than” on the chosen projection. -- Key recomputation cost: naive bubble sort calls the key repeatedly per element per pass - (O(n^2) key calls). If keys are expensive, this dominates runtime. -- Key caching trade-off: full precompute gives O(n) extra space (violates stated O(1)), - while sliding-window caching keeps O(1) space but requires careful updates on swaps to - avoid stale keys. -- Partial orders/NaN: float NaN causes both x>y and y>x to be False; items may remain in - original positions and the array may not be totally ordered. Behavior will differ from - “intuitive” numeric sorting unless explicitly defined. -- Mixed/incomparable types: comparisons raise TypeError mid-sort and leave the list - partially mutated; document and test this. -- Comparator method choice: relying on “>” triggers gt or falls back to the right operand’s - lt. Some custom objects implement only lt; confirm behavior is acceptable and consistent - with expectations. -- Edge bounds: when the last swap occurs at index 0, the next bound becomes minimal; ensure - the loop doesn’t underflow or skip necessary comparisons when the bound is 0 or 1. -- Input mutability: only in-place sortable, index-assignable sequences are supported. - Tuples, deques, and read-only sequences should be rejected or documented. -- Best-case O(n) hinges on both early-exit and last-swapped index; if either is implemented - incorrectly, nearly-sorted inputs degrade to quadratic. -- Test flakiness from timing: “performance sanity” based on wall-clock is brittle; prefer - counting comparisons/key calls or passes. -- Exception in key: exceptions should propagate; document that the list may be partially - reordered at the point of failure. -- Large-N tests: quadratic algorithms can make tests slow/unreliable; keep sizes modest - and targeted. - -Missing Tests and Edge Cases - -- Trivial sizes: empty, single element, two elements already sorted and reversed. -- Many equals: large runs of equal keys to confirm stability and early-exit behavior. -- Nearly sorted with a late inversion (e.g., one small item near the end). -- Alternating high/low (worst-case bubbling). -- Reverse-sorted list. -- Mixed but comparable types (e.g., ints and bools) vs. incomparable mixes (e.g., ints and - strings) to validate TypeError. -- Floats: with infinities; with NaN (either assert a documented policy or skip/expect a - specific outcome). -- Custom objects: only lt implemented; flaky/non-transitive comparators (document undefined - behavior but verify no crashes). -- Key function: expensive key with a call counter to verify call count scaling; key raising - exceptions. -- Large-ish list within safe bounds to avoid timeouts (e.g., a few thousand elements) to - sanity-check quadratic scaling without timing assertions. -- Stability check using (value, original_index) pairs under many-equal-key scenarios. - -Assumptions to Tighten - -- Accepted input types: require collections.abc.MutableSequence (or explicitly “list - only”). Define behavior for sequences that don’t support item assignment. -- API shape: in-place, return None (align with list.sort), document no reverse or cmp args - unless intentionally supported. -- Key semantics: key must be pure and consistent; define whether keys are recomputed vs. - cached (and resulting space complexity). -- Ordering semantics: clarify stance on NaN/partial orders (reject, document stability- - preserving but not totally ordered, or emulate sorted behavior). -- Comparator direction: commit to “>” on values/keys for ascending order and state how - custom types are expected to behave. -- Complexity statement: reflect chosen key strategy (O(1) space if no caching; O(n) if - precomputed keys). - -Pre-Implementation Checklist - -- Finalize boundary-update rule (exact index used) and loop limits. -- Decide and document NaN/partial-order handling; add tests accordingly. -- Choose key strategy: recompute vs. sliding-window cache vs. full precompute; update - space-complexity note. -- Lock API details: accepted sequence types, return value, absence/presence of reverse. -- Define exception behavior (TypeError and key exceptions) and add tests. -- Replace timing checks with instrumentation (count passes/comparisons/key calls) in tests. -- Add stability stress tests (many equal keys, varied patterns). -- Confirm comparator direction and behavior with custom objects implementing only lt. - -• ↳ #critic completed finished in 1m25s +└ ✔ Log request (detached) +□ Understand request +□ Run ideas delegates +□ Consult critic +□ Synthesize one-paragraph reply + +• ↳ #creative_ideas… You are the creative ideas delegate. Task: Propose 3 novel yet practical +ways to cut an apple for (a) snackable wedges, (b) thin slices for pie/tarts, and (c) small +dice for salad. Emphasize speed, minimal waste, and safety. Assume a home kitchen and a sharp +chef’s or paring knife; corer optional. Output: For each option, give 4–6 concise steps, then +1–2 safety/efficiency tips. Keep it compact and actionable. + +• ↳ #conservative_ideas… You are the conservative ideas delegate. Task: Provide reliable, +standard methods to cut an apple using a cutting board and a sharp knife (corer optional). +Deliver two variants: (1) classic wedge-and-core by quadrants; (2) thin crosswise slices then +remove core. Output: Numbered steps (5–7) for each variant with clear hand placement cues, +plus exactly two safety tips across both variants. Be precise and minimal; no fluff. + +─ Worked for 1m 07s ───────────────────────────────────────────────────────────────────────── + +• 1. Four-Cheek Panels (flat, fast, low‑waste) + +- Steps: + - Stand apple; slice off 2 wide cheeks close to core, then 2 narrow cheeks (leave a + rectangular core). + - Lay cheeks peel‑side down for stability. + - Wedges: lengthwise split each cheek into 2–3 bias wedges. + - Thin slices: stack 2–3 cheeks; slice crosswise into 2–3 mm slices. + - Small dice: stack slices; cut into matchsticks, then cross‑cut to dice. + - Finely trim remaining core edges; add to dice or snack. +- Tips: Use a claw grip; keep peel‑side down to prevent slip. +- Rationale: Flat “panel” faces speed slicing and keep fingers safe; zero tools. +- Assumptions: Medium apple; peel on; sharp chef’s or paring knife. + +2. Core‑Then‑Rings (pinwheel stacks for uniformity) + +- Steps: + - Remove core with a corer (or cut a square tunnel around core and push out). + - Lay apple on its side; slice into 2–3 mm horizontal rings. + - Stack 3–4 rings at a time. + - Wedges: quarter stacked rings radially to make even wedges. + - Thin slices: keep as rings or halve to semi‑circles; separate. + - Small dice: stack semi‑circles; slice into strips, then cross‑cut. +- Tips: Use your off‑hand as a “fence” to keep stacks aligned. +- Rationale: Ring stacks standardize thickness; few long cuts yield many pieces fast. +- Assumptions: Optional corer available; firm apple for clean rings. + +3. Hinged Half Fan (peel as built‑in clamp) + +- Steps: + - Halve apple top‑to‑bottom; on each half, cut a shallow V to remove core, leaving + outer peel intact. + - Place half cut‑side down; make parallel cuts almost through, leaving a 3–5 mm peel + hinge. + - Wedges: slice through the hinge every few cuts to release wedge clusters. + - Thin slices: shear off the hinge in one pass to free a fan of thin slices. + - Small dice: fan slices, rotate 90°, cross‑cut to dice; repeat. + - Repeat with second half; mince any residual core bits into the dice. +- Tips: The peel hinge stabilizes the half and keeps fingers distanced. +- Rationale: Peel “hinge” speeds repetitive cuts with safer control and minimal resets. +- Assumptions: Peel left on; paring knife OK for the V‑cut; apples not mealy. + +• ↳ #creative_ideas completed finished in 33.6s + +───────────────────────────────────────────────────────────────────────────────────────────── + +• Variant 1 — Wedges (Quadrants) + +1. Set apple stem-up on the board; steady the top with your non-dominant hand in a claw + (fingertips curled). +2. With your dominant hand, cut straight down about 1 cm to the right of the stem; keep the + blade vertical. +3. Rotate 90° and repeat on the remaining three sides to remove four lobes; discard the + core. (Optional: use a corer first, then quarter.) +4. Place each lobe skin-side down; hold the far end with a claw grip. +5. Remove the seed line by slicing a shallow V from each lobe, tip toward the board. +6. Cut each de-cored lobe lengthwise into 2–3 even wedges, moving your claw hand back as + you cut. + +Variant 2 — Crosswise Slices, Then Core + +1. Lay apple on its side; hold with your non-dominant hand in a claw, knuckles forward. +2. Slice off a thin end cap to expose flesh and stabilize. +3. Make thin crosswise slices (2–4 mm), keeping the blade square to the board; slide your + claw hand back as you go. +4. Stack 2–3 slices; place your palm flat on top, fingertips lifted away. +5. Remove cores from the stack: cut a small V around the seed pocket with a paring knife, + or punch out the center with a corer/small round cutter. +6. Separate slices; optionally halve for half-moons while maintaining a claw grip to + steady. + +Safety + +- Use a claw grip: curl fingertips and guide the blade against your knuckles; never reach + under the blade. +- Keep food stable: set flat sides down or create a flat surface before slicing; dry board + and hands if wet. + +• ↳ #conservative_ideas completed finished in 40.6s + +───────────────────────────────────────────────────────────────────────────────────────────── + +• /tmp/notes/request-2025-10-16T00:00:00Z.md + +• ↳ #request_summarizer completed finished in 57.5s + +• ↳ #critic… Chosen approach: “Four‑Cheek Panels” for cutting an apple. Summary: Stand the +apple; slice off two wide cheeks close to the core, then two narrower cheeks, leaving a +rectangular core. Lay cheeks skin‑side down. For wedges: split cheeks lengthwise into 2–3 +wedges. For thin slices: stack cheeks and slice crosswise 2–3 mm. For dice: stack slices into +matchsticks, then cross‑cut. Why it wins: fastest common method, minimal tools, flat stable +surfaces, low waste, supports wedges/slices/dice. Please respond with concise bullets: key +risks, mitigations, and any clarity improvements to the steps. + +─ Worked for 31s ──────────────────────────────────────────────────────────────────────────── + +• - Standing a round apple risks slipping; instruct to trim a thin base for stability before +cheek cuts. + +- Slicing too close to the core leaves seeds and gristle; specify a 3–5 mm offset from + the seeds. +- Thickness guidance is vague for wedges; define target widths and encourage a claw grip to + control spacing. +- Method may waste shoulder or bottom flesh; add a pass to shave remaining sidewalls around + the rectangular core. +- Validate speed and waste across apple sizes via timed, weighed trials; owner: recipe + author. + +• ↳ #critic completed finished in 32.8s • Updated Plan -└ ✔ Gather three design options -✔ Critique leading approach -✔ Synthesize recommendation +└ Delegates returned ideas and critic feedback; moving to final synthesis. +✔ Log request (detached) +✔ Understand request +✔ Run ideas delegates +✔ Consult critic +□ Synthesize one-paragraph reply ───────────────────────────────────────────────────────────────────────────────────────────── -• Recommendation - -- Choose the optimized bubble sort: in-place, stable, with early-exit and a shrinking - boundary using the last-swapped index. It retains simplicity while improving best-case - behavior on nearly-sorted inputs. -- API: operate on a Python list in place; return None (list.sort-style). Support an - optional key callable; skip reverse/comp for now. -- Comparisons: swap only when strictly greater to preserve stability. When key is provided, - compare key(a[i]) to key(a[i+1]). -- Types: require mutually comparable elements under the chosen key. Mixed/incomparable - types raise TypeError and may leave partial reordering at the failure point (document). -- NaN/partial orders: exclude NaN from supported inputs to keep behavior predictable and - consistent with tests vs sorted; document explicitly. -- Key strategy: recompute keys (O(1) space) for simplicity; tests should track call counts - to avoid timing flakiness. -- Boundary update: use the right index of the last swap as the exclusive upper bound for - the next pass, to avoid off-by-one errors. - -What to verify before coding - -- Finalize loop limits and boundary rule: exact indices and termination when no swaps - occur. -- Lock API details: list-only, in-place, return None, optional key, no reverse. -- Clarify errors: TypeError on incomparable mixes; exceptions from key propagate; note - possible partial reordering on failure. -- Decide NaN policy: explicitly unsupported and tested as such. -- Tests to include: - - Table-driven: empty, one element, two elements (sorted/reversed), already sorted, - reverse sorted, many equals, alternating high/low, nearly sorted with a late - inversion, negative numbers, large magnitudes. - - Stability: repeated equal keys using (value, original_index) to assert order - preservation. - - Key behavior: with/without key; expensive key with a counter; key that raises. - - Type policy: ints+strings triggering TypeError; ints+bools (comparable) behave as - expected. - - Floats: infinities allowed; NaN explicitly rejected or skipped per policy. - - Instrumentation-based performance sanity: count passes/comparisons/key calls (avoid - wall-clock). - -Rationale and consensus - -- Both delegates agree the optimized variant gives meaningful best-case gains with minimal - complexity, remains stable and in-place, and benefits from robust test scaffolding. -- Primary risks: off-by-one in the boundary logic, accidental stability regressions - (swapping on equality), undefined behavior around NaN/partial orders, and noisy timing- - based tests. -- Blockers to resolve up front: NaN policy, exact boundary rule, key recomputation vs - caching (we recommend recompute), and explicit API/exception semantics. +• Goal: Give safe, efficient apple-cutting steps. Constraints: brief, agent-driven, final ≤75 +words. Missing: preferred cut (wedges/slices/dice), peel preference, tools (knife only or +corer). Questions: Which cut shape? Peel on/off? Knife type available? + +Final approach: Trim a thin base for stability, stand apple, slice off two wide and +two narrow cheeks ~3–5 mm from seeds. Lay cheeks skin-side down. Cut into wedges, thin +crosswise slices, or dice (matchsticks then cross-cut). Use a claw grip; shave remaining +sidewalls to reduce waste. + +› /tmp/notes/request-2025-10-16T00:00:00Z.md + +• Dismissed detached run dc4cc325-50fc-45f0-bdea-41e0c69bc652 + +─ Worked for 18s ──────────────────────────────────────────────────────────────────────────── + +• Wrapping up: I’ll mark the plan done and send the final synthesis. + +• Updated Plan +└ Detached run produced the summary path; ideas and critic steps complete. Delivering final +≤75-word paragraph. +✔ Log request (detached) +✔ Understand request +✔ Run ideas delegates +✔ Consult critic +✔ Synthesize one-paragraph reply + +─ Worked for 20s ──────────────────────────────────────────────────────────────────────────── + +• Trim a thin base so the apple stands. Slice off two wide and two narrow cheeks about 3–5 +mm from the seeds, then set cheeks skin‑side down. Cut into wedges, 2–3 mm slices, or dice +(matchsticks, then cross‑cut). Use a claw grip, keep cuts vertical, and shave remaining +core sidewalls to reduce waste. Tell me your preferred shape and tools (knife only or +corer) to tailor thickness and portioning. diff --git a/ai-temp/parallel-delegation.md b/ai-temp/parallel-delegation.md index f94235ee2456..b213ce0d2fe0 100644 --- a/ai-temp/parallel-delegation.md +++ b/ai-temp/parallel-delegation.md @@ -30,14 +30,42 @@ approaches for synchronous, parallel, and detached (fire-and-forget) sub-agent r - Remaining task: expand prompts/docs so agents understand that parallel delegates are available, how the UI surfaces them, and when to leverage concurrency. -## 3. Detached Delegation (Status: future work) -- Requires a non-blocking variant (e.g., `delegate_agent_async`) that returns immediately with a - `run_id` and relies on the orchestrator’s event stream for progress. -- Must surface background activity in the UI: notification list, optional “attach to run” - command, and stored sessions leveraging `AgentOrchestrator::store_session`. -- Needs policy for auto-cleanup and rate limiting so runaway agents do not flood the orchestrator. -- Join-on-demand flow could reuse existing session switching helpers once a run finishes or when - the user opts in. +## 3. Detached Delegation (Status: implemented) +- Introduce `delegate_agent_async` (or `delegate_agent` with `mode: "detached"`) that returns an + immediate acknowledgement (`{"status":"accepted"}`) without exposing a `run_id`; the model resumes + its turn while the user decides when to inspect the run. +- The orchestrator now keeps a detached-run registry keyed by run id (pending/ready/failed) with agent + id, prompt preview, timestamps, and any conversation handle; completion hooks still fan updates into + the notification bus so the UI can show progress without blocking the caller. +- Surface detached runs in the TUI via notifications plus the existing `/agent` picker: pending entries + appear as informational rows (with prompt preview) until they finish, failed entries gain a dismissal + action, and ready runs appear alongside other delegate sessions with an extra "Dismiss detached run" + option; returning to the main agent can either move the full conversation back into the primary + transcript or discard it entirely, matching the current non-detached behavior. +- Reuse `[multi_agent].max_concurrent_delegates` as the throttle for detached work while a run is + actively executing; once it finishes and awaits user review it no longer counts toward the cap. + When the cap is reached, return a `queue_full` error through the tool response so agents can + apologize or retry later; do not auto-expire completed runs—only user dismissal removes them. +- Update prompts/docs so agents know when to choose async delegation (long-running tasks, optional + review) versus synchronous/parallel paths, and capture open questions about acknowledgement metadata + (beyond `status`/`session_id`/`conversation_id`) and whether summaries should ever auto-inject into + the main transcript. +- Hook completion into both notification systems: + - TUI: add a new `Notification::DetachedRunFinished` variant so unfocused terminals surface the + alert when detached work completes (subject to `tui.notifications` filters). + - External: extend `UserNotification` to emit a `detached-run-finished` payload when `config.notify` + is set, keeping headless/automation users in the loop. +- Surface detached sessions in the existing `/agent` picker by reusing `DelegateSessionSummary`: + mark summaries spawned from detached runs with a `Detached` mode, prefix their labels (e.g., + “Detached · #agent · pending/finished”), keep the existing `last_interacted_at` ordering, and + dispatch the same `AppEvent::EnterDelegateSession` so users attach to the run through the normal + agent-switch flow. Returning follows the current summary flow (apply vs. dismiss); no additional + grouping or status-card integration is planned for v1. +- Implementation status: core/tooling now accepts `mode = "detached"`, the orchestrator both tracks + run mode and maintains the detached-run registry, synchronous responses still stream summaries, the + TUI renders detached sessions with prefixed labels plus desktop notifications when they finish or + fail, and external `config.notify` hooks emit `detached-run-finished` payloads (failures populate the + `error` field rather than using a distinct type). ## 4. Next Decisions 1. Pick a parallelization strategy (simple flag + UI refactor vs. dedicated helper). diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 14e4a500a6f3..83b0952e217a 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -1594,6 +1594,7 @@ version = "0.0.0" dependencies = [ "anyhow", "assert_cmd", + "async-trait", "codex-core", "notify", "regex-lite", diff --git a/codex-rs/core/src/delegate_tool.rs b/codex-rs/core/src/delegate_tool.rs index 5a2d8b90323e..313caba5174d 100644 --- a/codex-rs/core/src/delegate_tool.rs +++ b/codex-rs/core/src/delegate_tool.rs @@ -17,6 +17,22 @@ pub struct DelegateToolContext { pub hints: Vec, } +/// Invocation strategy for the delegate tool. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)] +#[serde(rename_all = "kebab-case")] +pub enum DelegateInvocationMode { + /// Blocks the caller until the delegate completes, returning its summary. + Immediate, + /// Starts the delegate in the background and returns immediately. + Detached, +} + +impl Default for DelegateInvocationMode { + fn default() -> Self { + Self::Immediate + } +} + /// Single entry in a batched delegate request. #[derive(Debug, Clone, Default, Deserialize, Serialize)] pub struct DelegateToolBatchEntry { @@ -24,6 +40,8 @@ pub struct DelegateToolBatchEntry { pub prompt: String, #[serde(default)] pub context: DelegateToolContext, + #[serde(default)] + pub mode: DelegateInvocationMode, } /// Payload sent by the primary agent when invoking the delegate tool. @@ -35,6 +53,8 @@ pub struct DelegateToolRequest { pub context: DelegateToolContext, #[serde(default, skip_serializing_if = "Option::is_none", skip_deserializing)] pub caller_conversation_id: Option, + #[serde(default)] + pub mode: DelegateInvocationMode, #[serde(default, skip_serializing_if = "Vec::is_empty")] pub batch: Vec, } @@ -78,6 +98,8 @@ pub struct DelegateToolRun { pub enum DelegateToolError { #[error("another delegate is already running")] DelegateInProgress, + #[error("delegate queue is full")] + QueueFull, #[error("agent `{0}` not found")] AgentNotFound(String), #[error("delegate setup failed: {0}")] diff --git a/codex-rs/core/src/tools/handlers/delegate.rs b/codex-rs/core/src/tools/handlers/delegate.rs index 745b42add91f..81e205ef7895 100644 --- a/codex-rs/core/src/tools/handlers/delegate.rs +++ b/codex-rs/core/src/tools/handlers/delegate.rs @@ -1,6 +1,8 @@ use crate::client_common::tools::ResponsesApiTool; use crate::client_common::tools::ToolSpec; +use crate::codex::Session; use crate::delegate_tool::DelegateEventReceiver; +use crate::delegate_tool::DelegateInvocationMode; use crate::delegate_tool::DelegateToolAdapter; use crate::delegate_tool::DelegateToolContext; use crate::delegate_tool::DelegateToolError; @@ -14,6 +16,7 @@ use crate::tools::context::ToolOutput; use crate::tools::context::ToolPayload; use crate::tools::registry::ToolHandler; use crate::tools::registry::ToolKind; +use crate::user_notification::UserNotification; use async_trait::async_trait; use codex_protocol::ConversationId; use serde::Deserialize; @@ -21,6 +24,7 @@ use serde::Serialize; use std::collections::BTreeMap; use std::collections::HashMap; use std::collections::HashSet; +use std::sync::Arc; use std::sync::LazyLock; use std::time::Duration; @@ -57,6 +61,16 @@ pub static DELEGATE_TOOL: LazyLock = LazyLock::new(|| { description: Some("Instructions passed to the sub-agent".to_string()), }, ); + batch_entry_props.insert( + "mode".to_string(), + JsonSchema::String { + description: Some( + "Invocation mode. Use \"immediate\" for blocking delegation (default) or \ + \"detached\" to run in the background." + .to_string(), + ), + }, + ); batch_entry_props.insert( "context".to_string(), JsonSchema::Object { @@ -87,6 +101,16 @@ pub static DELEGATE_TOOL: LazyLock = LazyLock::new(|| { additional_properties: Some(false.into()), }, ); + properties.insert( + "mode".to_string(), + JsonSchema::String { + description: Some( + "Invocation mode. Use \"immediate\" for blocking delegation (default) or \ + \"detached\" to run in the background." + .to_string(), + ), + }, + ); properties.insert( "batch".to_string(), JsonSchema::Array { @@ -126,6 +150,8 @@ struct DelegateToolArgs { #[serde(default)] context: Option, #[serde(default)] + mode: Option, + #[serde(default)] batch: Vec, } @@ -142,6 +168,8 @@ struct DelegateToolBatchArgs { prompt: String, #[serde(default)] context: Option, + #[serde(default)] + mode: Option, } impl From for DelegateToolContext { @@ -156,8 +184,10 @@ impl From for DelegateToolContext { #[derive(Debug, Serialize)] struct DelegateToolResponse { status: &'static str, - agent_id: String, - run_id: String, + #[serde(skip_serializing_if = "Option::is_none")] + agent_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + run_id: Option, #[serde(skip_serializing_if = "Option::is_none")] summary: Option, #[serde(skip_serializing_if = "Option::is_none")] @@ -210,6 +240,13 @@ impl ToolHandler for DelegateToolHandler { )); } + if !args.batch.is_empty() && args.mode.is_some() { + return Err(FunctionCallError::RespondToModel( + "`mode` cannot be combined with `batch`; set the mode on each batch entry instead" + .into(), + )); + } + let adapter = session.delegate_adapter().ok_or_else(|| { FunctionCallError::RespondToModel("delegate tool is not available".to_string()) })?; @@ -240,24 +277,52 @@ impl ToolHandler for DelegateToolHandler { FunctionCallError::RespondToModel("missing `prompt` for delegate_agent call".into()) })?; + let mode = args.mode.unwrap_or_default(); + let request = DelegateToolRequest { agent_id: agent_id.clone(), prompt: prompt.clone(), context: args.context.unwrap_or_default().into(), caller_conversation_id: Some(conversation_id.to_string()), + mode, batch: Vec::new(), }; let run = adapter.delegate(request).await.map_err(map_adapter_error)?; + if mode == DelegateInvocationMode::Detached { + let response = DelegateToolResponse { + status: "accepted", + agent_id: None, + run_id: None, + summary: None, + duration_ms: None, + }; + let content = serde_json::to_string(&response).map_err(|e| { + FunctionCallError::Fatal(format!("failed to serialize response: {e}")) + })?; + + let session_clone = Arc::clone(&session); + let run_id = run.run_id.clone(); + let agent_id = run.agent_id.clone(); + tokio::spawn(async move { + monitor_detached_run(events, session_clone, run_id, agent_id).await; + }); + + return Ok(ToolOutput::Function { + content, + success: Some(true), + }); + } + let (summary, duration) = wait_for_completion(&mut events, &run) .await .map_err(FunctionCallError::RespondToModel)?; let response = DelegateToolResponse { status: "ok", - agent_id: run.agent_id, - run_id: run.run_id, + agent_id: Some(run.agent_id), + run_id: Some(run.run_id), summary, duration_ms: duration.map(|d| d.as_millis() as u64), }; @@ -329,11 +394,19 @@ async fn handle_batch_entries( let conversation_id = conversation_id.to_string(); for entry in batch { + let mode = entry.mode.unwrap_or_default(); + if mode == DelegateInvocationMode::Detached { + return Err(FunctionCallError::RespondToModel( + "detached delegation is not supported within `batch` requests".to_string(), + )); + } + let request = DelegateToolRequest { agent_id: entry.agent_id.clone(), prompt: entry.prompt.clone(), context: entry.context.unwrap_or_default().into(), caller_conversation_id: Some(conversation_id.clone()), + mode, batch: Vec::new(), }; @@ -400,6 +473,10 @@ fn map_adapter_error(err: DelegateToolError) -> FunctionCallError { "another delegate is already running; wait for it to finish before delegating again" .to_string(), ), + DelegateToolError::QueueFull => FunctionCallError::RespondToModel( + "delegate queue is full; wait for background delegates to finish before delegating again" + .to_string(), + ), DelegateToolError::AgentNotFound(agent_id) => FunctionCallError::RespondToModel(format!( "delegate agent `{agent_id}` is not configured" )), @@ -409,6 +486,68 @@ fn map_adapter_error(err: DelegateToolError) -> FunctionCallError { } } +async fn monitor_detached_run( + mut events: DelegateEventReceiver, + session: Arc, + run_id: String, + agent_id: String, +) { + let mut collected = String::new(); + + while let Some(event) = events.recv().await { + if event_run_id(&event) != run_id.as_str() { + continue; + } + + match event { + DelegateToolEvent::Delta { chunk, .. } => { + collected.push_str(&chunk); + } + DelegateToolEvent::Completed { + output, duration, .. + } => { + let summary = output.or_else(|| { + if collected.trim().is_empty() { + None + } else { + Some(collected.clone()) + } + }); + session + .notifier() + .notify(&UserNotification::DetachedRunFinished { + agent_id: agent_id.clone(), + run_id: run_id.clone(), + conversation_id: Some(session.conversation_id().to_string()), + summary, + duration_ms: Some(duration.as_millis() as u64), + error: None, + }); + break; + } + DelegateToolEvent::Failed { error, .. } => { + let summary = if collected.trim().is_empty() { + None + } else { + Some(collected.clone()) + }; + session + .notifier() + .notify(&UserNotification::DetachedRunFinished { + agent_id: agent_id.clone(), + run_id: run_id.clone(), + conversation_id: Some(session.conversation_id().to_string()), + summary, + duration_ms: None, + error: Some(error), + }); + break; + } + _ => {} + } + } +} + #[cfg(test)] mod tests { use super::*; @@ -469,11 +608,13 @@ mod tests { agent_id: "alpha".into(), prompt: "one".into(), context: None, + mode: None, }, DelegateToolBatchArgs { agent_id: "bravo".into(), prompt: "two".into(), context: None, + mode: None, }, ]; diff --git a/codex-rs/core/src/user_notification.rs b/codex-rs/core/src/user_notification.rs index be96d562705b..5b3aa20ef824 100644 --- a/codex-rs/core/src/user_notification.rs +++ b/codex-rs/core/src/user_notification.rs @@ -58,6 +58,19 @@ pub(crate) enum UserNotification { /// The last message sent by the assistant in the turn. last_assistant_message: Option, }, + #[serde(rename_all = "kebab-case")] + DetachedRunFinished { + agent_id: String, + run_id: String, + #[serde(skip_serializing_if = "Option::is_none")] + conversation_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + summary: Option, + #[serde(skip_serializing_if = "Option::is_none")] + duration_ms: Option, + #[serde(skip_serializing_if = "Option::is_none")] + error: Option, + }, } #[cfg(test)] @@ -82,4 +95,22 @@ mod tests { ); Ok(()) } + + #[test] + fn test_detached_run_notification() -> Result<()> { + let notification = UserNotification::DetachedRunFinished { + agent_id: "ideas_provider".to_string(), + run_id: "run-1".to_string(), + conversation_id: Some("conv-123".to_string()), + summary: Some("Drafted outline".to_string()), + duration_ms: Some(42_000), + error: None, + }; + let serialized = serde_json::to_string(¬ification)?; + assert_eq!( + serialized, + r#"{"type":"detached-run-finished","agent-id":"ideas_provider","run-id":"run-1","conversation-id":"conv-123","summary":"Drafted outline","duration-ms":42000}"# + ); + Ok(()) + } } diff --git a/codex-rs/core/tests/common/Cargo.toml b/codex-rs/core/tests/common/Cargo.toml index b3082dc54855..91c711573512 100644 --- a/codex-rs/core/tests/common/Cargo.toml +++ b/codex-rs/core/tests/common/Cargo.toml @@ -7,6 +7,7 @@ version = { workspace = true } path = "lib.rs" [dependencies] +async-trait = { workspace = true } anyhow = { workspace = true } assert_cmd = { workspace = true } codex-core = { workspace = true } diff --git a/codex-rs/core/tests/common/test_codex.rs b/codex-rs/core/tests/common/test_codex.rs index 0e07d82228d3..f51f68f7997d 100644 --- a/codex-rs/core/tests/common/test_codex.rs +++ b/codex-rs/core/tests/common/test_codex.rs @@ -2,6 +2,8 @@ use std::mem::swap; use std::path::PathBuf; use std::sync::Arc; +use async_trait::async_trait; +use codex_core::AuthManager; use codex_core::CodexAuth; use codex_core::CodexConversation; use codex_core::ConversationManager; @@ -9,11 +11,57 @@ use codex_core::ModelProviderInfo; use codex_core::NewConversation; use codex_core::built_in_model_providers; use codex_core::config::Config; +use codex_core::delegate_tool::DelegateEventReceiver; +use codex_core::delegate_tool::DelegateToolAdapter; +use codex_core::delegate_tool::DelegateToolError; +use codex_core::delegate_tool::DelegateToolEvent; +use codex_core::delegate_tool::DelegateToolRequest; +use codex_core::delegate_tool::DelegateToolRun; use codex_core::protocol::SessionConfiguredEvent; +use codex_core::protocol::SessionSource; use tempfile::TempDir; +use tokio::sync::Mutex; +use tokio::sync::mpsc; use crate::load_default_config_for_test; +#[derive(Default)] +struct TestDelegateAdapter { + sender: Mutex>>, + counter: std::sync::atomic::AtomicUsize, +} + +#[async_trait] +impl DelegateToolAdapter for TestDelegateAdapter { + async fn subscribe(&self) -> DelegateEventReceiver { + let (tx, rx) = mpsc::unbounded_channel(); + *self.sender.lock().await = Some(tx); + rx + } + + async fn delegate( + &self, + request: DelegateToolRequest, + ) -> Result { + let idx = self + .counter + .fetch_add(1, std::sync::atomic::Ordering::Relaxed); + let run_id = format!("test-run-{idx}"); + if let Some(sender) = self.sender.lock().await.as_ref() { + let _ = sender.send(DelegateToolEvent::Completed { + run_id: run_id.clone(), + agent_id: request.agent_id.clone(), + output: Some(request.prompt.clone()), + duration: std::time::Duration::from_millis(1), + }); + } + Ok(DelegateToolRun { + run_id, + agent_id: request.agent_id, + }) + } +} + type ConfigMutator = dyn FnOnce(&mut Config) + Send; pub struct TestCodexBuilder { @@ -52,7 +100,20 @@ impl TestCodexBuilder { for mutator in mutators { mutator(&mut config) } - let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy")); + + let delegate_adapter: Option> = if config.include_delegate_tool + { + Some(Arc::new(TestDelegateAdapter::default())) + } else { + None + }; + + let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("dummy")); + let conversation_manager = ConversationManager::with_delegate( + auth_manager.clone(), + SessionSource::Exec, + delegate_adapter, + ); let NewConversation { conversation, session_configured, diff --git a/codex-rs/multi-agent/src/lib.rs b/codex-rs/multi-agent/src/lib.rs index 908316ac6be3..7901e344cef0 100644 --- a/codex-rs/multi-agent/src/lib.rs +++ b/codex-rs/multi-agent/src/lib.rs @@ -355,7 +355,10 @@ pub use orchestrator::DelegateEvent; pub use orchestrator::DelegatePrompt; pub use orchestrator::DelegateRequest; pub use orchestrator::DelegateRunId; +pub use orchestrator::DelegateSessionMode; pub use orchestrator::DelegateSessionSummary; +pub use orchestrator::DetachedRunStatusSummary; +pub use orchestrator::DetachedRunSummary; use orchestrator::MultiAgentDelegateAdapter; pub use orchestrator::OrchestratorError; diff --git a/codex-rs/multi-agent/src/orchestrator.rs b/codex-rs/multi-agent/src/orchestrator.rs index 3dfb4ac38588..59409b5d2e22 100644 --- a/codex-rs/multi-agent/src/orchestrator.rs +++ b/codex-rs/multi-agent/src/orchestrator.rs @@ -12,6 +12,7 @@ use codex_core::ConversationManager; use codex_core::config::Config; use codex_core::config::ConfigOverrides; use codex_core::delegate_tool::DelegateEventReceiver as CoreDelegateEventReceiver; +use codex_core::delegate_tool::DelegateInvocationMode; use codex_core::delegate_tool::DelegateToolAdapter; use codex_core::delegate_tool::DelegateToolError; use codex_core::delegate_tool::DelegateToolEvent as CoreDelegateToolEvent; @@ -30,6 +31,16 @@ use uuid::Uuid; use crate::AgentConfigLoader; use crate::AgentId; +fn prompt_preview(text: &str) -> Option { + let trimmed = text.trim(); + if trimmed.is_empty() { + return None; + } + const MAX_LEN: usize = 120; + let preview = trimmed.chars().take(MAX_LEN).collect::(); + Some(preview) +} + /// Identifier used to correlate delegate runs. pub type DelegateRunId = String; @@ -40,6 +51,7 @@ pub struct DelegateRequest { pub prompt: DelegatePrompt, pub user_initial: Vec, pub parent_run_id: Option, + pub mode: DelegateInvocationMode, } /// The prompt content forwarded to the sub-agent. @@ -63,6 +75,7 @@ pub enum DelegateEvent { prompt: String, started_at: SystemTime, parent_run_id: Option, + mode: DelegateSessionMode, }, Delta { run_id: DelegateRunId, @@ -74,11 +87,13 @@ pub enum DelegateEvent { agent_id: AgentId, output: Option, duration: Duration, + mode: DelegateSessionMode, }, Failed { run_id: DelegateRunId, agent_id: AgentId, error: String, + mode: DelegateSessionMode, }, } @@ -87,6 +102,8 @@ pub enum DelegateEvent { pub enum OrchestratorError { #[error("another delegate is already running")] DelegateInProgress, + #[error("delegate queue is full")] + QueueFull, #[error("agent `{0}` not found")] AgentNotFound(String), #[error("failed to enqueue delegate: {0}")] @@ -102,6 +119,32 @@ pub struct DelegateSessionSummary { pub agent_id: AgentId, pub last_interacted_at: SystemTime, pub cwd: PathBuf, + pub mode: DelegateSessionMode, +} + +/// Indicates whether a session originated from a detached run. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DelegateSessionMode { + Standard, + Detached, +} + +#[derive(Debug, Clone)] +pub struct DetachedRunSummary { + pub run_id: String, + pub agent_id: AgentId, + pub started_at: SystemTime, + pub prompt_preview: Option, + pub status: DetachedRunStatusSummary, +} + +#[derive(Debug, Clone)] +pub enum DetachedRunStatusSummary { + Pending, + Failed { + error: String, + finished_at: SystemTime, + }, } /// Payload returned when entering an existing delegate session. @@ -127,6 +170,7 @@ pub struct AgentOrchestrator { allowed_agents: Vec, run_conversations: Mutex>, conversation_runs: Mutex>, + detached_runs: Mutex>, max_concurrent_runs: usize, } @@ -153,6 +197,7 @@ impl AgentOrchestrator { allowed_agents, run_conversations: Mutex::new(HashMap::new()), conversation_runs: Mutex::new(HashMap::new()), + detached_runs: Mutex::new(HashMap::new()), max_concurrent_runs: max_concurrent_runs.max(1), } } @@ -198,13 +243,30 @@ impl AgentOrchestrator { request: DelegateRequest, ) -> std::result::Result { let run_id = Uuid::new_v4().to_string(); + let session_mode = match request.mode { + DelegateInvocationMode::Detached => DelegateSessionMode::Detached, + _ => DelegateSessionMode::Standard, + }; { let mut active = self.active_runs.lock().await; if active.len() >= self.max_concurrent_runs { - return Err(OrchestratorError::DelegateInProgress); + return Err(OrchestratorError::QueueFull); } active.push(run_id.clone()); } + if session_mode == DelegateSessionMode::Detached { + let mut registry = self.detached_runs.lock().await; + registry.insert( + run_id.clone(), + DetachedRunRecord { + agent_id: request.agent_id.clone(), + started_at: SystemTime::now(), + prompt_preview: prompt_preview(&request.prompt.text), + cwd: None, + status: DetachedRunStatus::Pending, + }, + ); + } let parent_run_id = request.parent_run_id.clone(); let prompt_text = request.prompt.text.clone(); @@ -214,6 +276,7 @@ impl AgentOrchestrator { prompt: prompt_text, started_at: SystemTime::now(), parent_run_id: parent_run_id.clone(), + mode: session_mode, }) .await; @@ -241,6 +304,9 @@ impl AgentOrchestrator { match result { Ok(output) => { orchestrator.store_session(&output).await; + orchestrator + .mark_detached_ready(&run_id_clone, &output) + .await; let agent_id = output.agent_id.clone(); let message = output.message.clone(); let duration = output.duration; @@ -250,15 +316,20 @@ impl AgentOrchestrator { agent_id, output: message, duration, + mode: output.mode, }) .await; } Err(err) => { + orchestrator + .mark_detached_failed(&run_id_clone, &err.error) + .await; orchestrator .emit(DelegateEvent::Failed { run_id: run_id_clone.clone(), agent_id: err.agent_id, error: err.error, + mode: err.mode, }) .await; } @@ -296,6 +367,67 @@ impl AgentOrchestrator { summaries } + /// Return detached runs that are not yet ready to attach or have failed. + pub async fn detached_runs(&self) -> Vec { + let registry = self.detached_runs.lock().await; + let mut summaries: Vec = registry + .iter() + .filter_map(|(run_id, record)| match &record.status { + DetachedRunStatus::Pending => Some(DetachedRunSummary { + run_id: run_id.clone(), + agent_id: record.agent_id.clone(), + started_at: record.started_at, + prompt_preview: record.prompt_preview.clone(), + status: DetachedRunStatusSummary::Pending, + }), + DetachedRunStatus::Failed { error, finished_at } => Some(DetachedRunSummary { + run_id: run_id.clone(), + agent_id: record.agent_id.clone(), + started_at: record.started_at, + prompt_preview: record.prompt_preview.clone(), + status: DetachedRunStatusSummary::Failed { + error: error.clone(), + finished_at: *finished_at, + }, + }), + DetachedRunStatus::Ready { .. } => None, + }) + .collect(); + summaries.sort_by(|a, b| b.started_at.cmp(&a.started_at)); + summaries + } + + /// Remove a detached run from the registry and drop any stored session if present. + pub async fn dismiss_detached_run(&self, run_id: &str) -> Result<(), String> { + let conversation_to_remove = { + let mut registry = self.detached_runs.lock().await; + let record = registry + .get(run_id) + .ok_or_else(|| format!("detached run `{run_id}` not found"))?; + match &record.status { + DetachedRunStatus::Pending => { + return Err("run is still in progress".to_string()); + } + DetachedRunStatus::Ready { + conversation_id, .. + } => { + let conversation_id = conversation_id.clone(); + registry.remove(run_id); + Some(conversation_id) + } + DetachedRunStatus::Failed { .. } => { + registry.remove(run_id); + None + } + } + }; + + if let Some(conversation_id) = conversation_to_remove { + self.remove_session(&conversation_id).await; + } + Ok(()) + } + /// Enter an existing delegate session for direct interaction. pub async fn enter_session( &self, @@ -335,6 +467,7 @@ impl AgentOrchestrator { agent_id: success.agent_id.clone(), last_interacted_at: SystemTime::now(), cwd: success.cwd.clone(), + mode: success.mode, }; sessions.insert( success.conversation_id.clone(), @@ -347,6 +480,29 @@ impl AgentOrchestrator { ); } + async fn mark_detached_ready(&self, run_id: &DelegateRunId, success: &DelegateSuccess) { + let mut registry = self.detached_runs.lock().await; + if let Some(record) = registry.get_mut(run_id) { + record.cwd = Some(success.cwd.clone()); + record.status = DetachedRunStatus::Ready { + conversation_id: success.conversation_id.clone(), + _summary: success.message.clone(), + _duration: success.duration, + _finished_at: SystemTime::now(), + }; + } + } + + async fn mark_detached_failed(&self, run_id: &DelegateRunId, error: &str) { + let mut registry = self.detached_runs.lock().await; + if let Some(record) = registry.get_mut(run_id) { + record.status = DetachedRunStatus::Failed { + error: error.to_string(), + finished_at: SystemTime::now(), + }; + } + } + #[allow(clippy::too_many_arguments)] async fn run_delegate_task( self: Arc, @@ -360,12 +516,17 @@ impl AgentOrchestrator { ) -> std::result::Result { let start = SystemTime::now(); let agent_id = request.agent_id.clone(); + let session_mode = match request.mode { + DelegateInvocationMode::Detached => DelegateSessionMode::Detached, + _ => DelegateSessionMode::Standard, + }; let context = loader .load(Some(&agent_id), &cli_overrides, config_overrides) .await .map_err(|err| DelegateFailure { agent_id: agent_id.clone(), error: format!("failed to load agent config: {err:#}"), + mode: session_mode, })?; let config = context.into_config(); @@ -384,6 +545,7 @@ impl AgentOrchestrator { .map_err(|err| DelegateFailure { agent_id: agent_id.clone(), error: format!("failed to start conversation: {err:#}"), + mode: session_mode, })?; let conversation_id = conversation_bundle.conversation_id.to_string(); self.register_run_conversation(&run_id, &conversation_id) @@ -404,6 +566,7 @@ impl AgentOrchestrator { .map_err(|err| DelegateFailure { agent_id: agent_id.clone(), error: format!("failed to submit delegate prompt: {err:#}"), + mode: session_mode, })?; let mut aggregated = String::new(); @@ -414,6 +577,7 @@ impl AgentOrchestrator { .map_err(|err| DelegateFailure { agent_id: agent_id.clone(), error: format!("failed to read delegate events: {err:#}"), + mode: session_mode, })?; match event.msg { @@ -454,18 +618,21 @@ impl AgentOrchestrator { config: config_clone.clone(), message, duration, + mode: session_mode, }); } EventMsg::Error(err) => { return Err(DelegateFailure { agent_id, error: format!("delegate reported error: {}", err.message), + mode: session_mode, }); } EventMsg::TurnAborted(reason) => { return Err(DelegateFailure { agent_id, error: format!("delegate aborted: {:?}", reason.reason), + mode: session_mode, }); } EventMsg::ShutdownComplete => break, @@ -476,6 +643,7 @@ impl AgentOrchestrator { Err(DelegateFailure { agent_id, error: "delegate ended unexpectedly".to_string(), + mode: session_mode, }) } } @@ -489,11 +657,13 @@ struct DelegateSuccess { config: Config, message: Option, duration: Duration, + mode: DelegateSessionMode, } struct DelegateFailure { agent_id: AgentId, error: String, + mode: DelegateSessionMode, } struct StoredDelegateSession { @@ -503,6 +673,28 @@ struct StoredDelegateSession { config: Config, } +struct DetachedRunRecord { + agent_id: AgentId, + started_at: SystemTime, + prompt_preview: Option, + cwd: Option, + status: DetachedRunStatus, +} + +enum DetachedRunStatus { + Pending, + Ready { + conversation_id: String, + _summary: Option, + _duration: Duration, + _finished_at: SystemTime, + }, + Failed { + error: String, + finished_at: SystemTime, + }, +} + pub struct MultiAgentDelegateAdapter { orchestrator: Arc, } @@ -520,6 +712,7 @@ impl MultiAgentDelegateAdapter { prompt, started_at, parent_run_id, + mode: _, } => CoreDelegateToolEvent::Started { run_id, agent_id: agent_id.as_str().to_string(), @@ -541,6 +734,7 @@ impl MultiAgentDelegateAdapter { agent_id, output, duration, + mode: _, } => CoreDelegateToolEvent::Completed { run_id, agent_id: agent_id.as_str().to_string(), @@ -551,6 +745,7 @@ impl MultiAgentDelegateAdapter { run_id, agent_id, error, + mode: _, } => CoreDelegateToolEvent::Failed { run_id, agent_id: agent_id.as_str().to_string(), @@ -562,6 +757,7 @@ impl MultiAgentDelegateAdapter { fn map_error(err: OrchestratorError) -> DelegateToolError { match err { OrchestratorError::DelegateInProgress => DelegateToolError::DelegateInProgress, + OrchestratorError::QueueFull => DelegateToolError::QueueFull, OrchestratorError::AgentNotFound(agent) => DelegateToolError::AgentNotFound(agent), OrchestratorError::DelegateSetupFailed(reason) => { DelegateToolError::SetupFailed(reason) @@ -597,7 +793,8 @@ impl DelegateToolAdapter for MultiAgentDelegateAdapter { prompt, context: _, caller_conversation_id, - batch: _, + mode, + .. } = request; let agent_id = AgentId::parse(agent_id_str.as_str()) @@ -618,6 +815,7 @@ impl DelegateToolAdapter for MultiAgentDelegateAdapter { prompt: DelegatePrompt::new(prompt), user_initial: Vec::new(), parent_run_id, + mode, }) .await .map_err(Self::map_error)?; diff --git a/codex-rs/tui/src/app.rs b/codex-rs/tui/src/app.rs index cafb95aab3f3..702ddfb9e1b9 100644 --- a/codex-rs/tui/src/app.rs +++ b/codex-rs/tui/src/app.rs @@ -28,7 +28,9 @@ use codex_core::protocol_config_types::ReasoningEffort as ReasoningEffortConfig; use codex_multi_agent::AgentId; use codex_multi_agent::AgentOrchestrator; use codex_multi_agent::DelegateEvent; +use codex_multi_agent::DelegateSessionMode; use codex_multi_agent::DelegateSessionSummary; +use codex_multi_agent::DetachedRunSummary; use codex_multi_agent::delegate_tool_adapter; use codex_protocol::ConversationId; use color_eyre::eyre::Result; @@ -520,15 +522,25 @@ impl App { } AppEvent::OpenDelegatePicker => { let sessions = self.delegate_orchestrator.active_sessions().await; - if sessions.is_empty() { - self.chat_widget.add_info_message( - "No delegate sessions available.".to_string(), - Some("Ask the main agent to delegate a task first.".to_string()), - ); - } else { - self.chat_widget - .open_delegate_picker(sessions, self.active_delegate.as_deref()); + let detached_runs: Vec = + self.delegate_orchestrator.detached_runs().await; + let mut picker_sessions = Vec::with_capacity(sessions.len()); + for summary in sessions { + let run_id = if summary.mode == DelegateSessionMode::Detached { + self.delegate_orchestrator + .parent_run_for_conversation(summary.conversation_id.as_str()) + .await + } else { + None + }; + picker_sessions + .push(crate::chatwidget::DelegatePickerSession { summary, run_id }); } + self.chat_widget.open_delegate_picker( + picker_sessions, + detached_runs, + self.active_delegate.as_deref(), + ); } AppEvent::EnterDelegateSession(conversation_id) => { if let Err(err) = self.activate_delegate_session(tui, conversation_id).await { @@ -544,6 +556,21 @@ impl App { .add_error_message(format!("Failed to return to main agent: {err}")); } } + AppEvent::DismissDetachedRun(run_id) => { + match self + .delegate_orchestrator + .dismiss_detached_run(&run_id) + .await + { + Ok(()) => self + .chat_widget + .add_info_message(format!("Dismissed detached run {run_id}"), None), + Err(err) => self.chat_widget.add_error_message(err), + } + } + AppEvent::InsertUserTextMessage(text) => { + self.chat_widget.submit_text_message(text); + } AppEvent::OpenReviewBranchPicker(cwd) => { self.chat_widget.show_review_branch_picker(&cwd).await; } @@ -583,6 +610,7 @@ impl App { agent_id, prompt, parent_run_id, + mode, .. } => { let display = self.delegate_tree.insert( @@ -602,6 +630,7 @@ impl App { &prompt, display.label, claim_status, + mode, ); } DelegateEvent::Delta { run_id, chunk, .. } => { @@ -612,7 +641,7 @@ impl App { agent_id, output, duration, - .. + mode, } => { let display = self.delegate_tree.display_for(&run_id, &agent_id); self.delegate_tree.remove(&run_id); @@ -641,11 +670,20 @@ impl App { }; self.chat_widget .add_delegate_completion(response, hint, &display.label); + if mode == DelegateSessionMode::Detached { + self.chat_widget.notify_detached_completion(&display.label); + self.chat_widget.show_detached_completion_actions( + &agent_id, + &run_id, + output.as_deref(), + ); + } } DelegateEvent::Failed { run_id, agent_id, error, + mode, } => { let display = self.delegate_tree.display_for(&run_id, &agent_id); self.delegate_tree.remove(&run_id); @@ -662,6 +700,10 @@ impl App { } self.chat_widget .on_delegate_failed(&run_id, &display.label, &error); + if mode == DelegateSessionMode::Detached { + self.chat_widget + .notify_detached_failure(&display.label, &error); + } } } } @@ -726,14 +768,14 @@ impl App { fn stash_active_delegate(&mut self) { if let Some(active_id) = self.active_delegate.take() { - let mut summary = self - .active_delegate_summary - .take() - .expect("delegate summary missing"); - let main_chat = self - .primary_chat_backup - .take() - .expect("primary chat missing when stashing delegate"); + let mut summary = match self.active_delegate_summary.take() { + Some(summary) => summary, + None => return, + }; + let Some(main_chat) = self.primary_chat_backup.take() else { + self.active_delegate_summary = Some(summary); + return; + }; summary.last_interacted_at = SystemTime::now(); let mut delegate_chat = std::mem::replace(&mut self.chat_widget, main_chat); delegate_chat.set_delegate_context(Some(summary.clone())); @@ -750,10 +792,9 @@ impl App { async fn return_to_primary(&mut self, tui: &mut tui::Tui) -> Result<(), String> { if let Some(active_id) = self.active_delegate.take() { - let mut summary = self - .active_delegate_summary - .take() - .expect("delegate summary missing"); + let Some(mut summary) = self.active_delegate_summary.take() else { + return Err("delegate summary missing".to_string()); + }; let capture = self.chat_widget.take_delegate_capture(); let main_chat = self .primary_chat_backup diff --git a/codex-rs/tui/src/app_event.rs b/codex-rs/tui/src/app_event.rs index b6f5a371b073..7a31b995756a 100644 --- a/codex-rs/tui/src/app_event.rs +++ b/codex-rs/tui/src/app_event.rs @@ -85,6 +85,12 @@ pub(crate) enum AppEvent { /// Return from the active delegate session to the main agent. ExitDelegateSession, + /// Dismiss a detached delegate run from the registry. + DismissDetachedRun(String), + + /// Inject text into the main composer as if the user typed it. + InsertUserTextMessage(String), + /// Forwarded conversation history snapshot from the current conversation. ConversationHistory(ConversationPathResponseEvent), diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index 9ff183919973..b99e04f6d18f 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -121,7 +121,10 @@ use codex_git_tooling::GitToolingError; use codex_git_tooling::create_ghost_commit; use codex_git_tooling::restore_ghost_commit; use codex_multi_agent::AgentId; +use codex_multi_agent::DelegateSessionMode; use codex_multi_agent::DelegateSessionSummary; +use codex_multi_agent::DetachedRunStatusSummary; +use codex_multi_agent::DetachedRunSummary; use codex_protocol::plan_tool::UpdatePlanArgs; use strum::IntoEnumIterator; @@ -235,6 +238,12 @@ pub struct DelegateDisplayLabel { pub base_label: String, } +#[derive(Clone)] +pub struct DelegatePickerSession { + pub summary: DelegateSessionSummary, + pub run_id: Option, +} + pub(crate) struct ChatWidget { app_event_tx: AppEventSender, codex_op_tx: UnboundedSender, @@ -1977,10 +1986,11 @@ impl ChatWidget { pub(crate) fn open_delegate_picker( &mut self, - mut sessions: Vec, + mut sessions: Vec, + detached_runs: Vec, active_delegate: Option<&str>, ) { - if sessions.is_empty() { + if sessions.is_empty() && detached_runs.is_empty() { self.add_info_message( "No delegate sessions available.".to_string(), Some("Ask the main agent to delegate a task first.".to_string()), @@ -1988,7 +1998,11 @@ impl ChatWidget { return; } - sessions.sort_by(|a, b| b.last_interacted_at.cmp(&a.last_interacted_at)); + sessions.sort_by(|a, b| { + b.summary + .last_interacted_at + .cmp(&a.summary.last_interacted_at) + }); let mut items: Vec = Vec::new(); @@ -2005,10 +2019,17 @@ impl ChatWidget { }); } - for summary in sessions { + for entry in sessions { + let summary = entry.summary; + let run_id = entry.run_id; let conversation_id = summary.conversation_id.clone(); + let prefix = if summary.mode == DelegateSessionMode::Detached { + "Detached · " + } else { + "" + }; let label = format!( - "#{} · {}", + "{prefix}#{} · {}", summary.agent_id.as_str(), Self::format_delegate_timestamp(summary.last_interacted_at) ); @@ -2028,6 +2049,70 @@ impl ChatWidget { dismiss_on_select: true, ..Default::default() }); + + if summary.mode == DelegateSessionMode::Detached + && let Some(run_id) = run_id.clone() + { + let dismiss_actions: Vec = vec![Box::new(move |tx| { + tx.send(AppEvent::DismissDetachedRun(run_id.clone())); + })]; + items.push(SelectionItem { + name: format!(" Dismiss detached run for #{}", summary.agent_id.as_str()), + description: Some("Remove this detached run from the list.".to_string()), + is_current: false, + actions: dismiss_actions, + dismiss_on_select: true, + ..Default::default() + }); + } + } + + for detached in detached_runs { + let run_id = detached.run_id.clone(); + let status = detached.status.clone(); + let label = match &status { + DetachedRunStatusSummary::Pending => format!( + "Pending · #{} (started {})", + detached.agent_id.as_str(), + Self::format_delegate_timestamp(detached.started_at) + ), + DetachedRunStatusSummary::Failed { .. } => { + format!("Failed · #{}", detached.agent_id.as_str()) + } + }; + let description = match &status { + DetachedRunStatusSummary::Pending => { + let mut text = String::from( + "Run is still executing; you'll be able to dismiss it once it finishes.", + ); + if let Some(preview) = detached.prompt_preview.as_ref() { + text.push_str("\nPrompt: "); + text.push_str(preview); + } + Some(text) + } + DetachedRunStatusSummary::Failed { error, .. } => Some(format!("Error: {error}")), + }; + let (actions, dismiss_on_select): (Vec, bool) = match status { + DetachedRunStatusSummary::Pending => (Vec::new(), false), + DetachedRunStatusSummary::Failed { .. } => { + let run_id_clone = run_id.clone(); + ( + vec![Box::new(move |tx: &AppEventSender| { + tx.send(AppEvent::DismissDetachedRun(run_id_clone.clone())); + }) as SelectionAction], + true, + ) + } + }; + items.push(SelectionItem { + name: label, + description, + is_current: false, + actions, + dismiss_on_select, + ..Default::default() + }); } self.bottom_pane.show_selection_view(SelectionViewParams { @@ -2417,6 +2502,7 @@ impl ChatWidget { prompt: &str, label: DelegateDisplayLabel, claim_status: bool, + mode: DelegateSessionMode, ) { if claim_status { self.set_delegate_status_owner_internal(run_id, agent_id); @@ -2433,7 +2519,11 @@ impl ChatWidget { } else { Some(trimmed.to_string()) }; - self.add_info_message(format!("{}…", label.base_label), hint); + let mut info_label = label.base_label; + if mode == DelegateSessionMode::Detached { + info_label = format!("{info_label} (detached)"); + } + self.add_info_message(format!("{info_label}…"), hint); self.request_redraw(); } @@ -2462,6 +2552,50 @@ impl ChatWidget { label.depth == 0 && had_stream } + pub(crate) fn show_detached_completion_actions( + &mut self, + agent_id: &AgentId, + run_id: &str, + output: Option<&str>, + ) { + let mut items: Vec = Vec::new(); + if let Some(text) = output.map(str::trim).filter(|s| !s.is_empty()) { + let preview = truncate_text(text, 200); + let run_id_insert = run_id.to_string(); + let text_insert = text.to_string(); + items.push(SelectionItem { + name: format!("Use output from #{}", agent_id.as_str()), + description: Some(preview), + is_current: false, + actions: vec![Box::new(move |tx: &AppEventSender| { + tx.send(AppEvent::InsertUserTextMessage(text_insert.clone())); + tx.send(AppEvent::DismissDetachedRun(run_id_insert.clone())); + })], + dismiss_on_select: true, + ..Default::default() + }); + } + + let run_id_dismiss = run_id.to_string(); + items.push(SelectionItem { + name: format!("Dismiss detached run #{}", agent_id.as_str()), + description: Some("Remove this run from the list".to_string()), + is_current: false, + actions: vec![Box::new(move |tx: &AppEventSender| { + tx.send(AppEvent::DismissDetachedRun(run_id_dismiss.clone())); + })], + dismiss_on_select: true, + ..Default::default() + }); + + self.bottom_pane.show_selection_view(SelectionViewParams { + title: Some(format!("#{} finished", agent_id.as_str())), + footer_hint: Some(standard_popup_hint_line()), + items, + ..Default::default() + }); + } + pub(crate) fn on_delegate_failed( &mut self, run_id: &str, @@ -2472,6 +2606,19 @@ impl ChatWidget { self.add_error_message(format!("{} failed: {error}", label.base_label)); } + pub(crate) fn notify_detached_completion(&mut self, label: &DelegateDisplayLabel) { + self.notify(Notification::DetachedRunFinished { + label: label.base_label.clone(), + }); + } + + pub(crate) fn notify_detached_failure(&mut self, label: &DelegateDisplayLabel, error: &str) { + self.notify(Notification::DetachedRunFailed { + label: label.base_label.clone(), + error: error.to_string(), + }); + } + pub(crate) fn set_delegate_status_owner(&mut self, run_id: &str, agent_id: &AgentId) { self.set_delegate_status_owner_internal(run_id, agent_id); } @@ -2543,6 +2690,8 @@ enum Notification { AgentTurnComplete { response: String }, ExecApprovalRequested { command: String }, EditApprovalRequested { cwd: PathBuf, changes: Vec }, + DetachedRunFinished { label: String }, + DetachedRunFailed { label: String, error: String }, } impl Notification { @@ -2566,6 +2715,13 @@ impl Notification { } ) } + Notification::DetachedRunFinished { label } => { + format!("Detached delegate finished {label}") + } + Notification::DetachedRunFailed { label, error } => { + let preview = truncate_text(error, 60); + format!("Detached delegate failed {label}: {preview}") + } } } @@ -2574,6 +2730,8 @@ impl Notification { Notification::AgentTurnComplete { .. } => "agent-turn-complete", Notification::ExecApprovalRequested { .. } | Notification::EditApprovalRequested { .. } => "approval-requested", + Notification::DetachedRunFinished { .. } => "detached-run-finished", + Notification::DetachedRunFailed { .. } => "detached-run-failed", } } diff --git a/codex-rs/tui/src/chatwidget/tests.rs b/codex-rs/tui/src/chatwidget/tests.rs index 95106739ead5..a56d0969f43e 100644 --- a/codex-rs/tui/src/chatwidget/tests.rs +++ b/codex-rs/tui/src/chatwidget/tests.rs @@ -36,6 +36,7 @@ use codex_core::protocol::TaskCompleteEvent; use codex_core::protocol::TaskStartedEvent; use codex_core::protocol::ViewImageToolCallEvent; use codex_multi_agent::AgentId; +use codex_multi_agent::DelegateSessionMode; use codex_protocol::ConversationId; use codex_protocol::plan_tool::PlanItemArg; use codex_protocol::plan_tool::StepStatus; @@ -407,6 +408,7 @@ fn delegate_stream_deltas_and_restore_status() { "sketch integration points", label.clone(), true, + DelegateSessionMode::Standard, ); assert_eq!(chat.delegate_run.as_deref(), Some("run-1")); assert_eq!(chat.delegate_status_owner.as_deref(), Some("run-1")); @@ -477,8 +479,22 @@ fn nested_delegate_info_events_are_indented() { base_label: " ↳ #creative_ideas".to_string(), }; - chat.on_delegate_started("outer-run", &outer, "outer brief", outer_label, true); - chat.on_delegate_started("inner-run", &inner, "inner brief", inner_label, false); + chat.on_delegate_started( + "outer-run", + &outer, + "outer brief", + outer_label, + true, + DelegateSessionMode::Standard, + ); + chat.on_delegate_started( + "inner-run", + &inner, + "inner brief", + inner_label, + false, + DelegateSessionMode::Standard, + ); let mut messages = Vec::new(); while let Ok(event) = rx.try_recv() { From baa22c6c5008eb701edba1252ab74318c0c70621 Mon Sep 17 00:00:00 2001 From: George Nesterenok Date: Sat, 18 Oct 2025 12:21:54 +0000 Subject: [PATCH 15/17] plan shadow clinet for agents to interacte with --- ai-temp/agents-shadow-client.md | 103 ++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 ai-temp/agents-shadow-client.md diff --git a/ai-temp/agents-shadow-client.md b/ai-temp/agents-shadow-client.md new file mode 100644 index 000000000000..ecf0354f383c --- /dev/null +++ b/ai-temp/agents-shadow-client.md @@ -0,0 +1,103 @@ +# Shadow Client v2 Specification + +## Design Goals + +- Keep an in-memory “shadow client” for every active delegate conversation so the user can attach/detach instantly with full fidelity—no rollout replay, no missing turns. +- Preserve rollouts as the source of truth; shadows are an optimization guarded by resource limits and fallbacks. + +## Required Behaviour + +### 1. Continuous Recording + +- Spin up a `ShadowRecorder` alongside each delegate conversation. It subscribes to the same `CodexConversation` stream as the UI. +- Do not stop after the first `TaskComplete`. Continue recording until the delegate session is explicitly closed (or we evict the shadow). +- Record every `EventMsg`, merge deltas with the same `StreamController` logic the live ChatWidget uses, and build ready-to-render `HistoryCell`s. The recorder should output the _exact_ transcript the live widget would show. +- Track delegate capture frames (user inputs, agent outputs), tool events, plan updates, etc., so `ChatWidget::apply_delegate_summary` remains accurate. +- Maintain metrics per session: total events, total bytes (compressed + uncompressed), turn count, last updated timestamp. + +### 2. Shadow Storage + +- Store snapshots via `ShadowSnapshot` (Arc) containing: + - Rendered history cells. + - Raw `EventMsg`s (for diagnostics). + - Delegate capture frames. + - Metrics listed above. +- Provide cheap `snapshot()` clones on each update; snapshots are immutable views. +- Optional compression (`compress_shadows` flag) reduces footprint using gzip or a custom binary format; track both raw and compressed byte counts. + +### 3. Resource Policy + +- Configurable `[multi_agent]` knobs: + - `max_shadow_sessions` (default 5). `0` disables the count cap. + - `max_shadow_memory_bytes` (default 100 MiB). `0` disables the memory cap. + - `compress_shadows` (default false). +- `ShadowManager` enforces the caps using LRU by `last_interacted_at`. Evict the oldest snapshots (drop the cached transcript, keep the live conversation) until under both limits. +- On eviction, emit `DelegateEvent::Info` with a clear message (e.g., “Shadow cache evicted for #critic; next attach will replay from rollout”). +- Update aggregate `shadow_memory_bytes` whenever snapshots are added/removed so `/status` can report accurate totals. + +### 4. Orchestrator API + +- `AgentOrchestrator` owns a `ShadowManager` alongside existing session maps. +- `run_delegate_task` spawns the recorder task that feeds the manager; the recorder loop continues until `ShutdownComplete` and session removal. +- `enter_session` returns: + - `ActiveDelegateSession` with the live `CodexConversation`, `SessionConfigured`, `Config`. + - Optional `ShadowSnapshot` (ready-to-render). + - Latest `DelegateShadowMetrics`. +- `active_sessions()`, `detached_runs()`, and new helper(s) provide structured metrics (session counts, bytes, events) for UI/telemetry. + +### 5. UI Integration + +- `App::activate_delegate_session`: + - If snapshot present → call `ChatWidget::hydrate_from_shadow(snapshot)` (no replay). Hydration should be O(1) with respect to the cached cells. + - If snapshot missing → show an info banner (“Loading #agent from rollout; shadow cache unavailable”), then fall back to `ConversationManager::resume_conversation_from_rollout`. +- `ChatWidget::hydrate_from_shadow` must: + - Apply cached history cells directly. + - Seed delegate capture queues. + - Restore stream controller state so subsequent deltas append seamlessly. + - Avoid duplicate commit animations or stale status headers. +- `/status`: + - Add a “Delegates” section summarizing cached sessions vs total, total bytes vs limit, total recorded events, and detached-run counts. + - Use a helper (`MultiAgentStatusSummary`) exported from `codex_tui::status`. +- Delegate picker entries: + - Show shadow stats (bytes/events) when available. + - Show an explicit “rollout replay required” marker when the snapshot is missing. + +### 6. Observability + +- Libraries expose `ShadowMetrics` through a stable struct consumed by CLI, TUI, or other front ends. +- `/status` helper returns both text lines and machine-readable data for other surfaces (e.g., API, status card). +- Log important lifecycle events (`Shadow snapshot updated`, `Shadow evicted`, `Shadow compression failed -> fallback`). + +### 7. Fallback Guarantees + +- Shadow is best-effort. If recorder crashes, session is evicted, or compression fails: + - Emit `DelegateEvent::Info` so front-ends can display a toast/banner. + - Future attaches replay from rollout as today. + - Never crash the delegate conversation; return to rollouts and keep going. + +### 8. Testing + +- Unit tests: + - Recorder multi-turn coverage (TaskStarted → Delta → TaskComplete → TaskStarted …). + - Compression pipeline (round-trip + accounting). + - Eviction logic (count- and memory-based) including info event emission. +- TUI snapshot/unit tests: + - `/status` delegate section is rendered correctly. + - Delegate picker shows “shadow” vs “rollout” variants. + - Chat hydration from snapshot vs rollout fallback. +- Integration tests (async harness): + - Simulate multiple delegations and detached runs; ensure shadow survives across turns and fallbacks behave as expected. + +### 9. Migration / Rollback + +- Keep feature-flag support if needed (`enable_shadow_cache`). The default should remain enabled once feature is stable. +- Compression flag separately controllable; if bugs arise, disable compression without losing other functionality. + +## Implementation Roadmap + +1. Extract `ShadowRecorder` and shared rendering helpers (refactor out of `ChatWidget` into a reusable module). +2. Implement `ShadowManager` with continuous recording, metrics tracking, and eviction. +3. Update orchestrator APIs and events to consume the new manager. +4. Rework TUI hydration, fallback messaging, `/status`, and delegate picker. +5. Expand configuration, documentation, and telemetry. +6. Add unit/integration tests, then roll out behind (optional) feature flag before removing old code paths. From 75ff77576b997deaa9b48680cf0ac227d2a63867 Mon Sep 17 00:00:00 2001 From: George Nesterenok Date: Sun, 19 Oct 2025 19:11:27 +0000 Subject: [PATCH 16/17] organize agent-to-agent streams and cache transcripts for clearer output --- ai-temp/CHANGELOG.md | 6 + ai-temp/agents-shadow-client.md | 28 +- ai-temp/example-codex-home/AGENTS.md | 25 +- ai-temp/orchestration-integration.md | 22 +- codex-rs/Cargo.lock | 12 + codex-rs/Cargo.toml | 2 + codex-rs/core/src/config.rs | 48 + codex-rs/core/src/delegate_tool.rs | 4 + codex-rs/core/src/tools/handlers/delegate.rs | 58 +- codex-rs/multi-agent/Cargo.toml | 4 + codex-rs/multi-agent/src/lib.rs | 62 + codex-rs/multi-agent/src/orchestrator.rs | 350 ++++- .../multi-agent/src/orchestrator/tests.rs | 67 + codex-rs/multi-agent/src/shadow/mod.rs | 461 ++++++ codex-rs/multi-agent/src/shadow/recorder.rs | 332 ++++ codex-rs/tui/src/app.rs | 1355 +++++++++++++---- codex-rs/tui/src/app_backtrack.rs | 64 +- codex-rs/tui/src/app_event.rs | 32 +- codex-rs/tui/src/app_event_sender.rs | 42 +- .../tui/src/bottom_pane/approval_overlay.rs | 2 +- codex-rs/tui/src/bottom_pane/chat_composer.rs | 12 +- codex-rs/tui/src/bottom_pane/feedback_view.rs | 62 +- codex-rs/tui/src/chatwidget.rs | 374 +++-- codex-rs/tui/src/chatwidget/agent.rs | 162 +- codex-rs/tui/src/chatwidget/tests.rs | 19 +- codex-rs/tui/src/lib.rs | 7 + codex-rs/tui/src/session_log.rs | 4 +- codex-rs/tui/src/status/card.rs | 80 +- codex-rs/tui/src/status/helpers.rs | 23 + codex-rs/tui/src/status/mod.rs | 2 + codex-rs/tui/src/status/tests.rs | 42 +- 31 files changed, 3140 insertions(+), 623 deletions(-) create mode 100644 codex-rs/multi-agent/src/orchestrator/tests.rs create mode 100644 codex-rs/multi-agent/src/shadow/mod.rs create mode 100644 codex-rs/multi-agent/src/shadow/recorder.rs diff --git a/ai-temp/CHANGELOG.md b/ai-temp/CHANGELOG.md index d65221d46dfd..1a9578b806c2 100644 --- a/ai-temp/CHANGELOG.md +++ b/ai-temp/CHANGELOG.md @@ -2,6 +2,12 @@ > Keep this file current; it documents the evolution of the multi-agent design work. An outdated changelog breaks the orchestrator timeline. +## 2025-10-19 +- Reworked delegate event fan-in so each conversation feeds a dedicated `spawn_event_forwarder` and surfaces updates through `AppEvent::CodexEvent { conversation_id, event }`, keeping history rendering scoped to the active session. +- Added shadow-session plumbing to the orchestrator (`ShadowManager`, per-session metrics, user/agent capture forwarding) and exposed the new telemetry through the TUI status panel. +- Expanded the delegate app events (`DelegateShadow*`, `InsertHistoryCell { conversation_id, ... }`) and updated the TUI’s `SessionHandle` storage to hydrate from snapshots instead of replaying global history. +- Refreshed `ai-temp/agents-shadow-client.md`, `ai-temp/orchestration-integration.md`, and the sample Codex home (`example-codex-home/AGENTS.md`) to describe per-session streams, `DelegateEvent::{Started,Delta,Completed,Failed}`, and the tightened coordinator instructions. + ## 2025-10-16 - Added batched delegate execution: the core handler now accepts `batch` payloads, launches every child run concurrently (respecting the orchestrator’s concurrency cap), and returns per-agent summaries in a single response. - Updated the TUI delegate tree to indent siblings (two spaces per depth) and keep the status banner aligned while multiple children stream at once; adjusted snapshot/unit coverage accordingly. diff --git a/ai-temp/agents-shadow-client.md b/ai-temp/agents-shadow-client.md index ecf0354f383c..84a6552a5cd0 100644 --- a/ai-temp/agents-shadow-client.md +++ b/ai-temp/agents-shadow-client.md @@ -9,10 +9,11 @@ ### 1. Continuous Recording -- Spin up a `ShadowRecorder` alongside each delegate conversation. It subscribes to the same `CodexConversation` stream as the UI. -- Do not stop after the first `TaskComplete`. Continue recording until the delegate session is explicitly closed (or we evict the shadow). +- Spin up a `ShadowRecorder` alongside every conversation _including the primary session_. Each recorder listens to its **own** `CodexConversation` event feed; there is no shared “global UI stream”. +- Do not stop after the first `TaskComplete`. Continue recording until that conversation is explicitly closed (or we evict the shadow). - Record every `EventMsg`, merge deltas with the same `StreamController` logic the live ChatWidget uses, and build ready-to-render `HistoryCell`s. The recorder should output the _exact_ transcript the live widget would show. -- Track delegate capture frames (user inputs, agent outputs), tool events, plan updates, etc., so `ChatWidget::apply_delegate_summary` remains accurate. +- Inject synthetic `UserMessage` events for any text inputs we mirror into the delegate so the transcript always preserves the user’s prompt/context. +- Track capture frames (user inputs, agent outputs), tool events, plan updates, etc., so `ChatWidget::apply_delegate_summary` remains accurate. - Maintain metrics per session: total events, total bytes (compressed + uncompressed), turn count, last updated timestamp. ### 2. Shadow Storage @@ -39,17 +40,18 @@ - `AgentOrchestrator` owns a `ShadowManager` alongside existing session maps. - `run_delegate_task` spawns the recorder task that feeds the manager; the recorder loop continues until `ShutdownComplete` and session removal. +- Every conversation (primary and delegates) exposes a dedicated `event_rx` that yields only that session’s events. The orchestrator never multiplexes events across conversations. - `enter_session` returns: - `ActiveDelegateSession` with the live `CodexConversation`, `SessionConfigured`, `Config`. - Optional `ShadowSnapshot` (ready-to-render). - Latest `DelegateShadowMetrics`. -- `active_sessions()`, `detached_runs()`, and new helper(s) provide structured metrics (session counts, bytes, events) for UI/telemetry. + - The per-session event receiver that the UI uses to drive rendering. +- `active_sessions()`, `detached_runs()`, and helper(s) provide structured metrics (session counts, bytes, events) for UI/telemetry. ### 5. UI Integration -- `App::activate_delegate_session`: - - If snapshot present → call `ChatWidget::hydrate_from_shadow(snapshot)` (no replay). Hydration should be O(1) with respect to the cached cells. - - If snapshot missing → show an info banner (“Loading #agent from rollout; shadow cache unavailable”), then fall back to `ConversationManager::resume_conversation_from_rollout`. +- Each conversation gets its own `ChatWidget` + event receiver. The TUI keeps a `SessionHandle` map keyed by `conversation_id`; widgets never share a global history stream. +- `App::activate_delegate_session` simply selects the appropriate handle. If a snapshot exists → call `ChatWidget::hydrate_from_shadow(snapshot)` so the transcript appears instantly. If snapshot missing → show an info banner (“Shadow cache unavailable; replaying from rollout”) and stream events from rollout. - `ChatWidget::hydrate_from_shadow` must: - Apply cached history cells directly. - Seed delegate capture queues. @@ -61,6 +63,14 @@ - Delegate picker entries: - Show shadow stats (bytes/events) when available. - Show an explicit “rollout replay required” marker when the snapshot is missing. +- Live rendering always reads from the active session’s receiver; background sessions continue recording silently. No cross-session leakage is possible. + +### 6. Session Management & Rendering + +- `SessionHandle` (owned by `App`) stores the `ChatWidget`, cached history, shadow metadata, and recorder metrics for each `conversation_id`. Per-session receivers are consumed by `spawn_event_forwarder`, which converts them into `AppEvent::CodexEvent { conversation_id, event }` messages. +- The main event loop listens for those `AppEvent::CodexEvent` messages and forwards each payload to the matching handle; background sessions keep receiving updates, but only the active widget redraws. +- History rendering stays local: `ChatWidget::handle_codex_event` updates its own transcript, shadow state, and capture buffers in place, while shared inserts still flow through `AppEvent::InsertHistoryCell { conversation_id, cell }` so history never leaks across sessions. +- Non-history UI signals (keyboard input, `/status`, delegate lifecycle commands, etc.) stay in the `AppEvent` bus but always carry the target `conversation_id` so the app can route them to the correct handle. ### 6. Observability @@ -97,7 +107,7 @@ 1. Extract `ShadowRecorder` and shared rendering helpers (refactor out of `ChatWidget` into a reusable module). 2. Implement `ShadowManager` with continuous recording, metrics tracking, and eviction. -3. Update orchestrator APIs and events to consume the new manager. -4. Rework TUI hydration, fallback messaging, `/status`, and delegate picker. +3. Update orchestrator APIs to expose per-session event receivers + metrics. +4. Rework the TUI to maintain `SessionHandle`s, hydrate from per-session shadows, and render strictly from session-scoped streams (no global history bus). 5. Expand configuration, documentation, and telemetry. 6. Add unit/integration tests, then roll out behind (optional) feature flag before removing old code paths. diff --git a/ai-temp/example-codex-home/AGENTS.md b/ai-temp/example-codex-home/AGENTS.md index 5aa826636da1..995e74cc6cc1 100644 --- a/ai-temp/example-codex-home/AGENTS.md +++ b/ai-temp/example-codex-home/AGENTS.md @@ -1,8 +1,12 @@ # Sample Primary Agent Instructions +HARD RULE +dont ask user any Follow up questions just do as stated bellow. +HARD RULE + This directory demonstrates a multi-agent workflow. You are the coordinator that routes work through a fixed sequence: -1. **Log the user request (detached).** Immediately call `delegate_agent` so `request_summarizer` can write a markdown note to `/tmp/notes/`. Embed the *raw user message* in the prompt so the summarizer can actually summarize it. Example template (replace `` with the latest user turn, stripped of surrounding quotes): +1. **Log the user request (detached).** Immediately call `delegate_agent` so `request_summarizer` can write a markdown note to `/tmp/notes/`. Embed the _raw user message_ in the prompt so the summarizer can actually summarize it. Example template (replace `` with the latest user turn, stripped of surrounding quotes): ```json { @@ -13,21 +17,28 @@ This directory demonstrates a multi-agent workflow. You are the coordinator that ``` Continue with the remaining steps while this detached run completes. After you notice the “Detached run finished” banner (and optionally inspect the file), open `/agent`, highlight the summarizer entry, and choose “Dismiss detached run” so the list stays clean. -2. **Understand the request.** In your own words, restate the goal, list any constraints or missing information, and ask clarification questions. -3. **Invoke `ideas_provider` (batched delegates).** Use a single `delegate_agent` call with a `batch` array so both creative and conservative delegates run even if the model only allows one function call: + +2. **Invoke `ideas_provider` (batched delegates).** Use a single `delegate_agent` call with a `batch` array so both creative and conservative delegates run even if the model only allows one function call: ```json { "batch": [ - {"agent_id": "creative_ideas", "prompt": ""}, - {"agent_id": "conservative_ideas", "prompt": ""} + { + "agent_id": "creative_ideas", + "prompt": "" + }, + { + "agent_id": "conservative_ideas", + "prompt": "" + } ] } ``` Do not proceed until both sub-agents reply. If a response is missing or failed, rerun that delegate. -4. **Forward the winning approach to `critic`.** Summarize the chosen plan, note why it won, and call `delegate_agent` with that summary. Wait for the critic’s bullets before continuing. -5. **Synthesize the dialogue.** Deliver **exactly one paragraph** (≤75 words) combining the chosen idea, key mitigations, and next steps—no headings or bullets. + +3. **Forward the winning approach to `critic`.** Summarize the chosen plan, note why it won, and call `delegate_agent` with that summary. Wait for the critic’s bullets before continuing. +4. **Synthesize the dialogue.** Deliver **exactly one paragraph** (≤75 words) combining the chosen idea, key mitigations, and next steps—no headings or bullets. General rules: diff --git a/ai-temp/orchestration-integration.md b/ai-temp/orchestration-integration.md index e128ab84a3e4..8881ddeb1099 100644 --- a/ai-temp/orchestration-integration.md +++ b/ai-temp/orchestration-integration.md @@ -25,7 +25,7 @@ This document describes how to wire true sub-agent orchestration into the Codex Sub-agent sessions use the same `ConversationManager` entry points. The orchestrator calls `ConversationManager::new_conversation` with the agent-specific `Config` so all persistence automatically lands in `~/.codex/agents//` (per §2.2). - **Primary session** - Unchanged: `tui::App` (`codex-rs/tui/src/app.rs:78`) continues to own a `ConversationManager` for the main agent. The orchestrator is injected so it can spawn additional conversations on demand. + Unchanged: `tui::App` (`codex-rs/tui/src/app.rs`) continues to own a `ConversationManager` for the main agent. The orchestrator is injected so it can spawn additional conversations on demand while keeping each conversation’s event stream isolated. ### 1.2 Execution Flow @@ -45,10 +45,10 @@ This document describes how to wire true sub-agent orchestration into the Codex - The orchestrator forwards the translated prompt into the sub-agent conversation (`conversation.submit`). - Streamed `Event` values are intercepted before they reach the UI. For every event: - Persist to the sub-agent transcript as normal (handled by core). - - Convert to orchestrator messages (`DelegateProgress`, `DelegateOutput`), then forward to the primary session via a new `AppEvent::DelegateUpdate`. Nested runs simply push additional `Started` events with greater depth. + - Convert to orchestrator messages (`DelegateEvent::Started` / `Delta` / `Completed` / `Failed`) that always carry the owning conversation id. The TUI routes those updates to the matching session regardless of which tab the user is viewing. Nested runs emit additional `Started` events with increasing depth. 5. **Completion and summary** - - When `EventMsg::TaskComplete` fires, the orchestrator synthesizes a summary cell (e.g., `history_cell::AgentMessageCell`) and injects it into the primary transcript via `AppEvent::InsertHistoryCell`. + - When `EventMsg::TaskComplete` fires, the orchestrator emits `DelegateEvent::Completed` with the owning conversation id. The TUI enqueues the child’s summary for its parent session, then renders it when the parent becomes active so siblings never see each other’s output. - Store a compact record (duration, exit status) for `/status` display and optional audit logging (`~/.codex/log/multi-agent.log` per `ai-temp/persistence-design.md`). 6. **Cleanup** @@ -64,18 +64,18 @@ This document describes how to wire true sub-agent orchestration into the Codex | --- | --- | --- | | Orchestrator instantiation | `codex-rs/tui/src/app.rs:82` | Inject an `AgentOrchestrator` alongside the existing `ConversationManager`. | | Slash-command parsing | `codex-rs/tui/src/slash_command.rs` & `codex-rs/tui/src/chatwidget.rs:1126` | Add `/delegate` (or `/agent`) command to open a delegate picker or dispatch a delegate request. | -| App event handling | `codex-rs/tui/src/app.rs:247` (`while let Some(event)`) | Route new `AppEvent::DelegateRequest` to `AgentOrchestrator::handle_request`. | -| Event fan-in | `codex-rs/tui/src/app.rs:330` | Handle `AppEvent::DelegateUpdate` to mutate transcript/history cells. | +| App event handling | `codex-rs/tui/src/app.rs:600` (`while let Some(event)`) | Match on `AppEvent::DelegateUpdate` / `AppEvent::DelegateShadow*` and push updates into the session identified by `conversation_id`. | +| Event fan-in | `codex-rs/tui/src/app.rs:600` | Handle `AppEvent::CodexEvent` and `AppEvent::InsertHistoryCell` so each session only consumes its own stream. | | Status card | `codex-rs/tui/src/status/card.rs:68` | Pull orchestrator metrics (active agents, last run) to display in `/status`. | ### 2.2 Persistence - Sub-agent sessions reuse existing persistence automatically because `Config::codex_home` already points at `~/.codex/agents/` once we load through `AgentConfigLoader`. -- For the primary history: add summary inserts via `AppEvent::InsertHistoryCell` (`codex-rs/tui/src/app_event.rs:31`). No changes needed in core rollout recording. +- For the primary history: emit `DelegateEvent::Completed` / `Failed` with the owning conversation id and let the TUI enqueue summaries for the parent session. No changes needed in core rollout recording. ### 2.3 Error Handling -- Map orchestration errors to `AppEvent::InsertHistoryCell` with `history_cell::new_error_event` so failures surface in the main transcript. +- Map orchestration errors to `DelegateEvent::Failed`; the TUI turns those into error cells for the parent session. - Log details with `tracing::error!` inside the orchestrator, aligning with the `ai-temp/error-handling.md` guidance. --- @@ -95,7 +95,7 @@ This document describes how to wire true sub-agent orchestration into the Codex - Show a header `↳ rust_test_writer (success in 23s)` and embed the sub-agent's final answer. - Link to the sub-agent session path using the existing `SessionHeader` styling helpers (`codex-rs/tui/src/chatwidget/session_header.rs`). -- While the sub-agent runs, insert a “progress” cell (spinner) similar to exec command cells (`codex-rs/tui/src/exec_cell/render.rs:157`). Update via `DelegateProgress` events. +- While the sub-agent runs, insert a “progress” cell (spinner) similar to exec command cells (`codex-rs/tui/src/exec_cell/render.rs:157`). Update via `DelegateEvent::Delta` messages. ### 3.3 Status View @@ -118,9 +118,9 @@ This document describes how to wire true sub-agent orchestration into the Codex ```rust pub struct AgentOrchestrator { /* … */ } impl AgentOrchestrator { - pub async fn available_agents(&self) -> Result>; - pub async fn delegate(&self, request: DelegateRequest) -> Result; - pub fn subscribe(&self) -> mpsc::UnboundedReceiver; + pub async fn available_agents(&self) -> Result>; + pub async fn delegate(&self, request: DelegateRequest) -> Result; + pub fn subscribe(&self) -> mpsc::UnboundedReceiver; } ``` - This keeps the TUI glue thin and defers heavy logic to the crate that already knows how to load configs. diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 72dc6773f3df..0a1428f4223b 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -566,6 +566,15 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bit-set" version = "0.5.3" @@ -1283,14 +1292,17 @@ version = "0.0.0" dependencies = [ "anyhow", "async-trait", + "bincode", "codex-common", "codex-core", + "flate2", "serde", "serde_json", "tempfile", "thiserror 2.0.16", "tokio", "toml", + "tracing", "uuid", ] diff --git a/codex-rs/Cargo.toml b/codex-rs/Cargo.toml index 00383bededa2..f2a76e0d466f 100644 --- a/codex-rs/Cargo.toml +++ b/codex-rs/Cargo.toml @@ -158,6 +158,8 @@ serde_json = "1" serde_with = "3.14" serial_test = "3.2.0" sha1 = "0.10.6" +flate2 = "1.0.35" +bincode = "1.3" sha2 = "0.10" shlex = "1.3.0" similar = "2.7.0" diff --git a/codex-rs/core/src/config.rs b/codex-rs/core/src/config.rs index bc336852eb92..a6a747e641db 100644 --- a/codex-rs/core/src/config.rs +++ b/codex-rs/core/src/config.rs @@ -76,15 +76,25 @@ pub(crate) const CONFIG_TOML_FILE: &str = "config.toml"; pub struct MultiAgentConfig { pub agents: Vec, pub max_concurrent_delegates: usize, + pub enable_shadow_cache: bool, + pub max_shadow_sessions: Option, + pub max_shadow_memory_bytes: Option, + pub compress_shadows: bool, } pub const DEFAULT_MAX_CONCURRENT_DELEGATES: usize = 5; +pub const DEFAULT_MAX_SHADOW_SESSIONS: usize = 5; +pub const DEFAULT_MAX_SHADOW_MEMORY_BYTES: usize = 100 * 1024 * 1024; // 100 MiB impl Default for MultiAgentConfig { fn default() -> Self { Self { agents: Vec::new(), max_concurrent_delegates: DEFAULT_MAX_CONCURRENT_DELEGATES, + enable_shadow_cache: true, + max_shadow_sessions: Some(DEFAULT_MAX_SHADOW_SESSIONS), + max_shadow_memory_bytes: Some(DEFAULT_MAX_SHADOW_MEMORY_BYTES), + compress_shadows: false, } } } @@ -1026,6 +1036,14 @@ pub struct MultiAgentToml { pub agents: Vec, #[serde(default)] pub max_concurrent_delegates: Option, + #[serde(default)] + pub enable_shadow_cache: Option, + #[serde(default)] + pub max_shadow_sessions: Option, + #[serde(default)] + pub max_shadow_memory_bytes: Option, + #[serde(default)] + pub compress_shadows: Option, } impl ConfigToml { @@ -1268,6 +1286,32 @@ impl Config { .unwrap_or(DEFAULT_MAX_CONCURRENT_DELEGATES) .max(1); + let enable_shadow_cache = cfg + .multi_agent + .as_ref() + .and_then(|ma| ma.enable_shadow_cache) + .unwrap_or(true); + + let max_shadow_sessions = cfg + .multi_agent + .as_ref() + .and_then(|ma| ma.max_shadow_sessions) + .map(|value| if value == 0 { None } else { Some(value) }) + .unwrap_or(Some(DEFAULT_MAX_SHADOW_SESSIONS)); + + let max_shadow_memory_bytes = cfg + .multi_agent + .as_ref() + .and_then(|ma| ma.max_shadow_memory_bytes) + .map(|value| if value == 0 { None } else { Some(value) }) + .unwrap_or(Some(DEFAULT_MAX_SHADOW_MEMORY_BYTES)); + + let compress_shadows = cfg + .multi_agent + .as_ref() + .and_then(|ma| ma.compress_shadows) + .unwrap_or(false); + let multi_agent = MultiAgentConfig { agents: normalize_multi_agent_agents( cfg.multi_agent @@ -1276,6 +1320,10 @@ impl Config { .unwrap_or_default(), ), max_concurrent_delegates, + enable_shadow_cache, + max_shadow_sessions, + max_shadow_memory_bytes, + compress_shadows, }; let include_plan_tool_flag = features.enabled(Feature::PlanTool); diff --git a/codex-rs/core/src/delegate_tool.rs b/codex-rs/core/src/delegate_tool.rs index 313caba5174d..265207dcf877 100644 --- a/codex-rs/core/src/delegate_tool.rs +++ b/codex-rs/core/src/delegate_tool.rs @@ -85,6 +85,10 @@ pub enum DelegateToolEvent { agent_id: String, error: String, }, + Info { + agent_id: String, + message: String, + }, } /// Result returned when a delegate request is accepted. diff --git a/codex-rs/core/src/tools/handlers/delegate.rs b/codex-rs/core/src/tools/handlers/delegate.rs index 81e205ef7895..f4c0bc6cc2ac 100644 --- a/codex-rs/core/src/tools/handlers/delegate.rs +++ b/codex-rs/core/src/tools/handlers/delegate.rs @@ -344,28 +344,31 @@ async fn wait_for_completion( let mut collected = String::new(); while let Some(event) = events.recv().await { - if event_run_id(&event) != run.run_id { + if event_run_id(&event) + .map(|id| id != run.run_id.as_str()) + .unwrap_or(true) + { continue; } - match event { + match &event { DelegateToolEvent::Delta { chunk, .. } => { - collected.push_str(&chunk); + collected.push_str(chunk); } DelegateToolEvent::Completed { output, duration, .. } => { - let summary = output.or_else(|| { + let summary = output.clone().or_else(|| { if collected.trim().is_empty() { None } else { Some(collected.clone()) } }); - return Ok((summary, Some(duration))); + return Ok((summary, Some(*duration))); } DelegateToolEvent::Failed { error, .. } => { - return Err(error); + return Err(error.clone()); } _ => {} } @@ -374,12 +377,13 @@ async fn wait_for_completion( Err("delegate run ended unexpectedly".to_string()) } -fn event_run_id(event: &DelegateToolEvent) -> &str { +fn event_run_id(event: &DelegateToolEvent) -> Option<&str> { match event { DelegateToolEvent::Started { run_id, .. } | DelegateToolEvent::Delta { run_id, .. } | DelegateToolEvent::Completed { run_id, .. } - | DelegateToolEvent::Failed { run_id, .. } => run_id, + | DelegateToolEvent::Failed { run_id, .. } => Some(run_id), + DelegateToolEvent::Info { .. } => None, } } @@ -423,20 +427,27 @@ async fn handle_batch_entries( FunctionCallError::RespondToModel("delegate run ended unexpectedly".to_string()) })?; - let run_id = event_run_id(&event).to_string(); - if !interested.contains(&run_id) { + let Some(run_id) = event_run_id(&event) else { + continue; + }; + if !interested.contains(run_id) { continue; } - match event { + let run_id_owned = run_id.to_string(); + + match &event { DelegateToolEvent::Delta { chunk, .. } => { - collected.entry(run_id).or_default().push_str(&chunk); + collected + .entry(run_id_owned.clone()) + .or_default() + .push_str(chunk); } DelegateToolEvent::Completed { output, duration, .. } => { - let summary = output.or_else(|| { - collected.remove(&run_id).and_then(|text| { + let summary = output.clone().or_else(|| { + collected.remove(&run_id_owned).and_then(|text| { if text.trim().is_empty() { None } else { @@ -444,11 +455,11 @@ async fn handle_batch_entries( } }) }); - summaries.insert(run_id.clone(), (summary, Some(duration))); - interested.remove(&run_id); + summaries.insert(run_id_owned.clone(), (summary, Some(*duration))); + interested.remove(run_id); } DelegateToolEvent::Failed { error, .. } => { - return Err(FunctionCallError::RespondToModel(error)); + return Err(FunctionCallError::RespondToModel(error.clone())); } _ => {} } @@ -495,18 +506,21 @@ async fn monitor_detached_run( let mut collected = String::new(); while let Some(event) = events.recv().await { - if event_run_id(&event) != run_id.as_str() { + if event_run_id(&event) + .map(|id| id != run_id.as_str()) + .unwrap_or(true) + { continue; } - match event { + match &event { DelegateToolEvent::Delta { chunk, .. } => { - collected.push_str(&chunk); + collected.push_str(chunk); } DelegateToolEvent::Completed { output, duration, .. } => { - let summary = output.or_else(|| { + let summary = output.clone().or_else(|| { if collected.trim().is_empty() { None } else { @@ -539,7 +553,7 @@ async fn monitor_detached_run( conversation_id: Some(session.conversation_id().to_string()), summary, duration_ms: None, - error: Some(error), + error: Some(error.clone()), }); break; } diff --git a/codex-rs/multi-agent/Cargo.toml b/codex-rs/multi-agent/Cargo.toml index 2fec46a362dc..d8ebfd007590 100644 --- a/codex-rs/multi-agent/Cargo.toml +++ b/codex-rs/multi-agent/Cargo.toml @@ -13,6 +13,10 @@ toml.workspace = true thiserror.workspace = true tokio = { workspace = true, features = ["rt", "sync"] } uuid.workspace = true +serde_json.workspace = true +flate2 = { workspace = true } +tracing.workspace = true +bincode.workspace = true [dev-dependencies] serde_json.workspace = true diff --git a/codex-rs/multi-agent/src/lib.rs b/codex-rs/multi-agent/src/lib.rs index 7901e344cef0..eb2b8ed018b8 100644 --- a/codex-rs/multi-agent/src/lib.rs +++ b/codex-rs/multi-agent/src/lib.rs @@ -26,6 +26,8 @@ use serde::Deserialize; use serde::Serialize; use toml::Value as TomlValue; +pub mod shadow; + /// Identifier for a sub-agent directory under `~/.codex/agents`. /// /// The slug must be lowercase ASCII and may contain letters, numbers, @@ -516,4 +518,64 @@ agents = ["ideas_provider", "critic"] assert_eq!(nested.get("extra").unwrap().as_bool(), Some(true)); assert_eq!(base.get("new").unwrap().as_str(), Some("field")); } + + #[tokio::test] + async fn delegate_started_event_carries_owner() { + use crate::shadow::ShadowConfig; + use codex_core::delegate_tool::DelegateInvocationMode; + use codex_core::protocol::SessionSource; + use codex_core::{AuthManager, CodexAuth}; + use std::sync::Arc; + + let temp_home = tempdir().expect("tempdir"); + let global = temp_home.path().join("global"); + for dir in ["log", "sessions", "history", "mcp", "tmp"] { + fs::create_dir_all(global.join(dir)).expect("create dir"); + } + + let orchestrator = Arc::new(AgentOrchestrator::new( + &global, + AuthManager::from_auth_for_testing(CodexAuth::from_api_key("test")), + SessionSource::Cli, + CliConfigOverrides::default(), + ConfigOverrides::default(), + vec![AgentId::parse("critic").unwrap()], + 1, + ShadowConfig::disabled(), + )); + + let owner_id = "owner-conv".to_string(); + + let request = DelegateRequest { + agent_id: AgentId::parse("critic").unwrap(), + prompt: DelegatePrompt::new("hello"), + user_initial: Vec::new(), + parent_run_id: None, + mode: DelegateInvocationMode::Immediate, + caller_conversation_id: Some(owner_id.clone()), + }; + + let mut events = orchestrator.subscribe().await; + let run_id = orchestrator.delegate(request).await.unwrap(); + + while let Some(event) = events.recv().await { + if let DelegateEvent::Started { + run_id: started_run, + owner_conversation_id, + .. + } = event + { + assert_eq!(started_run, run_id); + assert_eq!(owner_conversation_id, owner_id); + assert_eq!( + orchestrator + .owner_conversation_for_run(&run_id) + .await + .as_deref(), + Some(owner_conversation_id.as_str()) + ); + break; + } + } + } } diff --git a/codex-rs/multi-agent/src/orchestrator.rs b/codex-rs/multi-agent/src/orchestrator.rs index 59409b5d2e22..608dc3e74f6d 100644 --- a/codex-rs/multi-agent/src/orchestrator.rs +++ b/codex-rs/multi-agent/src/orchestrator.rs @@ -18,6 +18,7 @@ use codex_core::delegate_tool::DelegateToolError; use codex_core::delegate_tool::DelegateToolEvent as CoreDelegateToolEvent; use codex_core::delegate_tool::DelegateToolRequest; use codex_core::delegate_tool::DelegateToolRun; +use codex_core::protocol::Event; use codex_core::protocol::EventMsg; use codex_core::protocol::InputItem; use codex_core::protocol::Op; @@ -26,10 +27,20 @@ use codex_core::protocol::SessionSource; use std::time::Duration; use tokio::sync::Mutex; use tokio::sync::mpsc; +use tokio::sync::mpsc::UnboundedReceiver; +use tokio::sync::mpsc::UnboundedSender; +use tokio::task::JoinHandle; +use tracing::error; +use tracing::warn; use uuid::Uuid; use crate::AgentConfigLoader; use crate::AgentId; +use crate::shadow::ShadowConfig; +use crate::shadow::ShadowManager; +use crate::shadow::ShadowMetrics; +use crate::shadow::ShadowSessionSummary; +use crate::shadow::ShadowSnapshot; fn prompt_preview(text: &str) -> Option { let trimmed = text.trim(); @@ -52,6 +63,7 @@ pub struct DelegateRequest { pub user_initial: Vec, pub parent_run_id: Option, pub mode: DelegateInvocationMode, + pub caller_conversation_id: Option, } /// The prompt content forwarded to the sub-agent. @@ -66,12 +78,39 @@ impl DelegatePrompt { } } +struct SessionEventBroadcaster { + subscribers: Mutex>>, +} + +impl SessionEventBroadcaster { + fn new() -> Self { + Self { + subscribers: Mutex::new(Vec::new()), + } + } + + async fn subscribe(&self, initial: Option) -> UnboundedReceiver { + let (tx, rx) = mpsc::unbounded_channel(); + if let Some(event) = initial { + let _ = tx.send(event); + } + self.subscribers.lock().await.push(tx); + rx + } + + async fn broadcast(&self, event: &Event) { + let mut subscribers = self.subscribers.lock().await; + subscribers.retain(|tx| tx.send(event.clone()).is_ok()); + } +} + /// Progress and completion updates emitted by the orchestrator. #[derive(Debug, Clone)] pub enum DelegateEvent { Started { run_id: DelegateRunId, agent_id: AgentId, + owner_conversation_id: String, prompt: String, started_at: SystemTime, parent_run_id: Option, @@ -80,11 +119,13 @@ pub enum DelegateEvent { Delta { run_id: DelegateRunId, agent_id: AgentId, + owner_conversation_id: String, chunk: String, }, Completed { run_id: DelegateRunId, agent_id: AgentId, + owner_conversation_id: String, output: Option, duration: Duration, mode: DelegateSessionMode, @@ -92,9 +133,15 @@ pub enum DelegateEvent { Failed { run_id: DelegateRunId, agent_id: AgentId, + owner_conversation_id: String, error: String, mode: DelegateSessionMode, }, + Info { + agent_id: AgentId, + conversation_id: String, + message: String, + }, } /// Errors that can surface when orchestrating delegates. @@ -148,12 +195,14 @@ pub enum DetachedRunStatusSummary { } /// Payload returned when entering an existing delegate session. -#[derive(Clone)] pub struct ActiveDelegateSession { pub summary: DelegateSessionSummary, pub conversation: Arc, pub session_configured: Arc, pub config: Config, + pub event_rx: UnboundedReceiver, + pub shadow_snapshot: Option, + pub shadow_summary: Option, } /// Lightweight controller that spins up sub-agent conversations on demand and @@ -172,6 +221,8 @@ pub struct AgentOrchestrator { conversation_runs: Mutex>, detached_runs: Mutex>, max_concurrent_runs: usize, + shadow_manager: Arc, + run_owner_conversations: Mutex>, } impl AgentOrchestrator { @@ -183,6 +234,7 @@ impl AgentOrchestrator { config_overrides: ConfigOverrides, allowed_agents: Vec, max_concurrent_runs: usize, + shadow_config: ShadowConfig, ) -> Self { let loader = AgentConfigLoader::new(global_codex_home.into()); Self { @@ -199,6 +251,8 @@ impl AgentOrchestrator { conversation_runs: Mutex::new(HashMap::new()), detached_runs: Mutex::new(HashMap::new()), max_concurrent_runs: max_concurrent_runs.max(1), + shadow_manager: Arc::new(ShadowManager::new(shadow_config)), + run_owner_conversations: Mutex::new(HashMap::new()), } } @@ -224,6 +278,7 @@ impl AgentOrchestrator { if let Some(conversation_id) = self.run_conversations.lock().await.remove(run_id) { self.conversation_runs.lock().await.remove(&conversation_id); } + self.run_owner_conversations.lock().await.remove(run_id); } pub async fn parent_run_for_conversation( @@ -270,9 +325,29 @@ impl AgentOrchestrator { let parent_run_id = request.parent_run_id.clone(); let prompt_text = request.prompt.text.clone(); + let owner_conversation = if let Some(owner) = request.caller_conversation_id.clone() { + Some(owner) + } else if let Some(parent) = parent_run_id.as_ref() { + let guard = self.run_owner_conversations.lock().await; + guard.get(parent).cloned() + } else { + None + }; + if let Some(owner) = owner_conversation.clone() { + self.run_owner_conversations + .lock() + .await + .insert(run_id.clone(), owner); + } + let owner_conversation_id = owner_conversation.clone().unwrap_or_default(); + if owner_conversation_id.is_empty() { + tracing::warn!(run_id = %run_id, "delegate run missing owner conversation id"); + } + self.emit(DelegateEvent::Started { run_id: run_id.clone(), agent_id: request.agent_id.clone(), + owner_conversation_id: owner_conversation_id.clone(), prompt: prompt_text, started_at: SystemTime::now(), parent_run_id: parent_run_id.clone(), @@ -314,6 +389,7 @@ impl AgentOrchestrator { .emit(DelegateEvent::Completed { run_id: run_id_clone.clone(), agent_id, + owner_conversation_id: owner_conversation_id.clone(), output: message, duration, mode: output.mode, @@ -328,6 +404,7 @@ impl AgentOrchestrator { .emit(DelegateEvent::Failed { run_id: run_id_clone.clone(), agent_id: err.agent_id, + owner_conversation_id: owner_conversation_id.clone(), error: err.error, mode: err.mode, }) @@ -351,6 +428,68 @@ impl AgentOrchestrator { listeners.retain(|tx| tx.send(event.clone()).is_ok()); } + pub async fn owner_conversation_for_run(&self, run_id: &DelegateRunId) -> Option { + self.run_owner_conversations + .lock() + .await + .get(run_id) + .cloned() + } + + async fn record_shadow_user_inputs( + &self, + agent_id: Option<&AgentId>, + conversation_id: &str, + inputs: &[InputItem], + ) { + if inputs.is_empty() { + return; + } + let Some(agent_id) = agent_id else { return }; + if let Err(err) = self + .shadow_manager + .record_user_inputs(conversation_id, agent_id, inputs) + .await + { + error!(error = %err, conversation_id, "failed to record shadow user inputs"); + } + } + + async fn record_shadow_event( + &self, + agent_id: Option<&AgentId>, + conversation_id: &str, + event: &Event, + ) { + let Some(agent_id) = agent_id else { return }; + if let Err(err) = self + .shadow_manager + .record_event(conversation_id, agent_id, event) + .await + { + error!(error = %err, conversation_id, "failed to record shadow event"); + } + } + + async fn record_shadow_agent_outputs( + &self, + agent_id: Option<&AgentId>, + conversation_id: &str, + outputs: &[String], + ) { + if outputs.is_empty() { + return; + } + let Some(agent_id) = agent_id else { return }; + if let Err(err) = self + .shadow_manager + .record_agent_outputs(conversation_id, agent_id, outputs) + .await + { + error!(error = %err, conversation_id, "failed to record shadow output"); + } + } + /// Return the list of configured agent ids available for delegation. pub fn allowed_agents(&self) -> &[AgentId] { &self.allowed_agents @@ -367,6 +506,51 @@ impl AgentOrchestrator { summaries } + pub async fn shadow_snapshot(&self, conversation_id: &str) -> Option { + self.shadow_manager.snapshot(conversation_id).await + } + + pub async fn shadow_metrics(&self) -> ShadowMetrics { + self.shadow_manager.metrics().await + } + + pub async fn shadow_session_summary( + &self, + conversation_id: &str, + ) -> Option { + self.shadow_manager.session_summary(conversation_id).await + } + + pub async fn push_shadow_event( + &self, + agent_id: Option<&AgentId>, + conversation_id: &str, + event: &Event, + ) { + self.record_shadow_event(agent_id, conversation_id, event) + .await; + } + + pub async fn push_shadow_user_inputs( + &self, + agent_id: Option<&AgentId>, + conversation_id: &str, + inputs: &[InputItem], + ) { + self.record_shadow_user_inputs(agent_id, conversation_id, inputs) + .await; + } + + pub async fn push_shadow_outputs( + &self, + agent_id: Option<&AgentId>, + conversation_id: &str, + outputs: &[String], + ) { + self.record_shadow_agent_outputs(agent_id, conversation_id, outputs) + .await; + } + /// Return detached runs that are not yet ready to attach or have failed. pub async fn detached_runs(&self) -> Vec { let registry = self.detached_runs.lock().await; @@ -433,23 +617,50 @@ impl AgentOrchestrator { &self, conversation_id: &str, ) -> Result { - let mut sessions = self.sessions.lock().await; - let entry = sessions - .get_mut(conversation_id) - .ok_or_else(|| OrchestratorError::SessionNotFound(conversation_id.to_string()))?; - entry.summary.last_interacted_at = SystemTime::now(); + let (summary, conversation, session_configured, config, events) = { + let mut sessions = self.sessions.lock().await; + let entry = sessions + .get_mut(conversation_id) + .ok_or_else(|| OrchestratorError::SessionNotFound(conversation_id.to_string()))?; + entry.summary.last_interacted_at = SystemTime::now(); + ( + entry.summary.clone(), + entry.conversation.clone(), + entry.session_configured.clone(), + entry.config.clone(), + Arc::clone(&entry.events), + ) + }; + + let initial_event = Event { + id: String::new(), + msg: EventMsg::SessionConfigured((*session_configured).clone()), + }; + let event_rx = events.subscribe(Some(initial_event)).await; + let shadow_snapshot = self.shadow_manager.snapshot(conversation_id).await; + let shadow_summary = self.shadow_manager.session_summary(conversation_id).await; + Ok(ActiveDelegateSession { - summary: entry.summary.clone(), - conversation: entry.conversation.clone(), - session_configured: entry.session_configured.clone(), - config: entry.config.clone(), + summary, + conversation, + session_configured, + config, + event_rx, + shadow_snapshot, + shadow_summary, }) } /// Remove a delegate session – used when the conversation is closed or no longer usable. pub async fn remove_session(&self, conversation_id: &str) { let mut sessions = self.sessions.lock().await; - sessions.remove(conversation_id); + if let Some(session) = sessions.remove(conversation_id) + && let Some(task) = session.event_task + { + task.abort(); + } + drop(sessions); + self.shadow_manager.remove_session(conversation_id).await; } /// Refresh the session's last-interacted timestamp without opening it. @@ -458,10 +669,12 @@ impl AgentOrchestrator { if let Some(entry) = sessions.get_mut(conversation_id) { entry.summary.last_interacted_at = SystemTime::now(); } + drop(sessions); + self.shadow_manager.touch(conversation_id).await; } - async fn store_session(&self, success: &DelegateSuccess) { - let mut sessions = self.sessions.lock().await; + async fn store_session(self: &Arc, success: &DelegateSuccess) { + let events = Arc::new(SessionEventBroadcaster::new()); let summary = DelegateSessionSummary { conversation_id: success.conversation_id.clone(), agent_id: success.agent_id.clone(), @@ -469,15 +682,63 @@ impl AgentOrchestrator { cwd: success.cwd.clone(), mode: success.mode, }; - sessions.insert( + + let mut sessions = self.sessions.lock().await; + if let Some(previous) = sessions.insert( success.conversation_id.clone(), StoredDelegateSession { summary, conversation: success.conversation.clone(), session_configured: success.session_configured.clone(), config: success.config.clone(), + events: Arc::clone(&events), + event_task: None, }, - ); + ) && let Some(task) = previous.event_task + { + task.abort(); + } + drop(sessions); + + let orchestrator = Arc::clone(self); + let conversation = success.conversation.clone(); + let conversation_id = success.conversation_id.clone(); + let agent_id = success.agent_id.clone(); + let session_configured = success.session_configured.clone(); + let events_clone = Arc::clone(&events); + let event_task = tokio::spawn(async move { + let session_configured_event = Event { + id: String::new(), + msg: EventMsg::SessionConfigured((*session_configured).clone()), + }; + orchestrator + .record_shadow_event(Some(&agent_id), &conversation_id, &session_configured_event) + .await; + + loop { + match conversation.next_event().await { + Ok(event) => { + orchestrator + .record_shadow_event(Some(&agent_id), &conversation_id, &event) + .await; + events_clone.broadcast(&event).await; + } + Err(err) => { + warn!( + error = %err, + conversation_id, + "delegate conversation event stream ended" + ); + break; + } + } + } + }); + + let mut sessions = self.sessions.lock().await; + if let Some(entry) = sessions.get_mut(&success.conversation_id) { + entry.event_task = Some(event_task); + } } async fn mark_detached_ready(&self, run_id: &DelegateRunId, success: &DelegateSuccess) { @@ -553,6 +814,19 @@ impl AgentOrchestrator { let session_configured = Arc::new(conversation_bundle.session_configured); let conversation = conversation_bundle.conversation; + if let Err(err) = self + .shadow_manager + .register_session(&conversation_id, &agent_id) + .await + { + error!( + error = %err, + conversation_id, + agent = %agent_id.as_str(), + "failed to initialize shadow session" + ); + } + let mut items = Vec::new(); items.extend(request.user_initial.clone()); if !request.prompt.text.trim().is_empty() { @@ -569,6 +843,23 @@ impl AgentOrchestrator { mode: session_mode, })?; + self.record_shadow_user_inputs(Some(&agent_id), &conversation_id, &request.user_initial) + .await; + if !request.prompt.text.trim().is_empty() { + self.record_shadow_user_inputs( + Some(&agent_id), + &conversation_id, + &[InputItem::Text { + text: request.prompt.text.clone(), + }], + ) + .await; + } + + let owner_conversation_id = self + .owner_conversation_for_run(&run_id) + .await + .unwrap_or_default(); let mut aggregated = String::new(); loop { let event = conversation @@ -580,6 +871,9 @@ impl AgentOrchestrator { mode: session_mode, })?; + self.record_shadow_event(Some(&agent_id), &conversation_id, &event) + .await; + match event.msg { EventMsg::AgentMessage(msg) => { if aggregated.is_empty() { @@ -587,6 +881,7 @@ impl AgentOrchestrator { self.emit(DelegateEvent::Delta { run_id: run_id.clone(), agent_id: agent_id.clone(), + owner_conversation_id: owner_conversation_id.clone(), chunk: msg.message, }) .await; @@ -599,6 +894,7 @@ impl AgentOrchestrator { self.emit(DelegateEvent::Delta { run_id: run_id.clone(), agent_id: agent_id.clone(), + owner_conversation_id: owner_conversation_id.clone(), chunk: delta.delta, }) .await; @@ -609,6 +905,15 @@ impl AgentOrchestrator { .last_agent_message .or_else(|| (!aggregated.is_empty()).then_some(aggregated.clone())); + if let Some(output) = message.as_ref() { + self.record_shadow_agent_outputs( + Some(&agent_id), + &conversation_id, + &[output.clone()], + ) + .await; + } + return Ok(DelegateSuccess { agent_id, conversation_id, @@ -671,6 +976,8 @@ struct StoredDelegateSession { conversation: Arc, session_configured: Arc, config: Config, + events: Arc, + event_task: Option>, } struct DetachedRunRecord { @@ -709,6 +1016,7 @@ impl MultiAgentDelegateAdapter { DelegateEvent::Started { run_id, agent_id, + owner_conversation_id: _, prompt, started_at, parent_run_id, @@ -723,6 +1031,7 @@ impl MultiAgentDelegateAdapter { DelegateEvent::Delta { run_id, agent_id, + owner_conversation_id: _, chunk, } => CoreDelegateToolEvent::Delta { run_id, @@ -732,6 +1041,7 @@ impl MultiAgentDelegateAdapter { DelegateEvent::Completed { run_id, agent_id, + owner_conversation_id: _, output, duration, mode: _, @@ -744,6 +1054,7 @@ impl MultiAgentDelegateAdapter { DelegateEvent::Failed { run_id, agent_id, + owner_conversation_id: _, error, mode: _, } => CoreDelegateToolEvent::Failed { @@ -751,6 +1062,14 @@ impl MultiAgentDelegateAdapter { agent_id: agent_id.as_str().to_string(), error, }, + DelegateEvent::Info { + agent_id, + conversation_id: _, + message, + } => CoreDelegateToolEvent::Info { + agent_id: agent_id.as_str().to_string(), + message, + }, } } @@ -816,6 +1135,7 @@ impl DelegateToolAdapter for MultiAgentDelegateAdapter { user_initial: Vec::new(), parent_run_id, mode, + caller_conversation_id, }) .await .map_err(Self::map_error)?; diff --git a/codex-rs/multi-agent/src/orchestrator/tests.rs b/codex-rs/multi-agent/src/orchestrator/tests.rs new file mode 100644 index 000000000000..56c1846bfff7 --- /dev/null +++ b/codex-rs/multi-agent/src/orchestrator/tests.rs @@ -0,0 +1,67 @@ +use super::*; +use codex_core::config::ConfigOverrides; +use codex_core::protocol::SessionSource; +use codex_core::CodexAuth; +use tempfile::tempdir; + +#[tokio::test] +async fn delegate_started_event_carries_owner() { + let tmp = tempdir().expect("tempdir"); + let global = tmp.path().join("codex"); + std::fs::create_dir_all(global.join("log")).unwrap(); + std::fs::create_dir_all(global.join("sessions")).unwrap(); + std::fs::create_dir_all(global.join("history")).unwrap(); + std::fs::create_dir_all(global.join("mcp")).unwrap(); + std::fs::create_dir_all(global.join("tmp")).unwrap(); + + let orchestrator = Arc::new(AgentOrchestrator::new( + &global, + Arc::new(AuthManager::from_auth(CodexAuth::from_api_key("test"))), + SessionSource::Cli, + CliConfigOverrides::default(), + ConfigOverrides::default(), + vec![AgentId::parse("critic").unwrap()], + 1, + ShadowConfig::disabled(), + )); + + let owner_id = "owner-conv".to_string(); + let parent_run_id = "parent-run".to_string(); + orchestrator + .run_owner_conversations + .lock() + .await + .insert(parent_run_id.clone(), owner_id.clone()); + + let request = DelegateRequest { + agent_id: AgentId::parse("critic").unwrap(), + prompt: DelegatePrompt::new("hello"), + user_initial: Vec::new(), + parent_run_id: Some(parent_run_id), + mode: DelegateInvocationMode::Immediate, + caller_conversation_id: None, + }; + + let mut events = orchestrator.subscribe().await; + let run_id = orchestrator.delegate(request).await.unwrap(); + + while let Some(event) = events.recv().await { + if let DelegateEvent::Started { + run_id: started_run, + owner_conversation_id, + .. + } = event + { + assert_eq!(started_run, run_id); + assert_eq!(owner_conversation_id, owner_id); + assert_eq!( + orchestrator + .owner_conversation_for_run(&run_id) + .await + .as_deref(), + Some(owner_conversation_id.as_str()) + ); + break; + } + } +} diff --git a/codex-rs/multi-agent/src/shadow/mod.rs b/codex-rs/multi-agent/src/shadow/mod.rs new file mode 100644 index 000000000000..d3d357bc4535 --- /dev/null +++ b/codex-rs/multi-agent/src/shadow/mod.rs @@ -0,0 +1,461 @@ +mod recorder; + +use std::collections::VecDeque; +use std::io::Write; +use std::time::SystemTime; + +use anyhow::Context; +use anyhow::Result; +use bincode; +use codex_core::protocol::Event; +use codex_core::protocol::InputItem; +use flate2::Compression; +use flate2::read::GzDecoder; +use flate2::write::GzEncoder; +use serde::Deserialize; +use serde::Serialize; +use tokio::sync::Mutex; + +use recorder::ShadowRecorder; + +use crate::AgentId; + +pub use recorder::ShadowHistoryEntry; +pub use recorder::ShadowHistoryKind; +pub use recorder::ShadowSessionMetrics as ShadowMetrics; +use recorder::ShadowSessionMetrics; +pub use recorder::ShadowSnapshot; +pub use recorder::ShadowTranscriptCapture; + +#[derive(Debug, Clone, Copy)] +pub struct ShadowConfig { + pub enabled: bool, + pub max_sessions: Option, + pub max_memory_bytes: Option, + pub compress: bool, +} + +impl ShadowConfig { + pub const fn disabled() -> Self { + Self { + enabled: false, + max_sessions: None, + max_memory_bytes: None, + compress: false, + } + } + + pub fn apply_defaults( + enabled: bool, + max_sessions: Option, + max_memory_bytes: Option, + compress: bool, + ) -> Self { + Self { + enabled, + max_sessions, + max_memory_bytes, + compress, + } + } +} + +#[derive(Debug, Clone)] +pub struct ShadowSessionSummary { + pub metrics: ShadowSessionMetrics, + pub raw_bytes: usize, + pub compressed_bytes: Option, +} + +#[derive(Serialize, Deserialize)] +struct ShadowData { + recorder: ShadowRecorder, + events: Vec, +} + +impl ShadowData { + fn new(conversation_id: &str, agent_id: &AgentId) -> Self { + Self { + recorder: ShadowRecorder::new(conversation_id.to_string(), agent_id.clone()), + events: Vec::new(), + } + } + + fn record_event(&mut self, event: &Event) { + self.recorder.record_event(event); + self.events.push(event.clone()); + } + + fn record_user_inputs(&mut self, items: &[InputItem]) { + for item in items { + if let InputItem::Text { text } = item { + if text.trim().is_empty() { + continue; + } + let event = self.recorder.make_user_event(text.clone()); + self.recorder.record_event(&event); + self.events.push(event); + } + } + self.recorder.record_user_inputs(items); + } + + fn record_agent_outputs(&mut self, outputs: &[String]) { + self.recorder.record_agent_outputs(outputs); + } + + fn snapshot(&self) -> ShadowSnapshot { + self.recorder.snapshot(&self.events) + } + + fn metrics(&self) -> ShadowSessionMetrics { + self.recorder.metrics() + } + + fn raw_bytes(&self) -> usize { + self.recorder.raw_bytes() + } +} + +enum ShadowStorage { + Uncompressed(ShadowData), + Compressed { + bytes: Vec, + raw_bytes: usize, + metrics: ShadowSessionMetrics, + }, +} + +impl ShadowStorage { + fn new(conversation_id: &str, agent_id: &AgentId) -> Self { + Self::Uncompressed(ShadowData::new(conversation_id, agent_id)) + } + + fn ensure_uncompressed( + &mut self, + conversation_id: &str, + agent_id: &AgentId, + ) -> Result<&mut ShadowData> { + if let ShadowStorage::Compressed { bytes, .. } = self { + let mut decoder = GzDecoder::new(bytes.as_slice()); + let restored: ShadowData = + bincode::deserialize_from(&mut decoder).context("decompress shadow data")?; + *self = ShadowStorage::Uncompressed(restored); + } + Ok(match self { + ShadowStorage::Uncompressed(data) => data, + ShadowStorage::Compressed { .. } => { + *self = ShadowStorage::Uncompressed(ShadowData::new(conversation_id, agent_id)); + match self { + ShadowStorage::Uncompressed(data) => data, + _ => unreachable!(), + } + } + }) + } + + fn compress(&mut self) -> Result<()> { + if let ShadowStorage::Uncompressed(data) = self { + let mut buf = Vec::new(); + let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); + bincode::serialize_into(&mut buf, data).context("serialize shadow data")?; + encoder + .write_all(&buf) + .context("compress serialized shadow data")?; + let compressed = encoder.finish().context("finish compression")?; + let metrics = data.metrics(); + let raw_bytes = data.raw_bytes().max(buf.len()); + *self = ShadowStorage::Compressed { + bytes: compressed, + raw_bytes, + metrics, + }; + } + Ok(()) + } + + fn snapshot(&self) -> Result { + match self { + ShadowStorage::Uncompressed(data) => Ok(data.snapshot()), + ShadowStorage::Compressed { bytes, .. } => { + let mut decoder = GzDecoder::new(bytes.as_slice()); + let restored: ShadowData = + bincode::deserialize_from(&mut decoder).context("decompress shadow data")?; + Ok(restored.snapshot()) + } + } + } + + fn raw_bytes(&self) -> usize { + match self { + ShadowStorage::Uncompressed(data) => data.raw_bytes(), + ShadowStorage::Compressed { raw_bytes, .. } => *raw_bytes, + } + } + + fn compressed_bytes(&self) -> Option { + match self { + ShadowStorage::Compressed { bytes, .. } => Some(bytes.len()), + _ => None, + } + } + + fn metrics(&self) -> ShadowSessionMetrics { + match self { + ShadowStorage::Uncompressed(data) => data.metrics(), + ShadowStorage::Compressed { metrics, .. } => *metrics, + } + } +} + +struct ShadowSession { + agent_id: AgentId, + storage: ShadowStorage, + last_updated: SystemTime, +} + +impl ShadowSession { + fn new(conversation_id: &str, agent_id: AgentId) -> Self { + Self { + storage: ShadowStorage::new(conversation_id, &agent_id), + agent_id, + last_updated: SystemTime::now(), + } + } + + fn touch(&mut self) { + self.last_updated = SystemTime::now(); + } +} + +pub struct ShadowManager { + config: ShadowConfig, + sessions: Mutex>, +} + +impl ShadowManager { + pub fn new(config: ShadowConfig) -> Self { + Self { + config, + sessions: Mutex::new(VecDeque::new()), + } + } + + pub async fn register_session(&self, conversation_id: &str, agent_id: &AgentId) -> Result<()> { + if !self.config.enabled { + return Ok(()); + } + let mut sessions = self.sessions.lock().await; + if sessions + .iter() + .any(|(id, _)| id.as_str() == conversation_id) + { + return Ok(()); + } + sessions.push_back(( + conversation_id.to_string(), + ShadowSession::new(conversation_id, agent_id.clone()), + )); + drop(sessions); + self.enforce_limits().await?; + Ok(()) + } + + pub async fn record_event( + &self, + conversation_id: &str, + agent_id: &AgentId, + event: &Event, + ) -> Result<()> { + if !self.config.enabled { + return Ok(()); + } + let mut sessions = self.sessions.lock().await; + if let Some((_, session)) = sessions + .iter_mut() + .find(|(id, _)| id.as_str() == conversation_id) + { + let data = session + .storage + .ensure_uncompressed(conversation_id, agent_id)?; + data.record_event(event); + session.touch(); + if self.config.compress { + let _ = session.storage.compress(); + } + } else { + sessions.push_back(( + conversation_id.to_string(), + ShadowSession::new(conversation_id, agent_id.clone()), + )); + if let Some((_, session)) = sessions + .iter_mut() + .find(|(id, _)| id.as_str() == conversation_id) + { + let data = session + .storage + .ensure_uncompressed(conversation_id, agent_id)?; + data.record_event(event); + session.touch(); + if self.config.compress { + let _ = session.storage.compress(); + } + } + } + drop(sessions); + self.enforce_limits().await?; + Ok(()) + } + + pub async fn record_user_inputs( + &self, + conversation_id: &str, + agent_id: &AgentId, + inputs: &[InputItem], + ) -> Result<()> { + if !self.config.enabled { + return Ok(()); + } + let mut sessions = self.sessions.lock().await; + if let Some((_, session)) = sessions + .iter_mut() + .find(|(id, _)| id.as_str() == conversation_id) + { + let data = session + .storage + .ensure_uncompressed(conversation_id, agent_id)?; + data.record_user_inputs(inputs); + session.touch(); + if self.config.compress { + let _ = session.storage.compress(); + } + } + Ok(()) + } + + pub async fn record_agent_outputs( + &self, + conversation_id: &str, + agent_id: &AgentId, + outputs: &[String], + ) -> Result<()> { + if !self.config.enabled { + return Ok(()); + } + let mut sessions = self.sessions.lock().await; + if let Some((_, session)) = sessions + .iter_mut() + .find(|(id, _)| id.as_str() == conversation_id) + { + let data = session + .storage + .ensure_uncompressed(conversation_id, agent_id)?; + data.record_agent_outputs(outputs); + session.touch(); + if self.config.compress { + let _ = session.storage.compress(); + } + } + Ok(()) + } + + pub async fn snapshot(&self, conversation_id: &str) -> Option { + if !self.config.enabled { + return None; + } + let sessions = self.sessions.lock().await; + let (_, session) = sessions.iter().find(|(id, _)| id == conversation_id)?; + session.storage.snapshot().ok() + } + + pub async fn metrics(&self) -> ShadowSessionMetrics { + if !self.config.enabled { + return ShadowSessionMetrics::default(); + } + let sessions = self.sessions.lock().await; + let mut metrics = ShadowSessionMetrics::default(); + metrics.session_count = sessions.len(); + for (_, session) in sessions.iter() { + let m = session.storage.metrics(); + metrics.events += m.events; + metrics.user_inputs += m.user_inputs; + metrics.agent_outputs += m.agent_outputs; + metrics.turns += m.turns; + metrics.total_bytes += session.storage.raw_bytes(); + if let Some(bytes) = session.storage.compressed_bytes() { + metrics.total_compressed_bytes += bytes; + } + } + metrics + } + + pub async fn session_summary(&self, conversation_id: &str) -> Option { + if !self.config.enabled { + return None; + } + let sessions = self.sessions.lock().await; + let (_, session) = sessions.iter().find(|(id, _)| id == conversation_id)?; + Some(ShadowSessionSummary { + metrics: session.storage.metrics(), + raw_bytes: session.storage.raw_bytes(), + compressed_bytes: session.storage.compressed_bytes(), + }) + } + + pub async fn remove_session(&self, conversation_id: &str) { + if !self.config.enabled { + return; + } + let mut sessions = self.sessions.lock().await; + sessions.retain(|(id, _)| id != conversation_id); + } + + pub async fn touch(&self, conversation_id: &str) { + if !self.config.enabled { + return; + } + let mut sessions = self.sessions.lock().await; + if let Some((_, session)) = sessions + .iter_mut() + .find(|(id, _)| id.as_str() == conversation_id) + { + session.touch(); + } + } + + async fn enforce_limits(&self) -> Result<()> { + if !self.config.enabled { + return Ok(()); + } + loop { + let mut sessions = self.sessions.lock().await; + let over_session_limit = self + .config + .max_sessions + .map(|max| sessions.len() > max) + .unwrap_or(false); + let total_bytes = sessions + .iter() + .map(|(_, session)| session.storage.raw_bytes()) + .sum::(); + let over_memory_limit = self + .config + .max_memory_bytes + .map(|limit| total_bytes > limit) + .unwrap_or(false); + if !over_session_limit && !over_memory_limit { + break; + } + if let Some((_, session)) = sessions.pop_front() { + tracing::info!( + agent = %session.agent_id.as_str(), + "Shadow cache evicted for #{}", + session.agent_id.as_str() + ); + } else { + break; + } + } + Ok(()) + } +} diff --git a/codex-rs/multi-agent/src/shadow/recorder.rs b/codex-rs/multi-agent/src/shadow/recorder.rs new file mode 100644 index 000000000000..4418bca0b8f9 --- /dev/null +++ b/codex-rs/multi-agent/src/shadow/recorder.rs @@ -0,0 +1,332 @@ +use std::time::SystemTime; + +use codex_core::protocol::AgentMessageDeltaEvent; +use codex_core::protocol::AgentMessageEvent; +use codex_core::protocol::Event; +use codex_core::protocol::EventMsg; +use codex_core::protocol::InputItem; +use codex_core::protocol::InputMessageKind; +use codex_core::protocol::UserMessageEvent; +use serde::Deserialize; +use serde::Serialize; + +use crate::AgentId; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] +pub enum ShadowHistoryKind { + #[default] + Agent, + User, + Info, + Warning, + Error, + System, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ShadowHistoryEntry { + pub kind: ShadowHistoryKind, + pub lines: Vec, + pub is_stream_continuation: bool, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct ShadowTranscriptCapture { + pub user_inputs: Vec, + pub agent_outputs: Vec, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)] +pub struct ShadowSessionMetrics { + pub session_count: usize, + pub events: usize, + pub user_inputs: usize, + pub agent_outputs: usize, + pub turns: usize, + pub total_bytes: usize, + pub total_compressed_bytes: usize, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ShadowSnapshot { + pub conversation_id: String, + pub agent_id: AgentId, + pub history: Vec, + pub capture: ShadowTranscriptCapture, + pub metrics: ShadowSessionMetrics, + pub events: Vec, + pub recorded_at: SystemTime, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub(crate) struct ShadowRecorder { + conversation_id: String, + agent_id: AgentId, + history: Vec, + capture: ShadowTranscriptCapture, + metrics: ShadowSessionMetrics, + current_stream: Option, + last_updated: SystemTime, + synthetic_event_counter: usize, +} + +impl ShadowRecorder { + pub fn new(conversation_id: String, agent_id: AgentId) -> Self { + Self { + conversation_id, + agent_id, + history: Vec::new(), + capture: ShadowTranscriptCapture::default(), + metrics: ShadowSessionMetrics::default(), + current_stream: None, + last_updated: SystemTime::now(), + synthetic_event_counter: 0, + } + } + + fn next_synthetic_event_id(&mut self) -> String { + let id = format!( + "shadow-{}-{}", + self.agent_id.as_str(), + self.synthetic_event_counter + ); + self.synthetic_event_counter = self.synthetic_event_counter.wrapping_add(1); + id + } + + pub fn make_user_event(&mut self, message: String) -> Event { + Event { + id: self.next_synthetic_event_id(), + msg: EventMsg::UserMessage(UserMessageEvent { + message, + kind: Some(InputMessageKind::Plain), + images: None, + }), + } + } + + pub fn record_event(&mut self, event: &Event) { + self.metrics.events += 1; + self.metrics.total_bytes += approximate_event_size(event); + self.last_updated = SystemTime::now(); + + match &event.msg { + EventMsg::AgentMessage(AgentMessageEvent { message }) => { + self.finish_stream(); + self.history.push(ShadowHistoryEntry { + kind: ShadowHistoryKind::Agent, + lines: message + .lines() + .map(std::string::ToString::to_string) + .collect(), + is_stream_continuation: false, + }); + } + EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta }) => { + let stream = self.current_stream.get_or_insert_with(String::new); + stream.push_str(delta); + } + EventMsg::UserMessage(user) => { + self.finish_stream(); + let mut lines = Vec::new(); + lines.extend(user.message.lines().map(std::string::ToString::to_string)); + self.history.push(ShadowHistoryEntry { + kind: ShadowHistoryKind::User, + lines: if lines.is_empty() { + vec![String::new()] + } else { + lines + }, + is_stream_continuation: false, + }); + } + EventMsg::TaskComplete(complete) => { + self.finish_stream(); + if let Some(last) = &complete.last_agent_message { + self.history.push(ShadowHistoryEntry { + kind: ShadowHistoryKind::Agent, + lines: last.lines().map(std::string::ToString::to_string).collect(), + is_stream_continuation: false, + }); + } + self.metrics.turns += 1; + } + EventMsg::Error(err) => { + self.finish_stream(); + self.history.push(ShadowHistoryEntry { + kind: ShadowHistoryKind::Error, + lines: vec![err.message.clone()], + is_stream_continuation: false, + }); + } + EventMsg::StreamError(err) => { + self.finish_stream(); + self.history.push(ShadowHistoryEntry { + kind: ShadowHistoryKind::Warning, + lines: vec![err.message.clone()], + is_stream_continuation: false, + }); + } + EventMsg::BackgroundEvent(ev) => { + self.finish_stream(); + self.history.push(ShadowHistoryEntry { + kind: ShadowHistoryKind::Info, + lines: vec![ev.message.clone()], + is_stream_continuation: false, + }); + } + EventMsg::PlanUpdate(update) => { + self.finish_stream(); + let mut lines = Vec::new(); + for item in &update.plan { + lines.push(format!("{} [{:?}]", item.step, item.status)); + } + if let Some(explanation) = update.explanation.as_ref() + && !explanation.is_empty() + { + lines.push(format!("Explanation: {explanation}")); + } + if lines.is_empty() { + lines.push("Plan updated.".to_string()); + } + self.history.push(ShadowHistoryEntry { + kind: ShadowHistoryKind::Info, + lines, + is_stream_continuation: false, + }); + } + EventMsg::ExecCommandBegin(ev) => { + self.finish_stream(); + self.history.push(ShadowHistoryEntry { + kind: ShadowHistoryKind::Info, + lines: vec![format!("Command started: {}", ev.command.join(" "))], + is_stream_continuation: false, + }); + } + EventMsg::ExecCommandOutputDelta(delta) => { + let stream = self.current_stream.get_or_insert_with(String::new); + let text = String::from_utf8_lossy(&delta.chunk); + stream.push_str(&text); + } + EventMsg::ExecCommandEnd(ev) => { + self.finish_stream(); + self.history.push(ShadowHistoryEntry { + kind: ShadowHistoryKind::Info, + lines: vec![format!("Command exited with code {}", ev.exit_code)], + is_stream_continuation: false, + }); + if !ev.stdout.is_empty() { + self.history.push(ShadowHistoryEntry { + kind: ShadowHistoryKind::Agent, + lines: ev + .stdout + .lines() + .map(std::string::ToString::to_string) + .collect(), + is_stream_continuation: false, + }); + } + if !ev.stderr.is_empty() { + self.history.push(ShadowHistoryEntry { + kind: ShadowHistoryKind::Warning, + lines: ev + .stderr + .lines() + .map(std::string::ToString::to_string) + .collect(), + is_stream_continuation: false, + }); + } + } + EventMsg::McpToolCallBegin(ev) => { + self.finish_stream(); + self.history.push(ShadowHistoryEntry { + kind: ShadowHistoryKind::Info, + lines: vec![format!("MCP tool call started: {:?}", ev.invocation)], + is_stream_continuation: false, + }); + } + EventMsg::McpToolCallEnd(ev) => { + self.finish_stream(); + let mut lines = Vec::new(); + lines.push(format!("Invocation: {:?}", ev.invocation)); + lines.push(format!("Duration: {:?}", ev.duration)); + lines.push(format!("Result: {:?}", ev.result)); + self.history.push(ShadowHistoryEntry { + kind: ShadowHistoryKind::Info, + lines, + is_stream_continuation: false, + }); + } + EventMsg::ShutdownComplete => { + self.finish_stream(); + } + _ => { + self.finish_stream(); + self.history.push(ShadowHistoryEntry { + kind: ShadowHistoryKind::System, + lines: vec![format!("{:?}", event.msg)], + is_stream_continuation: false, + }); + } + } + } + + pub fn record_user_inputs(&mut self, items: &[InputItem]) { + if items.is_empty() { + return; + } + self.capture.user_inputs.extend_from_slice(items); + self.metrics.user_inputs += items.len(); + } + + pub fn record_agent_outputs(&mut self, outputs: &[String]) { + if outputs.is_empty() { + return; + } + self.capture.agent_outputs.extend(outputs.to_owned()); + self.metrics.agent_outputs += outputs.len(); + } + + pub fn snapshot(&self, events: &[Event]) -> ShadowSnapshot { + let mut metrics = self.metrics; + metrics.session_count = 1; + metrics.total_compressed_bytes = 0; + ShadowSnapshot { + conversation_id: self.conversation_id.clone(), + agent_id: self.agent_id.clone(), + history: self.history.clone(), + capture: self.capture.clone(), + metrics, + events: events.to_vec(), + recorded_at: self.last_updated, + } + } + + pub fn metrics(&self) -> ShadowSessionMetrics { + self.metrics + } + + pub fn raw_bytes(&self) -> usize { + self.metrics.total_bytes + } + + fn finish_stream(&mut self) { + if let Some(stream) = self.current_stream.take() + && !stream.is_empty() + { + self.history.push(ShadowHistoryEntry { + kind: ShadowHistoryKind::Agent, + lines: stream + .lines() + .map(std::string::ToString::to_string) + .collect(), + is_stream_continuation: true, + }); + } + } +} + +fn approximate_event_size(event: &Event) -> usize { + serde_json::to_string(event).map(|s| s.len()).unwrap_or(0) +} diff --git a/codex-rs/tui/src/app.rs b/codex-rs/tui/src/app.rs index 243055c6b54e..975b7751cbfa 100644 --- a/codex-rs/tui/src/app.rs +++ b/codex-rs/tui/src/app.rs @@ -5,14 +5,17 @@ use crate::app_event_sender::AppEventSender; use crate::bottom_pane::ApprovalRequest; use crate::chatwidget::ChatWidget; use crate::chatwidget::ChatWidgetInit; +use crate::chatwidget::ChatWidgetSession; use crate::chatwidget::DelegateDisplayLabel; use crate::diff_render::DiffSummary; use crate::exec_command::strip_bash_lc_and_escape; use crate::file_search::FileSearchManager; use crate::history_cell::HistoryCell; +use crate::history_cell::UserHistoryCell; use crate::pager_overlay::Overlay; use crate::render::highlight::highlight_bash_to_lines; use crate::resume_picker::ResumeSelection; +use crate::status::StatusShadowData; use crate::tui; use crate::tui::TuiEvent; use codex_ansi_escape::ansi_escape_line; @@ -22,6 +25,7 @@ use codex_core::config::Config; use codex_core::config::persist_model_selection; use codex_core::config::set_hide_full_access_warning; use codex_core::model_family::find_family_for_model; +use codex_core::protocol::Event; use codex_core::protocol::SessionConfiguredEvent; use codex_core::protocol::SessionSource; use codex_core::protocol::TokenUsage; @@ -33,6 +37,9 @@ use codex_multi_agent::DelegateSessionMode; use codex_multi_agent::DelegateSessionSummary; use codex_multi_agent::DetachedRunSummary; use codex_multi_agent::delegate_tool_adapter; +#[cfg(test)] +use codex_multi_agent::shadow::ShadowConfig; +use codex_multi_agent::shadow::ShadowSessionSummary; use codex_protocol::ConversationId; use color_eyre::eyre::Result; use color_eyre::eyre::WrapErr; @@ -41,7 +48,7 @@ use crossterm::event::KeyEvent; use crossterm::event::KeyEventKind; use ratatui::style::Stylize; use ratatui::text::Line; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet, VecDeque, hash_map::Entry}; use std::path::PathBuf; use std::sync::Arc; use std::sync::atomic::AtomicBool; @@ -50,6 +57,7 @@ use std::thread; use std::time::Duration; use std::time::SystemTime; use tokio::select; +use tokio::sync::mpsc::UnboundedReceiver; use tokio::sync::mpsc::unbounded_channel; // use uuid::Uuid; @@ -60,10 +68,28 @@ pub struct AppExitInfo { pub update_action: Option, } +fn spawn_event_forwarder( + app_event_tx: AppEventSender, + conversation_id: ConversationId, + mut event_rx: UnboundedReceiver, +) { + tokio::spawn(async move { + let conversation_id = conversation_id.to_string(); + while let Some(event) = event_rx.recv().await { + app_event_tx.send(AppEvent::CodexEvent { + conversation_id: conversation_id.clone(), + event, + }); + } + }); +} + pub(crate) struct App { pub(crate) server: Arc, pub(crate) app_event_tx: AppEventSender, - pub(crate) chat_widget: ChatWidget, + pub(crate) sessions: HashMap, + pub(crate) active_session_id: String, + pub(crate) primary_session_id: String, pub(crate) auth_manager: Arc, pub(crate) delegate_orchestrator: Arc, @@ -88,14 +114,9 @@ pub(crate) struct App { // Esc-backtracking state grouped pub(crate) backtrack: crate::app_backtrack::BacktrackState, pub(crate) feedback: codex_feedback::CodexFeedback, - delegate_sessions: HashMap, - active_delegate: Option, - active_delegate_summary: Option, - primary_chat_backup: Option, /// Set when the user confirms an update; propagated on exit. pub(crate) pending_update_action: Option, - delegate_tree: DelegateTree, - delegate_status_owner: Option, + run_parent_map: HashMap, } #[derive(Default)] @@ -116,6 +137,30 @@ struct DelegateDisplay { label: DelegateDisplayLabel, } +#[derive(Clone)] +struct ChildCompletionSummary { + #[allow(dead_code)] + child_conversation_id: String, + label: DelegateDisplayLabel, + hint: Option, + output: Option, + #[allow(dead_code)] + mode: DelegateSessionMode, +} + +#[derive(Clone)] +enum ChildSummary { + Completion(ChildCompletionSummary), + Failure { + #[allow(dead_code)] + child_conversation_id: String, + label: DelegateDisplayLabel, + error: String, + #[allow(dead_code)] + mode: DelegateSessionMode, + }, +} + impl DelegateTree { fn insert( &mut self, @@ -231,19 +276,23 @@ impl App { let enhanced_keys_supported = tui.enhanced_keys_supported(); - let chat_widget = match resume_selection { + let ChatWidgetSession { + widget: mut chat_widget, + conversation_id: primary_conversation_id, + event_rx: primary_event_rx, + } = match resume_selection { ResumeSelection::StartFresh | ResumeSelection::Exit => { let init = crate::chatwidget::ChatWidgetInit { config: config.clone(), frame_requester: tui.frame_requester(), - app_event_tx: app_event_tx.clone(), + app_event_tx: app_event_tx.scoped(), initial_prompt: initial_prompt.clone(), initial_images: initial_images.clone(), enhanced_keys_supported, auth_manager: auth_manager.clone(), feedback: feedback.clone(), }; - ChatWidget::new(init, conversation_manager.clone()) + ChatWidget::new_session(init, conversation_manager.clone()).await? } ResumeSelection::Resume(path) => { let resumed = conversation_manager @@ -259,14 +308,14 @@ impl App { let init = crate::chatwidget::ChatWidgetInit { config: config.clone(), frame_requester: tui.frame_requester(), - app_event_tx: app_event_tx.clone(), + app_event_tx: app_event_tx.scoped(), initial_prompt: initial_prompt.clone(), initial_images: initial_images.clone(), enhanced_keys_supported, auth_manager: auth_manager.clone(), feedback: feedback.clone(), }; - ChatWidget::new_from_existing( + ChatWidget::new_session_from_existing( init, resumed.conversation, resumed.session_configured, @@ -274,12 +323,29 @@ impl App { } }; + let primary_session_id = primary_conversation_id.to_string(); + chat_widget.ensure_conversation_id(&primary_session_id); + + spawn_event_forwarder( + app_event_tx.clone(), + primary_conversation_id, + primary_event_rx, + ); + + let mut sessions = HashMap::new(); + sessions.insert( + primary_session_id.clone(), + SessionHandle::new(chat_widget, None, DelegateSessionMode::Standard, None), + ); + let file_search = FileSearchManager::new(config.cwd.clone(), app_event_tx.clone()); let mut app = Self { server: conversation_manager, app_event_tx, - chat_widget, + sessions, + active_session_id: primary_session_id.clone(), + primary_session_id, auth_manager: auth_manager.clone(), delegate_orchestrator, config, @@ -293,13 +359,8 @@ impl App { commit_anim_running: Arc::new(AtomicBool::new(false)), backtrack: BacktrackState::default(), feedback: feedback.clone(), - delegate_sessions: HashMap::new(), - active_delegate: None, - active_delegate_summary: None, - primary_chat_backup: None, pending_update_action: None, - delegate_tree: DelegateTree::default(), - delegate_status_owner: None, + run_parent_map: HashMap::new(), }; let tui_events = tui.event_stream(); @@ -318,7 +379,7 @@ impl App { tui.terminal.clear()?; Ok(AppExitInfo { token_usage: app.token_usage(), - conversation_id: app.chat_widget.conversation_id(), + conversation_id: app.active_widget().and_then(ChatWidget::conversation_id), update_action: app.pending_update_action, }) } @@ -341,21 +402,25 @@ impl App { // [tui-textarea]: https://github.com/rhysd/tui-textarea/blob/4d18622eeac13b309e0ff6a55a46ac6706da68cf/src/textarea.rs#L782-L783 // [iTerm2]: https://github.com/gnachman/iTerm2/blob/5d0c0d9f68523cbd0494dad5422998964a2ecd8d/sources/iTermPasteHelper.m#L206-L216 let pasted = pasted.replace("\r", "\n"); - self.chat_widget.handle_paste(pasted); + self.active_widget_mut().handle_paste(pasted); } TuiEvent::Draw => { - self.chat_widget.maybe_post_pending_notification(tui); + self.active_widget_mut() + .maybe_post_pending_notification(tui); if self - .chat_widget + .active_widget_mut() .handle_paste_burst_tick(tui.frame_requester()) { return Ok(true); } tui.draw( - self.chat_widget.desired_height(tui.terminal.size()?.width), + self.active_widget() + .expect("active widget") + .desired_height(tui.terminal.size()?.width), |frame| { - frame.render_widget_ref(&self.chat_widget, frame.area()); - if let Some((x, y)) = self.chat_widget.cursor_pos(frame.area()) { + let widget = self.active_widget().expect("active widget"); + frame.render_widget_ref(widget, frame.area()); + if let Some((x, y)) = widget.cursor_pos(frame.area()) { frame.set_cursor_position((x, y)); } }, @@ -366,49 +431,284 @@ impl App { Ok(true) } + pub(crate) fn active_widget(&self) -> Option<&ChatWidget> { + self.sessions + .get(&self.active_session_id) + .map(SessionHandle::widget) + } + + pub(crate) fn active_widget_mut(&mut self) -> &mut ChatWidget { + self.sessions + .get_mut(&self.active_session_id) + .expect("active session handle") + .widget_mut() + } + + fn render_history_cell(&mut self, cell: Arc, tui: &mut tui::Tui) { + if let Some(Overlay::Transcript(t)) = &mut self.overlay { + t.insert_cell(cell.clone()); + tui.frame_requester().schedule_frame(); + } + self.transcript_cells.push(cell.clone()); + let mut display = cell.display_lines(tui.terminal.last_known_screen_size.width); + if !display.is_empty() { + if !cell.is_stream_continuation() { + if self.has_emitted_history_lines { + display.insert(0, Line::from("")); + } else { + self.has_emitted_history_lines = true; + } + } + if self.overlay.is_some() { + self.deferred_history_lines.extend(display); + } else { + tui.insert_history_lines(display); + } + } + } + + pub(crate) fn apply_active_history_from_handle(&mut self) { + if let Some(handle) = self.sessions.get(&self.active_session_id) { + self.transcript_cells = handle.history().to_vec(); + self.has_emitted_history_lines = !self.transcript_cells.is_empty(); + } else { + self.transcript_cells.clear(); + self.has_emitted_history_lines = false; + } + self.deferred_history_lines.clear(); + } + + pub(crate) fn sync_active_handle_history(&mut self) { + if let Some(handle) = self.sessions.get_mut(&self.active_session_id) { + handle.set_history(self.transcript_cells.clone()); + } + } + + fn replay_active_session_from_last_user(&mut self, tui: &mut tui::Tui) { + let session_id = self.active_session_id.clone(); + let width = tui.terminal.last_known_screen_size.width; + + { + let Some(handle) = self.sessions.get_mut(&session_id) else { + return; + }; + let header_label = handle + .summary + .as_ref() + .map(|summary| format!("#{}", summary.agent_id.as_str())) + .unwrap_or_else(|| "#main".to_string()); + + let header = format!("Attached to {header_label} (shadow snapshot)"); + handle.widget_mut().add_info_message(header, None); + self.transcript_cells = handle.history().to_vec(); + self.has_emitted_history_lines = !self.transcript_cells.is_empty(); + } + + let Some(handle) = self.sessions.get(&session_id) else { + return; + }; + let history = handle.history(); + if history.is_empty() { + return; + } + + let replay_end = history.len().saturating_sub(1); + if replay_end == 0 { + return; + } + + let mut start_idx = 0; + for idx in (0..replay_end).rev() { + if history[idx] + .as_any() + .downcast_ref::() + .is_some() + { + start_idx = idx; + break; + } + } + + for cell in &history[start_idx..replay_end] { + let mut display = cell.display_lines(width); + if display.is_empty() { + continue; + } + if !cell.is_stream_continuation() { + if self.has_emitted_history_lines { + display.insert(0, Line::from("")); + } else { + self.has_emitted_history_lines = true; + } + } + tui.insert_history_lines(display); + } + + self.flush_pending_child_summaries(&session_id); + } + + fn enqueue_child_summary(&mut self, parent_id: &str, summary: ChildSummary) { + if let Some(parent) = self.sessions.get_mut(parent_id) { + if parent_id == self.active_session_id { + Self::render_child_summary_on_widget(parent.widget_mut(), summary); + } else { + parent.push_child_summary(summary); + } + } else { + tracing::warn!( + parent = %parent_id, + "unable to route delegate summary to parent conversation" + ); + } + } + + fn flush_pending_child_summaries(&mut self, session_id: &str) { + if let Some(handle) = self.sessions.get_mut(session_id) { + let summaries = handle.drain_child_summaries(); + for summary in summaries { + Self::render_child_summary_on_widget(handle.widget_mut(), summary); + } + } + } + + fn render_child_summary_on_widget(widget: &mut ChatWidget, summary: ChildSummary) { + match summary { + ChildSummary::Completion(data) => { + let ChildCompletionSummary { + child_conversation_id: _, + label, + hint, + output, + mode: _, + } = data; + widget.add_delegate_completion(output.as_deref(), hint, &label); + } + ChildSummary::Failure { + child_conversation_id: _, + label, + error, + mode: _, + } => { + widget.add_error_message(format!("{} failed: {error}", label.base_label)); + } + } + } + async fn handle_event(&mut self, tui: &mut tui::Tui, event: AppEvent) -> Result { match event { AppEvent::NewSession => { let init = crate::chatwidget::ChatWidgetInit { config: self.config.clone(), frame_requester: tui.frame_requester(), - app_event_tx: self.app_event_tx.clone(), + app_event_tx: self.app_event_tx.scoped(), initial_prompt: None, initial_images: Vec::new(), enhanced_keys_supported: self.enhanced_keys_supported, auth_manager: self.auth_manager.clone(), feedback: self.feedback.clone(), }; - self.chat_widget = ChatWidget::new(init, self.server.clone()); + let mut session = ChatWidget::new_session(init, self.server.clone()).await?; + let session_conversation_id = session.conversation_id; + session + .widget + .ensure_conversation_id(&session_conversation_id.to_string()); + spawn_event_forwarder( + self.app_event_tx.clone(), + session_conversation_id, + session.event_rx, + ); + self.sessions.insert( + self.primary_session_id.clone(), + SessionHandle::new(session.widget, None, DelegateSessionMode::Standard, None), + ); + self.active_session_id = self.primary_session_id.clone(); + self.apply_active_history_from_handle(); + self.replay_active_session_from_last_user(tui); + self.sync_active_handle_history(); tui.frame_requester().schedule_frame(); } AppEvent::DelegateUpdate(update) => { - self.handle_delegate_update(update); + self.handle_delegate_update(update).await; + } + AppEvent::DelegateShadowEvent { + conversation_id, + event, + } => { + let agent_id = self.agent_id_for_conversation(&conversation_id); + self.delegate_orchestrator + .push_shadow_event(agent_id, &conversation_id, &event) + .await; + } + AppEvent::DelegateShadowUserInput { + conversation_id, + inputs, + } => { + let agent_id = self.agent_id_for_conversation(&conversation_id); + self.delegate_orchestrator + .push_shadow_user_inputs(agent_id, &conversation_id, &inputs) + .await; + } + AppEvent::DelegateShadowAgentOutput { + conversation_id, + outputs, + } => { + let agent_id = self.agent_id_for_conversation(&conversation_id); + self.delegate_orchestrator + .push_shadow_outputs(agent_id, &conversation_id, &outputs) + .await; } - AppEvent::InsertHistoryCell(cell) => { + AppEvent::ShowStatus => { + let metrics = self.delegate_orchestrator.shadow_metrics().await; + let ma = &self.config.multi_agent; + let shadow_data = if ma.enable_shadow_cache { + Some(StatusShadowData { + enabled: true, + cached_sessions: metrics.session_count, + max_sessions: ma.max_shadow_sessions, + total_events: metrics.events, + total_user_inputs: metrics.user_inputs, + total_agent_outputs: metrics.agent_outputs, + total_raw_bytes: metrics.total_bytes, + total_compressed_bytes: metrics.total_compressed_bytes, + memory_limit_bytes: ma.max_shadow_memory_bytes, + compression_enabled: ma.compress_shadows, + }) + } else { + Some(StatusShadowData { + enabled: false, + cached_sessions: metrics.session_count, + max_sessions: ma.max_shadow_sessions, + total_events: metrics.events, + total_user_inputs: metrics.user_inputs, + total_agent_outputs: metrics.agent_outputs, + total_raw_bytes: metrics.total_bytes, + total_compressed_bytes: metrics.total_compressed_bytes, + memory_limit_bytes: ma.max_shadow_memory_bytes, + compression_enabled: ma.compress_shadows, + }) + }; + self.active_widget_mut().add_status_output(shadow_data); + } + AppEvent::InsertHistoryCell { + conversation_id, + cell, + } => { + let Some(target_id) = conversation_id else { + tracing::error!("received history cell without conversation id; dropping"); + return Ok(true); + }; + let cell: Arc = cell.into(); - if let Some(Overlay::Transcript(t)) = &mut self.overlay { - t.insert_cell(cell.clone()); - tui.frame_requester().schedule_frame(); - } - self.transcript_cells.push(cell.clone()); - let mut display = cell.display_lines(tui.terminal.last_known_screen_size.width); - if !display.is_empty() { - // Only insert a separating blank line for new cells that are not - // part of an ongoing stream. Streaming continuations should not - // accrue extra blank lines between chunks. - if !cell.is_stream_continuation() { - if self.has_emitted_history_lines { - display.insert(0, Line::from("")); - } else { - self.has_emitted_history_lines = true; - } - } - if self.overlay.is_some() { - self.deferred_history_lines.extend(display); - } else { - tui.insert_history_lines(display); + if let Some(handle) = self.sessions.get_mut(&target_id) { + handle.push_history(cell.clone()); + if target_id == self.active_session_id { + self.render_history_cell(cell, tui); } + } else { + tracing::warn!( + conversation = %target_id, + "received history cell for unknown conversation" + ); } } AppEvent::StartCommitAnimation => { @@ -431,10 +731,13 @@ impl App { self.commit_anim_running.store(false, Ordering::Release); } AppEvent::CommitTick => { - self.chat_widget.on_commit_tick(); + self.active_widget_mut().on_commit_tick(); } - AppEvent::CodexEvent(event) => { - self.chat_widget.handle_codex_event(event); + AppEvent::CodexEvent { + conversation_id, + event, + } => { + self.handle_codex_event(&conversation_id, event); } AppEvent::ConversationHistory(ev) => { self.on_conversation_history_for_backtrack(tui, ev).await?; @@ -442,10 +745,10 @@ impl App { AppEvent::ExitRequest => { return Ok(false); } - AppEvent::CodexOp(op) => self.chat_widget.submit_op(op), + AppEvent::CodexOp(op) => self.active_widget_mut().submit_op(op), AppEvent::DiffResult(text) => { // Clear the in-progress state in the bottom pane - self.chat_widget.on_diff_complete(); + self.active_widget_mut().on_diff_complete(); // Enter alternate screen using TUI helper and build pager lines let _ = tui.enter_alt_screen(); let pager_lines: Vec> = if text.trim().is_empty() { @@ -465,71 +768,74 @@ impl App { } } AppEvent::FileSearchResult { query, matches } => { - self.chat_widget.apply_file_search_result(query, matches); + self.active_widget_mut() + .apply_file_search_result(query, matches); } AppEvent::UpdateReasoningEffort(effort) => { self.on_update_reasoning_effort(effort); } AppEvent::UpdateModel(model) => { - self.chat_widget.set_model(&model); + self.active_widget_mut().set_model(&model); self.config.model = model.clone(); if let Some(family) = find_family_for_model(&model) { self.config.model_family = family; } } AppEvent::OpenReasoningPopup { model, presets } => { - self.chat_widget.open_reasoning_popup(model, presets); + self.active_widget_mut() + .open_reasoning_popup(model, presets); } AppEvent::OpenFullAccessConfirmation { preset } => { - self.chat_widget.open_full_access_confirmation(preset); + self.active_widget_mut() + .open_full_access_confirmation(preset); } AppEvent::PersistModelSelection { model, effort } => { - let profile = self.active_profile.as_deref(); - match persist_model_selection(&self.config.codex_home, profile, &model, effort) - .await - { + let profile = self.active_profile.clone(); + let result = persist_model_selection( + &self.config.codex_home, + profile.as_deref(), + &model, + effort, + ) + .await; + + match result { Ok(()) => { let effort_label = effort .map(|eff| format!(" with {eff} reasoning")) .unwrap_or_else(|| " with default reasoning".to_string()); - if let Some(profile) = profile { - self.chat_widget.add_info_message( - format!( - "Model changed to {model}{effort_label} for {profile} profile" - ), - None, - ); - } else { - self.chat_widget.add_info_message( - format!("Model changed to {model}{effort_label}"), - None, - ); - } + let message = match profile.as_deref() { + Some(profile) => format!( + "Model changed to {model}{effort_label} for {profile} profile" + ), + None => format!("Model changed to {model}{effort_label}"), + }; + self.active_widget_mut().add_info_message(message, None); } Err(err) => { tracing::error!( error = %err, "failed to persist model selection" ); - if let Some(profile) = profile { - self.chat_widget.add_error_message(format!( - "Failed to save model for profile `{profile}`: {err}" - )); - } else { - self.chat_widget - .add_error_message(format!("Failed to save default model: {err}")); - } + let message = match profile.as_deref() { + Some(profile) => { + format!("Failed to save model for profile `{profile}`: {err}") + } + None => format!("Failed to save default model: {err}"), + }; + self.active_widget_mut().add_error_message(message); } } } AppEvent::UpdateAskForApprovalPolicy(policy) => { - self.chat_widget.set_approval_policy(policy); + self.active_widget_mut().set_approval_policy(policy); } AppEvent::UpdateSandboxPolicy(policy) => { - self.chat_widget.set_sandbox_policy(policy); + self.active_widget_mut().set_sandbox_policy(policy); } AppEvent::UpdateFullAccessWarningAcknowledged(ack) => { - self.chat_widget.set_full_access_warning_acknowledged(ack); + self.active_widget_mut() + .set_full_access_warning_acknowledged(ack); } AppEvent::PersistFullAccessWarningAcknowledged => { if let Err(err) = set_hide_full_access_warning(&self.config.codex_home, true) { @@ -537,13 +843,13 @@ impl App { error = %err, "failed to persist full access warning acknowledgement" ); - self.chat_widget.add_error_message(format!( + self.active_widget_mut().add_error_message(format!( "Failed to save full access confirmation preference: {err}" )); } } AppEvent::OpenApprovalsPopup => { - self.chat_widget.open_approvals_popup(); + self.active_widget_mut().open_approvals_popup(); } AppEvent::OpenDelegatePicker => { let sessions = self.delegate_orchestrator.active_sessions().await; @@ -558,26 +864,39 @@ impl App { } else { None }; - picker_sessions - .push(crate::chatwidget::DelegatePickerSession { summary, run_id }); + let shadow = self + .delegate_orchestrator + .shadow_session_summary(summary.conversation_id.as_str()) + .await; + picker_sessions.push(crate::chatwidget::DelegatePickerSession { + summary, + run_id, + shadow, + }); } - self.chat_widget.open_delegate_picker( + let active_delegate_id = if self.active_session_id != self.primary_session_id { + Some(self.active_session_id.clone()) + } else { + None + }; + let active_delegate = active_delegate_id.as_deref(); + self.active_widget_mut().open_delegate_picker( picker_sessions, detached_runs, - self.active_delegate.as_deref(), + active_delegate, ); } AppEvent::EnterDelegateSession(conversation_id) => { if let Err(err) = self.activate_delegate_session(tui, conversation_id).await { tracing::error!("failed to enter delegate session: {err}"); - self.chat_widget + self.active_widget_mut() .add_error_message(format!("Failed to open delegate: {err}")); } } AppEvent::ExitDelegateSession => { if let Err(err) = self.return_to_primary(tui).await { tracing::error!("failed to return to primary agent: {err}"); - self.chat_widget + self.active_widget_mut() .add_error_message(format!("Failed to return to main agent: {err}")); } } @@ -588,22 +907,26 @@ impl App { .await { Ok(()) => self - .chat_widget + .active_widget_mut() .add_info_message(format!("Dismissed detached run {run_id}"), None), - Err(err) => self.chat_widget.add_error_message(err), + Err(err) => self.active_widget_mut().add_error_message(err), } } AppEvent::InsertUserTextMessage(text) => { - self.chat_widget.submit_text_message(text); + self.active_widget_mut().submit_text_message(text); } AppEvent::OpenReviewBranchPicker(cwd) => { - self.chat_widget.show_review_branch_picker(&cwd).await; + self.active_widget_mut() + .show_review_branch_picker(&cwd) + .await; } AppEvent::OpenReviewCommitPicker(cwd) => { - self.chat_widget.show_review_commit_picker(&cwd).await; + self.active_widget_mut() + .show_review_commit_picker(&cwd) + .await; } AppEvent::OpenReviewCustomPrompt => { - self.chat_widget.show_review_custom_prompt(); + self.active_widget_mut().show_review_custom_prompt(); } AppEvent::FullScreenApprovalRequest(request) => match request { ApprovalRequest::ApplyPatch { cwd, changes, .. } => { @@ -628,106 +951,242 @@ impl App { Ok(true) } - fn handle_delegate_update(&mut self, event: DelegateEvent) { + async fn handle_delegate_update(&mut self, event: DelegateEvent) { match event { - DelegateEvent::Started { - run_id, - agent_id, - prompt, - parent_run_id, - mode, - .. - } => { - let display = self.delegate_tree.insert( - run_id.clone(), - agent_id.clone(), - parent_run_id.clone(), - ); - let claim_status = parent_run_id.is_none() && self.delegate_status_owner.is_none(); - if claim_status { - self.delegate_status_owner = Some(run_id.clone()); - self.chat_widget - .set_delegate_status_owner(&run_id, &agent_id); - } - self.chat_widget.on_delegate_started( - &run_id, - &agent_id, - &prompt, - display.label, - claim_status, - mode, - ); - } - DelegateEvent::Delta { run_id, chunk, .. } => { - self.chat_widget.on_delegate_delta(&run_id, &chunk); - } - DelegateEvent::Completed { - run_id, + DelegateEvent::Info { agent_id, - output, - duration, - mode, + conversation_id, + message, } => { - let display = self.delegate_tree.display_for(&run_id, &agent_id); - self.delegate_tree.remove(&run_id); - if self.delegate_status_owner.as_deref() == Some(run_id.as_str()) { - self.delegate_status_owner = None; - if let Some((next_run_id, next_agent)) = self.delegate_tree.first_active_root() - { - self.delegate_status_owner = Some(next_run_id.clone()); - self.chat_widget - .set_delegate_status_owner(&next_run_id, &next_agent); - } else { - self.chat_widget.clear_delegate_status_owner(); - } - } - let streamed = self - .chat_widget - .on_delegate_completed(&run_id, &display.label); - let hint = Some(format!( - "finished in {}", - Self::format_delegate_duration(duration) - )); - let response = if display.depth == 0 { - output.as_deref().filter(|_| !streamed) + if let Some(handle) = self.sessions.get_mut(&conversation_id) { + let label = format!("#{}", agent_id.as_str()); + handle + .widget_mut() + .add_info_message(format!("{label}: {message}"), None); } else { - None - }; - self.chat_widget - .add_delegate_completion(response, hint, &display.label); - if mode == DelegateSessionMode::Detached { - self.chat_widget.notify_detached_completion(&display.label); - self.chat_widget.show_detached_completion_actions( - &agent_id, - &run_id, - output.as_deref(), + tracing::warn!( + agent = %agent_id.as_str(), + conversation = %conversation_id, + "received delegate info for unknown conversation" ); } } - DelegateEvent::Failed { - run_id, - agent_id, - error, - mode, - } => { - let display = self.delegate_tree.display_for(&run_id, &agent_id); - self.delegate_tree.remove(&run_id); - if self.delegate_status_owner.as_deref() == Some(run_id.as_str()) { - self.delegate_status_owner = None; - if let Some((next_run_id, next_agent)) = self.delegate_tree.first_active_root() - { - self.delegate_status_owner = Some(next_run_id.clone()); - self.chat_widget - .set_delegate_status_owner(&next_run_id, &next_agent); - } else { - self.chat_widget.clear_delegate_status_owner(); + other => { + let (run_id, owner_conversation_id) = match &other { + DelegateEvent::Started { + run_id, + owner_conversation_id, + .. + } + | DelegateEvent::Delta { + run_id, + owner_conversation_id, + .. + } + | DelegateEvent::Completed { + run_id, + owner_conversation_id, + .. + } + | DelegateEvent::Failed { + run_id, + owner_conversation_id, + .. + } => (run_id.clone(), owner_conversation_id.clone()), + DelegateEvent::Info { .. } => unreachable!(), + }; + + let mut parent_forward: Option<(String, ChildSummary)> = None; + + if let Some(handle) = self.sessions.get_mut(&owner_conversation_id) { + match other { + DelegateEvent::Started { + run_id, + agent_id, + owner_conversation_id: _, + prompt, + parent_run_id, + mode, + .. + } => { + if let Some(parent_run_id) = parent_run_id.as_ref() { + if let Some(parent_conversation) = self + .delegate_orchestrator + .owner_conversation_for_run(parent_run_id) + .await + { + self.run_parent_map + .insert(run_id.clone(), parent_conversation.clone()); + handle.set_parent_id(Some(parent_conversation)); + } + } else { + self.run_parent_map.remove(&run_id); + handle.set_parent_id(None); + } + + let display = handle.delegate_tree.insert( + run_id.clone(), + agent_id.clone(), + parent_run_id.clone(), + ); + let claim_status = + parent_run_id.is_none() && handle.delegate_status_owner.is_none(); + if claim_status { + handle.delegate_status_owner = Some(run_id.clone()); + handle + .widget_mut() + .set_delegate_status_owner(&run_id, &agent_id); + } + handle.widget_mut().on_delegate_started( + &run_id, + &agent_id, + &prompt, + display.label, + claim_status, + mode, + ); + } + DelegateEvent::Delta { + run_id, + owner_conversation_id: _, + chunk, + .. + } => { + handle.widget_mut().on_delegate_delta(&run_id, &chunk); + } + DelegateEvent::Completed { + run_id, + agent_id, + owner_conversation_id: _, + output, + duration, + mode, + .. + } => { + let display = handle.delegate_tree.display_for(&run_id, &agent_id); + handle.delegate_tree.remove(&run_id); + if handle.delegate_status_owner.as_deref() == Some(run_id.as_str()) { + handle.delegate_status_owner = None; + if let Some((next_run_id, next_agent)) = + handle.delegate_tree.first_active_root() + { + handle.delegate_status_owner = Some(next_run_id.clone()); + handle + .widget_mut() + .set_delegate_status_owner(&next_run_id, &next_agent); + } else { + handle.widget_mut().clear_delegate_status_owner(); + } + } + let streamed = handle + .widget_mut() + .on_delegate_completed(&run_id, &display.label); + let hint = Some(format!( + "finished in {}", + Self::format_delegate_duration(duration) + )); + let forwarded_output = if display.depth == 0 && !streamed { + output.clone() + } else { + None + }; + let hint_for_widget = hint.clone(); + handle.widget_mut().add_delegate_completion( + forwarded_output.as_deref(), + hint_for_widget, + &display.label, + ); + if mode == DelegateSessionMode::Detached { + handle + .widget_mut() + .notify_detached_completion(&display.label); + handle.widget_mut().show_detached_completion_actions( + &agent_id, + &run_id, + output.as_deref(), + ); + } + let parent_id = self + .run_parent_map + .get(&run_id) + .cloned() + .or_else(|| handle.parent_id().cloned()); + if let Some(parent_id) = parent_id { + parent_forward = Some(( + parent_id, + ChildSummary::Completion(ChildCompletionSummary { + child_conversation_id: owner_conversation_id.clone(), + label: display.label.clone(), + hint, + output: forwarded_output, + mode, + }), + )); + } + self.run_parent_map.remove(&run_id); + } + DelegateEvent::Failed { + run_id, + agent_id, + owner_conversation_id: _, + error, + mode, + .. + } => { + let display = handle.delegate_tree.display_for(&run_id, &agent_id); + handle.delegate_tree.remove(&run_id); + if handle.delegate_status_owner.as_deref() == Some(run_id.as_str()) { + handle.delegate_status_owner = None; + if let Some((next_run_id, next_agent)) = + handle.delegate_tree.first_active_root() + { + handle.delegate_status_owner = Some(next_run_id.clone()); + handle + .widget_mut() + .set_delegate_status_owner(&next_run_id, &next_agent); + } else { + handle.widget_mut().clear_delegate_status_owner(); + } + } + handle + .widget_mut() + .on_delegate_failed(&run_id, &display.label, &error); + if mode == DelegateSessionMode::Detached { + handle + .widget_mut() + .notify_detached_failure(&display.label, &error); + } + let parent_id = self + .run_parent_map + .get(&run_id) + .cloned() + .or_else(|| handle.parent_id().cloned()); + if let Some(parent_id) = parent_id { + parent_forward = Some(( + parent_id, + ChildSummary::Failure { + child_conversation_id: owner_conversation_id.clone(), + label: display.label.clone(), + error: error.clone(), + mode, + }, + )); + } + self.run_parent_map.remove(&run_id); + } + DelegateEvent::Info { .. } => unreachable!(), } + } else { + tracing::warn!( + run_id = %run_id, + conversation = %owner_conversation_id, + "received delegate event for unknown conversation" + ); + return; } - self.chat_widget - .on_delegate_failed(&run_id, &display.label, &error); - if mode == DelegateSessionMode::Detached { - self.chat_widget - .notify_detached_failure(&display.label, &error); + + if let Some((parent_id, summary)) = parent_forward { + self.enqueue_child_summary(&parent_id, summary); } } } @@ -738,53 +1197,118 @@ impl App { tui: &mut tui::Tui, conversation_id: String, ) -> Result<(), String> { - if self.active_delegate.as_deref() == Some(conversation_id.as_str()) { + if self.active_session_id == conversation_id { return Ok(()); } - if self.active_delegate.is_some() { - self.stash_active_delegate(); + self.sync_active_handle_history(); + self.active_widget_mut().set_delegate_context(None); + + let active = self + .delegate_orchestrator + .enter_session(&conversation_id) + .await + .map_err(|err| format!("{err}"))?; + + use Entry::*; + match self.sessions.entry(conversation_id.clone()) { + Occupied(mut occ) => { + let handle = occ.get_mut(); + handle.widget.ensure_conversation_id(&conversation_id); + handle.set_summary(Some(active.summary.clone())); + handle.set_mode(active.summary.mode); + handle.set_shadow_summary(active.shadow_summary.clone()); + handle + .widget + .set_delegate_context(Some(active.summary.clone())); + if handle.history().is_empty() { + if let Some(snapshot) = active.shadow_snapshot.as_ref() { + handle.widget.hydrate_from_shadow(snapshot); + } else { + handle.widget.clear_shadow_capture(); + handle.widget.add_info_message( + "Shadow cache unavailable; replaying from rollout.".to_string(), + None, + ); + } + } + drop(occ); + drop(active.event_rx); + } + Vacant(vacant) => { + let init = ChatWidgetInit { + config: active.config.clone(), + frame_requester: tui.frame_requester(), + app_event_tx: self.app_event_tx.scoped(), + initial_prompt: None, + initial_images: Vec::new(), + enhanced_keys_supported: self.enhanced_keys_supported, + auth_manager: self.auth_manager.clone(), + feedback: self.feedback.clone(), + }; + let mut session = ChatWidget::new_session_from_existing_with_events( + init, + active.conversation.clone(), + active.session_configured.clone(), + active.event_rx, + ); + session.widget.ensure_conversation_id(&conversation_id); + session + .widget + .set_delegate_context(Some(active.summary.clone())); + if let Some(snapshot) = active.shadow_snapshot.as_ref() { + session.widget.hydrate_from_shadow(snapshot); + } else { + session.widget.clear_shadow_capture(); + session.widget.add_info_message( + "Shadow cache unavailable; replaying from rollout.".to_string(), + None, + ); + } + spawn_event_forwarder( + self.app_event_tx.clone(), + session.conversation_id, + session.event_rx, + ); + vacant.insert(SessionHandle::new( + session.widget, + Some(active.summary.clone()), + active.summary.mode, + active.shadow_summary.clone(), + )); + } } - let state = if let Some(state) = self.delegate_sessions.remove(&conversation_id) { - state - } else { - let session = self - .delegate_orchestrator - .enter_session(&conversation_id) + let parent_conversation_id = if let Some(parent_run) = self + .delegate_orchestrator + .parent_run_for_conversation(&conversation_id) + .await + { + self.delegate_orchestrator + .owner_conversation_for_run(&parent_run) .await - .map_err(|err| format!("{err}"))?; - let init = ChatWidgetInit { - config: session.config.clone(), - frame_requester: tui.frame_requester(), - app_event_tx: self.app_event_tx.clone(), - initial_prompt: None, - initial_images: Vec::new(), - enhanced_keys_supported: self.enhanced_keys_supported, - auth_manager: self.auth_manager.clone(), - feedback: self.feedback.clone(), - }; - let session_configured = expect_unique_session_configured(session.session_configured); - let mut chat_widget = - ChatWidget::new_from_existing(init, session.conversation, session_configured); - chat_widget.set_delegate_context(Some(session.summary.clone())); - DelegateSessionState { - summary: session.summary, - chat_widget, - } + } else { + None }; - let DelegateSessionState { - summary, - mut chat_widget, - } = state; - chat_widget.set_delegate_context(Some(summary.clone())); - let mut previous = std::mem::replace(&mut self.chat_widget, chat_widget); - previous.set_delegate_context(None); - self.primary_chat_backup = Some(previous); - self.active_delegate = Some(conversation_id.clone()); - self.active_delegate_summary = Some(summary.clone()); - self.chat_widget.set_delegate_context(Some(summary.clone())); + if let Some(handle) = self.sessions.get_mut(&conversation_id) { + handle.set_parent_id(parent_conversation_id.clone()); + } + if let Some(parent_id) = parent_conversation_id.as_ref() { + if let Some(parent) = self.sessions.get_mut(parent_id) { + parent.add_child(conversation_id.clone()); + } + } + + self.active_session_id = conversation_id.clone(); + if let Some(handle) = self.sessions.get_mut(&self.active_session_id) + && let Some(summary) = handle.summary.clone() + { + handle.widget.set_delegate_context(Some(summary)); + } + self.apply_active_history_from_handle(); + self.replay_active_session_from_last_user(tui); + self.sync_active_handle_history(); self.delegate_orchestrator .touch_session(&conversation_id) .await; @@ -792,59 +1316,57 @@ impl App { Ok(()) } - fn stash_active_delegate(&mut self) { - if let Some(active_id) = self.active_delegate.take() { - let mut summary = match self.active_delegate_summary.take() { - Some(summary) => summary, - None => return, - }; - let Some(main_chat) = self.primary_chat_backup.take() else { - self.active_delegate_summary = Some(summary); - return; - }; - summary.last_interacted_at = SystemTime::now(); - let mut delegate_chat = std::mem::replace(&mut self.chat_widget, main_chat); - delegate_chat.set_delegate_context(Some(summary.clone())); - self.chat_widget.set_delegate_context(None); - self.delegate_sessions.insert( - active_id, - DelegateSessionState { - summary, - chat_widget: delegate_chat, - }, - ); + fn agent_id_for_conversation(&self, conversation_id: &str) -> Option<&AgentId> { + self.sessions + .get(conversation_id) + .and_then(|handle| handle.summary.as_ref().map(|summary| &summary.agent_id)) + } + + fn handle_codex_event(&mut self, conversation_id: &str, event: Event) { + if let Some(handle) = self.sessions.get_mut(conversation_id) { + handle.widget.ensure_conversation_id(conversation_id); + handle.widget.handle_codex_event(event); } } async fn return_to_primary(&mut self, tui: &mut tui::Tui) -> Result<(), String> { - if let Some(active_id) = self.active_delegate.take() { - let Some(mut summary) = self.active_delegate_summary.take() else { - return Err("delegate summary missing".to_string()); - }; - let capture = self.chat_widget.take_delegate_capture(); - let main_chat = self - .primary_chat_backup - .take() - .ok_or_else(|| "primary conversation unavailable".to_string())?; - summary.last_interacted_at = SystemTime::now(); - let mut delegate_chat = std::mem::replace(&mut self.chat_widget, main_chat); - delegate_chat.set_delegate_context(Some(summary.clone())); - self.chat_widget.set_delegate_context(None); - self.delegate_sessions.insert( - active_id.clone(), - DelegateSessionState { - summary: summary.clone(), - chat_widget: delegate_chat, - }, - ); - self.delegate_orchestrator.touch_session(&active_id).await; - self.primary_chat_backup = None; - self.active_delegate_summary = None; - if let Some(capture) = capture { - self.chat_widget.apply_delegate_summary(&summary, capture); + if self.active_session_id == self.primary_session_id { + return Ok(()); + } + + self.sync_active_handle_history(); + + let active_id = self.active_session_id.clone(); + let capture = if let Some(handle) = self.sessions.get_mut(&active_id) { + if let Some(summary) = handle.summary_mut() { + summary.last_interacted_at = SystemTime::now(); } - tui.frame_requester().schedule_frame(); + handle.widget.take_delegate_capture() + } else { + None + }; + + self.active_session_id = self.primary_session_id.clone(); + self.apply_active_history_from_handle(); + self.replay_active_session_from_last_user(tui); + if let Some(primary) = self.sessions.get_mut(&self.primary_session_id) { + primary.widget.set_delegate_context(None); } + + if let Some(handle) = self.sessions.get_mut(&active_id) + && let Some(summary) = handle.summary.clone() + { + handle.widget.set_delegate_context(Some(summary.clone())); + if let Some(capture) = capture + && let Some(primary) = self.sessions.get_mut(&self.primary_session_id) + { + primary.widget.apply_delegate_summary(&summary, capture); + } + } + + self.delegate_orchestrator.touch_session(&active_id).await; + self.sync_active_handle_history(); + tui.frame_requester().schedule_frame(); Ok(()) } @@ -861,11 +1383,13 @@ impl App { } pub(crate) fn token_usage(&self) -> codex_core::protocol::TokenUsage { - self.chat_widget.token_usage() + self.active_widget() + .map(ChatWidget::token_usage) + .unwrap_or_default() } fn on_update_reasoning_effort(&mut self, effort: Option) { - self.chat_widget.set_reasoning_effort(effort); + self.active_widget_mut().set_reasoning_effort(effort); self.config.model_reasoning_effort = effort; } @@ -891,12 +1415,13 @@ impl App { kind: KeyEventKind::Press | KeyEventKind::Repeat, .. } => { - if self.chat_widget.is_normal_backtrack_mode() - && self.chat_widget.composer_is_empty() + if self + .active_widget() + .is_some_and(|w| w.is_normal_backtrack_mode() && w.composer_is_empty()) { self.handle_backtrack_esc_key(tui); } else { - self.chat_widget.handle_key_event(key_event); + self.active_widget_mut().handle_key_event(key_event); } } // Enter confirms backtrack when primed + count > 0. Otherwise pass to widget. @@ -906,7 +1431,9 @@ impl App { .. } if self.backtrack.primed && self.backtrack.nth_user_message != usize::MAX - && self.chat_widget.composer_is_empty() => + && self + .active_widget() + .is_some_and(super::chatwidget::ChatWidget::composer_is_empty) => { // Delegate to helper for clarity; preserves behavior. self.confirm_backtrack_from_main(); @@ -921,7 +1448,7 @@ impl App { if key_event.code != KeyCode::Esc && self.backtrack.primed { self.reset_backtrack_state(); } - self.chat_widget.handle_key_event(key_event); + self.active_widget_mut().handle_key_event(key_event); } _ => { // Ignore Release key events. @@ -930,11 +1457,127 @@ impl App { } } -struct DelegateSessionState { - summary: DelegateSessionSummary, - chat_widget: ChatWidget, +pub(crate) struct SessionHandle { + widget: ChatWidget, + summary: Option, + mode: DelegateSessionMode, + history: Vec>, + shadow: Option, + delegate_tree: DelegateTree, + delegate_status_owner: Option, + parent_conversation_id: Option, + #[allow(dead_code)] + child_conversations: HashSet, + pending_child_summaries: VecDeque, +} + +impl SessionHandle { + fn new( + widget: ChatWidget, + summary: Option, + mode: DelegateSessionMode, + shadow: Option, + ) -> Self { + Self { + widget, + summary, + mode, + history: Vec::new(), + shadow, + delegate_tree: DelegateTree::default(), + delegate_status_owner: None, + parent_conversation_id: None, + child_conversations: HashSet::new(), + pending_child_summaries: VecDeque::new(), + } + } + + fn summary_mut(&mut self) -> Option<&mut DelegateSessionSummary> { + self.summary.as_mut() + } + + pub(crate) fn replace( + &mut self, + widget: ChatWidget, + summary: Option, + mode: DelegateSessionMode, + history: Option>>, + shadow: Option, + ) { + self.widget = widget; + self.summary = summary; + self.mode = mode; + if let Some(history) = history { + self.history = history; + } + self.shadow = shadow; + } + + pub(crate) fn widget(&self) -> &ChatWidget { + &self.widget + } + + pub(crate) fn widget_mut(&mut self) -> &mut ChatWidget { + &mut self.widget + } + + pub(crate) fn push_history(&mut self, cell: Arc) { + self.history.push(cell); + } + + pub(crate) fn set_history(&mut self, history: Vec>) { + self.history = history; + } + + pub(crate) fn history(&self) -> &[Arc] { + &self.history + } + + pub(crate) fn set_summary(&mut self, summary: Option) { + self.summary = summary; + } + + pub(crate) fn set_mode(&mut self, mode: DelegateSessionMode) { + self.mode = mode; + } + + pub(crate) fn set_shadow_summary(&mut self, shadow: Option) { + self.shadow = shadow; + } + + #[allow(dead_code)] + pub(crate) fn parent_id(&self) -> Option<&String> { + self.parent_conversation_id.as_ref() + } + + pub(crate) fn set_parent_id(&mut self, parent: Option) { + self.parent_conversation_id = parent; + } + + pub(crate) fn add_child(&mut self, conversation_id: String) { + self.child_conversations.insert(conversation_id); + } + + #[allow(dead_code)] + pub(crate) fn remove_child(&mut self, conversation_id: &str) { + self.child_conversations.remove(conversation_id); + } + + #[allow(dead_code)] + pub(crate) fn child_conversations(&self) -> impl Iterator { + self.child_conversations.iter() + } + + fn push_child_summary(&mut self, summary: ChildSummary) { + self.pending_child_summaries.push_back(summary); + } + + fn drain_child_summaries(&mut self) -> Vec { + self.pending_child_summaries.drain(..).collect() + } } +#[cfg_attr(not(test), allow(dead_code))] fn expect_unique_session_configured( session_configured: Arc, ) -> SessionConfiguredEvent { @@ -969,8 +1612,10 @@ mod tests { use std::sync::atomic::AtomicBool; fn make_test_app() -> App { - let (chat_widget, app_event_tx, _rx, _op_rx) = make_chatwidget_manual_with_sender(); + let (mut chat_widget, app_event_tx, _rx, _op_rx) = make_chatwidget_manual_with_sender(); let config = chat_widget.config_ref().clone(); + let session_id = ConversationId::new().to_string(); + chat_widget.ensure_conversation_id(&session_id); let server = Arc::new(ConversationManager::with_auth(CodexAuth::from_api_key( "Test API Key", @@ -1002,12 +1647,21 @@ mod tests { }, Vec::new(), config.multi_agent.max_concurrent_delegates, + ShadowConfig::disabled(), )); + let mut sessions = HashMap::new(); + sessions.insert( + session_id.clone(), + SessionHandle::new(chat_widget, None, DelegateSessionMode::Standard, None), + ); + App { server, app_event_tx, - chat_widget, + sessions, + active_session_id: session_id.clone(), + primary_session_id: session_id, auth_manager, delegate_orchestrator, config, @@ -1021,13 +1675,8 @@ mod tests { commit_anim_running: Arc::new(AtomicBool::new(false)), backtrack: BacktrackState::default(), feedback: codex_feedback::CodexFeedback::new(), - delegate_sessions: HashMap::new(), - active_delegate: None, - active_delegate_summary: None, - primary_chat_backup: None, pending_update_action: None, - delegate_tree: DelegateTree::default(), - delegate_status_owner: None, + run_parent_map: HashMap::new(), } } @@ -1035,7 +1684,10 @@ mod tests { fn update_reasoning_effort_updates_config() { let mut app = make_test_app(); app.config.model_reasoning_effort = Some(ReasoningEffortConfig::Medium); - app.chat_widget + app.sessions + .get_mut(&app.active_session_id) + .unwrap() + .widget .set_reasoning_effort(Some(ReasoningEffortConfig::Medium)); app.on_update_reasoning_effort(Some(ReasoningEffortConfig::High)); @@ -1045,11 +1697,108 @@ mod tests { Some(ReasoningEffortConfig::High) ); assert_eq!( - app.chat_widget.config_ref().model_reasoning_effort, + app.sessions + .get(&app.active_session_id) + .unwrap() + .widget + .config_ref() + .model_reasoning_effort, Some(ReasoningEffortConfig::High) ); } + #[tokio::test] + async fn delegate_events_route_to_owner_only() { + let mut app = make_test_app(); + + let child_conversation_id = "child".to_string(); + let parent_conversation_id = app.active_session_id.clone(); + + let (mut child_widget, _, _, _) = make_chatwidget_manual_with_sender(); + child_widget.ensure_conversation_id(&child_conversation_id); + let child_handle = + SessionHandle::new(child_widget, None, DelegateSessionMode::Standard, None); + app.sessions + .insert(child_conversation_id.clone(), child_handle); + + let started = DelegateEvent::Started { + run_id: "run-1".to_string(), + agent_id: AgentId::parse("critic").unwrap(), + owner_conversation_id: child_conversation_id.clone(), + prompt: "prompt".to_string(), + started_at: SystemTime::now(), + parent_run_id: None, + mode: DelegateSessionMode::Standard, + }; + + app.handle_delegate_update(started).await; + + assert!( + app.sessions + .get(&parent_conversation_id) + .unwrap() + .pending_child_summaries + .is_empty() + ); + assert!( + app.sessions + .get(&child_conversation_id) + .unwrap() + .pending_child_summaries + .is_empty() + ); + assert!(app.run_parent_map.is_empty()); + } + + #[tokio::test] + async fn child_completion_bubbles_to_parent() { + let mut app = make_test_app(); + + let child_conversation_id = "child".to_string(); + let parent_conversation_id = app.active_session_id.clone(); + + let (mut child_widget, _, _, _) = make_chatwidget_manual_with_sender(); + child_widget.ensure_conversation_id(&child_conversation_id); + app.sessions.insert( + child_conversation_id.clone(), + SessionHandle::new(child_widget, None, DelegateSessionMode::Standard, None), + ); + + app.active_session_id = child_conversation_id.clone(); + + app.run_parent_map + .insert("run-1".to_string(), parent_conversation_id.clone()); + + let completed = DelegateEvent::Completed { + run_id: "run-1".to_string(), + agent_id: AgentId::parse("critic").unwrap(), + owner_conversation_id: child_conversation_id.clone(), + output: Some("Child output".to_string()), + duration: Duration::from_secs(2), + mode: DelegateSessionMode::Standard, + }; + + app.handle_delegate_update(completed).await; + + let parent_handle = app + .sessions + .get(&parent_conversation_id) + .unwrap() + .pending_child_summaries + .clone(); + assert_eq!(parent_handle.len(), 1); + matches!(parent_handle[0], ChildSummary::Completion(_)); + + assert!( + app.sessions + .get(&child_conversation_id) + .unwrap() + .pending_child_summaries + .is_empty() + ); + assert!(app.run_parent_map.is_empty()); + } + #[test] fn backtrack_selection_with_duplicate_history_targets_unique_turn() { let mut app = make_test_app(); @@ -1077,7 +1826,11 @@ mod tests { rollout_path: PathBuf::new(), }; Arc::new(new_session_info( - app.chat_widget.config_ref(), + app.sessions + .get(&app.active_session_id) + .unwrap() + .widget + .config_ref(), event, is_first, )) as Arc diff --git a/codex-rs/tui/src/app_backtrack.rs b/codex-rs/tui/src/app_backtrack.rs index b3e948ecb38e..06f3c9c09fe1 100644 --- a/codex-rs/tui/src/app_backtrack.rs +++ b/codex-rs/tui/src/app_backtrack.rs @@ -3,12 +3,14 @@ use std::path::PathBuf; use std::sync::Arc; use crate::app::App; +use crate::app_event::AppEvent; use crate::history_cell::CompositeHistoryCell; use crate::history_cell::UserHistoryCell; use crate::pager_overlay::Overlay; use crate::tui; use crate::tui::TuiEvent; use codex_core::protocol::ConversationPathResponseEvent; +use codex_multi_agent::DelegateSessionMode; use codex_protocol::ConversationId; use color_eyre::eyre::Result; use crossterm::event::KeyCode; @@ -82,7 +84,7 @@ impl App { /// Handle global Esc presses for backtracking when no overlay is present. pub(crate) fn handle_backtrack_esc_key(&mut self, tui: &mut tui::Tui) { - if !self.chat_widget.composer_is_empty() { + if self.active_widget().is_some_and(|w| !w.composer_is_empty()) { return; } @@ -146,8 +148,10 @@ impl App { fn prime_backtrack(&mut self) { self.backtrack.primed = true; self.backtrack.nth_user_message = usize::MAX; - self.backtrack.base_id = self.chat_widget.conversation_id(); - self.chat_widget.show_esc_backtrack_hint(); + self.backtrack.base_id = self + .active_widget() + .and_then(super::chatwidget::ChatWidget::conversation_id); + self.active_widget_mut().show_esc_backtrack_hint(); } /// Open overlay and begin backtrack preview flow (first step + highlight). @@ -155,14 +159,16 @@ impl App { self.open_transcript_overlay(tui); self.backtrack.overlay_preview_active = true; // Composer is hidden by overlay; clear its hint. - self.chat_widget.clear_esc_backtrack_hint(); + self.active_widget_mut().clear_esc_backtrack_hint(); self.step_backtrack_and_highlight(tui); } /// When overlay is already open, begin preview mode and select latest user message. fn begin_overlay_backtrack_preview(&mut self, tui: &mut tui::Tui) { self.backtrack.primed = true; - self.backtrack.base_id = self.chat_widget.conversation_id(); + self.backtrack.base_id = self + .active_widget() + .and_then(super::chatwidget::ChatWidget::conversation_id); self.backtrack.overlay_preview_active = true; let count = user_count(&self.transcript_cells); if let Some(last) = count.checked_sub(1) { @@ -267,7 +273,7 @@ impl App { self.backtrack.base_id = None; self.backtrack.nth_user_message = usize::MAX; // In case a hint is somehow still visible (e.g., race with overlay open/close). - self.chat_widget.clear_esc_backtrack_hint(); + self.active_widget_mut().clear_esc_backtrack_hint(); } /// Handle a ConversationHistory response while a backtrack is pending. @@ -295,7 +301,11 @@ impl App { nth_user_message: usize, prefill: String, ) { - let cfg = self.chat_widget.config_ref().clone(); + let cfg = self + .active_widget() + .expect("active session") + .config_ref() + .clone(); // Perform the fork via a thin wrapper for clarity/testability. let result = self .perform_fork(ev.path.clone(), nth_user_message, cfg.clone()) @@ -334,27 +344,59 @@ impl App { let init = crate::chatwidget::ChatWidgetInit { config: cfg, frame_requester: tui.frame_requester(), - app_event_tx: self.app_event_tx.clone(), + app_event_tx: self.app_event_tx.scoped(), initial_prompt: None, initial_images: Vec::new(), enhanced_keys_supported: self.enhanced_keys_supported, auth_manager: self.auth_manager.clone(), feedback: self.feedback.clone(), }; - self.chat_widget = - crate::chatwidget::ChatWidget::new_from_existing(init, conv, session_configured); + let bootstrap = crate::chatwidget::ChatWidget::new_session_from_existing( + init, + conv, + session_configured, + ); + let conversation_id_str = bootstrap.conversation_id.to_string(); + let forward_id = conversation_id_str.clone(); + let mut event_rx = bootstrap.event_rx; + let tx = self.app_event_tx.clone(); + tokio::spawn(async move { + while let Some(event) = event_rx.recv().await { + tx.send(AppEvent::CodexEvent { + conversation_id: forward_id.clone(), + event, + }); + } + }); + + let mut handle = self + .sessions + .remove(&self.primary_session_id) + .expect("primary session handle"); + let mut widget = bootstrap.widget; + widget.ensure_conversation_id(&conversation_id_str); + handle.replace(widget, None, DelegateSessionMode::Standard, None, None); + + self.primary_session_id = conversation_id_str.clone(); + self.active_session_id = conversation_id_str.clone(); + self.sessions.insert(conversation_id_str, handle); + self.apply_active_history_from_handle(); + // Trim transcript up to the selected user message and re-render it. self.trim_transcript_for_backtrack(nth_user_message); self.render_transcript_once(tui); if !prefill.is_empty() { - self.chat_widget.set_composer_text(prefill.to_string()); + self.active_widget_mut() + .set_composer_text(prefill.to_string()); } + self.sync_active_handle_history(); tui.frame_requester().schedule_frame(); } /// Trim transcript_cells to preserve only content up to the selected user message. fn trim_transcript_for_backtrack(&mut self, nth_user_message: usize) { trim_transcript_cells_to_nth_user(&mut self.transcript_cells, nth_user_message); + self.sync_active_handle_history(); } } diff --git a/codex-rs/tui/src/app_event.rs b/codex-rs/tui/src/app_event.rs index 8912cf7c4ea0..e2b901732f76 100644 --- a/codex-rs/tui/src/app_event.rs +++ b/codex-rs/tui/src/app_event.rs @@ -4,6 +4,7 @@ use codex_common::approval_presets::ApprovalPreset; use codex_common::model_presets::ModelPreset; use codex_core::protocol::ConversationPathResponseEvent; use codex_core::protocol::Event; +use codex_core::protocol::InputItem; use codex_file_search::FileMatch; use codex_multi_agent::DelegateEvent; @@ -17,7 +18,10 @@ use codex_core::protocol_config_types::ReasoningEffort; #[allow(clippy::large_enum_variant)] #[derive(Debug)] pub(crate) enum AppEvent { - CodexEvent(Event), + CodexEvent { + conversation_id: String, + event: Event, + }, /// Start a new session. NewSession, @@ -32,6 +36,27 @@ pub(crate) enum AppEvent { /// Update emitted from the orchestrator about delegate progress/completion. DelegateUpdate(DelegateEvent), + /// Forward delegate conversation events back to the orchestrator for shadow caching. + DelegateShadowEvent { + conversation_id: String, + event: Event, + }, + + /// Record user inputs submitted while inside a delegate session. + DelegateShadowUserInput { + conversation_id: String, + inputs: Vec, + }, + + /// Record agent outputs captured from the delegate session (e.g., summary blocks). + DelegateShadowAgentOutput { + conversation_id: String, + outputs: Vec, + }, + + /// Request the app to gather and render `/status` information. + ShowStatus, + /// Kick off an asynchronous file search for the given query (text after /// the `@`). Previous searches may be cancelled by the app layer so there /// is at most one in-flight search. @@ -47,7 +72,10 @@ pub(crate) enum AppEvent { /// Result of computing a `/diff` command. DiffResult(String), - InsertHistoryCell(Box), + InsertHistoryCell { + conversation_id: Option, + cell: Box, + }, StartCommitAnimation, StopCommitAnimation, diff --git a/codex-rs/tui/src/app_event_sender.rs b/codex-rs/tui/src/app_event_sender.rs index c1427b3ff02f..bf22f2ed10d9 100644 --- a/codex-rs/tui/src/app_event_sender.rs +++ b/codex-rs/tui/src/app_event_sender.rs @@ -1,16 +1,41 @@ +use std::sync::Arc; +use std::sync::Mutex; + use tokio::sync::mpsc::UnboundedSender; use crate::app_event::AppEvent; +use crate::history_cell::HistoryCell; use crate::session_log; #[derive(Clone, Debug)] pub(crate) struct AppEventSender { pub app_event_tx: UnboundedSender, + conversation_id: Arc>>, } impl AppEventSender { pub(crate) fn new(app_event_tx: UnboundedSender) -> Self { - Self { app_event_tx } + Self { + app_event_tx, + conversation_id: Arc::new(Mutex::new(None)), + } + } + + /// Create a scoped sender that shares the same channel but tracks a + /// conversation-specific context independently. + pub(crate) fn scoped(&self) -> Self { + Self { + app_event_tx: self.app_event_tx.clone(), + conversation_id: Arc::new(Mutex::new(None)), + } + } + + pub(crate) fn set_conversation_id>(&self, conversation_id: S) { + let mut guard = self + .conversation_id + .lock() + .expect("conversation_id mutex poisoned"); + *guard = Some(conversation_id.into()); } /// Send an event to the app event channel. If it fails, we swallow the @@ -25,4 +50,19 @@ impl AppEventSender { tracing::error!("failed to send event: {e}"); } } + + pub(crate) fn send_history_cell(&self, cell: Box) { + let conversation_id = self + .conversation_id + .lock() + .expect("conversation_id mutex poisoned") + .clone(); + if conversation_id.is_none() { + tracing::error!("history cell emitted without conversation context"); + } + self.send(AppEvent::InsertHistoryCell { + conversation_id, + cell, + }); + } } diff --git a/codex-rs/tui/src/bottom_pane/approval_overlay.rs b/codex-rs/tui/src/bottom_pane/approval_overlay.rs index ce0e3c9c951e..73bbbddc4dc8 100644 --- a/codex-rs/tui/src/bottom_pane/approval_overlay.rs +++ b/codex-rs/tui/src/bottom_pane/approval_overlay.rs @@ -160,7 +160,7 @@ impl ApprovalOverlay { fn handle_exec_decision(&self, id: &str, command: &[String], decision: ReviewDecision) { let cell = history_cell::new_approval_decision_cell(command.to_vec(), decision); - self.app_event_tx.send(AppEvent::InsertHistoryCell(cell)); + self.app_event_tx.send_history_cell(cell); self.app_event_tx.send(AppEvent::CodexOp(Op::ExecApproval { id: id.to_string(), decision, diff --git a/codex-rs/tui/src/bottom_pane/chat_composer.rs b/codex-rs/tui/src/bottom_pane/chat_composer.rs index 951c08d4d3dc..3dec678f08be 100644 --- a/codex-rs/tui/src/bottom_pane/chat_composer.rs +++ b/codex-rs/tui/src/bottom_pane/chat_composer.rs @@ -1009,9 +1009,9 @@ impl ChatComposer { let message = format!( r#"Unrecognized command '/{name}'. Type "/" for a list of supported commands."# ); - self.app_event_tx.send(AppEvent::InsertHistoryCell(Box::new( + self.app_event_tx.send_history_cell(Box::new( history_cell::new_info_event(message, None), - ))); + )); self.textarea.set_text(&original_input); self.textarea.set_cursor(original_input.len()); return (InputResult::None, true); @@ -1022,9 +1022,9 @@ impl ChatComposer { let expanded_prompt = match expand_custom_prompt(&text, &self.custom_prompts) { Ok(expanded) => expanded, Err(err) => { - self.app_event_tx.send(AppEvent::InsertHistoryCell(Box::new( + self.app_event_tx.send_history_cell(Box::new( history_cell::new_error_event(err.user_message()), - ))); + )); self.textarea.set_text(&original_input); self.textarea.set_cursor(original_input.len()); return (InputResult::None, true); @@ -3064,7 +3064,7 @@ mod tests { let mut found_error = false; while let Ok(event) = rx.try_recv() { - if let AppEvent::InsertHistoryCell(cell) = event { + if let AppEvent::InsertHistoryCell { cell, .. } = event { let message = cell .display_lines(80) .into_iter() @@ -3110,7 +3110,7 @@ mod tests { let mut found_error = false; while let Ok(event) = rx.try_recv() { - if let AppEvent::InsertHistoryCell(cell) = event { + if let AppEvent::InsertHistoryCell { cell, .. } = event { let message = cell .display_lines(80) .into_iter() diff --git a/codex-rs/tui/src/bottom_pane/feedback_view.rs b/codex-rs/tui/src/bottom_pane/feedback_view.rs index 9c75e32bdfee..dd0fc128703a 100644 --- a/codex-rs/tui/src/bottom_pane/feedback_view.rs +++ b/codex-rs/tui/src/bottom_pane/feedback_view.rs @@ -1,4 +1,3 @@ -use crate::app_event::AppEvent; use crate::app_event_sender::AppEventSender; use crate::history_cell; use crate::history_cell::PlainHistoryCell; @@ -36,29 +35,34 @@ impl FeedbackView { let thread_id = snapshot.thread_id.clone(); let upload_action_tread_id = thread_id.clone(); - let upload_action: SelectionAction = Box::new(move |tx: &AppEventSender| { - match snapshot.upload_to_sentry() { - Ok(()) => { - let issue_url = format!( - "{BASE_ISSUE_URL}&steps=Uploaded%20thread:%20{upload_action_tread_id}", - ); - tx.send(AppEvent::InsertHistoryCell(Box::new(PlainHistoryCell::new(vec![ + let upload_action: SelectionAction = + Box::new( + move |tx: &AppEventSender| match snapshot.upload_to_sentry() { + Ok(()) => { + let issue_url = format!( + "{BASE_ISSUE_URL}&steps=Uploaded%20thread:%20{upload_action_tread_id}", + ); + tx.send_history_cell(Box::new(PlainHistoryCell::new(vec![ Line::from( "• Codex logs uploaded. Please open an issue using the following URL:", ), "".into(), Line::from(vec![" ".into(), issue_url.cyan().underlined()]), "".into(), - Line::from(vec![" Or mention your thread ID ".into(), upload_action_tread_id.clone().bold(), " in an existing issue.".into()]) - ])))); - } - Err(e) => { - tx.send(AppEvent::InsertHistoryCell(Box::new( - history_cell::new_error_event(format!("Failed to upload logs: {e}")), - ))); - } - } - }); + Line::from(vec![ + " Or mention your thread ID ".into(), + upload_action_tread_id.clone().bold(), + " in an existing issue.".into(), + ]), + ]))); + } + Err(e) => { + tx.send_history_cell(Box::new(history_cell::new_error_event(format!( + "Failed to upload logs: {e}" + )))); + } + }, + ); let upload_item = SelectionItem { name: "Yes".to_string(), @@ -74,19 +78,17 @@ impl FeedbackView { let no_action: SelectionAction = Box::new(move |tx: &AppEventSender| { let issue_url = format!("{BASE_ISSUE_URL}&steps=Thread%20ID:%20{thread_id}",); - tx.send(AppEvent::InsertHistoryCell(Box::new( - PlainHistoryCell::new(vec![ - Line::from("• Please open an issue using the following URL:"), - "".into(), - Line::from(vec![" ".into(), issue_url.cyan().underlined()]), - "".into(), - Line::from(vec![ - " Or mention your thread ID ".into(), - thread_id.clone().bold(), - " in an existing issue.".into(), - ]), + tx.send_history_cell(Box::new(PlainHistoryCell::new(vec![ + Line::from("• Please open an issue using the following URL:"), + "".into(), + Line::from(vec![" ".into(), issue_url.cyan().underlined()]), + "".into(), + Line::from(vec![ + " Or mention your thread ID ".into(), + thread_id.clone().bold(), + " in an existing issue.".into(), ]), - ))); + ]))); }); let no_item = SelectionItem { diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index b1439efc6fbe..a2af963aa92e 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -4,6 +4,7 @@ use std::collections::VecDeque; use std::path::PathBuf; use std::sync::Arc; +use codex_core::CodexConversation; use codex_core::config::Config; use codex_core::config_types::Notifications; use codex_core::git_info::current_branch_name; @@ -34,16 +35,21 @@ use codex_core::protocol::Op; use codex_core::protocol::PatchApplyBeginEvent; use codex_core::protocol::RateLimitSnapshot; use codex_core::protocol::ReviewRequest; +use codex_core::protocol::SessionConfiguredEvent; use codex_core::protocol::StreamErrorEvent; use codex_core::protocol::TaskCompleteEvent; use codex_core::protocol::TokenUsage; use codex_core::protocol::TokenUsageInfo; use codex_core::protocol::TurnAbortReason; use codex_core::protocol::TurnDiffEvent; +use codex_core::protocol::USER_INSTRUCTIONS_CLOSE_TAG; +use codex_core::protocol::USER_INSTRUCTIONS_OPEN_TAG; use codex_core::protocol::UserMessageEvent; use codex_core::protocol::ViewImageToolCallEvent; use codex_core::protocol::WebSearchBeginEvent; use codex_core::protocol::WebSearchEndEvent; +use codex_multi_agent::shadow::ShadowSessionSummary; +use codex_multi_agent::shadow::ShadowSnapshot; use codex_protocol::ConversationId; use codex_protocol::parse_command::ParsedCommand; use crossterm::event::KeyCode; @@ -62,6 +68,7 @@ use ratatui::widgets::Paragraph; use ratatui::widgets::Widget; use ratatui::widgets::WidgetRef; use ratatui::widgets::Wrap; +use tokio::sync::mpsc::UnboundedReceiver; use tokio::sync::mpsc::UnboundedSender; use tracing::debug; @@ -92,11 +99,14 @@ use crate::render::renderable::ColumnRenderable; use crate::render::renderable::Renderable; use crate::slash_command::SlashCommand; use crate::status::RateLimitSnapshotDisplay; +use crate::status::StatusShadowData; +use crate::status::format_bytes_compact; use crate::text_formatting::truncate_text; use crate::tui::FrameRequester; mod interrupts; use self::interrupts::InterruptManager; mod agent; +use self::agent::AgentHandles; use self::agent::spawn_agent; use self::agent::spawn_agent_from_existing; mod session_header; @@ -247,6 +257,7 @@ pub struct DelegateDisplayLabel { pub struct DelegatePickerSession { pub summary: DelegateSessionSummary, pub run_id: Option, + pub shadow: Option, } pub(crate) struct ChatWidget { @@ -302,12 +313,19 @@ pub(crate) struct ChatWidget { delegate_user_frames: Vec, delegate_agent_frames: Vec, pending_delegate_context: Vec, + shadow_updates_suppressed: bool, last_rendered_width: std::cell::Cell>, // Feedback sink for /feedback feedback: codex_feedback::CodexFeedback, } +pub(crate) struct ChatWidgetSession { + pub widget: ChatWidget, + pub conversation_id: ConversationId, + pub event_rx: UnboundedReceiver, +} + struct UserMessage { text: String, image_paths: Vec, @@ -353,6 +371,23 @@ impl ChatWidget { } } + pub(crate) fn ensure_conversation_id(&mut self, conversation_id: &str) { + if let Some(current) = self.conversation_id.as_ref() + && current.to_string() == conversation_id + { + return; + } + if let Ok(parsed) = ConversationId::from_string(conversation_id) { + self.conversation_id = Some(parsed); + self.app_event_tx + .set_conversation_id(conversation_id.to_string()); + } + } + + fn emit_history_cell(&self, cell: Box) { + self.app_event_tx.send_history_cell(cell); + } + fn flush_answer_stream_with_separator(&mut self) { if let Some(mut controller) = self.stream_controller.take() && let Some(cell) = controller.finalize() @@ -403,6 +438,50 @@ impl ChatWidget { self.delegate_context = summary; self.delegate_user_frames.clear(); self.delegate_agent_frames.clear(); + self.shadow_updates_suppressed = false; + } + + fn delegate_conversation_id(&self) -> Option<&str> { + self.delegate_context + .as_ref() + .map(|summary| summary.conversation_id.as_str()) + } + + fn should_forward_shadow(&self) -> bool { + self.delegate_conversation_id().is_some() && !self.shadow_updates_suppressed + } + + fn send_shadow_event(&self, event: Event) { + if let Some(conversation_id) = self.delegate_conversation_id() { + self.app_event_tx.send(AppEvent::DelegateShadowEvent { + conversation_id: conversation_id.to_string(), + event, + }); + } + } + + fn send_shadow_user_inputs(&self, inputs: Vec) { + if inputs.is_empty() { + return; + } + if let Some(conversation_id) = self.delegate_conversation_id() { + self.app_event_tx.send(AppEvent::DelegateShadowUserInput { + conversation_id: conversation_id.to_string(), + inputs, + }); + } + } + + fn send_shadow_outputs(&self, outputs: Vec) { + if outputs.is_empty() { + return; + } + if let Some(conversation_id) = self.delegate_conversation_id() { + self.app_event_tx.send(AppEvent::DelegateShadowAgentOutput { + conversation_id: conversation_id.to_string(), + outputs, + }); + } } pub(crate) fn take_delegate_capture(&mut self) -> Option { @@ -415,6 +494,29 @@ impl ChatWidget { }) } + pub(crate) fn hydrate_from_shadow(&mut self, snapshot: &ShadowSnapshot) { + self.shadow_updates_suppressed = true; + self.show_welcome_banner = false; + self.add_info_message( + format!( + "Attached to #{} (shadow snapshot)", + snapshot.agent_id.as_str() + ), + None, + ); + for event in &snapshot.events { + self.dispatch_event_msg(Some(event.id.clone()), event.msg.clone(), true); + } + self.delegate_user_frames = snapshot.capture.user_inputs.clone(); + self.delegate_agent_frames = snapshot.capture.agent_outputs.clone(); + self.shadow_updates_suppressed = false; + } + + pub(crate) fn clear_shadow_capture(&mut self) { + self.delegate_user_frames.clear(); + self.delegate_agent_frames.clear(); + } + pub(crate) fn apply_delegate_summary( &mut self, summary: &DelegateSessionSummary, @@ -545,6 +647,9 @@ impl ChatWidget { && !message.trim().is_empty() { self.delegate_agent_frames.push(message.clone()); + if self.should_forward_shadow() { + self.send_shadow_outputs(vec![message.clone()]); + } } let notification_response = last_agent_message.unwrap_or_default(); @@ -1032,83 +1137,40 @@ impl ChatWidget { .areas(area) } - pub(crate) fn new( + pub(crate) async fn new_session( common: ChatWidgetInit, conversation_manager: Arc, - ) -> Self { - let ChatWidgetInit { - config, - frame_requester, - app_event_tx, - initial_prompt, - initial_images, - enhanced_keys_supported, - auth_manager, - feedback, - } = common; - let mut rng = rand::rng(); - let placeholder = EXAMPLE_PROMPTS[rng.random_range(0..EXAMPLE_PROMPTS.len())].to_string(); - let codex_op_tx = spawn_agent(config.clone(), app_event_tx.clone(), conversation_manager); - - Self { - app_event_tx: app_event_tx.clone(), - frame_requester: frame_requester.clone(), - codex_op_tx, - bottom_pane: BottomPane::new(BottomPaneParams { - frame_requester, - app_event_tx, - has_input_focus: true, - enhanced_keys_supported, - placeholder_text: placeholder, - disable_paste_burst: config.disable_paste_burst, - }), - active_cell: None, - config: config.clone(), - auth_manager, - session_header: SessionHeader::new(config.model), - initial_user_message: create_initial_user_message( - initial_prompt.unwrap_or_default(), - initial_images, - ), - token_info: None, - rate_limit_snapshot: None, - rate_limit_warnings: RateLimitWarningState::default(), - stream_controller: None, - running_commands: HashMap::new(), - task_complete_pending: false, - interrupts: InterruptManager::new(), - reasoning_buffer: String::new(), - full_reasoning_buffer: String::new(), - current_status_header: String::from("Working"), - retry_status_header: None, - conversation_id: None, - queued_user_messages: VecDeque::new(), - show_welcome_banner: true, - suppress_session_configured_redraw: false, - pending_notification: None, - is_review_mode: false, - ghost_snapshots: Vec::new(), - ghost_snapshots_disabled: true, - needs_final_message_separator: false, - delegate_run: None, - delegate_runs_with_stream: HashSet::new(), - delegate_status_owner: None, - delegate_previous_status_header: None, - delegate_context: None, - delegate_user_frames: Vec::new(), - delegate_agent_frames: Vec::new(), - pending_delegate_context: Vec::new(), - last_rendered_width: std::cell::Cell::new(None), - feedback, - } + ) -> color_eyre::Result { + let handles = spawn_agent(common.config.clone(), conversation_manager).await?; + Ok(Self::from_agent_handles(common, handles, false)) } /// Create a ChatWidget attached to an existing conversation (e.g., a fork). - pub(crate) fn new_from_existing( + pub(crate) fn new_session_from_existing( + common: ChatWidgetInit, + conversation: Arc, + session_configured: SessionConfiguredEvent, + ) -> ChatWidgetSession { + let handles = spawn_agent_from_existing(conversation, session_configured); + Self::from_agent_handles(common, handles, true) + } + + pub(crate) fn new_session_from_existing_with_events( common: ChatWidgetInit, - conversation: std::sync::Arc, - session_configured: codex_core::protocol::SessionConfiguredEvent, - ) -> Self { + conversation: Arc, + session_configured: Arc, + event_rx: UnboundedReceiver, + ) -> ChatWidgetSession { + let handles = + agent::handles_from_existing_with_events(conversation, session_configured, event_rx); + Self::from_agent_handles(common, handles, true) + } + + fn from_agent_handles( + common: ChatWidgetInit, + handles: AgentHandles, + suppress_session_configured_redraw: bool, + ) -> ChatWidgetSession { let ChatWidgetInit { config, frame_requester, @@ -1122,13 +1184,18 @@ impl ChatWidget { let mut rng = rand::rng(); let placeholder = EXAMPLE_PROMPTS[rng.random_range(0..EXAMPLE_PROMPTS.len())].to_string(); - let codex_op_tx = - spawn_agent_from_existing(conversation, session_configured, app_event_tx.clone()); + let AgentHandles { + conversation_id, + op_tx, + event_rx, + } = handles; - Self { + app_event_tx.set_conversation_id(conversation_id.to_string()); + + let widget = Self { app_event_tx: app_event_tx.clone(), frame_requester: frame_requester.clone(), - codex_op_tx, + codex_op_tx: op_tx, bottom_pane: BottomPane::new(BottomPaneParams { frame_requester, app_event_tx, @@ -1156,10 +1223,10 @@ impl ChatWidget { full_reasoning_buffer: String::new(), current_status_header: String::from("Working"), retry_status_header: None, - conversation_id: None, + conversation_id: Some(conversation_id), queued_user_messages: VecDeque::new(), show_welcome_banner: true, - suppress_session_configured_redraw: true, + suppress_session_configured_redraw, pending_notification: None, is_review_mode: false, ghost_snapshots: Vec::new(), @@ -1173,8 +1240,15 @@ impl ChatWidget { delegate_user_frames: Vec::new(), delegate_agent_frames: Vec::new(), pending_delegate_context: Vec::new(), + shadow_updates_suppressed: false, last_rendered_width: std::cell::Cell::new(None), feedback, + }; + + ChatWidgetSession { + widget, + conversation_id, + event_rx, } } @@ -1361,7 +1435,7 @@ impl ChatWidget { self.app_event_tx.send(AppEvent::OpenDelegatePicker); } SlashCommand::Status => { - self.add_status_output(); + self.app_event_tx.send(AppEvent::ShowStatus); } SlashCommand::Mcp => { self.add_mcp_output(); @@ -1374,35 +1448,42 @@ impl ChatWidget { use codex_core::protocol::ApplyPatchApprovalRequestEvent; use codex_core::protocol::FileChange; - self.app_event_tx.send(AppEvent::CodexEvent(Event { - id: "1".to_string(), - // msg: EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent { - // call_id: "1".to_string(), - // command: vec!["git".into(), "apply".into()], - // cwd: self.config.cwd.clone(), - // reason: Some("test".to_string()), - // }), - msg: EventMsg::ApplyPatchApprovalRequest(ApplyPatchApprovalRequestEvent { - call_id: "1".to_string(), - changes: HashMap::from([ - ( - PathBuf::from("/tmp/test.txt"), - FileChange::Add { - content: "test".to_string(), - }, - ), - ( - PathBuf::from("/tmp/test2.txt"), - FileChange::Update { - unified_diff: "+test\n-test2".to_string(), - move_path: None, - }, - ), - ]), - reason: None, - grant_root: Some(PathBuf::from("/tmp")), - }), - })); + let conversation_id = self + .conversation_id() + .map(|id| id.to_string()) + .unwrap_or_default(); + self.app_event_tx.send(AppEvent::CodexEvent { + conversation_id, + event: Event { + id: "1".to_string(), + // msg: EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent { + // call_id: "1".to_string(), + // command: vec!["git".into(), "apply".into()], + // cwd: self.config.cwd.clone(), + // reason: Some("test".to_string()), + // }), + msg: EventMsg::ApplyPatchApprovalRequest(ApplyPatchApprovalRequestEvent { + call_id: "1".to_string(), + changes: HashMap::from([ + ( + PathBuf::from("/tmp/test.txt"), + FileChange::Add { + content: "test".to_string(), + }, + ), + ( + PathBuf::from("/tmp/test2.txt"), + FileChange::Update { + unified_diff: "+test\n-test2".to_string(), + move_path: None, + }, + ), + ]), + reason: None, + grant_root: Some(PathBuf::from("/tmp")), + }), + }, + }); } } } @@ -1432,7 +1513,7 @@ impl ChatWidget { fn flush_active_cell(&mut self) { if let Some(active) = self.active_cell.take() { self.needs_final_message_separator = true; - self.app_event_tx.send(AppEvent::InsertHistoryCell(active)); + self.emit_history_cell(active); } } @@ -1446,7 +1527,7 @@ impl ChatWidget { self.flush_active_cell(); self.needs_final_message_separator = true; } - self.app_event_tx.send(AppEvent::InsertHistoryCell(cell)); + self.emit_history_cell(cell); } fn submit_user_message(&mut self, user_message: UserMessage) { @@ -1492,6 +1573,8 @@ impl ChatWidget { text = prefix; } + let forward_shadow = self.should_forward_shadow(); + let mut items: Vec = Vec::new(); if !text.is_empty() { @@ -1502,6 +1585,10 @@ impl ChatWidget { items.push(InputItem::LocalImage { path }); } + if forward_shadow { + self.send_shadow_user_inputs(items.clone()); + } + if let Err(e) = self.codex_op_tx.send(Op::UserInput { items }) { tracing::error!("failed to send message: {e}"); } @@ -1587,8 +1674,16 @@ impl ChatWidget { } pub(crate) fn handle_codex_event(&mut self, event: Event) { + let forward = if self.should_forward_shadow() { + Some(event.clone()) + } else { + None + }; let Event { id, msg } = event; self.dispatch_event_msg(Some(id), msg, false); + if let Some(event) = forward { + self.send_shadow_event(event); + } } /// Dispatch a protocol `EventMsg` to the appropriate handler. @@ -1712,8 +1807,7 @@ impl ChatWidget { let mut rendered: Vec> = vec!["".into()]; append_markdown(&explanation, None, &mut rendered, &self.config); let body_cell = AgentMessageCell::new(rendered, false); - self.app_event_tx - .send(AppEvent::InsertHistoryCell(Box::new(body_cell))); + self.app_event_tx.send_history_cell(Box::new(body_cell)); } } else { let message_text = @@ -1721,8 +1815,7 @@ impl ChatWidget { let mut message_lines: Vec> = Vec::new(); append_markdown(&message_text, None, &mut message_lines, &self.config); let body_cell = AgentMessageCell::new(message_lines, true); - self.app_event_tx - .send(AppEvent::InsertHistoryCell(Box::new(body_cell))); + self.app_event_tx.send_history_cell(Box::new(body_cell)); } } @@ -1736,9 +1829,20 @@ impl ChatWidget { fn on_user_message_event(&mut self, event: UserMessageEvent) { match event.kind { - Some(InputMessageKind::EnvironmentContext) - | Some(InputMessageKind::UserInstructions) => { - // Skip XML‑wrapped context blocks in the transcript. + Some(InputMessageKind::EnvironmentContext) => { + // Environment context is primarily for the model; omit from transcript. + } + Some(InputMessageKind::UserInstructions) => { + let message = event.message.trim(); + let cleaned = message + .strip_prefix(USER_INSTRUCTIONS_OPEN_TAG) + .and_then(|rest| rest.strip_suffix(USER_INSTRUCTIONS_CLOSE_TAG)) + .map(str::trim) + .filter(|text| !text.is_empty()) + .unwrap_or(message); + if !cleaned.is_empty() { + self.add_to_history(history_cell::new_user_prompt(cleaned.to_string())); + } } Some(InputMessageKind::Plain) | None => { let message = event.message.trim(); @@ -1810,7 +1914,7 @@ impl ChatWidget { self.request_redraw(); } - pub(crate) fn add_status_output(&mut self) { + pub(crate) fn add_status_output(&mut self, shadow: Option) { let default_usage = TokenUsage::default(); let (total_usage, context_usage) = if let Some(ti) = &self.token_info { (&ti.total_token_usage, Some(&ti.last_token_usage)) @@ -1823,6 +1927,7 @@ impl ChatWidget { context_usage, &self.conversation_id, self.rate_limit_snapshot.as_ref(), + shadow, )); } @@ -2058,12 +2163,43 @@ impl ChatWidget { } else { "" }; + let mut description_lines = vec![summary.cwd.display().to_string()]; + let mut needs_replay = false; + + if let Some(shadow) = entry.shadow { + let mut line = format!( + "Shadow cache: {} raw", + format_bytes_compact(shadow.raw_bytes) + ); + if let Some(compressed) = shadow.compressed_bytes { + line.push_str(&format!( + " ({} compressed)", + format_bytes_compact(compressed) + )); + } + description_lines.push(line); + description_lines.push(format!( + "Shadow events: {} · inputs {} · outputs {}", + shadow.metrics.events, shadow.metrics.user_inputs, shadow.metrics.agent_outputs + )); + } else if self.config.multi_agent.enable_shadow_cache { + description_lines.push("Shadow cache unavailable; replay required".to_string()); + needs_replay = true; + } else { + description_lines.push("Shadow cache disabled".to_string()); + } + + let replay_suffix = if needs_replay { + " · replay required" + } else { + "" + }; let label = format!( - "{prefix}#{} · {}", + "{prefix}#{} · {}{replay_suffix}", summary.agent_id.as_str(), Self::format_delegate_timestamp(summary.last_interacted_at) ); - let description = Some(summary.cwd.display().to_string()); + let description = Some(description_lines.join("\n")); let is_current = active_delegate == Some(conversation_id.as_str()); let conversation_id_for_action = conversation_id.clone(); let actions: Vec = vec![Box::new(move |tx| { diff --git a/codex-rs/tui/src/chatwidget/agent.rs b/codex-rs/tui/src/chatwidget/agent.rs index bf15b6c4ade8..95462a270312 100644 --- a/codex-rs/tui/src/chatwidget/agent.rs +++ b/codex-rs/tui/src/chatwidget/agent.rs @@ -4,96 +4,118 @@ use codex_core::CodexConversation; use codex_core::ConversationManager; use codex_core::NewConversation; use codex_core::config::Config; +use codex_core::protocol::Event; +use codex_core::protocol::EventMsg; use codex_core::protocol::Op; +use codex_core::protocol::SessionConfiguredEvent; +use codex_protocol::ConversationId; +use color_eyre::eyre::Result; +use color_eyre::eyre::WrapErr; +use tokio::sync::mpsc::UnboundedReceiver; use tokio::sync::mpsc::UnboundedSender; use tokio::sync::mpsc::unbounded_channel; -use crate::app_event::AppEvent; -use crate::app_event_sender::AppEventSender; +pub(crate) struct AgentHandles { + pub conversation_id: ConversationId, + pub op_tx: UnboundedSender, + pub event_rx: UnboundedReceiver, +} -/// Spawn the agent bootstrapper and op forwarding loop, returning the -/// `UnboundedSender` used by the UI to submit operations. -pub(crate) fn spawn_agent( - config: Config, - app_event_tx: AppEventSender, - server: Arc, -) -> UnboundedSender { +fn spawn_op_forwarder(conversation: Arc) -> UnboundedSender { let (codex_op_tx, mut codex_op_rx) = unbounded_channel::(); - - let app_event_tx_clone = app_event_tx; tokio::spawn(async move { - let NewConversation { - conversation_id: _, - conversation, - session_configured, - } = match server.new_conversation(config).await { - Ok(v) => v, - Err(e) => { - // TODO: surface this error to the user. - tracing::error!("failed to initialize codex: {e}"); - return; + while let Some(op) = codex_op_rx.recv().await { + if let Err(err) = conversation.submit(op).await { + tracing::error!("failed to submit op: {err}"); } - }; + } + }); + codex_op_tx +} - // Forward the captured `SessionConfigured` event so it can be rendered in the UI. - let ev = codex_core::protocol::Event { - // The `id` does not matter for rendering, so we can use a fake value. - id: "".to_string(), - msg: codex_core::protocol::EventMsg::SessionConfigured(session_configured), +fn spawn_event_forwarder( + conversation: Arc, + session_configured: Arc, +) -> UnboundedReceiver { + let (event_tx, event_rx) = unbounded_channel::(); + tokio::spawn(async move { + let initial = Event { + id: String::new(), + msg: EventMsg::SessionConfigured((*session_configured).clone()), }; - app_event_tx_clone.send(AppEvent::CodexEvent(ev)); + if event_tx.send(initial).is_err() { + return; + } - let conversation_clone = conversation.clone(); - tokio::spawn(async move { - while let Some(op) = codex_op_rx.recv().await { - let id = conversation_clone.submit(op).await; - if let Err(e) = id { - tracing::error!("failed to submit op: {e}"); + loop { + match conversation.next_event().await { + Ok(event) => { + if event_tx.send(event).is_err() { + break; + } + } + Err(err) => { + tracing::error!("failed to receive conversation event: {err}"); + break; } } - }); - - while let Ok(event) = conversation.next_event().await { - app_event_tx_clone.send(AppEvent::CodexEvent(event)); } }); + event_rx +} - codex_op_tx +/// Spawn the agent bootstrapper and op forwarding loop. +pub(crate) async fn spawn_agent( + config: Config, + server: Arc, +) -> Result { + let NewConversation { + conversation_id, + conversation, + session_configured, + } = server + .new_conversation(config) + .await + .wrap_err("failed to start Codex conversation")?; + + let session_configured = Arc::new(session_configured); + let op_tx = spawn_op_forwarder(conversation.clone()); + let event_rx = spawn_event_forwarder(conversation, session_configured); + + Ok(AgentHandles { + conversation_id, + op_tx, + event_rx, + }) } /// Spawn agent loops for an existing conversation (e.g., a forked conversation). -/// Sends the provided `SessionConfiguredEvent` immediately, then forwards subsequent -/// events and accepts Ops for submission. pub(crate) fn spawn_agent_from_existing( - conversation: std::sync::Arc, - session_configured: codex_core::protocol::SessionConfiguredEvent, - app_event_tx: AppEventSender, -) -> UnboundedSender { - let (codex_op_tx, mut codex_op_rx) = unbounded_channel::(); - - let app_event_tx_clone = app_event_tx; - tokio::spawn(async move { - // Forward the captured `SessionConfigured` event so it can be rendered in the UI. - let ev = codex_core::protocol::Event { - id: "".to_string(), - msg: codex_core::protocol::EventMsg::SessionConfigured(session_configured), - }; - app_event_tx_clone.send(AppEvent::CodexEvent(ev)); + conversation: Arc, + session_configured: SessionConfiguredEvent, +) -> AgentHandles { + let conversation_id = session_configured.session_id; + let session_configured = Arc::new(session_configured); + let op_tx = spawn_op_forwarder(conversation.clone()); + let event_rx = spawn_event_forwarder(conversation, session_configured); - let conversation_clone = conversation.clone(); - tokio::spawn(async move { - while let Some(op) = codex_op_rx.recv().await { - let id = conversation_clone.submit(op).await; - if let Err(e) = id { - tracing::error!("failed to submit op: {e}"); - } - } - }); - - while let Ok(event) = conversation.next_event().await { - app_event_tx_clone.send(AppEvent::CodexEvent(event)); - } - }); + AgentHandles { + conversation_id, + op_tx, + event_rx, + } +} - codex_op_tx +pub(crate) fn handles_from_existing_with_events( + conversation: Arc, + session_configured: Arc, + event_rx: UnboundedReceiver, +) -> AgentHandles { + let conversation_id = session_configured.session_id; + let op_tx = spawn_op_forwarder(conversation); + AgentHandles { + conversation_id, + op_tx, + event_rx, + } } diff --git a/codex-rs/tui/src/chatwidget/tests.rs b/codex-rs/tui/src/chatwidget/tests.rs index 96cd60566c08..bd619e8ba928 100644 --- a/codex-rs/tui/src/chatwidget/tests.rs +++ b/codex-rs/tui/src/chatwidget/tests.rs @@ -240,9 +240,11 @@ async fn helpers_are_available_and_do_not_panic() { auth_manager, feedback: codex_feedback::CodexFeedback::new(), }; - let mut w = ChatWidget::new(init, conversation_manager); + let ChatWidgetSession { mut widget, .. } = ChatWidget::new_session(init, conversation_manager) + .await + .expect("chat widget bootstrap"); // Basic construction sanity. - let _ = &mut w; + let _ = &mut widget; } // --- Helpers for tests that need direct construction and event draining --- @@ -302,6 +304,7 @@ fn make_chatwidget_manual() -> ( delegate_user_frames: Vec::new(), delegate_agent_frames: Vec::new(), pending_delegate_context: Vec::new(), + shadow_updates_suppressed: false, last_rendered_width: std::cell::Cell::new(None), feedback: codex_feedback::CodexFeedback::new(), }; @@ -324,7 +327,7 @@ fn drain_insert_history( ) -> Vec>> { let mut out = Vec::new(); while let Ok(ev) = rx.try_recv() { - if let AppEvent::InsertHistoryCell(cell) = ev { + if let AppEvent::InsertHistoryCell { cell, .. } = ev { let mut lines = cell.display_lines(80); if !cell.is_stream_continuation() && !out.is_empty() && !lines.is_empty() { lines.insert(0, "".into()); @@ -433,7 +436,7 @@ fn delegate_stream_deltas_and_restore_status() { chat.on_commit_tick(); let mut saw_history_line = false; while let Ok(event) = rx.try_recv() { - if let AppEvent::InsertHistoryCell(cell) = event { + if let AppEvent::InsertHistoryCell { cell, .. } = event { let text = lines_to_single_string(&cell.display_lines(80)); if text.contains("First idea") { saw_history_line = true; @@ -501,7 +504,7 @@ fn nested_delegate_info_events_are_indented() { let mut messages = Vec::new(); while let Ok(event) = rx.try_recv() { - if let AppEvent::InsertHistoryCell(cell) = event { + if let AppEvent::InsertHistoryCell { cell, .. } = event { messages.push(lines_to_single_string(&cell.display_lines(120))); } } @@ -1412,7 +1415,7 @@ async fn binary_size_transcript_snapshot() { }; chat.handle_codex_event(ev); while let Ok(app_ev) = rx.try_recv() { - if let AppEvent::InsertHistoryCell(cell) = app_ev { + if let AppEvent::InsertHistoryCell { cell, .. } = app_ev { let mut lines = cell.display_lines(width); if has_emitted_history && !cell.is_stream_continuation() @@ -1433,7 +1436,7 @@ async fn binary_size_transcript_snapshot() { { chat.on_commit_tick(); while let Ok(app_ev) = rx.try_recv() { - if let AppEvent::InsertHistoryCell(cell) = app_ev { + if let AppEvent::InsertHistoryCell { cell, .. } = app_ev { let mut lines = cell.display_lines(width); if has_emitted_history && !cell.is_stream_continuation() @@ -2565,7 +2568,7 @@ printf 'fenced within fenced\n' chat.on_commit_tick(); let mut inserted_any = false; while let Ok(app_ev) = rx.try_recv() { - if let AppEvent::InsertHistoryCell(cell) = app_ev { + if let AppEvent::InsertHistoryCell { cell, .. } = app_ev { let lines = cell.display_lines(width); crate::insert_history::insert_history_lines(&mut term, lines); inserted_any = true; diff --git a/codex-rs/tui/src/lib.rs b/codex-rs/tui/src/lib.rs index 3e7414f7addf..eb557d64ce51 100644 --- a/codex-rs/tui/src/lib.rs +++ b/codex-rs/tui/src/lib.rs @@ -20,6 +20,7 @@ use codex_core::protocol::SessionSource; use codex_multi_agent::AgentContext; use codex_multi_agent::AgentId; use codex_multi_agent::AgentOrchestrator; +use codex_multi_agent::shadow::ShadowConfig; use codex_ollama::DEFAULT_OSS_MODEL; use codex_protocol::config_types::SandboxMode; use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge; @@ -532,6 +533,12 @@ async fn run_ratatui_app( delegate_config_overrides, allowed_agents, config.multi_agent.max_concurrent_delegates, + ShadowConfig::apply_defaults( + config.multi_agent.enable_shadow_cache, + config.multi_agent.max_shadow_sessions, + config.multi_agent.max_shadow_memory_bytes, + config.multi_agent.compress_shadows, + ), )); // Determine resume behavior: explicit id, then resume last, then picker. diff --git a/codex-rs/tui/src/session_log.rs b/codex-rs/tui/src/session_log.rs index b2858e8f2103..ee1a40ed3555 100644 --- a/codex-rs/tui/src/session_log.rs +++ b/codex-rs/tui/src/session_log.rs @@ -125,7 +125,7 @@ pub(crate) fn log_inbound_app_event(event: &AppEvent) { } match event { - AppEvent::CodexEvent(ev) => { + AppEvent::CodexEvent { event: ev, .. } => { write_record("to_tui", "codex_event", ev); } AppEvent::NewSession => { @@ -136,7 +136,7 @@ pub(crate) fn log_inbound_app_event(event: &AppEvent) { }); LOGGER.write_json_line(value); } - AppEvent::InsertHistoryCell(cell) => { + AppEvent::InsertHistoryCell { cell, .. } => { let value = json!({ "ts": now_ts(), "dir": "to_tui", diff --git a/codex-rs/tui/src/status/card.rs b/codex-rs/tui/src/status/card.rs index 4832713bf905..e76eb65d2b9f 100644 --- a/codex-rs/tui/src/status/card.rs +++ b/codex-rs/tui/src/status/card.rs @@ -21,6 +21,7 @@ use super::format::truncate_line_to_width; use super::helpers::compose_account_display; use super::helpers::compose_agents_summary; use super::helpers::compose_model_display; +use super::helpers::format_bytes_compact; use super::helpers::format_directory_display; use super::helpers::format_tokens_compact; use super::rate_limits::RateLimitSnapshotDisplay; @@ -44,6 +45,20 @@ pub(crate) struct StatusTokenUsageData { context_window: Option, } +#[derive(Debug, Clone, Default)] +pub(crate) struct StatusShadowData { + pub enabled: bool, + pub cached_sessions: usize, + pub max_sessions: Option, + pub total_events: usize, + pub total_user_inputs: usize, + pub total_agent_outputs: usize, + pub total_raw_bytes: usize, + pub total_compressed_bytes: usize, + pub memory_limit_bytes: Option, + pub compression_enabled: bool, +} + #[derive(Debug)] struct StatusHistoryCell { model_name: String, @@ -56,6 +71,7 @@ struct StatusHistoryCell { session_id: Option, token_usage: StatusTokenUsageData, rate_limits: StatusRateLimitData, + shadow: Option, } pub(crate) fn new_status_output( @@ -64,9 +80,17 @@ pub(crate) fn new_status_output( context_usage: Option<&TokenUsage>, session_id: &Option, rate_limits: Option<&RateLimitSnapshotDisplay>, + shadow: Option, ) -> CompositeHistoryCell { let command = PlainHistoryCell::new(vec!["/status".magenta().into()]); - let card = StatusHistoryCell::new(config, total_usage, context_usage, session_id, rate_limits); + let card = StatusHistoryCell::new( + config, + total_usage, + context_usage, + session_id, + rate_limits, + shadow, + ); CompositeHistoryCell::new(vec![Box::new(command), Box::new(card)]) } @@ -78,6 +102,7 @@ impl StatusHistoryCell { context_usage: Option<&TokenUsage>, session_id: &Option, rate_limits: Option<&RateLimitSnapshotDisplay>, + shadow: Option, ) -> Self { let config_entries = create_config_summary_entries(config); let (model_name, model_details) = compose_model_display(config, &config_entries); @@ -121,6 +146,7 @@ impl StatusHistoryCell { session_id, token_usage, rate_limits, + shadow, } } @@ -273,6 +299,13 @@ impl HistoryCell for StatusHistoryCell { if self.token_usage.context_window.is_some() { push_label(&mut labels, &mut seen, "Context window"); } + if let Some(shadow) = self.shadow.as_ref() { + push_label(&mut labels, &mut seen, "Delegates"); + if shadow.enabled { + push_label(&mut labels, &mut seen, "Delegate memory"); + push_label(&mut labels, &mut seen, "Delegate events"); + } + } self.collect_rate_limit_labels(&mut seen, &mut labels); let formatter = FieldFormatter::from_labels(labels.iter().map(String::as_str)); @@ -311,6 +344,51 @@ impl HistoryCell for StatusHistoryCell { lines.push(formatter.line("Context window", spans)); } + if let Some(shadow) = self.shadow.as_ref() { + lines.push(Line::from(Vec::>::new())); + if !shadow.enabled { + lines.push( + formatter.line("Delegates", vec![Span::from("shadow cache disabled").dim()]), + ); + } else { + let delegate_label = if let Some(limit) = shadow.max_sessions { + format!("{} cached (limit {limit})", shadow.cached_sessions) + } else { + format!("{} cached", shadow.cached_sessions) + }; + lines.push(formatter.line("Delegates", vec![Span::from(delegate_label)])); + + let mut memory_spans = + vec![Span::from(format_bytes_compact(shadow.total_raw_bytes))]; + memory_spans.push(Span::from(" raw").dim()); + if shadow.compression_enabled && shadow.total_compressed_bytes > 0 { + memory_spans.push(Span::from(" (").dim()); + memory_spans.push( + Span::from(format!( + "{} compressed", + format_bytes_compact(shadow.total_compressed_bytes) + )) + .dim(), + ); + memory_spans.push(Span::from(")").dim()); + } + if let Some(limit) = shadow.memory_limit_bytes { + memory_spans.push(Span::from(" / ").dim()); + memory_spans.push(Span::from(format_bytes_compact(limit))); + memory_spans.push(Span::from(" limit").dim()); + } else { + memory_spans.push(Span::from(" (no limit)").dim()); + } + lines.push(formatter.line("Delegate memory", memory_spans)); + + let event_summary = format!( + "{} events · {} inputs · {} outputs", + shadow.total_events, shadow.total_user_inputs, shadow.total_agent_outputs + ); + lines.push(formatter.line("Delegate events", vec![Span::from(event_summary)])); + } + } + lines.extend(self.rate_limit_lines(available_inner_width, &formatter)); let content_width = lines.iter().map(line_display_width).max().unwrap_or(0); diff --git a/codex-rs/tui/src/status/helpers.rs b/codex-rs/tui/src/status/helpers.rs index 1889d7f4f40c..8fa4124d2885 100644 --- a/codex-rs/tui/src/status/helpers.rs +++ b/codex-rs/tui/src/status/helpers.rs @@ -186,3 +186,26 @@ pub(crate) fn title_case(s: &str) -> String { let rest: String = chars.as_str().to_ascii_lowercase(); first.to_uppercase().collect::() + &rest } + +pub(crate) fn format_bytes_compact(value: usize) -> String { + if value == 0 { + return "0 B".to_string(); + } + + const UNITS: [&str; 5] = ["B", "KiB", "MiB", "GiB", "TiB"]; + let mut size = value as f64; + let mut unit_index = 0usize; + + while size >= 1024.0 && unit_index < UNITS.len() - 1 { + size /= 1024.0; + unit_index += 1; + } + + if size < 10.0 { + format!("{size:.2} {}", UNITS[unit_index]) + } else if size < 100.0 { + format!("{size:.1} {}", UNITS[unit_index]) + } else { + format!("{size:.0} {}", UNITS[unit_index]) + } +} diff --git a/codex-rs/tui/src/status/mod.rs b/codex-rs/tui/src/status/mod.rs index eccb6b72b5a7..84c318b7f042 100644 --- a/codex-rs/tui/src/status/mod.rs +++ b/codex-rs/tui/src/status/mod.rs @@ -4,7 +4,9 @@ mod format; mod helpers; mod rate_limits; +pub(crate) use card::StatusShadowData; pub(crate) use card::new_status_output; +pub(crate) use helpers::format_bytes_compact; pub(crate) use rate_limits::RateLimitSnapshotDisplay; pub(crate) use rate_limits::rate_limit_snapshot_display; diff --git a/codex-rs/tui/src/status/tests.rs b/codex-rs/tui/src/status/tests.rs index 395d952f413b..618bc2251e65 100644 --- a/codex-rs/tui/src/status/tests.rs +++ b/codex-rs/tui/src/status/tests.rs @@ -103,7 +103,14 @@ fn status_snapshot_includes_reasoning_details() { .expect("timestamp"); let rate_display = rate_limit_snapshot_display(&snapshot, captured_at); - let composite = new_status_output(&config, &usage, Some(&usage), &None, Some(&rate_display)); + let composite = new_status_output( + &config, + &usage, + Some(&usage), + &None, + Some(&rate_display), + None, + ); let mut rendered_lines = render_lines(&composite.display_lines(80)); if cfg!(windows) { for line in &mut rendered_lines { @@ -144,7 +151,14 @@ fn status_snapshot_includes_monthly_limit() { .expect("timestamp"); let rate_display = rate_limit_snapshot_display(&snapshot, captured_at); - let composite = new_status_output(&config, &usage, Some(&usage), &None, Some(&rate_display)); + let composite = new_status_output( + &config, + &usage, + Some(&usage), + &None, + Some(&rate_display), + None, + ); let mut rendered_lines = render_lines(&composite.display_lines(80)); if cfg!(windows) { for line in &mut rendered_lines { @@ -170,7 +184,7 @@ fn status_card_token_usage_excludes_cached_tokens() { total_tokens: 2_100, }; - let composite = new_status_output(&config, &usage, Some(&usage), &None, None); + let composite = new_status_output(&config, &usage, Some(&usage), &None, None, None); let rendered = render_lines(&composite.display_lines(120)); assert!( @@ -211,7 +225,14 @@ fn status_snapshot_truncates_in_narrow_terminal() { .expect("timestamp"); let rate_display = rate_limit_snapshot_display(&snapshot, captured_at); - let composite = new_status_output(&config, &usage, Some(&usage), &None, Some(&rate_display)); + let composite = new_status_output( + &config, + &usage, + Some(&usage), + &None, + Some(&rate_display), + None, + ); let mut rendered_lines = render_lines(&composite.display_lines(46)); if cfg!(windows) { for line in &mut rendered_lines { @@ -238,7 +259,7 @@ fn status_snapshot_shows_missing_limits_message() { total_tokens: 750, }; - let composite = new_status_output(&config, &usage, Some(&usage), &None, None); + let composite = new_status_output(&config, &usage, Some(&usage), &None, None, None); let mut rendered_lines = render_lines(&composite.display_lines(80)); if cfg!(windows) { for line in &mut rendered_lines { @@ -274,7 +295,14 @@ fn status_snapshot_shows_empty_limits_message() { .expect("timestamp"); let rate_display = rate_limit_snapshot_display(&snapshot, captured_at); - let composite = new_status_output(&config, &usage, Some(&usage), &None, Some(&rate_display)); + let composite = new_status_output( + &config, + &usage, + Some(&usage), + &None, + Some(&rate_display), + None, + ); let mut rendered_lines = render_lines(&composite.display_lines(80)); if cfg!(windows) { for line in &mut rendered_lines { @@ -306,7 +334,7 @@ fn status_context_window_uses_last_usage() { total_tokens: 13_679, }; - let composite = new_status_output(&config, &total_usage, Some(&last_usage), &None, None); + let composite = new_status_output(&config, &total_usage, Some(&last_usage), &None, None, None); let rendered_lines = render_lines(&composite.display_lines(80)); let context_line = rendered_lines .into_iter() From 864461c67360be429d2bc6282c0c81c4eaf7d9f9 Mon Sep 17 00:00:00 2001 From: George Nesterenok Date: Mon, 27 Oct 2025 19:51:32 +0000 Subject: [PATCH 17/17] Added follow-up delegation support --- ai-temp/AGENTS.md | 100 ++- ai-temp/CHANGELOG.md | 9 + ai-temp/TODO.md | 25 + ai-temp/agent-follow-up.md | 281 ++++++++ ai-temp/agent-switching.md | 89 +-- ai-temp/config-design.md | 2 +- ai-temp/example-codex-home/AGENTS.md | 1 + ai-temp/instruction-design.md | 10 +- ai-temp/orchestration-integration.md | 201 ++---- ai-temp/parallel-delegation.md | 6 +- ai-temp/ui-ux-delegation.md | 15 +- codex-rs/Cargo.lock | 1 + codex-rs/core/src/delegate_tool.rs | 80 ++- codex-rs/core/src/tools/handlers/delegate.rs | 113 +++- .../src/tools/handlers/delegate_sessions.rs | 282 ++++++++ codex-rs/core/src/tools/handlers/mod.rs | 3 + codex-rs/core/src/tools/spec.rs | 5 + codex-rs/core/tests/common/test_codex.rs | 40 +- codex-rs/multi-agent/Cargo.toml | 1 + codex-rs/multi-agent/src/lib.rs | 4 +- codex-rs/multi-agent/src/orchestrator.rs | 638 +++++++++++++++++- .../multi-agent/src/orchestrator/tests.rs | 183 ++++- codex-rs/tui/src/app.rs | 401 +++++++++-- codex-rs/tui/src/app_event.rs | 6 + codex-rs/tui/src/chatwidget.rs | 61 ++ codex-rs/tui/src/history_cell.rs | 103 +++ 26 files changed, 2286 insertions(+), 374 deletions(-) create mode 100644 ai-temp/TODO.md create mode 100644 ai-temp/agent-follow-up.md create mode 100644 codex-rs/core/src/tools/handlers/delegate_sessions.rs diff --git a/ai-temp/AGENTS.md b/ai-temp/AGENTS.md index 2fa2dda7c041..7d7ecf8c55b9 100644 --- a/ai-temp/AGENTS.md +++ b/ai-temp/AGENTS.md @@ -1,64 +1,40 @@ # Multi-Agent Orchestrator Notes -## Feature Goal -- Allow the primary Codex CLI agent to delegate work to named sub-agents that live inside `~/.codex/agents//`. Here `` is a human-friendly slug like `rust_test_writer` that doubles as the directory name. -- Each sub-agent should behave like an isolated Codex installation with its own `AGENTS.md`, `config.toml`, `log/`, `sessions/`, and related state directories. -- The orchestrator must load, run, and switch between agents without duplicating the existing configuration, logging, history, and persistence logic. - -## Existing Implementation Survey - -- `codex-rs/core/src/config.rs` owns the `Config` struct, the `find_codex_home` helper, and `Config::load_from_base_config_with_overrides`, which lets us inject a custom `codex_home` path when constructing a configuration. `Config::log_dir` and related helpers derive paths by appending to `codex_home`, so moving to a per-agent directory is automatically supported. -- `codex-rs/core/src/config_loader/mod.rs` implements layered config loading (`config.toml`, managed overrides, CLI overrides). It already accepts an arbitrary base directory, so we can reuse it for sub-agent trees by pointing it at `~/.codex/agents/`. -- `codex-rs/common/src/config_override.rs` parses `-c key=value` overrides. Those overrides can continue to target agent-specific settings as long as we resolve them against the sub-agent config before the run starts. -- The `multi_agent.agents = ["…"]` list in each `config.toml` now controls delegate availability. When the list is non-empty the delegate tool auto-enables; when empty it stays hidden, eliminating the need for manual `include_delegate_tool` flags. - -### Project instructions (`AGENTS.md`) -- `Config::load_instructions` in `codex-rs/core/src/config.rs` reads `AGENTS.md` at the root of `codex_home`. That gives us a place to put per-agent doctrine without touching repo-level instructions. -- Repository and cwd instructions are merged by `codex-rs/core/src/project_doc.rs`, which walks the filesystem to collect `AGENTS.md` files. This logic happens after `Config` is loaded, so sub-agent instructions will cascade naturally once the agent-specific `Config` sets its own cwd and codex_home. -- The TUI status widget (`codex-rs/tui/src/status/helpers.rs`) already summarises discovered instructions. It will display sub-agent docs correctly as long as the orchestrator updates the `Config` before rendering. -### Session persistence and logging -- Each agent writes rollouts, streaming history, and logs under its own `codex_home`. See `ai-temp/persistence-design.md` for the isolation rules and orchestrator responsibilities. - -### Auth and CLI entry points -- Authentication helpers in `codex-rs/core/src/auth.rs` read and write `auth.json` beneath `codex_home`. For the first iteration, all agents share the primary `~/.codex/auth.json`; isolation hooks can be added later if needed. -- CLI bootstrapping happens in `codex-rs/cli/src/main.rs`, which constructs `Config` via the shared loader and then launches the TUI or other subcommands. The orchestrator will need to hook here (or inside the TUI) to select an agent before the config load so that downstream crates operate against the correct directory tree. -- Documentation for the current configuration surface is in `docs/config.md`, ensuring any new flags or environment variables we introduce are documented alongside existing options. - -## Design Principles -- Treat each sub-agent as an isolated `Config` + state bundle so existing code paths stay unchanged. -- Keep the orchestration layer thin: it should select the right `codex_home`, prepare overrides, and then call into unmodified core/TUI code wherever possible. -- Prefer additive interfaces (e.g., `AgentRegistry::resolve_path(id) -> PathBuf`) over invasive changes to core modules, respecting the repository's instruction to avoid Java-level over-abstraction. -- Make it easy to fall back to single-agent behaviour by defaulting to the legacy `~/.codex` layout when no sub-agent is selected. - -## Proposed Architecture -- Directory layout: - - `~/.codex/agents//AGENTS.md` – sub-agent guidance consumed by `Config::load_instructions`. `` should be a meaningful, filesystem-safe identifier (e.g., `rust_test_writer`). - - `~/.codex/agents//config.toml` – optional overrides layered on top of the global config loader. - - `~/.codex/agents//log/` and `~/.codex/agents//sessions/` – reused by the TUI and rollout recorder with no code changes. - - Optional extras such as `history.jsonl`, `auth.json`, or MCP metadata can mirror the top-level structure when isolation is desired. -- Orchestration flow: - - Extend the CLI (likely in `codex-rs/cli/src/main.rs`) to accept an `--agent ` flag or read the selection from a config profile. The orchestrator resolves `~/.codex/agents/` (creating it if missing) before loading `Config`. - - Introduce a lightweight helper (e.g., `codex-rs/core/src/agent_registry.rs`) that maps agent identifiers to directories, validates presence of `AGENTS.md`/`config.toml`, and exposes the resolved `codex_home`. - - When the main agent needs to talk to a sub-agent, construct a new `Config` by calling `Config::load_from_base_config_with_overrides` with the agent's path. All downstream components (sessions, logs, instructions) receive the correct context automatically. -- Maintain a controller component in the CLI or core layer that mediates conversations: the primary agent keeps the user-facing session, delegates tasks via API calls to sub-agent Codex instances, and reconciles their responses. -- Decoupling strategy: - - Keep orchestrator logic in a new module/crate rather than embedding it directly into `codex-rs/core/src/codex.rs`, so only the orchestration entry points depend on it. - - Use trait-based boundaries sparingly: a simple `AgentContext` struct carrying the agent id, codex_home, and resolved `Config` may be enough, keeping future changes localised. - -## Agent Invocation UX -- Default behaviour: the main agent chooses when to invoke sub-agents, treating them like native tools (similar to the plan tool or apply-patch flow). -- Explicit requests: users can opt to summon particular agents by tagging them in prompts, e.g. `#rust_test_writer`. -- Multiple tags (`#agent_one #agent_two`) allow coordinated runs when orchestration logic supports it. - -## Error Handling -- Validation, logging, and recovery patterns are documented in `ai-temp/error-handling.md`. Highlights: - - Agent discovery failures turn into actionable CLI/TUI errors via `anyhow::bail!`. - - Detailed context is emitted through `tracing` logs while the main history records only summary entries. - - Persistence and working-directory issues follow the same `std::io::Result` semantics used by rollout recording. - -## Roadmap -1. Implement an `AgentRegistry` that enumerates `~/.codex/agents`, validates directory shape, and resolves paths. -2. Add CLI plumbing to choose an agent (flag, config entry, or interactive prompt) before constructing `Config`. -3. Ensure core services (auth, logging, history, rollout) honour the selected agent by threading the alternate `codex_home`. -4. Prototype orchestration logic that spins up a secondary Codex instance using the sub-agent context and mediates message flow. -5. Expand tests and documentation to cover multi-agent behaviour, including snapshots for the new directory layout and user guidance in `docs/`. +> **Docs in progress** – All multi-agent design material lives in `ai-temp/` while the feature is under active development. **Do not touch files under `docs/` yet;** when the architecture stabilises we will migrate polished sections there. + +## Current Capabilities (2025‑10‑20) +- Delegation runs through the `delegate_agent` tool. Payloads can include `conversation_id` to continue an existing delegate session (`codex-rs/core/src/tools/handlers/delegate.rs`). +- Discovery/preview/dismiss flows are served by the `delegate_sessions` tool with `list`, `messages`, and `dismiss` operations (`codex-rs/core/src/tools/handlers/delegate_sessions.rs`). +- `AgentOrchestrator` (`codex-rs/multi-agent/src/orchestrator.rs`) now: + - Registers every delegate session, exposing summaries (`DelegateSessionSummary`) and event streams per conversation. + - Emits `DelegateEvent::{Started,Delta,Completed,Failed,Info}` via a `SessionEventBroadcaster`. + - Supports follow-ups by preserving the original parent run id (`parent_run_for_follow_up`) before re-registering a conversation. + - Tracks detached runs and reusable sessions, feeding the `/agent` picker and notifications. +- TUI integration (`codex-rs/tui/src/app.rs`, `chatwidget.rs`, `history_cell.rs`) provides: + - A delegate tree with indentation per depth, status ownership, and summaries. + - A picker that offers preview/dismiss actions for saved sessions and detached runs. + - Dedicated history cells for preview output (`new_delegate_preview`) and consistent routing so sessions never leak updates into each other. +- Tests cover the delegate handler, orchestrator follow-up behaviour, and the TUI presentation. See `codex-rs/multi-agent/src/orchestrator/tests.rs` for regression cases on parent linkage and shadow recording. + +## Key Modules & Paths +- Loader & facade: `codex-rs/multi-agent/src/lib.rs` (`AgentConfigLoader`, orchestrator re-export). +- Runtime: `codex-rs/multi-agent/src/orchestrator.rs`. +- Shared tool types: `codex-rs/core/src/delegate_tool.rs`. +- Tool handlers/specs: `codex-rs/core/src/tools/handlers/delegate.rs`, `delegate_sessions.rs`, registry wiring in `codex-rs/core/src/tools/spec.rs`. +- UI: `codex-rs/tui/src/app.rs`, `app_event.rs`, `chatwidget.rs`, `history_cell.rs`, `/agent` picker, status helpers. +- Shadow caching architecture: `ai-temp/agents-shadow-client.md`. +- Follow-up design: `ai-temp/agent-follow-up.md`. + +## Behaviour Summary +1. **New run** – `delegate_agent` validates input and calls `AgentOrchestrator::delegate`. The orchestrator spins up a conversation through `ConversationManager`, registers it, and streams events back to the UI. +2. **Follow-up** – When `conversation_id` is supplied, `delegate_follow_up` reuses the stored session. Parent run metadata is captured before re-registration so the TUI can keep lineage straight. +3. **Listing & previews** – `delegate_sessions` pulls from `AgentOrchestrator::list_sessions_paginated` and `recent_messages`, which in turn rely on the shadow cache. +4. **Detached runs** – `mode: "detached"` returns immediately; runs are tracked until completion and surfaced in the picker with dismiss actions. +5. **UI routing** – Each event carries `owner_conversation_id`. `App::handle_delegate_update` forwards deltas and completions only to the matching session handle, while parent summaries are enqueued via `ChildSummary`. + +## Outstanding / Planned +- Agent switching (temporarily entering a delegate session) remains a future task – see `ai-temp/agent-switching.md`. +- No edits to `docs/advanced.md` (rolled back) or other public docs until this feature graduates. +- Additional end-to-end CLI tests and documentation polish still required before release. + +For subsystem-specific details, consult the dedicated docs in `ai-temp/` (shadow client, error handling, parallel delegation, persistence, follow-ups, etc.). Each file references the relevant source paths so updates stay aligned with implementation. diff --git a/ai-temp/CHANGELOG.md b/ai-temp/CHANGELOG.md index 1a9578b806c2..8fdf283e806a 100644 --- a/ai-temp/CHANGELOG.md +++ b/ai-temp/CHANGELOG.md @@ -2,13 +2,21 @@ > Keep this file current; it documents the evolution of the multi-agent design work. An outdated changelog breaks the orchestrator timeline. +## 2025-10-27 + +- Added follow-up delegation support: orchestrator can resume saved sessions, shadow-backed previews power the TUI picker, and the `delegate_sessions` tool exposes list/messages/dismiss operations. +- Documented the shipped behaviour inside `ai-temp/` (AGENTS overview, orchestration integration, UI/UX notes); public docs remain untouched until release. +- Implemented dismissal plumbing so stale sessions and their shadow caches can be removed safely from both the CLI and coordinating prompts. + ## 2025-10-19 + - Reworked delegate event fan-in so each conversation feeds a dedicated `spawn_event_forwarder` and surfaces updates through `AppEvent::CodexEvent { conversation_id, event }`, keeping history rendering scoped to the active session. - Added shadow-session plumbing to the orchestrator (`ShadowManager`, per-session metrics, user/agent capture forwarding) and exposed the new telemetry through the TUI status panel. - Expanded the delegate app events (`DelegateShadow*`, `InsertHistoryCell { conversation_id, ... }`) and updated the TUI’s `SessionHandle` storage to hydrate from snapshots instead of replaying global history. - Refreshed `ai-temp/agents-shadow-client.md`, `ai-temp/orchestration-integration.md`, and the sample Codex home (`example-codex-home/AGENTS.md`) to describe per-session streams, `DelegateEvent::{Started,Delta,Completed,Failed}`, and the tightened coordinator instructions. ## 2025-10-16 + - Added batched delegate execution: the core handler now accepts `batch` payloads, launches every child run concurrently (respecting the orchestrator’s concurrency cap), and returns per-agent summaries in a single response. - Updated the TUI delegate tree to indent siblings (two spaces per depth) and keep the status banner aligned while multiple children stream at once; adjusted snapshot/unit coverage accordingly. - Refreshed documentation and sample instructions (`ai-temp/parallel-delegation.md`, `ai-temp/tool-implementation-patterns.md`, example Codex home agents) to describe the batched call flow and new presentation. @@ -20,6 +28,7 @@ - Added focused unit tests covering the new config flag behaviour and UI indentation to prevent regressions. ## 2025-10-14 + - Captured the baseline design artifacts (`AGENTS.md`, `config-design.md`, `instruction-design.md`, `persistence-design.md`, `error-handling.md`) compiled during the planning phase. - Reiterated the requirement that this changelog must stay up to date as the multi-agent feature evolves. - Scaffolded the `codex-multi-agent` crate with `AgentId`, `AgentRegistry`, and async config loading that merges global/agent/CLI overrides into an `AgentContext`. diff --git a/ai-temp/TODO.md b/ai-temp/TODO.md new file mode 100644 index 000000000000..fbb4bbb7b22e --- /dev/null +++ b/ai-temp/TODO.md @@ -0,0 +1,25 @@ +# Follow-Up Rendering Bug (UI) + +We found that delegate follow-up prompts disappear when reopening the saved session even though the +run’s rollout contains them. Root cause: + +- `App::activate_delegate_session` only calls `ChatWidget::hydrate_from_shadow` when + `SessionHandle::history().is_empty()`. +- On follow-up, the handle still has prior history, so hydration is skipped and the old transcript + remains. + +## Next Steps +1. Adjust `activate_delegate_session` to refresh from the shadow snapshot even when history already + exists (e.g., clear/replace history or always hydrate). +2. Add a regression test that currently fails: create a `SessionHandle` with non-empty history, mock + an `ActiveDelegateSession` snapshot containing follow-up user and agent messages, invoke + `activate_delegate_session`, and assert the transcript now includes the follow-up prompt. +3. After the fix, run `cargo test -p codex-tui` to confirm coverage. + +## Useful References +- Rollout file with the missing follow-up: + `ai-temp/example-codex-home/agents/critic/sessions/2025/10/21/rollout-2025-10-21T00-33-16-019a042f-3e55-7171-b9df-1690b9f905a0.jsonl` +- Entry points: + - `codex-rs/tui/src/app.rs::activate_delegate_session` + - `codex-rs/tui/src/chatwidget.rs::hydrate_from_shadow` + - Shadow capture logic in `codex-rs/multi-agent/src/shadow/{mod,recorder}.rs` diff --git a/ai-temp/agent-follow-up.md b/ai-temp/agent-follow-up.md new file mode 100644 index 000000000000..b5c7e602838b --- /dev/null +++ b/ai-temp/agent-follow-up.md @@ -0,0 +1,281 @@ +# Agent Follow-Up Enablement + +This note captures the design work required to let the primary Codex agent +reuse an existing delegate conversation instead of launching a fresh run every +time. The end goal is that the model can: + +1. List delegate sessions that are still available for follow-up. +2. Inspect the latest `N` turns from a delegate before deciding whether to + re-engage it. +3. Invoke `delegate_agent` in "follow-up" mode by passing the existing + conversation identifier so the request is appended to that session. + +The sections below outline the API changes, orchestrator plumbing, UI surfaces, +and documentation updates needed to ship this capability. + +--- + +## 1. User & Model Experience + +- **Discovery** – the primary agent calls the new `delegate_sessions` tool to + retrieve a list of reusable delegate sessions. Each entry returns: + - `conversation_id` + - `agent_id` + - `mode` (`standard` vs `detached`) + - `cwd` + - `last_interacted_at` + - Optional shadow metrics (e.g., total turns, bytes) if available +- **Preview** – when absolutely necessary, the primary agent can request the + last `k` messages from a conversation (latest first). This serves as a + last-resort reminder of what the delegate just did before composing a + follow-up prompt. +- **Follow-up call** – when the model is ready, it reuses the existing + conversation by including a `conversation_id` in the `delegate_agent` + payload. The orchestrator routes the new prompt into that conversation and + streams the updates back just like a fresh run. + +This keeps the user-facing workflow simple: the parent agent chooses from a +list, previews context, then issues a follow-up prompt that continues the +delegate transcript. + +--- + +## 2. API & Schema Updates + +We introduce a dedicated read-only tool named `delegate_sessions` alongside the +existing `delegate_agent` function. The new tool handles discovery, preview, +and dismissal so `delegate_agent` can focus solely on launching or resuming +work. + +### 2.1 `delegate_sessions` tool + +- **Operations** + - `list` – returns paginated session summaries ordered by + `last_interacted_at` (newest first). + - `messages` – returns a page of recent messages for a specific session. + - `dismiss` – removes a session from the reusable pool (freeing any shadow + resources). + +- **Request schema** (pseudo-JSON): + + ```json + { + "operation": "list" | "messages" | "dismiss", + "conversation_id": "...", // required for messages/dismiss + "cursor": "...", // optional pagination cursor + "limit": 3 // optional page size (default 3) + } + ``` + +- **Response examples** + - `list` returns `{ "sessions": [ ... ], "next_cursor": "..." }` + - `messages` returns `{ "messages": [ ... ], "next_cursor": "..." }` + with messages ordered newest-to-oldest; clients can follow `next_cursor` + to page back through older history. + - `dismiss` returns `{ "status": "ok" }` + - Errors return `{ "status": "error", "error": { "code": "...", + "message": "..." } }` (e.g., `unknown_conversation`, `invalid_cursor`). + + Message entries include `id`, `role`, `content`, timestamps, and any tool + metadata needed for replay. Cursors are opaque strings generated by the + orchestrator (e.g., encoding timestamp + index). + +### 2.2 `delegate_agent` follow-up contract + +- Extend `DelegateToolRequest` (`codex-rs/core/src/delegate_tool.rs:49-60`) with + one optional field: + - `conversation_id: Option` – when present, indicates this is a + follow-up call using an existing session. + +- Update the tool schema (`DELEGATE_TOOL` in + `codex-rs/core/src/tools/handlers/delegate.rs:33-136`) with the new rules: + - `conversation_id` is mutually exclusive with `batch`. + - When `conversation_id` is provided, `prompt` is required but `agent_id` is + optional (if omitted, reuse the agent recorded on the session). + - Reject requests where the session id is unknown, inactive, or currently + processing another follow-up. + - Always return a new `run_id` even for follow-ups so downstream telemetry and + notifications can differentiate consecutive prompts within the same + conversation. + +No automatic context is streamed back; the model must call `delegate_sessions` +`messages` if it needs to inspect prior turns. + +--- + +## 3. Orchestrator Changes + +All follow-up logic lives in `AgentOrchestrator` and its adapter +(`codex-rs/multi-agent/src/orchestrator.rs`). Key tasks: + +1. **Expose session summaries** – add a public method + `list_sessions(limit, cursor) -> PaginatedSessions` that wraps the existing + `active_sessions()` helper and returns JSON-friendly data. Results are + ordered by `last_interacted_at` (desc) and include the next cursor. +2. **Fetch recent history** – new async helper + `recent_messages(conversation_id, cursor, limit)` that consults + `ShadowManager` first, then falls back to `CodexConversation::history`. The + helper returns newest-to-oldest entries plus the next cursor to support + pagination without loading entire transcripts. +3. **Follow-up entry point** – extend `MultiAgentDelegateAdapter::delegate` to + branch when `conversation_id` is set: + - Look up the session via `enter_session` (reusing the live + `CodexConversation`). + - Submit the follow-up prompt with `conversation.submit(Op::UserTurn { … })`. + - Reuse the existing event stream (`SessionEventBroadcaster`) so deltas flow + back through `DelegateEvent`. + - Ensure `run_owner_conversations` keeps pointing to the original owner so + nested delegates still resolve parentage correctly. + - Mint a fresh `DelegateRunId` for each follow-up. Map it to the reused + conversation id so telemetry, notifications, and tool responses can + distinguish consecutive follow-ups on the same session. +4. **Lifecycle management** – when a follow-up completes, leave the session in + the registry so additional follow-ups remain possible. Only `remove_session` + (on explicit dismiss/cleanup) should evict it. + +Detached runs require extra care: only allow follow-ups once the detached run +has reached the `Ready` state, and update the detached-run registry to reflect +that the session is now “active”. + +--- + +## 4. UI & CLI Considerations + +- **TUI session picker** – extend the `/agent` panel to show reusable sessions + with additional actions: + - “Preview last N messages” (calls the new preview operation). + - “Resume conversation” (instructs the model to send a follow-up prompt). + - “Dismiss session” (fires the new `delegate_sessions` `dismiss` operation). +- **History cells** – add new cell types (`DelegateFollowUpStarted`, + `DelegateFollowUpCompleted`) so the primary transcript records when we re-enter + a delegate. This keeps the main chat audit-friendly. +- **Status panel** – include a count of reusable sessions and their total disk + footprint (via `ShadowMetrics`). +- **Notifications** – follow-ups should reuse the existing delegate + notifications; no change required beyond possibly annotating messages with + “(follow-up)”. + +Headless front-ends (e.g., `codex exec`) can expose the same operations through +their status commands or JSON APIs. + +--- + +## 5. Documentation & Prompt Updates + +- Revise the model-facing instructions (`ai-temp/example-codex-home/AGENTS.md` + and `docs/multi-agent.md`) to teach the agent flow: + 1. Call `delegate_sessions` with `operation: "list"` to enumerate reusable + runs when considering a follow-up. + 2. Use `delegate_sessions` with `operation: "messages"` for context (only + when absolutely necessary). + 3. Provide `conversation_id` when invoking `delegate_agent` to continue that + session, or `operation: "dismiss"` to clean up when you are done. +- Update `ai-temp/CHANGELOG.md` with the new capabilities once implemented. +- Add sample transcripts in `ai-temp/example-conversation.md` showing a follow-up + delegation. + +--- + +## 6. Testing Strategy + +1. **Unit tests** + - Tool handler validation (mutually exclusive fields, missing session id, + etc.). + - Agent-busy path: follow-up while a run is active returns the expected + `agent_busy` error payload. + - `delegate_sessions` operations: list pagination, message cursors, and + dismiss semantics (including error codes). + - Orchestrator follow-up path: submit prompt, verify events stream from the + existing conversation and that no new conversation is created. + - Recent-message retrieval using both shadow cache and rollout fallback. +2. **TUI snapshot tests** – update `/agent` picker snapshots to include preview + actions and follow-up entries; ensure history cells render correctly. +3. **Integration test** – create a fake agent home with a sample run, trigger a + follow-up via the CLI, and assert that the same conversation ID is reused and + history grows as expected. + +--- + +## 7. Policy Decisions + +- **Session eviction:** reusable sessions have no expiry by design; they stay + available until the user or orchestrator explicitly removes them via the + `delegate_sessions` `dismiss` operation (or matching UI action). +- **Concurrent follow-ups:** the orchestrator must reject follow-up attempts if + the target agent is still running. Introduce `DelegateToolError::AgentBusy` + and serialize it as `{ "status": "error", "error": { "code": "agent_busy", + "message": "delegate still running" } }` so the parent agent can wait and + retry deterministically. +- **Automatic context snippets:** the follow-up API never returns extra context + automatically. The model should request previews explicitly via + `delegate_sessions` `messages` only when it truly needs them. + +--- + +By following the plan above, we give the primary agent a structured way to +inspect, select, and continue delegate conversations, unlocking true follow-up +capabilities without breaking existing one-shot delegation workflows. + +--- + +## 8. Implementation Roadmap + +### Phase 1 – Core Wiring +1. **Schema groundwork** ✅ + - Update `DelegateToolRequest`/`DelegateToolError` to add `conversation_id` + and new error variants. + - Introduce shared structs/enums for session listings and message previews. +2. **Delegate handler** (`core/src/tools/handlers/delegate.rs`) ✅ + - Accept `conversation_id`; validate mutually exclusive arguments. + - Ensure follow-up calls reuse the existing agent id when omitted. + - Map new error variants to `FunctionCallError` responses. +3. **Tool registry** ✅ + - Register the new `delegate_sessions` tool spec and handler stub. +4. **Adapter interface** ✅ + - Extend `DelegateToolAdapter` trait with list/messages/dismiss methods. + +### Phase 2 – Orchestrator Services +5. **Session pagination helpers** ✅ + - Add cursor encoding/decoding, `list_sessions(limit, cursor)` public API. +6. **Message retrieval** ✅ + - Implement `recent_messages(conversation_id, cursor, limit)` with shadow + cache fallback. +7. **Follow-up orchestration** ✅ + - Support optional `conversation_id` in `delegate()`; mint fresh run ids and + guard against concurrent follow-ups (AgentBusy path). +8. **Dismissal** ✅ + - Provide `dismiss_session(conversation_id)` that tears down session state + and shadow artifacts. + +### Phase 3 – `delegate_sessions` Tool Handler +9. **Handler implementation** ✅ + - Parse `operation` (`list`/`messages`/`dismiss`), call new adapter methods, + return structured JSON with pagination cursors. +10. **Error handling** ✅ + - Map orchestrator errors (`SessionNotFound`, `InvalidCursor`, etc.) to + consistent tool responses. + +### Phase 4 – UI & CLI Integration +11. **TUI updates** ✅ + - `/agent` picker: show follow-up sessions, preview/dismiss actions, updated + history cells for follow-up start/complete. +12. **Status panel** + - Surface reusable session counts and shadow metrics. +13. **Headless surfaces** + - Ensure CLI status/info commands expose the same data (if applicable). + +### Phase 5 – Documentation & Tests +14. **Docs & prompts** ✅ + - Update `docs/multi-agent.md`, sample instructions, and the changelog. +15. **Unit tests** ✅ + - Add coverage for the new delegate preview history cell (with snapshot) and + keep TUI behaviour under test. +16. **TUI snapshots/integration** + - Add snapshots for new UI entries and integration test ensuring identical + conversation id reuse. + +### Phase 6 – Final Polish +17. **Lint & format** (`just fmt`, `just fix -p ` as required). +18. **Sanity checks** + - Manual smoke test with `CODEX_HOME=...` sample agents following the new + flow. diff --git a/ai-temp/agent-switching.md b/ai-temp/agent-switching.md index 20aa1fb449d9..09f91b0eb2f9 100644 --- a/ai-temp/agent-switching.md +++ b/ai-temp/agent-switching.md @@ -1,76 +1,35 @@ -# Agent Switching Flow +# Agent Switching Flow (Implemented) -## Goal -- Let users temporarily leave the main assistant, talk directly to the delegate that just ran, and then return to the primary conversation with the new context automatically folded in. -- Preserve the sub-agent’s own history/logs while also giving the main agent enough summary data to continue the original task without manual copy/paste. -- Keep this behaviour additive to the existing delegation pipeline (`ai-temp/orchestration-integration.md`) so we do not fork separate orchestration code paths. +This document summarises the current “enter delegate session” behaviour that shipped alongside follow-up support on 2025‑10‑20. Users can jump into a finished delegate conversation, interact directly, and return to the primary agent—all without leaving the TUI. -## Interaction Flow -1. **Primary delegation** – User asks the main agent for work; it invokes `AgentOrchestrator::delegate(...)` (`codex-rs/multi-agent/src/orchestrator.rs`) and streams the sub-agent result as today. -2. **Switch request** – Once the delegate finishes, the UI surfaces an affordance (button, slash command, or keyboard shortcut) to “enter” that delegate session. The request references the agent id plus the conversation/session handle held by the orchestrator. -3. **Direct conversation** – While switched, user prompts are routed straight to the sub-agent’s `ConversationManager` (`codex-rs/core/src/conversation_manager.rs:57`), writing to `~/.codex/agents//history.jsonl` and `sessions/` per `ai-temp/persistence-design.md`. -4. **Return & sync** – Exiting the sub-agent triggers a summary event back to the primary conversation. The orchestrator composes: - - Latest sub-agent turns since the last delegation summary. - - Any user instructions entered during the switch. - - Optional metadata (elapsed time, exit status) for the main transcript. -5. **Primary follow-up** – The main agent resumes with an updated context item (e.g., injected history cell) so the user can issue the closing instruction (“Solve task X”) without restating manual edits. It stays idle until the user provides that follow-up prompt; there is no automatic validation pass unless the user explicitly asks for one. +--- -## Orchestrator Responsibilities -- Track active sub-agent sessions beyond the initial delegate run, including an offset to know which messages were created during the manual switch. -- Provide APIs to: - - `enter_agent(agent_id, session_id)` – hand back a handle to the sub-agent conversation. - - `exit_agent(agent_id, session_id)` – return summaries for rehydrating the primary transcript. -- Maintain a lightweight audit of switches (agent id, start/end timestamps) for `/status` (`codex-rs/tui/src/status/helpers.rs`) and debugging. -- Ensure authentication and tool permissions obey the main agent’s policy; the switch cannot elevate capabilities beyond what the delegate already has. +## User Experience -## Persistence & Context Sync -- Sub-agent turns continue to live exclusively under `~/.codex/agents//` so per-agent isolation stays intact (`ai-temp/persistence-design.md`). -- The main agent stores only synthesized snapshots: user switch transcript, sub-agent response digest, and references to the underlying rollout file. -- Conflict reconciliation remains manual. Unless a sub-agent overrides its working directory, it edits the same workspace as the main agent, so users should rely on git/review tooling to resolve overlapping changes. -- Each `AgentContext` already persists the delegate’s working directory (`ConfigOverrides::cwd`). When the sub-agent runs with a non-default cwd, the return summary should echo that path so the main agent understands where the edits landed. -- Consider storing a “since marker” (session id + line number) inside the orchestrator so re-entry picks up where the user left off. -- When returning, append a history cell in the main transcript citing the sub-agent session path and summarizing the net changes. +1. **Launch picker** – `/agent` (or the corresponding UI shortcut) opens the delegate picker via `ChatWidget::open_delegate_picker` (`codex-rs/tui/src/chatwidget.rs`). The picker lists reusable sessions and detached runs, highlighting the active session if the user is already inside a delegate. +2. **Enter session** – Selecting “Enter session” for a delegate sends `AppEvent::EnterDelegateSession(conversation_id)` (`chatwidget.rs:2207`). `App::activate_delegate_session` (`codex-rs/tui/src/app.rs:1250`) switches the active `SessionHandle` to the chosen conversation, hydrates it from the shadow cache if available, and routes subsequent user input to that sub-agent. +3. **While inside** – The composer banner shows the delegate name; history updates stream directly from that delegate’s `CodexConversation`. Switching again simply selects the new session from the picker. +4. **Return to primary** – Choosing “Return to main agent” issues `AppEvent::ExitDelegateSession` (`chatwidget.rs:2147`). `App::return_to_primary` (`codex-rs/tui/src/app.rs:900`) restores the primary session, logs a summary message, and leaves the delegate conversation available for future follow-ups. -## UI Considerations -- Extend the delegation UI (`ai-temp/ui-ux-delegation.md`) with: - - A status banner showing `In #` while switched, with a shortcut to return to the primary agent. - - History cells that log switch events (`Entered #critic`, `Returned from #critic – applied adjustments`). - - Optional shortcut `/agent return` to exit quickly. -- While switched, show an inline footer indicator (`In #agent`) next to the context meter so the active delegate is always visible. -- Hide picker entries whose conversations are no longer resumable (e.g., cleanup, failure). If the user attempts to switch into a stale handle, surface an error toast and keep them in the current context while logging the failure. -- While switched, the prompt input should clearly identify the active agent (e.g., placeholder text, accent color) to avoid accidental edits. -- Surface breadcrumbs in `/status` showing the current agent stack (`Main → #ideas_provider → #critic`), making nested switches easier to follow later. -- Scope out a history browser for now; we do not surface delegate sessions from previous main-agent runs. +Key UI touches: +- Delegate history remains isolated; no events leak into other sessions. +- Summaries and status indicators update the delegate tree so the user can see which runs generated additional interaction. +- Errors during entry/exit bubble into history cells and log via `tracing::error!`. -## Edge Cases & Safeguards -- **Aborted delegate sessions** – If the orchestrator or sub-agent errors while you are switched in, emit a `DelegateEvent::Failed`, append an error history cell in the main transcript, and automatically return the user to the main agent. Also write the detailed failure to `codex-tui.log`. -- **Active-run visibility** – The orchestrator now tracks a stack of in-flight delegates. Surface the full stack in the UI so users know which nested agents are working; only the top-most run streams output. -- **Multi-agent hopping** – Switching among multiple delegates is hub-and-spoke: you can move main ↔ #ideas ↔ main ↔ #critic freely. Future “delegate chains” (sub-agents invoking their own sub-agents) remain out of scope; note this in breadcrumbs/help text so expectations stay clear. -- **Undo/redo** – Codex does not provide an orchestrator-level undo stack. Any manual file edits a user performs while switched should be managed through their VCS tooling. -- **Tool overlap** – Each sub-agent carries its own tool registry (e.g., plan tool). Streaming results during the switch stay in the sub-agent transcript; summaries injected on return should mention any plan updates so the main agent context is accurate. +--- -## Code Impact +## Orchestrator Integration -### Multi-Agent Feature Surfaces -- `codex-rs/multi-agent/src/orchestrator.rs` – extend state to track active delegate sessions, add `enter_agent`/`exit_agent` helpers, and retain offsets so we know which turns to summarize when the user returns. -- `codex-rs/multi-agent/src/lib.rs` – re-export the switching API and plumb new structs/enums (e.g., switch summaries, session handles). We may add a dedicated `switching.rs` module for bookkeeping. -- `codex-rs/multi-agent/src/tests/` (new) – cover enter/exit flows, ensuring we capture only newly added turns and that summaries are produced correctly. +- `AgentOrchestrator::enter_session` (`codex-rs/multi-agent/src/orchestrator.rs:960`) returns `ActiveDelegateSession` containing the session summary, live `CodexConversation`, a session-configured snapshot, and the per-session event receiver. +- `AgentOrchestrator::dismiss_session` removes a reusable session when the user chooses “Dismiss”; it refuses if a run is active to avoid mid-stream exits. +- Shadow snapshots hydrate the conversation instantly. Missing snapshots fall back to rollout replay with an informative banner. +- Parent/child relationships remain intact—switching doesn’t alter delegate lineage or follow-up behaviour. -### Core Runtime -- `codex-rs/core/src/conversation_manager.rs` – expose APIs to hand out existing `CodexConversation` handles (or resume by rollout) so the orchestrator can park and resume sub-agent sessions. We may need a lightweight “since marker” abstraction here. -- `codex-rs/core/src/delegate_tool.rs` – extend `DelegateToolEvent`/`DelegateToolRun` to serialize manual switch summaries back to the client. -- `codex-rs/core/src/tools/handlers/delegate.rs` – accept the richer payload, surface switch-specific metadata to the model, and ensure the handler stops streaming once the user exits the sub-agent. -- `codex-rs/core/src/codex.rs` – thread the orchestrator’s switch adapter into new conversations (similar to how the delegate adapter is wired today). +--- -### TUI Integration -- `codex-rs/tui/src/app_event.rs` & `codex-rs/tui/src/app.rs` – introduce `AppEvent` variants for “enter agent”, “exit agent”, and “switch summaries”; drive the event loop transitions. -- `codex-rs/tui/src/chatwidget.rs` (plus `chatwidget/agent.rs`) – route user input to the active sub-agent while switched, render banners/breadcrumbs, and rehydrate the main transcript when returning. -- `codex-rs/tui/src/history_cell.rs` – add cell types for “entered delegate” / “returned from delegate” entries with session links. -- `codex-rs/tui/src/status/helpers.rs` & `/status` widgets – surface the active agent stack and recent switch history. -- `codex-rs/tui/src/slash_command.rs` – wire `/agent enter ` / `/agent return` (or similar) commands if we expose keyboard-driven switching. -- `codex-rs/tui/src/tests/` – update snapshot/unit tests to cover the new event stream and UI affordances. -- We explicitly skip building a “replay” browser for older delegate sessions in this iteration. +## Known Limitations / Future Work -### CLI & Configuration -- `codex-rs/cli/src/main.rs` – ensure the CLI still constructs the orchestrator once, passing the new switching adapter into the TUI bootstrapper. -- `docs/` – update user-facing documentation (e.g., `docs/tui.md`, `docs/multi-agent.md`) to describe how to enter/exit a delegate session. +- No dedicated breadcrumbs in the main transcript yet; summaries appear as text cells. UX improvements (timers, richer chips) are tracked in `ai-temp/ui-ux-delegation.md`. +- We currently keep the delegate conversation alive indefinitely until dismissed; eviction policy may be revisited alongside shadow retention rules. + +Despite these gaps, switching is fully functional and covered by the orchestrator/TUI tests. Consult `codex-rs/tui/src/app.rs`, `chatwidget.rs`, and `ai-temp/ui-ux-delegation.md` for deeper details. diff --git a/ai-temp/config-design.md b/ai-temp/config-design.md index dfb5c784af7a..54a37a57bc6e 100644 --- a/ai-temp/config-design.md +++ b/ai-temp/config-design.md @@ -61,6 +61,6 @@ By isolating the orchestration-specific logic in this crate, other crates only n - A dedicated crate provides a single place to evolve agent-related behaviour (validation, migrations, metadata) without scattering knowledge of `~/.codex/agents` across the repo. ## Open Points to Finalise -- Exact CLI UX for selecting an agent (flag vs. config profile vs. interactive picker). +- Decide whether to add a dedicated CLI flag/selector for agent profiles or continue relying on config overrides and the `/agent` picker. - Whether agent directories can fall back to the global `auth.json` or require their own credentials. - Error reporting strategy when an agent directory exists but is misconfigured. diff --git a/ai-temp/example-codex-home/AGENTS.md b/ai-temp/example-codex-home/AGENTS.md index 995e74cc6cc1..a7ad57efc763 100644 --- a/ai-temp/example-codex-home/AGENTS.md +++ b/ai-temp/example-codex-home/AGENTS.md @@ -47,5 +47,6 @@ General rules: - Follow the chain even if you already see the answer; only skip when the user explicitly opts out of delegation. - The `delegate_agent` tool is AI-only. Describe which delegate you want in plain language—the user cannot invoke sub-agents directly. - You can launch multiple delegates in parallel. The CLI indents nested runs beneath their parent (two spaces per depth), and up to five delegates may be active at once; wait for all required sub-agents to finish before synthesizing. Call `delegate_agent` once with a `batch` array containing each `{agent_id, prompt}` so both delegates run even on models limited to a single tool invocation per turn. +- When resuming a previous run, use `delegate_sessions` with `operation: "messages"` to remind yourself what the delegate just did, and `operation: "dismiss"` when you are finished. - Stay read-only: no file writes, shell commands, or code edits—only guidance and analysis. - When manually testing, describe the problem clearly so the coordinator chooses the right delegate. diff --git a/ai-temp/instruction-design.md b/ai-temp/instruction-design.md index cd1a04ebf217..049905f1d8ad 100644 --- a/ai-temp/instruction-design.md +++ b/ai-temp/instruction-design.md @@ -13,13 +13,9 @@ 3. **Custom path** – point at a specific project directory (frontend/backend split, microservice repos, etc.). - `AgentContext` records the chosen path so downstream code (project-doc discovery, logging) operates with consistent scope. -## Instruction Inheritance -- Default behaviour: the agent’s `AGENTS.md` replaces inherited docs for a clean slate. -- Optional override: agent `config.toml` may set `inherit_repo_instructions = true` (name TBD) to append repository-level documents after the agent-specific instructions. -- Implementation outline: - - During load, the config layer reads the inheritance flag. - - If disabled, set `Config::base_instructions` to the agent file and skip repo traversal. - - If enabled, rely on `codex_core::project_doc::read_project_docs` so instructions are merged root-to-leaf with the agent doc prepended. +## Instruction Inheritance (current vs. future) +- **Today:** loading an agent replaces inherited docs; only the agent’s own `AGENTS.md` is applied. +- **Future idea:** introduce an opt-in `inherit_repo_instructions` flag so agents can append repo-level documents after their own guidance. This flag is not implemented yet; the section remains here as a backlog note. ## UI Exposure - The TUI status helpers (`codex-rs/tui/src/status/helpers.rs`) will detect the updated `Config` and display whichever instruction set is active. diff --git a/ai-temp/orchestration-integration.md b/ai-temp/orchestration-integration.md index 8881ddeb1099..e88ea50e0ede 100644 --- a/ai-temp/orchestration-integration.md +++ b/ai-temp/orchestration-integration.md @@ -1,177 +1,86 @@ -# Multi-Agent Orchestration Integration Plan +# Orchestration Integration Overview -This document describes how to wire true sub-agent orchestration into the Codex CLI so the primary agent can delegate work to agent profiles living under `~/.codex/agents//`. It focuses on runtime control-flow, UI/UX, and minimal-coupling integration points in the existing codebase. +This note documents how the multi-agent runtime is currently wired into the Codex CLI/TUI stack. It replaces the older speculative design and mirrors the implementation that shipped on 2025‑10‑20. --- -## 1. Runtime Architecture +## 1. Component Map -### 1.1 Components - -- **`codex-multi-agent` crate (`codex-rs/multi-agent/src/lib.rs`)** - Already exposes `AgentId`, `AgentRegistry`, and async loaders that return `AgentContext` values (merged `ConfigToml` + `Config`). We extend this crate with an orchestration module to keep agent resolution and config cloning isolated from the rest of the app. Each `AgentContext` now captures its own `multi_agent.agents` list so child delegates inherit the correct allowlist automatically. - -- **Orchestrator core (new)** - Proposed module `codex-rs/multi-agent/src/orchestrator.rs` exporting: - - `AgentHandle`: carries `AgentContext`, active `ConversationId`, and bookkeeping (start/end timestamps, status). - - `DelegateRequest`: SPA-style struct describing who/what to run (`agent_id`, prompt payload, optional working directory override). - - `AgentOrchestrator`: stateful controller that owns: - - A primary `AgentHandle` (mirrors currently running conversation). - - A per-agent `ConversationManager` + `UnboundedSender` pair created via `ConversationManager::with_delegate` so child runs can spawn their own delegates. - - Result channels to stream `Event` values back to the primary UI after post-processing. - - A stack of active run ids so nested delegates can execute concurrently. - -- **`ConversationManager` reuse** - Sub-agent sessions use the same `ConversationManager` entry points. The orchestrator calls `ConversationManager::new_conversation` with the agent-specific `Config` so all persistence automatically lands in `~/.codex/agents//` (per §2.2). - -- **Primary session** - Unchanged: `tui::App` (`codex-rs/tui/src/app.rs`) continues to own a `ConversationManager` for the main agent. The orchestrator is injected so it can spawn additional conversations on demand while keeping each conversation’s event stream isolated. - -### 1.2 Execution Flow - -1. **Delegate trigger** - - User explicitly requests delegation (see UI plan below), or the primary agent emits a structured tool call. - - We normalize the intent into `DelegateRequest`. - -2. **Agent resolution** - - `AgentOrchestrator::resolve_agent` calls `AgentConfigLoader::load` with the requested `AgentId`. - - On success, the orchestrator instantiates / reuses a `ConversationManager` scoped to that agent. Authentication stays shared (`AuthManager` from the primary session) per current design docs. The returned `AgentContext` also defines which downstream agents this delegate is allowed to call. - -3. **Conversation bootstrap** - - Call `ConversationManager::new_conversation` with the agent `Config`. - - The orchestrator captures the new `UnboundedSender` from `spawn_agent` (`codex-rs/tui/src/chatwidget/agent.rs:16`) or an equivalent helper in the orchestrator crate. - -4. **Task execution** - - The orchestrator forwards the translated prompt into the sub-agent conversation (`conversation.submit`). - - Streamed `Event` values are intercepted before they reach the UI. For every event: - - Persist to the sub-agent transcript as normal (handled by core). - - Convert to orchestrator messages (`DelegateEvent::Started` / `Delta` / `Completed` / `Failed`) that always carry the owning conversation id. The TUI routes those updates to the matching session regardless of which tab the user is viewing. Nested runs emit additional `Started` events with increasing depth. - -5. **Completion and summary** - - When `EventMsg::TaskComplete` fires, the orchestrator emits `DelegateEvent::Completed` with the owning conversation id. The TUI enqueues the child’s summary for its parent session, then renders it when the parent becomes active so siblings never see each other’s output. - - Store a compact record (duration, exit status) for `/status` display and optional audit logging (`~/.codex/log/multi-agent.log` per `ai-temp/persistence-design.md`). - -6. **Cleanup** - - Keep the sub-agent conversation alive if the profile supports follow-up chat, otherwise call `ConversationManager::remove_conversation`. - ---- - -## 2. Control-Flow Integration - -### 2.1 Entry Points - -| Concern | File | Hook | +| Layer | File(s) | Responsibility | | --- | --- | --- | -| Orchestrator instantiation | `codex-rs/tui/src/app.rs:82` | Inject an `AgentOrchestrator` alongside the existing `ConversationManager`. | -| Slash-command parsing | `codex-rs/tui/src/slash_command.rs` & `codex-rs/tui/src/chatwidget.rs:1126` | Add `/delegate` (or `/agent`) command to open a delegate picker or dispatch a delegate request. | -| App event handling | `codex-rs/tui/src/app.rs:600` (`while let Some(event)`) | Match on `AppEvent::DelegateUpdate` / `AppEvent::DelegateShadow*` and push updates into the session identified by `conversation_id`. | -| Event fan-in | `codex-rs/tui/src/app.rs:600` | Handle `AppEvent::CodexEvent` and `AppEvent::InsertHistoryCell` so each session only consumes its own stream. | -| Status card | `codex-rs/tui/src/status/card.rs:68` | Pull orchestrator metrics (active agents, last run) to display in `/status`. | - -### 2.2 Persistence - -- Sub-agent sessions reuse existing persistence automatically because `Config::codex_home` already points at `~/.codex/agents/` once we load through `AgentConfigLoader`. -- For the primary history: emit `DelegateEvent::Completed` / `Failed` with the owning conversation id and let the TUI enqueue summaries for the parent session. No changes needed in core rollout recording. - -### 2.3 Error Handling - -- Map orchestration errors to `DelegateEvent::Failed`; the TUI turns those into error cells for the parent session. -- Log details with `tracing::error!` inside the orchestrator, aligning with the `ai-temp/error-handling.md` guidance. +| **Loader** | `codex-rs/multi-agent/src/lib.rs` | `AgentConfigLoader` merges global + agent config, exposes `AgentContext`, and re-exports the orchestrator API. | +| **Orchestrator runtime** | `codex-rs/multi-agent/src/orchestrator.rs` | Owns session registry, shadow manager hooks, detached-run registry, follow-up handling, and the `SessionEventBroadcaster`. | +| **Tool handlers** | `codex-rs/core/src/tools/handlers/delegate.rs`, `delegate_sessions.rs` | Translate tool payloads into orchestrator calls, serialize responses/errors, and enforce schema constraints. | +| **Shared types** | `codex-rs/core/src/delegate_tool.rs` | Defines `DelegateToolRequest`, `DelegateSessionsList`, `DelegateToolError`, etc. | +| **TUI integration** | `codex-rs/tui/src/app.rs`, `app_event.rs`, `chatwidget.rs`, `history_cell.rs` | Renders delegate events, maintains per-session handles, offers preview/dismiss actions, and updates history. | --- -## 3. UI & UX Plan - -### 3.1 Invocation - -- **Slash command**: `/delegate [prompt...]` - - Add `SlashCommand::Delegate` in `codex-rs/tui/src/slash_command.rs`. - - In `ChatWidget::dispatch_command` (`codex-rs/tui/src/chatwidget.rs:1126`), call a new method `open_delegate_dialog()` that lists available agents via `AgentRegistry::list_agent_ids`. - - -### 3.2 Transcript Presentation +## 2. Orchestrator Responsibilities -- Introduce a specialized history cell (e.g., `DelegationSummaryCell`) under `codex-rs/tui/src/history_cell.rs`. - - Show a header `↳ rust_test_writer (success in 23s)` and embed the sub-agent's final answer. - - Link to the sub-agent session path using the existing `SessionHeader` styling helpers (`codex-rs/tui/src/chatwidget/session_header.rs`). +### Session lifecycle +1. **Creation** – `AgentOrchestrator::delegate` (or `delegate_follow_up`) loads the agent config, spawns a conversation, and registers the run via `register_run_conversation`. +2. **Streaming** – Every conversation gets a `SessionEventBroadcaster`. Event tasks forward individual `Event` values into delegate events scoped to the owning conversation. +3. **Shadow capture** – Recorder hooks record user/agent events into `ShadowManager`. `recent_messages` serves previews from this cache. +4. **Follow-up** – `parent_run_for_follow_up` captures the existing parent id before re-registering the conversation; `delegate_follow_up` reuses the stored `CodexConversation` and emits a fresh `DelegateEvent::Started`. +5. **Detached runs** – `delegate()` records detached runs in `detached_runs`. Completions update status and feed notifications. +6. **Session storage** – `store_session` refreshes `StoredDelegateSession` with the new handle and restarts the event forwarder if needed. -- While the sub-agent runs, insert a “progress” cell (spinner) similar to exec command cells (`codex-rs/tui/src/exec_cell/render.rs:157`). Update via `DelegateEvent::Delta` messages. - -### 3.3 Status View - -- Extend `compose_agents_summary` (`codex-rs/tui/src/status/helpers.rs:14`) to append active sub-agent counts and last-run statuses by querying the orchestrator handle cache. - -### 3.4 Keyboard & UX - -- Shortcut: `Ctrl+D` opens the delegate picker when the composer is empty. -- For task isolation, disable `/delegate` while another sub-agent call is running unless the selected agent supports concurrent runs (metadata flag in agent config). +### APIs exposed +- `delegate(...) -> DelegateRunId` +- `list_sessions_paginated(cursor, limit) -> DelegateSessionsList` +- `recent_messages(conversation_id, cursor, limit) -> DelegateSessionMessages` +- `dismiss_session(conversation_id)` +- `subscribe() -> mpsc::UnboundedReceiver` +- Helpers for detached summary, parent lookups, shadow metrics/statistics. --- -## 4. Minimal Coupling Strategy +## 3. Tool / Model Contract -1. **Keep core unaware** - - No changes to `codex-rs/core/src/codex.rs` or the protocol. The orchestrator consumes the existing `Op`/`Event` API via `CodexConversation`. +- **`delegate_agent`** + - Requires `prompt`. `agent_id` optional when resuming with `conversation_id`. + - Mutually exclusive with `batch`. Batch entries trigger concurrent runs. + - Handler subscribes to orchestrator events, waits for completion unless `mode: "detached"`, and returns `{"status":"ok","run_id":...}` or `{"status":"accepted"}` for detached calls. -2. **Orchestrator as a library** - - Implement orchestration in `codex-multi-agent` (new module) so the CLI/TUI crates depend only on a slim API: - ```rust - pub struct AgentOrchestrator { /* … */ } - impl AgentOrchestrator { - pub async fn available_agents(&self) -> Result>; - pub async fn delegate(&self, request: DelegateRequest) -> Result; - pub fn subscribe(&self) -> mpsc::UnboundedReceiver; - } - ``` - - This keeps the TUI glue thin and defers heavy logic to the crate that already knows how to load configs. +- **`delegate_sessions`** + - `operation: "list"` – paginated session summaries (newest first). + - `operation: "messages"` – newest-first message preview with cursor support. + - `operation: "dismiss"` – removes the session and cleans up shadow resources. + - Responses are serialized as `{ "status": "ok", ... }` with `sessions`, `messages`, and `next_cursor` as appropriate. -3. **UI changes confined to `tui/`** - - Avoid threading orchestration state through unrelated widgets. Only `ChatWidget`, `App`, and the status card interact with the orchestrator. - -4. **CLI parity** - - Other frontends (`codex exec`, `codex cloud`) can opt-in later because orchestration lives behind a library boundary. No changes required now. +Errors map to `DelegateToolError` variants (`AgentBusy`, `SessionNotFound`, `InvalidCursor`, etc.) so the model receives actionable messages. --- -## 5. Implementation Phases - -1. **Library groundwork** - - Extend `codex-multi-agent` with orchestrator types and helper methods. - - Add unit tests verifying `delegate()` spawns conversations and streams events (mock `ConversationManager`). - -2. **TUI integration** - - Instantiate orchestrator in `App::run` (`codex-rs/tui/src/app.rs:84`). - - Add new `AppEvent` variants (`codex-rs/tui/src/app_event.rs:15`). - - Update `ChatWidget` to emit delegate requests and render updates. +## 4. TUI Flow -3. **UI polish** - - Add history cell types and status indicators. - - Expose keyboard shortcuts and help text. +1. `App::run` constructs an `AgentOrchestrator` and subscribes to its events via `AppEvent::DelegateUpdate`. +2. `/agent` picker (`ChatWidget::open_delegate_picker`) pulls summaries from `delegate_sessions list`, including detached runs and follow-up sessions. +3. Preview action uses `delegate_sessions messages` and renders the result with `new_delegate_preview` history cells. +4. Dismiss action calls `dismiss_session` through the orchestrator. +5. Delegate events update the active `SessionHandle`: + - `Started` inserts a running status entry and updates the delegate tree. + - `Delta` streams through the existing `StreamController`. + - `Completed`/`Failed` produce history cells, clear status owners, and enqueue `ChildSummary` for the parent conversation. +6. Shadow snapshots hydrate when the user opens a saved session; fallbacks replay from rollout and inform the user. -4. **Testing** - - Snapshot tests for `/delegate` output in `tui/src/chatwidget/tests.rs`. - - Integration test creating a fake agent directory and verifying the orchestrator selects the correct `Config`. - - Manual smoke test using the sample Codex home in `ai-temp/example-codex-home/`. +Detached run notifications surface via the notification system; dismissing them removes the run from the registry. --- -## 6. Decisions & Open Questions +## 5. Follow-Up Handling -- **Concurrent delegates**: The orchestrator now maintains a stack of active runs so delegates can invoke their own delegates; the UI surfaces the stack depth with indented history entries. -- **Prompt hand-off semantics**: The primary agent composes the sub-agent prompt with all relevant context before invoking `delegate()`. The orchestrator forwards the prompt verbatim without trimming history. -- **Return payload**: Still open. Default plan remains to summarize results in the primary transcript while exposing a “view details” action to open the sub-agent session. -- **Auth isolation**: Shared. All agents continue to use the primary `AuthManager`; per-agent credentials are out of scope unless a future requirement emerges. +- When `delegate_agent` receives `conversation_id`, the handler omits `agent_id` (optional) and sets `caller_conversation_id` so the orchestrator knows which primary conversation owns the request. +- `delegate_follow_up` touches the shadow manager, ensures the conversation is idle, reuses existing `CodexConversation`, and emits a `DelegateEvent::Started` with the original parent id. +- Regression tests (`follow_up_shadow_events_do_not_duplicate`, `follow_up_should_preserve_parent_before_registration`) ensure shadow logging does not double-count and lineage stays intact. --- -## 7. References +## 6. Pending Work / Notes -- Agent loader implementation – `codex-rs/multi-agent/src/lib.rs` -- Conversation bootstrap – `codex-rs/core/src/conversation_manager.rs:57` -- TUI spawn helpers – `codex-rs/tui/src/chatwidget/agent.rs:16` -- Slash command dispatch – `codex-rs/tui/src/chatwidget.rs:1126` -- History cell construction – `codex-rs/tui/src/history_cell.rs` -- Status card summary – `codex-rs/tui/src/status/helpers.rs:14` -- App event wiring – `codex-rs/tui/src/app.rs:212` & `codex-rs/tui/src/app_event.rs:15` +- **Agent switching** – interactive entry/exit of delegate sessions is implemented; further UX polish is tracked in `ai-temp/agent-switching.md`. +- **docs/** – We intentionally rolled back edits to `docs/advanced.md`. All public documentation will be refreshed once the feature is production-ready. +- **Additional tests** – CLI integration tests and further UX polish (breadcrumbs, status chips) are still on the roadmap. -These anchors will guide the low-impact code changes required to hook orchestration into the existing CLI. +For subsystem details (shadow cache, error handling, parallel orchestration, persistence), refer to the respective docs in `ai-temp/`. diff --git a/ai-temp/parallel-delegation.md b/ai-temp/parallel-delegation.md index b213ce0d2fe0..e73b34b914ca 100644 --- a/ai-temp/parallel-delegation.md +++ b/ai-temp/parallel-delegation.md @@ -68,6 +68,6 @@ approaches for synchronous, parallel, and detached (fire-and-forget) sub-agent r `error` field rather than using a distinct type). ## 4. Next Decisions -1. Pick a parallelization strategy (simple flag + UI refactor vs. dedicated helper). -2. Specify UX for background runs before adding async variant (notifications, manual join, audit). -3. Extend documentation/prompts once the capabilities land so models know when to choose each path. +1. Refresh model-facing prompts/instructions so agents understand when to batch delegates versus running them sequentially. +2. Decide whether to surface richer runtime telemetry (per-run timers, progress chips) for concurrent delegates in the TUI. +3. Document detached-session management and batching behaviour in the public docs (`docs/`) once the feature exits WIP. diff --git a/ai-temp/ui-ux-delegation.md b/ai-temp/ui-ux-delegation.md index 412d89226161..2854e3ce2cac 100644 --- a/ai-temp/ui-ux-delegation.md +++ b/ai-temp/ui-ux-delegation.md @@ -6,10 +6,17 @@ - Delegation events from the orchestrator reach `App::handle_delegate_update` (`codex-rs/tui/src/app.rs:446`). `DelegateEvent::Delta` now streams sub-agent output through the same `StreamController` pipeline, while start/completion still use `add_info_message`/`add_delegate_completion` for context. Incoming events carry run depth so the chat history can render indented entries for nested delegates. - `DelegateEvent::Started` activates the bottom-pane status indicator with a “Delegating to #` header and hides it once the run finishes (`codex-rs/tui/src/chatwidget.rs:2165-2196`), reducing ambiguity about who is currently working. -## Observed Gaps -- No transcript linking: once the delegate finishes, the TUI shows the final answer but lacks a quick way to drill into the delegate’s own session (the path is only available in logs). -- Duration is implicit: the status header flips back to “Working” when delegation ends, but we still do not surface elapsed time or a final summary chip in the transcript. -- Nested runs only show progress via indented info messages; we may still want richer breadcrumbs or timers in the status widget. +## What’s shipped (2025‑10‑20) +- `/agent` picker lists reusable sessions (including detached runs) with preview/dismiss actions. +- Preview pulls from the shadow cache and renders a dedicated history cell (`new_delegate_preview`), so users can recall recent turns before issuing a follow-up. +- Delegate tree view shows indentation per depth, status ownership, and completion summaries. +- Notifications fire when detached runs finish, and dismissal clears them from the picker. + +## Remaining Gaps +- No inline breadcrumb linking in the main transcript—returning summaries are plain text cells without clickable actions. +- Duration is only visible in completion summaries; the status banner still resets to “Working” without showing elapsed time. +- Nested runs rely on indentation; we may still want richer breadcrumbs/timers or a stack view in `/status`. +- Agent switching (temporarily entering a delegate session) is still future work; tracked in `ai-temp/agent-switching.md`. ## UX Goals 1. **Live streaming** – continue to reuse `StreamController`, but add safeguards against duplicate completions (covered by the new test) and consider showing a collapsed summary once the stream ends. diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 0a1428f4223b..27cbc201a18d 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -1300,6 +1300,7 @@ dependencies = [ "serde_json", "tempfile", "thiserror 2.0.16", + "time", "tokio", "toml", "tracing", diff --git a/codex-rs/core/src/delegate_tool.rs b/codex-rs/core/src/delegate_tool.rs index 265207dcf877..0dcc990aa5ec 100644 --- a/codex-rs/core/src/delegate_tool.rs +++ b/codex-rs/core/src/delegate_tool.rs @@ -47,7 +47,7 @@ pub struct DelegateToolBatchEntry { /// Payload sent by the primary agent when invoking the delegate tool. #[derive(Debug, Clone, Deserialize, Serialize)] pub struct DelegateToolRequest { - pub agent_id: String, + pub agent_id: Option, pub prompt: String, #[serde(default)] pub context: DelegateToolContext, @@ -55,6 +55,8 @@ pub struct DelegateToolRequest { pub caller_conversation_id: Option, #[serde(default)] pub mode: DelegateInvocationMode, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub conversation_id: Option, #[serde(default, skip_serializing_if = "Vec::is_empty")] pub batch: Vec, } @@ -98,6 +100,59 @@ pub struct DelegateToolRun { pub agent_id: String, } +/// Indicates whether a delegate session originated from a synchronous or detached run. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)] +#[serde(rename_all = "lowercase")] +pub enum DelegateSessionMode { + Standard, + Detached, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DelegateSessionShadowSummary { + pub events: usize, + pub user_inputs: usize, + pub agent_outputs: usize, + pub turns: usize, + pub raw_bytes: usize, + #[serde(skip_serializing_if = "Option::is_none")] + pub compressed_bytes: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DelegateSessionListEntry { + pub conversation_id: String, + pub agent_id: String, + pub mode: DelegateSessionMode, + pub cwd: String, + pub last_interacted_at: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub shadow: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DelegateSessionsList { + pub sessions: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub next_cursor: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DelegateSessionMessageEntry { + pub id: String, + pub role: String, + pub content: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub timestamp: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DelegateSessionMessages { + pub messages: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub next_cursor: Option, +} + #[derive(thiserror::Error, Debug)] pub enum DelegateToolError { #[error("another delegate is already running")] @@ -108,6 +163,14 @@ pub enum DelegateToolError { AgentNotFound(String), #[error("delegate setup failed: {0}")] SetupFailed(String), + #[error("delegate session `{0}` not found")] + SessionNotFound(String), + #[error("delegate session is busy")] + AgentBusy, + #[error("invalid delegate pagination cursor")] + InvalidCursor, + #[error("delegate history unavailable for session `{0}`")] + HistoryUnavailable(String), } pub type DelegateEventReceiver = UnboundedReceiver; @@ -121,4 +184,19 @@ pub trait DelegateToolAdapter: Send + Sync { &self, request: DelegateToolRequest, ) -> Result; + + async fn list_sessions( + &self, + cursor: Option, + limit: usize, + ) -> Result; + + async fn session_messages( + &self, + conversation_id: &str, + cursor: Option, + limit: usize, + ) -> Result; + + async fn dismiss_session(&self, conversation_id: &str) -> Result<(), DelegateToolError>; } diff --git a/codex-rs/core/src/tools/handlers/delegate.rs b/codex-rs/core/src/tools/handlers/delegate.rs index f4c0bc6cc2ac..bd5760d0cd60 100644 --- a/codex-rs/core/src/tools/handlers/delegate.rs +++ b/codex-rs/core/src/tools/handlers/delegate.rs @@ -87,6 +87,15 @@ pub static DELEGATE_TOOL: LazyLock = LazyLock::new(|| { description: Some("Identifier of the sub-agent to invoke".to_string()), }, ); + properties.insert( + "conversation_id".to_string(), + JsonSchema::String { + description: Some( + "Existing delegate conversation identifier to resume; when provided, `agent_id` may be omitted" + .to_string(), + ), + }, + ); properties.insert( "prompt".to_string(), JsonSchema::String { @@ -152,6 +161,8 @@ struct DelegateToolArgs { #[serde(default)] mode: Option, #[serde(default)] + conversation_id: Option, + #[serde(default)] batch: Vec, } @@ -252,12 +263,22 @@ impl ToolHandler for DelegateToolHandler { })?; let mut events = adapter.subscribe().await; - let conversation_id = session.conversation_id(); + let root_conversation_id = session.conversation_id(); + + if !args.batch.is_empty() && args.conversation_id.is_some() { + return Err(FunctionCallError::RespondToModel( + "`conversation_id` cannot be combined with `batch`".into(), + )); + } if !args.batch.is_empty() { - let runs = - handle_batch_entries(adapter.as_ref(), &mut events, &conversation_id, args.batch) - .await?; + let runs = handle_batch_entries( + adapter.as_ref(), + &mut events, + &root_conversation_id, + args.batch, + ) + .await?; let response = DelegateToolBatchResponse { status: "ok", runs }; let content = serde_json::to_string(&response).map_err(|e| { @@ -270,9 +291,16 @@ impl ToolHandler for DelegateToolHandler { }); } - let agent_id = args.agent_id.ok_or_else(|| { - FunctionCallError::RespondToModel("missing `agent_id` for delegate_agent call".into()) - })?; + let resume_conversation_id = args.conversation_id; + let agent_id = match (&resume_conversation_id, args.agent_id) { + (Some(_), maybe_agent) => maybe_agent, + (None, Some(agent)) => Some(agent), + (None, None) => { + return Err(FunctionCallError::RespondToModel( + "missing `agent_id` for delegate_agent call".into(), + )); + } + }; let prompt = args.prompt.ok_or_else(|| { FunctionCallError::RespondToModel("missing `prompt` for delegate_agent call".into()) })?; @@ -280,11 +308,12 @@ impl ToolHandler for DelegateToolHandler { let mode = args.mode.unwrap_or_default(); let request = DelegateToolRequest { - agent_id: agent_id.clone(), + agent_id, prompt: prompt.clone(), context: args.context.unwrap_or_default().into(), - caller_conversation_id: Some(conversation_id.to_string()), + caller_conversation_id: Some(root_conversation_id.to_string()), mode, + conversation_id: resume_conversation_id, batch: Vec::new(), }; @@ -406,11 +435,12 @@ async fn handle_batch_entries( } let request = DelegateToolRequest { - agent_id: entry.agent_id.clone(), + agent_id: Some(entry.agent_id.clone()), prompt: entry.prompt.clone(), context: entry.context.unwrap_or_default().into(), caller_conversation_id: Some(conversation_id.clone()), mode, + conversation_id: None, batch: Vec::new(), }; @@ -494,6 +524,23 @@ fn map_adapter_error(err: DelegateToolError) -> FunctionCallError { DelegateToolError::SetupFailed(reason) => { FunctionCallError::RespondToModel(format!("failed to start delegate: {reason}")) } + DelegateToolError::SessionNotFound(conversation_id) => { + FunctionCallError::RespondToModel(format!( + "delegate session `{conversation_id}` is not available" + )) + } + DelegateToolError::AgentBusy => FunctionCallError::RespondToModel( + "delegate session is still running; wait for it to finish before sending a follow-up" + .to_string(), + ), + DelegateToolError::InvalidCursor => { + FunctionCallError::RespondToModel("invalid delegate pagination cursor".to_string()) + } + DelegateToolError::HistoryUnavailable(conversation_id) => { + FunctionCallError::RespondToModel(format!( + "delegate history is unavailable for session `{conversation_id}`" + )) + } } } @@ -565,6 +612,8 @@ async fn monitor_detached_run( #[cfg(test)] mod tests { use super::*; + use crate::delegate_tool::DelegateSessionMessages; + use crate::delegate_tool::DelegateSessionsList; use std::sync::Arc; use tokio::sync::Mutex; use tokio::sync::mpsc; @@ -596,19 +645,47 @@ mod tests { request: DelegateToolRequest, ) -> Result { self.requests.lock().await.push(request.clone()); - let run_id = format!("run-{}", request.agent_id); + let agent_id = request + .agent_id + .clone() + .expect("mock delegate expects agent id"); + let run_id = format!("run-{agent_id}"); if let Some(sender) = self.sender.lock().await.as_ref() { let _ = sender.send(DelegateToolEvent::Completed { run_id: run_id.clone(), - agent_id: request.agent_id.clone(), + agent_id: agent_id.clone(), output: Some(format!("summary: {}", request.prompt)), duration: Duration::from_millis(5), }); } - Ok(DelegateToolRun { - run_id, - agent_id: request.agent_id, - }) + Ok(DelegateToolRun { run_id, agent_id }) + } + + async fn list_sessions( + &self, + _cursor: Option, + _limit: usize, + ) -> Result { + Err(DelegateToolError::SetupFailed( + "list_sessions not implemented in mock".to_string(), + )) + } + + async fn session_messages( + &self, + conversation_id: &str, + _cursor: Option, + _limit: usize, + ) -> Result { + Err(DelegateToolError::HistoryUnavailable( + conversation_id.to_string(), + )) + } + + async fn dismiss_session(&self, conversation_id: &str) -> Result<(), DelegateToolError> { + Err(DelegateToolError::SessionNotFound( + conversation_id.to_string(), + )) } } @@ -639,8 +716,8 @@ mod tests { let requests = adapter.requests.lock().await.clone(); assert_eq!(requests.len(), 2); - assert_eq!(requests[0].agent_id, "alpha"); - assert_eq!(requests[1].agent_id, "bravo"); + assert_eq!(requests[0].agent_id.as_deref(), Some("alpha")); + assert_eq!(requests[1].agent_id.as_deref(), Some("bravo")); assert_eq!(runs.len(), 2); assert_eq!(runs[0].agent_id, "alpha"); diff --git a/codex-rs/core/src/tools/handlers/delegate_sessions.rs b/codex-rs/core/src/tools/handlers/delegate_sessions.rs new file mode 100644 index 000000000000..3a3df95db1b7 --- /dev/null +++ b/codex-rs/core/src/tools/handlers/delegate_sessions.rs @@ -0,0 +1,282 @@ +use std::collections::BTreeMap; +use std::sync::LazyLock; + +use async_trait::async_trait; +use serde::Deserialize; +use serde::Serialize; + +use crate::client_common::tools::ResponsesApiTool; +use crate::client_common::tools::ToolSpec; +use crate::delegate_tool::DelegateSessionMessages; +use crate::delegate_tool::DelegateSessionMode; +use crate::delegate_tool::DelegateSessionsList; +use crate::delegate_tool::DelegateToolError; +use crate::function_tool::FunctionCallError; +use crate::openai_tools::JsonSchema; +use crate::tools::context::ToolInvocation; +use crate::tools::context::ToolOutput; +use crate::tools::context::ToolPayload; +use crate::tools::registry::ToolHandler; +use crate::tools::registry::ToolKind; + +#[derive(Debug, Deserialize)] +struct DelegateSessionsArgs { + operation: String, + #[serde(default)] + conversation_id: Option, + #[serde(default)] + cursor: Option, + #[serde(default)] + limit: Option, +} + +#[derive(Debug, Serialize, Default)] +struct DelegateSessionsPayload { + #[serde(skip_serializing_if = "Option::is_none")] + sessions: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + messages: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + next_cursor: Option, +} + +#[derive(Debug, Serialize)] +struct DelegateSessionsResponse { + status: &'static str, + #[serde(flatten)] + payload: DelegateSessionsPayload, +} + +#[derive(Debug, Serialize)] +struct DelegateSessionListEntry { + conversation_id: String, + agent_id: String, + mode: String, + cwd: String, + last_interacted_at: String, + #[serde(skip_serializing_if = "Option::is_none")] + shadow: Option, +} + +#[derive(Debug, Serialize)] +struct DelegateSessionShadowSummary { + events: usize, + user_inputs: usize, + agent_outputs: usize, + turns: usize, + raw_bytes: usize, + #[serde(skip_serializing_if = "Option::is_none")] + compressed_bytes: Option, +} + +#[derive(Debug, Serialize)] +struct DelegateSessionMessageEntry { + id: String, + role: String, + content: String, + #[serde(skip_serializing_if = "Option::is_none")] + timestamp: Option, +} + +pub struct DelegateSessionsHandler; + +pub static DELEGATE_SESSIONS_TOOL: LazyLock = LazyLock::new(|| { + let mut properties = BTreeMap::new(); + properties.insert( + "operation".to_string(), + JsonSchema::String { + description: Some("Operation to perform: list, messages, or dismiss".to_string()), + }, + ); + properties.insert( + "conversation_id".to_string(), + JsonSchema::String { + description: Some("Target conversation id for messages or dismiss".to_string()), + }, + ); + properties.insert( + "cursor".to_string(), + JsonSchema::String { + description: Some("Opaque pagination cursor".to_string()), + }, + ); + properties.insert( + "limit".to_string(), + JsonSchema::Number { + description: Some("Maximum number of entries to return (default 3)".to_string()), + }, + ); + + ToolSpec::Function(ResponsesApiTool { + name: "delegate_sessions".to_string(), + description: "Inspect or manage reusable delegate sessions".to_string(), + strict: false, + parameters: JsonSchema::Object { + properties, + required: Some(vec!["operation".to_string()]), + additional_properties: Some(false.into()), + }, + }) +}); + +#[async_trait] +impl ToolHandler for DelegateSessionsHandler { + fn kind(&self) -> ToolKind { + ToolKind::Function + } + + async fn handle(&self, invocation: ToolInvocation) -> Result { + let ToolInvocation { + session, payload, .. + } = invocation; + + let arguments = match payload { + ToolPayload::Function { arguments } => arguments, + _ => { + return Err(FunctionCallError::RespondToModel( + "delegate_sessions handler received unsupported payload".to_string(), + )); + } + }; + + let args: DelegateSessionsArgs = serde_json::from_str(&arguments).map_err(|e| { + FunctionCallError::RespondToModel(format!("failed to parse function arguments: {e}")) + })?; + + let adapter = session.delegate_adapter().ok_or_else(|| { + FunctionCallError::RespondToModel("delegate tool is not available".to_string()) + })?; + + let limit = args.limit.unwrap_or(3).max(1); + let response = match args.operation.as_str() { + "list" => { + let result: DelegateSessionsList = adapter + .list_sessions(args.cursor.clone(), limit) + .await + .map_err(map_adapter_error)?; + let sessions = result + .sessions + .into_iter() + .map(|session| DelegateSessionListEntry { + conversation_id: session.conversation_id, + agent_id: session.agent_id, + mode: mode_to_string(session.mode), + cwd: session.cwd, + last_interacted_at: session.last_interacted_at, + shadow: session.shadow.map(|shadow| DelegateSessionShadowSummary { + events: shadow.events, + user_inputs: shadow.user_inputs, + agent_outputs: shadow.agent_outputs, + turns: shadow.turns, + raw_bytes: shadow.raw_bytes, + compressed_bytes: shadow.compressed_bytes, + }), + }) + .collect(); + DelegateSessionsResponse { + status: "ok", + payload: DelegateSessionsPayload { + sessions: Some(sessions), + next_cursor: result.next_cursor, + ..DelegateSessionsPayload::default() + }, + } + } + "messages" => { + let conversation_id = args.conversation_id.ok_or_else(|| { + FunctionCallError::RespondToModel( + "`conversation_id` is required for operation messages".to_string(), + ) + })?; + let result: DelegateSessionMessages = adapter + .session_messages(&conversation_id, args.cursor.clone(), limit) + .await + .map_err(map_adapter_error)?; + let messages = result + .messages + .into_iter() + .map(|message| DelegateSessionMessageEntry { + id: message.id, + role: message.role, + content: message.content, + timestamp: message.timestamp, + }) + .collect(); + DelegateSessionsResponse { + status: "ok", + payload: DelegateSessionsPayload { + messages: Some(messages), + next_cursor: result.next_cursor, + ..DelegateSessionsPayload::default() + }, + } + } + "dismiss" => { + let conversation_id = args.conversation_id.ok_or_else(|| { + FunctionCallError::RespondToModel( + "`conversation_id` is required for operation dismiss".to_string(), + ) + })?; + adapter + .dismiss_session(&conversation_id) + .await + .map_err(map_adapter_error)?; + DelegateSessionsResponse { + status: "ok", + payload: DelegateSessionsPayload::default(), + } + } + other => { + return Err(FunctionCallError::RespondToModel(format!( + "unknown operation `{other}`" + ))); + } + }; + + let content = serde_json::to_string(&response) + .map_err(|e| FunctionCallError::Fatal(format!("failed to serialize response: {e}")))?; + + Ok(ToolOutput::Function { + content, + success: Some(true), + }) + } +} + +fn map_adapter_error(err: DelegateToolError) -> FunctionCallError { + match err { + DelegateToolError::DelegateInProgress => FunctionCallError::RespondToModel( + "another delegate is already running; wait before listing sessions".to_string(), + ), + DelegateToolError::QueueFull => { + FunctionCallError::RespondToModel("delegate queue is full; try again later".to_string()) + } + DelegateToolError::AgentNotFound(agent_id) => FunctionCallError::RespondToModel(format!( + "delegate agent `{agent_id}` is not configured" + )), + DelegateToolError::SetupFailed(reason) => { + FunctionCallError::RespondToModel(format!("delegate operation failed: {reason}")) + } + DelegateToolError::SessionNotFound(conversation_id) => FunctionCallError::RespondToModel( + format!("delegate session `{conversation_id}` is not available"), + ), + DelegateToolError::AgentBusy => FunctionCallError::RespondToModel( + "delegate session is busy; wait for it to finish".to_string(), + ), + DelegateToolError::InvalidCursor => { + FunctionCallError::RespondToModel("invalid delegate pagination cursor".to_string()) + } + DelegateToolError::HistoryUnavailable(conversation_id) => { + FunctionCallError::RespondToModel(format!( + "delegate history is unavailable for session `{conversation_id}`" + )) + } + } +} + +fn mode_to_string(mode: DelegateSessionMode) -> String { + match mode { + DelegateSessionMode::Standard => "standard".to_string(), + DelegateSessionMode::Detached => "detached".to_string(), + } +} diff --git a/codex-rs/core/src/tools/handlers/mod.rs b/codex-rs/core/src/tools/handlers/mod.rs index 9ba1bc184422..5774f0cb7a23 100644 --- a/codex-rs/core/src/tools/handlers/mod.rs +++ b/codex-rs/core/src/tools/handlers/mod.rs @@ -1,5 +1,6 @@ pub mod apply_patch; mod delegate; +mod delegate_sessions; mod exec_stream; mod grep_files; mod list_dir; @@ -17,6 +18,8 @@ pub use plan::PLAN_TOOL; pub use apply_patch::ApplyPatchHandler; pub use delegate::DELEGATE_TOOL; pub use delegate::DelegateToolHandler; +pub use delegate_sessions::DELEGATE_SESSIONS_TOOL; +pub use delegate_sessions::DelegateSessionsHandler; pub use exec_stream::ExecStreamHandler; pub use grep_files::GrepFilesHandler; pub use list_dir::ListDirHandler; diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index f72dae6c6bb8..d5c572ab4083 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -822,7 +822,9 @@ pub(crate) fn build_specs( use crate::exec_command::create_exec_command_tool_for_responses_api; use crate::exec_command::create_write_stdin_tool_for_responses_api; use crate::tools::handlers::ApplyPatchHandler; + use crate::tools::handlers::DELEGATE_SESSIONS_TOOL; use crate::tools::handlers::DELEGATE_TOOL; + use crate::tools::handlers::DelegateSessionsHandler; use crate::tools::handlers::DelegateToolHandler; use crate::tools::handlers::ExecStreamHandler; use crate::tools::handlers::GrepFilesHandler; @@ -844,6 +846,7 @@ pub(crate) fn build_specs( let unified_exec_handler = Arc::new(UnifiedExecHandler); let plan_handler = Arc::new(PlanHandler); let delegate_handler = Arc::new(DelegateToolHandler); + let delegate_sessions_handler = Arc::new(DelegateSessionsHandler); let apply_patch_handler = Arc::new(ApplyPatchHandler); let view_image_handler = Arc::new(ViewImageHandler); let mcp_handler = Arc::new(McpHandler); @@ -893,6 +896,8 @@ pub(crate) fn build_specs( if config.delegate_tool { builder.push_spec_with_parallel_support(DELEGATE_TOOL.clone(), true); builder.register_handler("delegate_agent", delegate_handler); + builder.push_spec_with_parallel_support(DELEGATE_SESSIONS_TOOL.clone(), true); + builder.register_handler("delegate_sessions", delegate_sessions_handler); } if let Some(apply_patch_tool_type) = &config.apply_patch_tool_type { diff --git a/codex-rs/core/tests/common/test_codex.rs b/codex-rs/core/tests/common/test_codex.rs index f51f68f7997d..b9e25085519c 100644 --- a/codex-rs/core/tests/common/test_codex.rs +++ b/codex-rs/core/tests/common/test_codex.rs @@ -12,6 +12,8 @@ use codex_core::NewConversation; use codex_core::built_in_model_providers; use codex_core::config::Config; use codex_core::delegate_tool::DelegateEventReceiver; +use codex_core::delegate_tool::DelegateSessionMessages; +use codex_core::delegate_tool::DelegateSessionsList; use codex_core::delegate_tool::DelegateToolAdapter; use codex_core::delegate_tool::DelegateToolError; use codex_core::delegate_tool::DelegateToolEvent; @@ -43,6 +45,10 @@ impl DelegateToolAdapter for TestDelegateAdapter { &self, request: DelegateToolRequest, ) -> Result { + let agent_id = request + .agent_id + .clone() + .unwrap_or_else(|| "test-agent".to_string()); let idx = self .counter .fetch_add(1, std::sync::atomic::Ordering::Relaxed); @@ -50,15 +56,39 @@ impl DelegateToolAdapter for TestDelegateAdapter { if let Some(sender) = self.sender.lock().await.as_ref() { let _ = sender.send(DelegateToolEvent::Completed { run_id: run_id.clone(), - agent_id: request.agent_id.clone(), + agent_id: agent_id.clone(), output: Some(request.prompt.clone()), duration: std::time::Duration::from_millis(1), }); } - Ok(DelegateToolRun { - run_id, - agent_id: request.agent_id, - }) + Ok(DelegateToolRun { run_id, agent_id }) + } + + async fn list_sessions( + &self, + _cursor: Option, + _limit: usize, + ) -> Result { + Err(DelegateToolError::SetupFailed( + "list_sessions not implemented in TestDelegateAdapter".to_string(), + )) + } + + async fn session_messages( + &self, + _conversation_id: &str, + _cursor: Option, + _limit: usize, + ) -> Result { + Err(DelegateToolError::HistoryUnavailable( + "test-conversation".to_string(), + )) + } + + async fn dismiss_session(&self, _conversation_id: &str) -> Result<(), DelegateToolError> { + Err(DelegateToolError::SessionNotFound( + "test-conversation".to_string(), + )) } } diff --git a/codex-rs/multi-agent/Cargo.toml b/codex-rs/multi-agent/Cargo.toml index d8ebfd007590..b2b3ef07850e 100644 --- a/codex-rs/multi-agent/Cargo.toml +++ b/codex-rs/multi-agent/Cargo.toml @@ -17,6 +17,7 @@ serde_json.workspace = true flate2 = { workspace = true } tracing.workspace = true bincode.workspace = true +time = { version = "0.3", features = ["formatting"] } [dev-dependencies] serde_json.workspace = true diff --git a/codex-rs/multi-agent/src/lib.rs b/codex-rs/multi-agent/src/lib.rs index eb2b8ed018b8..c11249050a66 100644 --- a/codex-rs/multi-agent/src/lib.rs +++ b/codex-rs/multi-agent/src/lib.rs @@ -522,9 +522,10 @@ agents = ["ideas_provider", "critic"] #[tokio::test] async fn delegate_started_event_carries_owner() { use crate::shadow::ShadowConfig; + use codex_core::AuthManager; + use codex_core::CodexAuth; use codex_core::delegate_tool::DelegateInvocationMode; use codex_core::protocol::SessionSource; - use codex_core::{AuthManager, CodexAuth}; use std::sync::Arc; let temp_home = tempdir().expect("tempdir"); @@ -553,6 +554,7 @@ agents = ["ideas_provider", "critic"] parent_run_id: None, mode: DelegateInvocationMode::Immediate, caller_conversation_id: Some(owner_id.clone()), + conversation_id: None, }; let mut events = orchestrator.subscribe().await; diff --git a/codex-rs/multi-agent/src/orchestrator.rs b/codex-rs/multi-agent/src/orchestrator.rs index 608dc3e74f6d..ec4bd0e18bfa 100644 --- a/codex-rs/multi-agent/src/orchestrator.rs +++ b/codex-rs/multi-agent/src/orchestrator.rs @@ -1,3 +1,4 @@ +use std::cmp::Ordering; use std::collections::HashMap; use std::path::PathBuf; use std::sync::Arc; @@ -13,6 +14,12 @@ use codex_core::config::Config; use codex_core::config::ConfigOverrides; use codex_core::delegate_tool::DelegateEventReceiver as CoreDelegateEventReceiver; use codex_core::delegate_tool::DelegateInvocationMode; +use codex_core::delegate_tool::DelegateSessionListEntry; +use codex_core::delegate_tool::DelegateSessionMessageEntry; +use codex_core::delegate_tool::DelegateSessionMessages; +use codex_core::delegate_tool::DelegateSessionMode as CoreDelegateSessionMode; +use codex_core::delegate_tool::DelegateSessionShadowSummary; +use codex_core::delegate_tool::DelegateSessionsList; use codex_core::delegate_tool::DelegateToolAdapter; use codex_core::delegate_tool::DelegateToolError; use codex_core::delegate_tool::DelegateToolEvent as CoreDelegateToolEvent; @@ -25,6 +32,8 @@ use codex_core::protocol::Op; use codex_core::protocol::SessionConfiguredEvent; use codex_core::protocol::SessionSource; use std::time::Duration; +use time::OffsetDateTime; +use time::format_description::well_known::Rfc3339; use tokio::sync::Mutex; use tokio::sync::mpsc; use tokio::sync::mpsc::UnboundedReceiver; @@ -37,6 +46,8 @@ use uuid::Uuid; use crate::AgentConfigLoader; use crate::AgentId; use crate::shadow::ShadowConfig; +use crate::shadow::ShadowHistoryEntry; +use crate::shadow::ShadowHistoryKind; use crate::shadow::ShadowManager; use crate::shadow::ShadowMetrics; use crate::shadow::ShadowSessionSummary; @@ -52,6 +63,35 @@ fn prompt_preview(text: &str) -> Option { Some(preview) } +fn system_time_to_unix_nanos(time: SystemTime) -> i128 { + let dt: OffsetDateTime = time.into(); + dt.unix_timestamp_nanos() +} + +fn format_system_time(time: SystemTime) -> String { + let dt: OffsetDateTime = time.into(); + dt.format(&Rfc3339) + .unwrap_or_else(|_| "1970-01-01T00:00:00Z".to_string()) +} + +fn encode_session_cursor(summary: &DelegateSessionSummary) -> String { + let nanos = system_time_to_unix_nanos(summary.last_interacted_at); + format!("{nanos}:{}", summary.conversation_id) +} + +fn parse_session_cursor(cursor: &str) -> Result<(i128, String), OrchestratorError> { + let (ts, id) = cursor + .split_once(':') + .ok_or(OrchestratorError::InvalidCursor)?; + let nanos = ts + .parse::() + .map_err(|_| OrchestratorError::InvalidCursor)?; + if id.is_empty() { + return Err(OrchestratorError::InvalidCursor); + } + Ok((nanos, id.to_string())) +} + /// Identifier used to correlate delegate runs. pub type DelegateRunId = String; @@ -64,6 +104,7 @@ pub struct DelegateRequest { pub parent_run_id: Option, pub mode: DelegateInvocationMode, pub caller_conversation_id: Option, + pub conversation_id: Option, } /// The prompt content forwarded to the sub-agent. @@ -82,6 +123,12 @@ struct SessionEventBroadcaster { subscribers: Mutex>>, } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum ShadowRecordMode { + Normal, + FollowUp, +} + impl SessionEventBroadcaster { fn new() -> Self { Self { @@ -157,6 +204,12 @@ pub enum OrchestratorError { DelegateSetupFailed(String), #[error("delegate session `{0}` not found")] SessionNotFound(String), + #[error("delegate session `{0}` is busy")] + AgentBusy(String), + #[error("invalid delegate pagination cursor")] + InvalidCursor, + #[error("delegate history unavailable for session `{0}`")] + HistoryUnavailable(String), } /// High-level metadata describing a delegate session available for switching. @@ -226,6 +279,137 @@ pub struct AgentOrchestrator { } impl AgentOrchestrator { + async fn delegate_follow_up( + self: &Arc, + run_id: DelegateRunId, + request: DelegateRequest, + conversation_id: String, + ) -> std::result::Result { + let (summary, conversation, session_configured, config, events) = { + let mut sessions = self.sessions.lock().await; + let entry = sessions + .get_mut(&conversation_id) + .ok_or_else(|| OrchestratorError::SessionNotFound(conversation_id.clone()))?; + entry.summary.last_interacted_at = SystemTime::now(); + ( + entry.summary.clone(), + entry.conversation.clone(), + entry.session_configured.clone(), + entry.config.clone(), + Arc::clone(&entry.events), + ) + }; + + if self + .conversation_runs + .lock() + .await + .contains_key(&conversation_id) + { + return Err(OrchestratorError::AgentBusy(conversation_id)); + } + + { + let mut active = self.active_runs.lock().await; + if active.len() >= self.max_concurrent_runs { + return Err(OrchestratorError::QueueFull); + } + active.push(run_id.clone()); + } + + let parent_run_id = self + .parent_run_for_follow_up(&conversation_id, request.parent_run_id.clone()) + .await; + + self.register_run_conversation(&run_id, &conversation_id) + .await; + + self.shadow_manager.touch(&conversation_id).await; + + let owner_conversation = if let Some(owner) = request.caller_conversation_id.clone() { + Some(owner) + } else if let Some(parent) = parent_run_id.as_ref() { + let guard = self.run_owner_conversations.lock().await; + guard.get(parent).cloned() + } else { + None + }; + if let Some(owner) = owner_conversation.clone() { + self.run_owner_conversations + .lock() + .await + .insert(run_id.clone(), owner); + } + let owner_conversation_id = owner_conversation.unwrap_or_default(); + + let prompt_preview_text = request.prompt.text.clone(); + self.emit(DelegateEvent::Started { + run_id: run_id.clone(), + agent_id: summary.agent_id.clone(), + owner_conversation_id: owner_conversation_id.clone(), + prompt: prompt_preview_text, + started_at: SystemTime::now(), + parent_run_id: parent_run_id.clone(), + mode: summary.mode, + }) + .await; + + let events_rx = events.subscribe(None).await; + + let orchestrator = Arc::clone(self); + let run_id_clone = run_id.clone(); + tokio::spawn(async move { + let orchestrator_task = Arc::clone(&orchestrator); + let result = orchestrator_task + .run_follow_up_task( + run_id_clone.clone(), + request, + conversation_id.clone(), + summary.clone(), + conversation.clone(), + session_configured.clone(), + config.clone(), + events_rx, + owner_conversation_id.clone(), + ) + .await; + + match result { + Ok(output) => { + orchestrator.store_session(&output).await; + orchestrator + .emit(DelegateEvent::Completed { + run_id: run_id_clone.clone(), + agent_id: output.agent_id.clone(), + owner_conversation_id: owner_conversation_id.clone(), + output: output.message.clone(), + duration: output.duration, + mode: output.mode, + }) + .await; + } + Err(err) => { + orchestrator + .emit(DelegateEvent::Failed { + run_id: run_id_clone.clone(), + agent_id: err.agent_id, + owner_conversation_id: owner_conversation_id.clone(), + error: err.error, + mode: err.mode, + }) + .await; + } + } + + orchestrator.clear_run_conversation(&run_id_clone).await; + let mut active = orchestrator.active_runs.lock().await; + if let Some(pos) = active.iter().rposition(|id| id == &run_id_clone) { + active.remove(pos); + } + }); + + Ok(run_id) + } pub fn new( global_codex_home: impl Into, auth_manager: Arc, @@ -302,6 +486,11 @@ impl AgentOrchestrator { DelegateInvocationMode::Detached => DelegateSessionMode::Detached, _ => DelegateSessionMode::Standard, }; + if let Some(conversation_id) = request.conversation_id.clone() { + return self + .delegate_follow_up(run_id, request, conversation_id) + .await; + } { let mut active = self.active_runs.lock().await; if active.len() >= self.max_concurrent_runs { @@ -455,12 +644,16 @@ impl AgentOrchestrator { } } - async fn record_shadow_event( + async fn record_shadow_event_internal( &self, agent_id: Option<&AgentId>, conversation_id: &str, event: &Event, + mode: ShadowRecordMode, ) { + if matches!(mode, ShadowRecordMode::FollowUp) { + return; + } let Some(agent_id) = agent_id else { return }; if let Err(err) = self .shadow_manager @@ -471,6 +664,21 @@ impl AgentOrchestrator { } } + async fn record_shadow_event( + &self, + agent_id: Option<&AgentId>, + conversation_id: &str, + event: &Event, + ) { + self.record_shadow_event_internal( + agent_id, + conversation_id, + event, + ShadowRecordMode::Normal, + ) + .await; + } + async fn record_shadow_agent_outputs( &self, agent_id: Option<&AgentId>, @@ -502,14 +710,217 @@ impl AgentOrchestrator { .values() .map(|entry| entry.summary.clone()) .collect(); - summaries.sort_by(|a, b| b.last_interacted_at.cmp(&a.last_interacted_at)); + summaries.sort_by( + |a, b| match b.last_interacted_at.cmp(&a.last_interacted_at) { + Ordering::Equal => a.conversation_id.cmp(&b.conversation_id), + other => other, + }, + ); summaries } + pub async fn session_summary(&self, conversation_id: &str) -> Option { + let sessions = self.sessions.lock().await; + sessions + .get(conversation_id) + .map(|entry| entry.summary.clone()) + } + + pub async fn dismiss_session(&self, conversation_id: &str) -> Result<(), OrchestratorError> { + if self + .conversation_runs + .lock() + .await + .contains_key(conversation_id) + { + return Err(OrchestratorError::AgentBusy(conversation_id.to_string())); + } + + { + let sessions = self.sessions.lock().await; + if !sessions.contains_key(conversation_id) { + return Err(OrchestratorError::SessionNotFound( + conversation_id.to_string(), + )); + } + } + + self.remove_session(conversation_id).await; + + let mut detached = self.detached_runs.lock().await; + detached.retain(|_, record| match &record.status { + DetachedRunStatus::Ready { + conversation_id: ready_id, + .. + } => ready_id != conversation_id, + _ => true, + }); + + Ok(()) + } + + pub async fn list_sessions_paginated( + &self, + cursor: Option, + limit: usize, + ) -> Result { + let summaries = self.active_sessions().await; + + let (page, next_cursor) = paginate_session_summaries(&summaries, cursor, limit)?; + + let mut sessions_vec = Vec::new(); + for summary in page { + let shadow = self + .shadow_manager + .session_summary(&summary.conversation_id) + .await + .map(|s| DelegateSessionShadowSummary { + events: s.metrics.events, + user_inputs: s.metrics.user_inputs, + agent_outputs: s.metrics.agent_outputs, + turns: s.metrics.turns, + raw_bytes: s.raw_bytes, + compressed_bytes: s.compressed_bytes, + }); + + let mode = match summary.mode { + DelegateSessionMode::Standard => CoreDelegateSessionMode::Standard, + DelegateSessionMode::Detached => CoreDelegateSessionMode::Detached, + }; + + sessions_vec.push(DelegateSessionListEntry { + conversation_id: summary.conversation_id.clone(), + agent_id: summary.agent_id.as_str().to_string(), + mode, + cwd: summary.cwd.to_string_lossy().into_owned(), + last_interacted_at: format_system_time(summary.last_interacted_at), + shadow, + }); + } + + Ok(DelegateSessionsList { + sessions: sessions_vec, + next_cursor, + }) + } + + pub async fn recent_messages( + &self, + conversation_id: &str, + cursor: Option, + limit: usize, + ) -> Result { + { + let sessions = self.sessions.lock().await; + if !sessions.contains_key(conversation_id) { + return Err(OrchestratorError::SessionNotFound( + conversation_id.to_string(), + )); + } + } + + let snapshot = self.shadow_manager.snapshot(conversation_id).await; + let snapshot = snapshot + .ok_or_else(|| OrchestratorError::HistoryUnavailable(conversation_id.to_string()))?; + + let entries = snapshot.history; + let total = entries.len(); + let start_index = if let Some(cursor) = cursor { + let idx = cursor + .parse::() + .map_err(|_| OrchestratorError::InvalidCursor)?; + if idx > total { + return Err(OrchestratorError::InvalidCursor); + } + idx + } else { + total + }; + + let mut index = start_index; + let mut messages = Vec::new(); + while index > 0 && messages.len() < limit { + index -= 1; + if let Some(entry) = history_entry_to_message(&entries[index], conversation_id, index) { + messages.push(entry); + } + } + + let next_cursor = if index > 0 { + Some(index.to_string()) + } else { + None + }; + + Ok(DelegateSessionMessages { + messages, + next_cursor, + }) + } + pub async fn shadow_snapshot(&self, conversation_id: &str) -> Option { self.shadow_manager.snapshot(conversation_id).await } + async fn parent_run_for_follow_up( + &self, + conversation_id: &str, + request_parent: Option, + ) -> Option { + if let Some(parent) = request_parent { + Some(parent) + } else { + self.conversation_runs + .lock() + .await + .get(conversation_id) + .cloned() + } + } +} + +fn paginate_session_summaries<'a>( + summaries: &'a [DelegateSessionSummary], + cursor: Option, + limit: usize, +) -> Result<(Vec<&'a DelegateSessionSummary>, Option), OrchestratorError> { + let limit = limit.max(1); + + let start_index = if let Some(cursor) = cursor { + let (cursor_ts, cursor_id) = parse_session_cursor(&cursor)?; + let mut idx = None; + for (i, summary) in summaries.iter().enumerate() { + let ts = system_time_to_unix_nanos(summary.last_interacted_at); + if ts == cursor_ts && summary.conversation_id == cursor_id { + idx = Some(i + 1); + break; + } + } + idx.ok_or(OrchestratorError::InvalidCursor)? + } else { + 0 + }; + + let mut page = Vec::new(); + for summary in summaries.iter().skip(start_index).take(limit) { + page.push(summary); + } + + let consumed = page.len(); + let next_cursor = if consumed == 0 { + None + } else if start_index + consumed < summaries.len() { + Some(encode_session_cursor( + &summaries[start_index + consumed - 1], + )) + } else { + None + }; + + Ok((page, next_cursor)) +} + +impl AgentOrchestrator { pub async fn shadow_metrics(&self) -> ShadowMetrics { self.shadow_manager.metrics().await } @@ -951,6 +1362,169 @@ impl AgentOrchestrator { mode: session_mode, }) } + + async fn run_follow_up_task( + self: Arc, + run_id: DelegateRunId, + request: DelegateRequest, + conversation_id: String, + summary: DelegateSessionSummary, + conversation: Arc, + session_configured: Arc, + config: Config, + mut events_rx: UnboundedReceiver, + owner_conversation_id: String, + ) -> std::result::Result { + let start = SystemTime::now(); + let agent_id = summary.agent_id.clone(); + let session_mode = summary.mode; + + let mut items = request.user_initial.clone(); + let prompt_text = request.prompt.text.clone(); + if !prompt_text.trim().is_empty() { + items.push(InputItem::Text { + text: prompt_text.clone(), + }); + } + + conversation + .submit(Op::UserInput { items }) + .await + .map_err(|err| DelegateFailure { + agent_id: agent_id.clone(), + error: format!("failed to submit delegate prompt: {err:#}"), + mode: session_mode, + })?; + + self.record_shadow_user_inputs(Some(&agent_id), &conversation_id, &request.user_initial) + .await; + if !prompt_text.trim().is_empty() { + self.record_shadow_user_inputs( + Some(&agent_id), + &conversation_id, + &[InputItem::Text { + text: prompt_text.clone(), + }], + ) + .await; + } + + let mut collected = String::new(); + while let Some(event) = events_rx.recv().await { + match &event.msg { + EventMsg::AgentMessage(msg) => { + collected = msg.message.clone(); + self.record_shadow_event_internal( + Some(&agent_id), + &conversation_id, + &event, + ShadowRecordMode::FollowUp, + ) + .await; + self.emit(DelegateEvent::Delta { + run_id: run_id.clone(), + agent_id: agent_id.clone(), + owner_conversation_id: owner_conversation_id.clone(), + chunk: msg.message.clone(), + }) + .await; + } + EventMsg::AgentMessageDelta(delta) => { + collected.push_str(&delta.delta); + self.record_shadow_event_internal( + Some(&agent_id), + &conversation_id, + &event, + ShadowRecordMode::FollowUp, + ) + .await; + self.emit(DelegateEvent::Delta { + run_id: run_id.clone(), + agent_id: agent_id.clone(), + owner_conversation_id: owner_conversation_id.clone(), + chunk: delta.delta.clone(), + }) + .await; + } + EventMsg::TaskComplete(task_complete) => { + self.record_shadow_event_internal( + Some(&agent_id), + &conversation_id, + &event, + ShadowRecordMode::FollowUp, + ) + .await; + let duration = start.elapsed().unwrap_or_else(|_| Duration::from_secs(0)); + let message = task_complete.last_agent_message.clone().or_else(|| { + if collected.trim().is_empty() { + None + } else { + Some(collected.clone()) + } + }); + + if let Some(output) = message.as_ref() { + self.record_shadow_agent_outputs( + Some(&agent_id), + &conversation_id, + &[output.clone()], + ) + .await; + } + + return Ok(DelegateSuccess { + agent_id, + conversation_id, + conversation, + session_configured, + cwd: summary.cwd.clone(), + config, + message, + duration, + mode: session_mode, + }); + } + EventMsg::Error(err) => { + self.record_shadow_event(Some(&agent_id), &conversation_id, &event) + .await; + return Err(DelegateFailure { + agent_id: agent_id.clone(), + error: err.message.clone(), + mode: session_mode, + }); + } + _ => {} + } + } + + Err(DelegateFailure { + agent_id, + error: "delegate run ended unexpectedly".to_string(), + mode: session_mode, + }) + } +} + +fn history_entry_to_message( + entry: &ShadowHistoryEntry, + conversation_id: &str, + index: usize, +) -> Option { + let role = match entry.kind { + ShadowHistoryKind::Agent => "assistant", + ShadowHistoryKind::User => "user", + ShadowHistoryKind::Info => "info", + ShadowHistoryKind::Warning => "warning", + ShadowHistoryKind::Error => "error", + ShadowHistoryKind::System => "system", + }; + let content = entry.lines.join("\n"); + Some(DelegateSessionMessageEntry { + id: format!("{conversation_id}:{index}"), + role: role.to_string(), + content, + timestamp: None, + }) } struct DelegateSuccess { @@ -1082,7 +1656,12 @@ impl MultiAgentDelegateAdapter { DelegateToolError::SetupFailed(reason) } OrchestratorError::SessionNotFound(session_id) => { - DelegateToolError::SetupFailed(format!("session not found: {session_id}")) + DelegateToolError::SessionNotFound(session_id) + } + OrchestratorError::AgentBusy(_) => DelegateToolError::AgentBusy, + OrchestratorError::InvalidCursor => DelegateToolError::InvalidCursor, + OrchestratorError::HistoryUnavailable(session_id) => { + DelegateToolError::HistoryUnavailable(session_id) } } } @@ -1108,14 +1687,31 @@ impl DelegateToolAdapter for MultiAgentDelegateAdapter { request: DelegateToolRequest, ) -> Result { let DelegateToolRequest { - agent_id: agent_id_str, + agent_id, prompt, context: _, caller_conversation_id, mode, + conversation_id, .. } = request; + let agent_id_str = match agent_id { + Some(id) => id, + None => { + if let Some(conv_id) = conversation_id.as_ref() { + let summary = self + .orchestrator + .session_summary(conv_id) + .await + .ok_or_else(|| DelegateToolError::SessionNotFound(conv_id.clone()))?; + summary.agent_id.as_str().to_string() + } else { + return Err(DelegateToolError::AgentNotFound("".to_string())); + } + } + }; + let agent_id = AgentId::parse(agent_id_str.as_str()) .map_err(|_| DelegateToolError::AgentNotFound(agent_id_str.clone()))?; @@ -1136,6 +1732,7 @@ impl DelegateToolAdapter for MultiAgentDelegateAdapter { parent_run_id, mode, caller_conversation_id, + conversation_id, }) .await .map_err(Self::map_error)?; @@ -1145,4 +1742,37 @@ impl DelegateToolAdapter for MultiAgentDelegateAdapter { agent_id: agent_id_str, }) } + + async fn list_sessions( + &self, + cursor: Option, + limit: usize, + ) -> Result { + self.orchestrator + .list_sessions_paginated(cursor, limit) + .await + .map_err(Self::map_error) + } + + async fn session_messages( + &self, + conversation_id: &str, + cursor: Option, + limit: usize, + ) -> Result { + self.orchestrator + .recent_messages(conversation_id, cursor, limit) + .await + .map_err(Self::map_error) + } + + async fn dismiss_session(&self, conversation_id: &str) -> Result<(), DelegateToolError> { + self.orchestrator + .dismiss_session(conversation_id) + .await + .map_err(Self::map_error) + } } + +#[cfg(test)] +mod tests; diff --git a/codex-rs/multi-agent/src/orchestrator/tests.rs b/codex-rs/multi-agent/src/orchestrator/tests.rs index 56c1846bfff7..7c3ba2cec9bc 100644 --- a/codex-rs/multi-agent/src/orchestrator/tests.rs +++ b/codex-rs/multi-agent/src/orchestrator/tests.rs @@ -1,7 +1,11 @@ use super::*; +use codex_core::CodexAuth; use codex_core::config::ConfigOverrides; +use codex_core::protocol::AgentMessageEvent; +use codex_core::protocol::Event; +use codex_core::protocol::EventMsg; use codex_core::protocol::SessionSource; -use codex_core::CodexAuth; +use std::path::PathBuf; use tempfile::tempdir; #[tokio::test] @@ -16,7 +20,7 @@ async fn delegate_started_event_carries_owner() { let orchestrator = Arc::new(AgentOrchestrator::new( &global, - Arc::new(AuthManager::from_auth(CodexAuth::from_api_key("test"))), + AuthManager::from_auth_for_testing(CodexAuth::from_api_key("test")), SessionSource::Cli, CliConfigOverrides::default(), ConfigOverrides::default(), @@ -40,6 +44,7 @@ async fn delegate_started_event_carries_owner() { parent_run_id: Some(parent_run_id), mode: DelegateInvocationMode::Immediate, caller_conversation_id: None, + conversation_id: None, }; let mut events = orchestrator.subscribe().await; @@ -65,3 +70,177 @@ async fn delegate_started_event_carries_owner() { } } } + +#[test] +fn paginate_session_list_returns_remaining_entries() { + use std::time::Duration; + use std::time::SystemTime; + + let base = SystemTime::UNIX_EPOCH; + let summaries = vec![ + DelegateSessionSummary { + conversation_id: "conv-3".to_string(), + agent_id: AgentId::parse("gamma").unwrap(), + last_interacted_at: base + Duration::from_secs(30), + cwd: PathBuf::from("/tmp/gamma"), + mode: DelegateSessionMode::Standard, + }, + DelegateSessionSummary { + conversation_id: "conv-2".to_string(), + agent_id: AgentId::parse("beta").unwrap(), + last_interacted_at: base + Duration::from_secs(20), + cwd: PathBuf::from("/tmp/beta"), + mode: DelegateSessionMode::Standard, + }, + DelegateSessionSummary { + conversation_id: "conv-1".to_string(), + agent_id: AgentId::parse("alpha").unwrap(), + last_interacted_at: base + Duration::from_secs(10), + cwd: PathBuf::from("/tmp/alpha"), + mode: DelegateSessionMode::Standard, + }, + ]; + + let (first_page, cursor) = + paginate_session_summaries(&summaries, None, 2).expect("first page ok"); + assert_eq!( + first_page + .into_iter() + .map(|summary| summary.conversation_id.as_str()) + .collect::>(), + vec!["conv-3", "conv-2"] + ); + let cursor = cursor.expect("cursor for next page"); + + let (second_page, next_cursor) = + paginate_session_summaries(&summaries, Some(cursor), 2).expect("second page ok"); + assert_eq!( + second_page + .into_iter() + .map(|summary| summary.conversation_id.as_str()) + .collect::>(), + vec!["conv-1"], + "expected final session to appear on second page" + ); + assert!(next_cursor.is_none()); +} + +#[tokio::test] +async fn follow_up_shadow_events_do_not_duplicate() { + let temp_home = tempdir().expect("tempdir"); + let global = temp_home.path().join("codex"); + for dir in ["log", "sessions", "history", "mcp", "tmp"] { + std::fs::create_dir_all(global.join(dir)).expect("create dir"); + } + + let orchestrator = Arc::new(AgentOrchestrator::new( + &global, + AuthManager::from_auth_for_testing(CodexAuth::from_api_key("test")), + SessionSource::Cli, + CliConfigOverrides::default(), + ConfigOverrides::default(), + vec![AgentId::parse("critic").unwrap()], + 1, + ShadowConfig::apply_defaults(true, None, None, false), + )); + + let agent_id = AgentId::parse("critic").unwrap(); + let conversation_id = "conv-follow-up"; + orchestrator + .shadow_manager + .register_session(conversation_id, &agent_id) + .await + .expect("register session"); + + let event = Event { + id: "event-1".into(), + msg: EventMsg::AgentMessage(AgentMessageEvent { + message: "delegate output".into(), + }), + }; + + orchestrator + .record_shadow_event_internal( + Some(&agent_id), + conversation_id, + &event, + ShadowRecordMode::Normal, + ) + .await; + + let baseline = orchestrator + .shadow_manager + .session_summary(conversation_id) + .await + .expect("summary") + .metrics + .events; + assert!(baseline > 0); + + orchestrator + .record_shadow_event_internal( + Some(&agent_id), + conversation_id, + &event, + ShadowRecordMode::FollowUp, + ) + .await; + + let after = orchestrator + .shadow_manager + .session_summary(conversation_id) + .await + .expect("summary") + .metrics + .events; + assert_eq!(after, baseline); +} + +#[tokio::test] +async fn follow_up_should_preserve_parent_before_registration() { + let temp_home = tempdir().expect("tempdir"); + let global = temp_home.path().join("codex"); + for dir in ["log", "sessions", "history", "mcp", "tmp"] { + std::fs::create_dir_all(global.join(dir)).expect("create dir"); + } + + let orchestrator = AgentOrchestrator::new( + &global, + AuthManager::from_auth_for_testing(CodexAuth::from_api_key("test")), + SessionSource::Cli, + CliConfigOverrides::default(), + ConfigOverrides::default(), + vec![AgentId::parse("critic").unwrap()], + 2, + ShadowConfig::disabled(), + ); + + let conversation_id = "reuse-conv".to_string(); + let original_parent = "run-parent".to_string(); + let new_run = "run-follow-up".to_string(); + + orchestrator + .conversation_runs + .lock() + .await + .insert(conversation_id.clone(), original_parent.clone()); + + let resolved = orchestrator + .parent_run_for_follow_up(&conversation_id, None) + .await; + assert_eq!( + resolved.as_deref(), + Some(original_parent.as_str()), + "follow-up resolution should see the existing parent before registration" + ); + + orchestrator + .register_run_conversation(&new_run, &conversation_id) + .await; + + assert_eq!( + resolved.as_deref(), + Some(original_parent.as_str()), + "captured parent run id should remain intact for follow-up bookkeeping" + ); +} diff --git a/codex-rs/tui/src/app.rs b/codex-rs/tui/src/app.rs index 975b7751cbfa..8de4e83c5256 100644 --- a/codex-rs/tui/src/app.rs +++ b/codex-rs/tui/src/app.rs @@ -30,6 +30,7 @@ use codex_core::protocol::SessionConfiguredEvent; use codex_core::protocol::SessionSource; use codex_core::protocol::TokenUsage; use codex_core::protocol_config_types::ReasoningEffort as ReasoningEffortConfig; +use codex_multi_agent::ActiveDelegateSession; use codex_multi_agent::AgentId; use codex_multi_agent::AgentOrchestrator; use codex_multi_agent::DelegateEvent; @@ -48,7 +49,10 @@ use crossterm::event::KeyEvent; use crossterm::event::KeyEventKind; use ratatui::style::Stylize; use ratatui::text::Line; -use std::collections::{HashMap, HashSet, VecDeque, hash_map::Entry}; +use std::collections::HashMap; +use std::collections::HashSet; +use std::collections::VecDeque; +use std::collections::hash_map::Entry; use std::path::PathBuf; use std::sync::Arc; use std::sync::atomic::AtomicBool; @@ -912,6 +916,59 @@ impl App { Err(err) => self.active_widget_mut().add_error_message(err), } } + AppEvent::PreviewDelegateSession(conversation_id) => { + match self + .delegate_orchestrator + .session_summary(&conversation_id) + .await + { + Some(summary) => { + match self + .delegate_orchestrator + .recent_messages(&conversation_id, None, 3) + .await + { + Ok(messages) => { + self.active_widget_mut() + .show_delegate_preview(&summary, &messages); + } + Err(err) => { + self.active_widget_mut().add_error_message(format!( + "Failed to load session preview: {err}" + )); + } + } + } + None => { + self.active_widget_mut().add_error_message(format!( + "Unknown delegate session {conversation_id}" + )); + } + } + } + AppEvent::DismissDelegateSession(conversation_id) => { + let label = self + .delegate_orchestrator + .session_summary(&conversation_id) + .await + .map(|summary| format!("#{}", summary.agent_id.as_str())) + .unwrap_or_else(|| conversation_id.clone()); + + match self + .delegate_orchestrator + .dismiss_session(&conversation_id) + .await + { + Ok(()) => { + self.active_widget_mut() + .add_info_message(format!("Dismissed delegate session {label}"), None); + } + Err(err) => { + self.active_widget_mut() + .add_error_message(format!("Failed to dismiss {label}: {err}")); + } + } + } AppEvent::InsertUserTextMessage(text) => { self.active_widget_mut().submit_text_message(text); } @@ -1210,35 +1267,70 @@ impl App { .await .map_err(|err| format!("{err}"))?; + self.activate_delegate_session_from_active(Some(tui), conversation_id, active) + .await + } + + async fn activate_delegate_session_from_active( + &mut self, + mut tui: Option<&mut tui::Tui>, + conversation_id: String, + active: ActiveDelegateSession, + ) -> Result<(), String> { + let ActiveDelegateSession { + summary, + conversation, + session_configured, + config, + event_rx, + shadow_snapshot, + shadow_summary, + } = active; + + let mut event_rx = Some(event_rx); + use Entry::*; match self.sessions.entry(conversation_id.clone()) { Occupied(mut occ) => { let handle = occ.get_mut(); handle.widget.ensure_conversation_id(&conversation_id); - handle.set_summary(Some(active.summary.clone())); - handle.set_mode(active.summary.mode); - handle.set_shadow_summary(active.shadow_summary.clone()); - handle - .widget - .set_delegate_context(Some(active.summary.clone())); - if handle.history().is_empty() { - if let Some(snapshot) = active.shadow_snapshot.as_ref() { - handle.widget.hydrate_from_shadow(snapshot); - } else { - handle.widget.clear_shadow_capture(); - handle.widget.add_info_message( - "Shadow cache unavailable; replaying from rollout.".to_string(), - None, - ); - } + handle.set_summary(Some(summary.clone())); + handle.set_mode(summary.mode); + handle.set_shadow_summary(shadow_summary.clone()); + handle.widget.set_delegate_context(Some(summary.clone())); + if let Some(snapshot) = shadow_snapshot.as_ref() { + handle.set_history(Vec::new()); + handle.widget.hydrate_from_shadow(snapshot); + } else { + handle.widget.clear_shadow_capture(); + handle.widget.add_info_message( + "Shadow cache unavailable; replaying from rollout.".to_string(), + None, + ); } drop(occ); - drop(active.event_rx); + if let Some(rx) = event_rx.take() { + drop(rx); + } } Vacant(vacant) => { + #[allow(unused_mut)] + let frame_requester = match tui.as_mut() { + Some(tui_ref) => tui_ref.frame_requester(), + None => { + #[cfg(test)] + { + crate::tui::FrameRequester::test_dummy() + } + #[cfg(not(test))] + { + unreachable!("delegate session activation requires tui"); + } + } + }; let init = ChatWidgetInit { - config: active.config.clone(), - frame_requester: tui.frame_requester(), + config: config.clone(), + frame_requester, app_event_tx: self.app_event_tx.scoped(), initial_prompt: None, initial_images: Vec::new(), @@ -1248,15 +1340,15 @@ impl App { }; let mut session = ChatWidget::new_session_from_existing_with_events( init, - active.conversation.clone(), - active.session_configured.clone(), - active.event_rx, + conversation.clone(), + session_configured.clone(), + event_rx + .take() + .expect("delegate session event receiver consumed"), ); session.widget.ensure_conversation_id(&conversation_id); - session - .widget - .set_delegate_context(Some(active.summary.clone())); - if let Some(snapshot) = active.shadow_snapshot.as_ref() { + session.widget.set_delegate_context(Some(summary.clone())); + if let Some(snapshot) = shadow_snapshot.as_ref() { session.widget.hydrate_from_shadow(snapshot); } else { session.widget.clear_shadow_capture(); @@ -1272,9 +1364,9 @@ impl App { ); vacant.insert(SessionHandle::new( session.widget, - Some(active.summary.clone()), - active.summary.mode, - active.shadow_summary.clone(), + Some(summary.clone()), + summary.mode, + shadow_summary.clone(), )); } } @@ -1307,15 +1399,35 @@ impl App { handle.widget.set_delegate_context(Some(summary)); } self.apply_active_history_from_handle(); - self.replay_active_session_from_last_user(tui); + if let Some(tui_ref) = tui.as_mut() { + self.replay_active_session_from_last_user(tui_ref); + } self.sync_active_handle_history(); self.delegate_orchestrator .touch_session(&conversation_id) .await; - tui.frame_requester().schedule_frame(); + if let Some(tui_ref) = tui { + tui_ref.frame_requester().schedule_frame(); + } Ok(()) } + #[cfg(test)] + async fn activate_delegate_session_with_active( + &mut self, + conversation_id: String, + active: ActiveDelegateSession, + ) -> Result<(), String> { + if self.active_session_id == conversation_id { + return Ok(()); + } + + self.sync_active_handle_history(); + self.active_widget_mut().set_delegate_context(None); + self.activate_delegate_session_from_active(None, conversation_id, active) + .await + } + fn agent_id_for_conversation(&self, conversation_id: &str) -> Option<&AgentId> { self.sessions .get(conversation_id) @@ -1589,8 +1701,10 @@ mod tests { use super::*; use crate::app_backtrack::BacktrackState; use crate::app_backtrack::user_count; + use crate::app_event::AppEvent; use crate::chatwidget::tests::make_chatwidget_manual_with_sender; use crate::file_search::FileSearchManager; + use crate::history_cell; use crate::history_cell::AgentMessageCell; use crate::history_cell::HistoryCell; use crate::history_cell::UserHistoryCell; @@ -1605,14 +1719,25 @@ mod tests { use codex_core::protocol::SessionConfiguredEvent; use codex_core::protocol::SessionSource; + use codex_core::protocol::Event; + use codex_core::protocol::EventMsg; + use codex_core::protocol::InputItem; + use codex_core::protocol::InputMessageKind; + use codex_core::protocol::UserMessageEvent; use codex_protocol::ConversationId; use ratatui::prelude::Line; use std::path::PathBuf; use std::sync::Arc; use std::sync::atomic::AtomicBool; + use std::time::SystemTime; fn make_test_app() -> App { - let (mut chat_widget, app_event_tx, _rx, _op_rx) = make_chatwidget_manual_with_sender(); + let (app, _rx) = make_test_app_with_receiver(); + app + } + + fn make_test_app_with_receiver() -> (App, tokio::sync::mpsc::UnboundedReceiver) { + let (mut chat_widget, app_event_tx, rx, _op_rx) = make_chatwidget_manual_with_sender(); let config = chat_widget.config_ref().clone(); let session_id = ConversationId::new().to_string(); chat_widget.ensure_conversation_id(&session_id); @@ -1656,28 +1781,31 @@ mod tests { SessionHandle::new(chat_widget, None, DelegateSessionMode::Standard, None), ); - App { - server, - app_event_tx, - sessions, - active_session_id: session_id.clone(), - primary_session_id: session_id, - auth_manager, - delegate_orchestrator, - config, - active_profile: None, - file_search, - transcript_cells: Vec::new(), - overlay: None, - deferred_history_lines: Vec::new(), - has_emitted_history_lines: false, - enhanced_keys_supported: false, - commit_anim_running: Arc::new(AtomicBool::new(false)), - backtrack: BacktrackState::default(), - feedback: codex_feedback::CodexFeedback::new(), - pending_update_action: None, - run_parent_map: HashMap::new(), - } + ( + App { + server, + app_event_tx, + sessions, + active_session_id: session_id.clone(), + primary_session_id: session_id, + auth_manager, + delegate_orchestrator, + config, + active_profile: None, + file_search, + transcript_cells: Vec::new(), + overlay: None, + deferred_history_lines: Vec::new(), + has_emitted_history_lines: false, + enhanced_keys_supported: false, + commit_anim_running: Arc::new(AtomicBool::new(false)), + backtrack: BacktrackState::default(), + feedback: codex_feedback::CodexFeedback::new(), + pending_update_action: None, + run_parent_map: HashMap::new(), + }, + rx, + ) } #[test] @@ -1799,6 +1927,169 @@ mod tests { assert!(app.run_parent_map.is_empty()); } + #[tokio::test] + async fn follow_up_session_should_apply_shadow_even_with_existing_history() { + use crate::tui::FrameRequester; + use codex_multi_agent::shadow::ShadowHistoryEntry; + use codex_multi_agent::shadow::ShadowHistoryKind; + use codex_multi_agent::shadow::ShadowMetrics; + use codex_multi_agent::shadow::ShadowSnapshot; + use codex_multi_agent::shadow::ShadowTranscriptCapture; + + let (mut app, mut app_events) = make_test_app_with_receiver(); + + let new_conversation = app + .server + .new_conversation(app.config.clone()) + .await + .expect("new conversation"); + let conversation_id = new_conversation.conversation_id.to_string(); + let agent_id = AgentId::parse("critic").unwrap(); + + let summary = DelegateSessionSummary { + conversation_id: conversation_id.clone(), + agent_id: agent_id.clone(), + last_interacted_at: SystemTime::now(), + cwd: app.config.cwd.clone(), + mode: DelegateSessionMode::Standard, + }; + + let initial_prompt = "How should I carry a box of apples safely?".to_string(); + let follow_up_prompt = "Follow-up: The box weighs 50 kg.".to_string(); + + let make_user_event = |id: &str, message: &str| Event { + id: id.to_string(), + msg: EventMsg::UserMessage(UserMessageEvent { + message: message.to_string(), + kind: Some(InputMessageKind::Plain), + images: None, + }), + }; + let events = vec![ + make_user_event("event-initial", &initial_prompt), + make_user_event("event-follow-up", &follow_up_prompt), + ]; + let events_len = events.len(); + + let shadow_snapshot = ShadowSnapshot { + conversation_id: conversation_id.clone(), + agent_id: agent_id.clone(), + history: vec![ + ShadowHistoryEntry { + kind: ShadowHistoryKind::User, + lines: vec![initial_prompt.clone()], + is_stream_continuation: false, + }, + ShadowHistoryEntry { + kind: ShadowHistoryKind::User, + lines: vec![follow_up_prompt.clone()], + is_stream_continuation: false, + }, + ], + capture: ShadowTranscriptCapture { + user_inputs: vec![ + InputItem::Text { + text: initial_prompt.clone(), + }, + InputItem::Text { + text: follow_up_prompt.clone(), + }, + ], + agent_outputs: Vec::new(), + }, + metrics: ShadowMetrics { + session_count: 1, + events: events_len, + user_inputs: 2, + ..ShadowMetrics::default() + }, + events, + recorded_at: SystemTime::now(), + }; + + let init = ChatWidgetInit { + config: app.config.clone(), + frame_requester: FrameRequester::test_dummy(), + app_event_tx: app.app_event_tx.scoped(), + initial_prompt: None, + initial_images: Vec::new(), + enhanced_keys_supported: app.enhanced_keys_supported, + auth_manager: app.auth_manager.clone(), + feedback: app.feedback.clone(), + }; + let (event_tx, event_rx) = tokio::sync::mpsc::unbounded_channel(); + drop(event_tx); + let mut widget_session = ChatWidget::new_session_from_existing_with_events( + init, + new_conversation.conversation.clone(), + Arc::new(new_conversation.session_configured.clone()), + event_rx, + ); + widget_session + .widget + .ensure_conversation_id(&conversation_id); + + let mut handle = SessionHandle::new( + widget_session.widget, + Some(summary.clone()), + summary.mode, + None, + ); + handle.push_history( + Arc::new(history_cell::new_user_prompt("earlier history".to_string())) + as Arc, + ); + handle.widget.set_delegate_context(Some(summary.clone())); + + app.sessions.insert(conversation_id.clone(), handle); + + let (shadow_event_tx, shadow_event_rx) = tokio::sync::mpsc::unbounded_channel(); + drop(shadow_event_tx); + + let active = ActiveDelegateSession { + summary: summary.clone(), + conversation: new_conversation.conversation.clone(), + session_configured: Arc::new(new_conversation.session_configured.clone()), + config: app.config.clone(), + event_rx: shadow_event_rx, + shadow_snapshot: Some(shadow_snapshot), + shadow_summary: None, + }; + + app.activate_delegate_session_with_active(conversation_id.clone(), active) + .await + .expect("activate delegate session"); + + while let Ok(event) = app_events.try_recv() { + if let AppEvent::InsertHistoryCell { + conversation_id: Some(target_id), + cell, + } = event + { + let cell: Arc = cell.into(); + if let Some(handle) = app.sessions.get_mut(&target_id) { + handle.push_history(cell); + } + } + } + + let transcript: String = app + .sessions + .get(&conversation_id) + .unwrap() + .history() + .iter() + .flat_map(|cell| cell.display_lines(120)) + .flat_map(|line| line.spans.into_iter()) + .map(|span| span.content.to_string()) + .collect(); + + assert!( + transcript.contains(&follow_up_prompt), + "transcript missing follow-up prompt: {transcript}" + ); + } + #[test] fn backtrack_selection_with_duplicate_history_targets_unique_turn() { let mut app = make_test_app(); diff --git a/codex-rs/tui/src/app_event.rs b/codex-rs/tui/src/app_event.rs index e2b901732f76..2a30cb6aab54 100644 --- a/codex-rs/tui/src/app_event.rs +++ b/codex-rs/tui/src/app_event.rs @@ -131,6 +131,12 @@ pub(crate) enum AppEvent { /// Dismiss a detached delegate run from the registry. DismissDetachedRun(String), + /// Show a snapshot of recent messages for a delegate session. + PreviewDelegateSession(String), + + /// Remove a reusable delegate session (and its shadow cache). + DismissDelegateSession(String), + /// Inject text into the main composer as if the user typed it. InsertUserTextMessage(String), diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index a2af963aa92e..95d6df7e6d02 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -7,6 +7,7 @@ use std::sync::Arc; use codex_core::CodexConversation; use codex_core::config::Config; use codex_core::config_types::Notifications; +use codex_core::delegate_tool::DelegateSessionMessages; use codex_core::git_info::current_branch_name; use codex_core::git_info::local_git_branches; use codex_core::project_doc::DEFAULT_PROJECT_DOC_FILENAME; @@ -2216,6 +2217,39 @@ impl ChatWidget { ..Default::default() }); + let preview_id = conversation_id.clone(); + let preview_actions: Vec = vec![Box::new(move |tx| { + tx.send(AppEvent::PreviewDelegateSession(preview_id.clone())); + })]; + items.push(SelectionItem { + name: format!( + " Preview recent messages for #{}", + summary.agent_id.as_str() + ), + description: Some("Shows the latest shadow-cached turns.".to_string()), + is_current: false, + actions: preview_actions, + dismiss_on_select: false, + ..Default::default() + }); + + if !is_current { + let dismiss_id = conversation_id.clone(); + let dismiss_actions: Vec = vec![Box::new(move |tx| { + tx.send(AppEvent::DismissDelegateSession(dismiss_id.clone())); + })]; + items.push(SelectionItem { + name: format!(" Dismiss session for #{}", summary.agent_id.as_str()), + description: Some( + "Remove this session (and its shadow cache) from the list.".to_string(), + ), + is_current: false, + actions: dismiss_actions, + dismiss_on_select: true, + ..Default::default() + }); + } + if summary.mode == DelegateSessionMode::Detached && let Some(run_id) = run_id.clone() { @@ -2752,6 +2786,33 @@ impl ChatWidget { self.request_redraw(); } + pub(crate) fn show_delegate_preview( + &mut self, + summary: &DelegateSessionSummary, + messages: &DelegateSessionMessages, + ) { + if messages.messages.is_empty() { + self.add_info_message( + format!( + "No recent messages recorded for #{}", + summary.agent_id.as_str() + ), + None, + ); + return; + } + + let label = format!("#{}", summary.agent_id.as_str()); + let cell = history_cell::new_delegate_preview( + &label, + summary.conversation_id.as_str(), + &messages.messages, + messages.next_cursor.is_some(), + ); + self.add_boxed_history(cell); + self.request_redraw(); + } + pub(crate) fn on_delegate_started( &mut self, run_id: &str, diff --git a/codex-rs/tui/src/history_cell.rs b/codex-rs/tui/src/history_cell.rs index 85acaf9b6a6d..a7cf3df148fe 100644 --- a/codex-rs/tui/src/history_cell.rs +++ b/codex-rs/tui/src/history_cell.rs @@ -24,6 +24,7 @@ use codex_common::format_env_display::format_env_display; use codex_core::config::Config; use codex_core::config_types::McpServerTransportConfig; use codex_core::config_types::ReasoningSummaryFormat; +use codex_core::delegate_tool::DelegateSessionMessageEntry; use codex_core::protocol::FileChange; use codex_core::protocol::McpAuthStatus; use codex_core::protocol::McpInvocation; @@ -273,6 +274,54 @@ impl HistoryCell for PlainHistoryCell { } } +#[derive(Debug)] +pub(crate) struct DelegatePreviewCell { + header: String, + conversation_id: String, + entries: Vec, + has_more: bool, +} + +#[derive(Debug)] +struct DelegatePreviewEntry { + role: String, + content: String, +} + +impl HistoryCell for DelegatePreviewCell { + fn display_lines(&self, width: u16) -> Vec> { + let mut lines: Vec> = Vec::new(); + lines.push(line_to_static(&Line::from(self.header.clone()).bold())); + lines.push(line_to_static( + &Line::from(format!("Session {}", self.conversation_id)).dim(), + )); + + for (idx, entry) in self.entries.iter().enumerate() { + lines.push(line_to_static( + &Line::from(format!("{}:", entry.role)).dim(), + )); + let wrapped = word_wrap_lines( + &[Line::from(entry.content.clone())], + RtOptions::new(width as usize) + .initial_indent(" ".into()) + .subsequent_indent(" ".into()), + ); + lines.extend(wrapped); + if idx + 1 < self.entries.len() { + lines.push(Line::default()); + } + } + + if self.has_more { + lines.push(line_to_static( + &Line::from("…older messages available").dim(), + )); + } + + lines + } +} + #[derive(Debug)] pub(crate) struct PrefixedWrappedHistoryCell { text: Text<'static>, @@ -1158,6 +1207,27 @@ pub(crate) fn new_error_event(message: String) -> PlainHistoryCell { PlainHistoryCell { lines } } +pub(crate) fn new_delegate_preview( + agent_label: &str, + conversation_id: &str, + entries: &[DelegateSessionMessageEntry], + has_more: bool, +) -> Box { + let preview_entries = entries + .iter() + .map(|entry| DelegatePreviewEntry { + role: entry.role.clone(), + content: entry.content.clone(), + }) + .collect(); + Box::new(DelegatePreviewCell { + header: format!("Preview of {agent_label}"), + conversation_id: conversation_id.to_string(), + entries: preview_entries, + has_more, + }) +} + /// Render a user‑friendly plan update styled like a checkbox todo list. pub(crate) fn new_plan_update(update: UpdatePlanArgs) -> PlanUpdateCell { let UpdatePlanArgs { explanation, plan } = update; @@ -1913,6 +1983,39 @@ mod tests { insta::assert_snapshot!(rendered); } + #[test] + fn delegate_preview_displays_recent_messages() { + let entries = vec![ + DelegateSessionMessageEntry { + id: "sess:2".into(), + role: "assistant".into(), + content: "Final summary".into(), + timestamp: None, + }, + DelegateSessionMessageEntry { + id: "sess:1".into(), + role: "user".into(), + content: "Follow up with tests".into(), + timestamp: None, + }, + ]; + + let cell = new_delegate_preview("#critic", "sess", &entries, true); + let lines = cell.display_lines(40); + let rendered = render_lines(&lines).join("\n"); + + insta::assert_snapshot!(rendered, @r###" +Preview of #critic +Session sess +assistant: + Final summary + +user: + Follow up with tests +…older messages available +"###); + } + #[test] fn multiline_command_without_wrap_uses_branch_then_eight_spaces() { let call_id = "c1".to_string();