openai · cassirer-openai · Apr 21, 2026 · Apr 21, 2026
diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock
diff --git a/codex-rs/Cargo.toml b/codex-rs/Cargo.toml
@@ -52,6 +52,7 @@ members = [
     "protocol",
     "realtime-webrtc",
     "rollout",
+    "rollout-trace",
     "rmcp-client",
     "responses-api-proxy",
     "response-debug-context",

diff --git a/codex-rs/rollout-trace/BUILD.bazel b/codex-rs/rollout-trace/BUILD.bazel
@@ -0,0 +1,6 @@
+load("//:defs.bzl", "codex_rust_crate")
+
+codex_rust_crate(
+    name = "rollout-trace",
+    crate_name = "codex_rollout_trace",
+)
diff --git a/codex-rs/rollout-trace/Cargo.toml b/codex-rs/rollout-trace/Cargo.toml
@@ -0,0 +1,23 @@
+[package]
+edition.workspace = true
+license.workspace = true
+name = "codex-rollout-trace"
+version.workspace = true
+
+[lib]
+doctest = false
+name = "codex_rollout_trace"
+path = "src/lib.rs"
+
+[lints]
+workspace = true
+
+[dependencies]
+anyhow = { workspace = true }
+codex-protocol = { workspace = true }
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true }
+
+[dev-dependencies]
+pretty_assertions = { workspace = true }
+tempfile = { workspace = true }
diff --git a/codex-rs/rollout-trace/README.md b/codex-rs/rollout-trace/README.md
@@ -0,0 +1,203 @@
+# Rollout Trace
+
+> **Privacy:** Rollout tracing does **not** collect, upload, or report user data;
+> it only writes local bundles when `CODEX_ROLLOUT_TRACE_ROOT` is set.
+
+Rollout tracing is an opt-in diagnostic path for understanding what happened
+during a Codex session. It records raw runtime evidence into a local bundle, then
+replays that bundle into a semantic graph that a debugger or UI can inspect.
+
+The key design choice is: **observe first, interpret later**.
+
+Hot-path Codex code does not try to build the final graph while the session is
+running. It writes ordered raw events and payload references. The offline reducer
+then decides which events became model-visible conversation, which events were
+runtime work, and how information moved between threads, tools, code cells, and
+terminal sessions.
+
+## What This Gives Us
+
+Rollout traces make failures debuggable when the normal transcript is not enough.
+They preserve enough evidence to answer questions like:
+
+- Which model request produced this tool call?
+- Did this output come from the model-visible transcript, a code-mode runtime
+  value, a terminal operation, or an agent notification?
+- Which code-mode `exec` cell issued a nested tool call?
+- Which terminal operation created or reused a running process?
+- Which multi-agent v2 tool call spawned, messaged, received from, or closed a
+  child thread?
+
+The reduced `state.json` is intentionally not just a transcript. It is a graph of
+model-visible conversation plus the runtime objects that explain how Codex got
+there.
+
+## System Shape
+
+```mermaid
+flowchart TD
+    subgraph Runtime["codex-core runtime"]
+        Protocol["protocol lifecycle\nthread start/end, turn start/end"]
+        Inference["inference + compaction\nrequests, responses, checkpoints"]
+        Tools["tool dispatch\ndirect model tools + code-mode nested tools"]
+        CodeMode["code-mode runtime\nexec cells, yields, waits, termination"]
+        Terminal["terminal runtime\nexec_command / write_stdin operations"]
+        Agents["multi_agent_v2\nspawn, task delivery, result, close"]
+    end
+
+    Recorder["RolloutTraceRecorder\nthin best-effort producer"]
+    Writer["TraceWriter\nassigns seq and writes payloads before events"]
+
+    subgraph Bundle["trace bundle"]
+        Manifest["manifest.json\ntrace_id, rollout_id, root_thread_id"]
+        Events["trace.jsonl\nordered raw event spine"]
+        Payloads["payloads/*.json\nlarge raw evidence"]
+    end
+
+    Reducer["replay_bundle\ndeterministic offline reducer"]
+
+    subgraph State["state.json"]
+        Threads["threads + turns"]
+        Conversation["conversation_items\nwhat the model saw"]
+        RuntimeObjects["inference_calls, tool_calls,\ncode_cells, terminals, compactions"]
+        Edges["interaction_edges\nspawn, task, result, close"]
+        RawRefs["raw_payload refs"]
+    end
+
+    Protocol --> Recorder
+    Inference --> Recorder
+    Tools --> Recorder
+    CodeMode --> Recorder
+    Terminal --> Recorder
+    Agents --> Recorder
+
+    Recorder --> Writer
+    Writer --> Manifest
+    Writer --> Payloads
+    Writer --> Events
+
+    Manifest --> Reducer
+    Events --> Reducer
+    Payloads --> Reducer
+
+    Reducer --> Threads
+    Reducer --> Conversation
+    Reducer --> RuntimeObjects
+    Reducer --> Edges
+    Reducer --> RawRefs
+```
+
+The recorder is deliberately small. It is enabled by `CODEX_ROLLOUT_TRACE_ROOT`
+and must never make a Codex session fail just because tracing failed. Core emits
+raw observations; this crate owns the bundle schema, writer API, and reducer.
+
+## Bundle Layout
+
+A trace bundle contains:
+
+- `manifest.json`: trace identity and bundle metadata.
+- `trace.jsonl`: append-only raw events ordered by writer-assigned `seq`.
+- `payloads/*.json`: raw requests, responses, tool inputs/results, runtime
+  events, terminal output, compaction data, and protocol snapshots.
+- `state.json`: optional reducer output written by `codex debug trace-reduce`.
+
+`trace_id` identifies this diagnostic artifact. `rollout_id` identifies the
+Codex rollout/session being observed. Keeping those separate lets us reason about
+the stored trace without confusing it with the product-level session identity.
+
+To reduce a bundle:
+
+```bash
+codex debug trace-reduce <trace-bundle>
+```
+
+By default this writes `<trace-bundle>/state.json`.
+
+## Raw Evidence vs Reduced Graph
+
+```mermaid
+flowchart LR
+    Model["model-visible payloads\nrequests and response output items"]
+    Runtime["runtime observations\ntool dispatch, terminal output, code-mode JSON"]
+    RawPayloads["payloads/*.json\nexact evidence"]
+    Reducer["reducer"]
+    Conversation["ConversationItem\nwhat the model saw"]
+    ToolCall["ToolCall\nruntime tool boundary"]
+    CodeCell["CodeCell\nmodel-authored exec cell"]
+    TerminalOperation["TerminalOperation\ncommand/write/poll"]
+    InteractionEdge["InteractionEdge\ninformation flow"]
+
+    Model --> RawPayloads
+    Runtime --> RawPayloads
+    RawPayloads --> Reducer
+
+    Reducer --> Conversation
+    Reducer --> ToolCall
+    Reducer --> CodeCell
+    Reducer --> TerminalOperation
+    Reducer --> InteractionEdge
+
+    CodeCell --> ToolCall
+    ToolCall --> TerminalOperation
+    ToolCall --> InteractionEdge
+    Conversation --> InteractionEdge
+```
+
+This distinction is the reason the model has both raw payload references and
+semantic objects. A code-mode nested tool call, for example, has JSON input and
+output at the JavaScript runtime boundary, but the model-visible transcript only
+contains the surrounding `exec` custom tool call and its eventual output.
+
+The reducer keeps those facts separate:
+
+- `ConversationItem` records what appeared in model-facing requests/responses.
+- `ToolCall`, `CodeCell`, `TerminalOperation`, `InferenceCall`, and
+  `Compaction` record runtime/debug boundaries.
+- `InteractionEdge` records information flow between objects, such as a
+  `spawn_agent` tool call delivering a task into a child thread.
+- `RawPayloadRef` points back to exact evidence when a viewer needs more detail
+  than the reduced graph stores inline.
+
+## Multi-Agent v2
+
+Multi-agent v2 child threads share the root trace writer. That means one root
+bundle reduces into one graph containing the parent thread, child threads, and
+the edges between them.
+
+```mermaid
+flowchart LR
+    RootTool["root ToolCall\nspawn_agent / followup_task / send_message"]
+    ChildInput["child ConversationItem\ninjected task/message"]
+    ChildThread["child AgentThread"]
+    ChildResult["child assistant ConversationItem\nresult message"]
+    RootNotice["root ConversationItem\nsubagent notification"]
+    CloseTool["root ToolCall\nclose_agent"]
+    TargetThread["target AgentThread"]
+
+    RootTool -- "spawn/task edge" --> ChildInput
+    ChildInput --> ChildThread
+    ChildThread --> ChildResult
+    ChildResult -- "agent_result edge" --> RootNotice
+    CloseTool -- "close_agent edge" --> TargetThread
+```
+
+Top-level independent threads still get independent bundles. Spawned child
+threads are different: they are part of the same rollout tree, so they belong in
+the same raw event log, payload directory, and reduced `state.json`.
+
+## Reducer Invariants
+
+The reducer is strict where the raw evidence should be self-consistent:
+
+- raw events are replayed in `seq` order;
+- payload files must exist before events refer to them;
+- reduced object IDs are stable within one replay;
+- runtime events may be queued until the model-visible source or delivery target
+  has been observed;
+- model-visible conversation is derived from model-facing payloads, not from
+  runtime convenience output;
+- runtime payloads are evidence, not proof that the model saw the same bytes.
+
+Those invariants let the reduced graph stay small while preserving a path back
+to the original evidence whenever a debugger needs to explain why an object or
+edge exists.
diff --git a/codex-rs/rollout-trace/src/bundle.rs b/codex-rs/rollout-trace/src/bundle.rs
@@ -0,0 +1,49 @@
+//! Trace bundle manifest and local layout constants.
+
+use serde::Deserialize;
+use serde::Serialize;
+
+use crate::model::AgentThreadId;
+
+pub(crate) const MANIFEST_FILE_NAME: &str = "manifest.json";
+pub(crate) const RAW_EVENT_LOG_FILE_NAME: &str = "trace.jsonl";
+pub(crate) const PAYLOADS_DIR_NAME: &str = "payloads";
+/// Conventional file name for a reducer-written `RolloutTrace` cache.
+pub const REDUCED_STATE_FILE_NAME: &str = "state.json";
+pub(crate) const TRACE_MANIFEST_SCHEMA_VERSION: u32 = 1;
+pub(crate) const REDUCED_TRACE_SCHEMA_VERSION: u32 = 1;
+
+/// Manifest stored at the root of a trace bundle.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub(crate) struct TraceBundleManifest {
+    pub(crate) schema_version: u32,
+    pub(crate) trace_id: String,
+    pub(crate) rollout_id: String,
+    /// Root thread for the recorded rollout. Replay should fail rather than
+    /// inventing a placeholder, because every reduced object is scoped back to
+    /// this thread tree.
+    pub(crate) root_thread_id: AgentThreadId,
+    pub(crate) started_at_unix_ms: i64,
+    pub(crate) raw_event_log: String,
+    pub(crate) payloads_dir: String,
+}
+
+impl TraceBundleManifest {
+    /// Builds a manifest that uses the standard local bundle layout.
+    pub(crate) fn new(
+        trace_id: String,
+        rollout_id: String,
+        root_thread_id: AgentThreadId,
+        started_at_unix_ms: i64,
+    ) -> Self {
+        Self {
+            schema_version: TRACE_MANIFEST_SCHEMA_VERSION,
+            trace_id,
+            rollout_id,
+            root_thread_id,
+            started_at_unix_ms,
+            raw_event_log: RAW_EVENT_LOG_FILE_NAME.to_string(),
+            payloads_dir: PAYLOADS_DIR_NAME.to_string(),
+        }
+    }
+}