diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index 57807e5f855..b518d8ce4ff 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -1,5 +1,7 @@ use crate::codex::TurnContext; use crate::context_manager::normalize; +use crate::event_mapping::has_non_contextual_dev_message_content; +use crate::event_mapping::is_contextual_dev_message_content; use crate::event_mapping::is_contextual_user_message_content; use crate::truncate::TruncationPolicy; use crate::truncate::approx_bytes_for_tokens; @@ -40,7 +42,9 @@ pub(crate) struct ContextManager { /// match the current turn after context updates are persisted. /// /// When this is `None`, settings diffing treats the next turn as having no - /// baseline and emits a full reinjection of context state. + /// baseline and emits a full reinjection of context state. Rollback may + /// also clear this when it trims a mixed initial-context developer bundle + /// whose non-diff fragments no longer exist in the surviving history. reference_context_item: Option, } @@ -215,6 +219,12 @@ impl ContextManager { /// - if there are no user turns, this is a no-op /// - if `num_turns` exceeds the number of user turns, all user turns are dropped while /// preserving any items that occurred before the first user message. + /// + /// If rollback trims a pre-turn developer message that mixes contextual fragments with + /// persistent developer text from `build_initial_context`, this also clears + /// `reference_context_item`. The surviving history no longer contains the full bundle that + /// established the prior baseline, so future turns must fall back to full reinjection instead + /// of diffing against stale state. pub(crate) fn drop_last_n_user_turns(&mut self, num_turns: u32) { if num_turns == 0 { return; @@ -222,18 +232,21 @@ impl ContextManager { let snapshot = self.items.clone(); let user_positions = user_message_positions(&snapshot); - let Some(&first_user_idx) = user_positions.first() else { + let Some(&first_instruction_turn_idx) = user_positions.first() else { self.replace(snapshot); return; }; let n_from_end = usize::try_from(num_turns).unwrap_or(usize::MAX); - let cut_idx = if n_from_end >= user_positions.len() { - first_user_idx + let mut cut_idx = if n_from_end >= user_positions.len() { + first_instruction_turn_idx } else { user_positions[user_positions.len() - n_from_end] }; + cut_idx = + self.trim_pre_turn_context_updates(&snapshot, first_instruction_turn_idx, cut_idx); + self.replace(snapshot[..cut_idx].to_vec()); } @@ -382,6 +395,53 @@ impl ContextManager { | ResponseItem::Other => item.clone(), } } + + /// Walk backward from a rollback cut and trim contiguous pre-turn context-update items. + /// + /// Returns the adjusted cut index after removing contextual developer/user items immediately + /// above the rolled-back turn boundary. + /// + /// `first_instruction_turn_idx` is the earliest rollback-eligible instruction-turn boundary + /// in `snapshot`; the trim walk never crosses it so any session-prefix items that predate the + /// first real turn survive rollback. + /// + /// `cut_idx` is the tentative slice boundary after dropping the requested number of + /// instruction turns, before stripping contextual pre-turn items that sit immediately above + /// that boundary. + /// + /// If any trimmed developer message was a mixed `build_initial_context` bundle containing both + /// rollback-trimmable contextual fragments and persistent developer text, this also clears the + /// stored `reference_context_item` baseline so the next real turn falls back to full + /// reinjection. + fn trim_pre_turn_context_updates( + &mut self, + snapshot: &[ResponseItem], + first_instruction_turn_idx: usize, + mut cut_idx: usize, + ) -> usize { + while cut_idx > first_instruction_turn_idx { + match &snapshot[cut_idx - 1] { + ResponseItem::Message { role, content, .. } + if role == "developer" && is_contextual_dev_message_content(content) => + { + if has_non_contextual_dev_message_content(content) { + // Mixed `build_initial_context` bundles are not reconstructible from + // steady-state diffs once trimmed, so the next real turn must fully + // reinject context instead of diffing against a stale baseline. + self.reference_context_item = None; + } + cut_idx -= 1; + } + ResponseItem::Message { role, content, .. } + if role == "user" && is_contextual_user_message_content(content) => + { + cut_idx -= 1; + } + _ => break, + } + } + cut_idx + } } fn truncate_function_output_payload( diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index 1a4dc0ed8e3..91e7c8546c4 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -5,6 +5,7 @@ use base64::Engine; use base64::engine::general_purpose::STANDARD as BASE64_STANDARD; use codex_git::GhostCommit; use codex_protocol::AgentPath; +use codex_protocol::config_types::ReasoningSummary; use codex_protocol::models::BaseInstructions; use codex_protocol::models::ContentItem; use codex_protocol::models::FunctionCallOutputBody; @@ -18,12 +19,16 @@ use codex_protocol::models::ReasoningItemContent; use codex_protocol::models::ReasoningItemReasoningSummary; use codex_protocol::openai_models::InputModality; use codex_protocol::openai_models::default_input_modalities; +use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::InterAgentCommunication; +use codex_protocol::protocol::SandboxPolicy; +use codex_protocol::protocol::TurnContextItem; use image::ImageBuffer; use image::ImageFormat; use image::Rgba; use pretty_assertions::assert_eq; use regex_lite::Regex; +use std::path::PathBuf; const EXEC_FORMAT_MAX_BYTES: usize = 10_000; const EXEC_FORMAT_MAX_TOKENS: usize = 2_500; @@ -90,6 +95,56 @@ fn user_input_text_msg(text: &str) -> ResponseItem { } } +fn developer_msg(text: &str) -> ResponseItem { + ResponseItem::Message { + id: None, + role: "developer".to_string(), + content: vec![ContentItem::InputText { + text: text.to_string(), + }], + end_turn: None, + phase: None, + } +} + +fn developer_msg_with_fragments(texts: &[&str]) -> ResponseItem { + ResponseItem::Message { + id: None, + role: "developer".to_string(), + content: texts + .iter() + .map(|text| ContentItem::InputText { + text: (*text).to_string(), + }) + .collect(), + end_turn: None, + phase: None, + } +} + +fn reference_context_item() -> TurnContextItem { + TurnContextItem { + turn_id: Some("reference-turn".to_string()), + trace_id: None, + cwd: PathBuf::from("/tmp/reference-cwd"), + current_date: Some("2026-03-23".to_string()), + timezone: Some("America/Los_Angeles".to_string()), + approval_policy: AskForApproval::OnRequest, + sandbox_policy: SandboxPolicy::new_read_only_policy(), + network: None, + model: "gpt-test".to_string(), + personality: None, + collaboration_mode: None, + realtime_active: Some(false), + effort: None, + summary: ReasoningSummary::Auto, + user_instructions: None, + developer_instructions: None, + final_output_json_schema: None, + truncation_policy: Some(codex_protocol::protocol::TruncationPolicy::Tokens(10_000)), + } +} + fn custom_tool_call_output(call_id: &str, output: &str) -> ResponseItem { ResponseItem::CustomToolCallOutput { call_id: call_id.to_string(), @@ -851,6 +906,75 @@ fn drop_last_n_user_turns_ignores_session_prefix_user_messages() { assert_eq!(history.for_prompt(&modalities), expected_prefix_only); } +#[test] +fn drop_last_n_user_turns_trims_context_updates_above_rolled_back_turn() { + let items = vec![ + assistant_msg("session prefix item"), + user_input_text_msg("turn 1 user"), + assistant_msg("turn 1 assistant"), + developer_msg("Generated images are saved to /tmp as /tmp/image-1.png by default."), + developer_msg("ROLLED_BACK_DEV_INSTRUCTIONS"), + user_input_text_msg( + "PRETURN_CONTEXT_DIFF_CWD", + ), + user_input_text_msg("turn 2 user"), + assistant_msg("turn 2 assistant"), + ]; + + let modalities = default_input_modalities(); + let mut history = create_history_with_items(items); + let reference_context_item = reference_context_item(); + history.set_reference_context_item(Some(reference_context_item.clone())); + history.drop_last_n_user_turns(1); + + assert_eq!( + history.clone().for_prompt(&modalities), + vec![ + assistant_msg("session prefix item"), + user_input_text_msg("turn 1 user"), + assistant_msg("turn 1 assistant"), + developer_msg("Generated images are saved to /tmp as /tmp/image-1.png by default."), + ] + ); + assert_eq!( + serde_json::to_value(history.reference_context_item()) + .expect("serialize retained reference context item"), + serde_json::to_value(Some(reference_context_item)) + .expect("serialize expected reference context item") + ); +} + +#[test] +fn drop_last_n_user_turns_clears_reference_context_for_mixed_developer_context_bundles() { + let items = vec![ + user_input_text_msg("turn 1 user"), + assistant_msg("turn 1 assistant"), + developer_msg_with_fragments(&[ + "contextual permissions", + "persistent plugin instructions", + ]), + user_input_text_msg( + "PRETURN_CONTEXT_DIFF_CWD", + ), + user_input_text_msg("turn 2 user"), + assistant_msg("turn 2 assistant"), + ]; + + let modalities = default_input_modalities(); + let mut history = create_history_with_items(items); + history.set_reference_context_item(Some(reference_context_item())); + history.drop_last_n_user_turns(1); + + assert_eq!( + history.clone().for_prompt(&modalities), + vec![ + user_input_text_msg("turn 1 user"), + assistant_msg("turn 1 assistant"), + ] + ); + assert!(history.reference_context_item().is_none()); +} + #[test] fn remove_first_item_handles_custom_tool_pair() { let items = vec![ diff --git a/codex-rs/core/src/event_mapping.rs b/codex-rs/core/src/event_mapping.rs index 8f80547a4c6..5e174944f12 100644 --- a/codex-rs/core/src/event_mapping.rs +++ b/codex-rs/core/src/event_mapping.rs @@ -14,6 +14,8 @@ use codex_protocol::models::is_image_close_tag_text; use codex_protocol::models::is_image_open_tag_text; use codex_protocol::models::is_local_image_close_tag_text; use codex_protocol::models::is_local_image_open_tag_text; +use codex_protocol::protocol::COLLABORATION_MODE_OPEN_TAG; +use codex_protocol::protocol::REALTIME_CONVERSATION_OPEN_TAG; use codex_protocol::user_input::UserInput; use tracing::warn; use uuid::Uuid; @@ -22,10 +24,48 @@ use crate::contextual_user_message::is_contextual_user_fragment; use crate::contextual_user_message::parse_visible_hook_prompt_message; use crate::web_search::web_search_action_detail; +const CONTEXTUAL_DEVELOPER_PREFIXES: &[&str] = &[ + "", + "", + COLLABORATION_MODE_OPEN_TAG, + REALTIME_CONVERSATION_OPEN_TAG, + "", +]; + pub(crate) fn is_contextual_user_message_content(message: &[ContentItem]) -> bool { message.iter().any(is_contextual_user_fragment) } +/// Returns true when a developer message contains any rollback-trimmable contextual fragment. +/// +/// `build_initial_context` can bundle these fragments together with persistent developer text in a +/// single developer message, so callers that care about invalidating a stored reference baseline +/// should pair this with `has_non_contextual_dev_message_content`. +pub(crate) fn is_contextual_dev_message_content(message: &[ContentItem]) -> bool { + message.iter().any(is_contextual_dev_fragment) +} + +/// Returns true when a developer message contains any fragment that is not part of the +/// rollback-trimmable contextual prefix set. +pub(crate) fn has_non_contextual_dev_message_content(message: &[ContentItem]) -> bool { + message + .iter() + .any(|content_item| !is_contextual_dev_fragment(content_item)) +} + +fn is_contextual_dev_fragment(content_item: &ContentItem) -> bool { + let ContentItem::InputText { text } = content_item else { + return false; + }; + + let trimmed = text.trim_start(); + CONTEXTUAL_DEVELOPER_PREFIXES.iter().any(|prefix| { + trimmed + .get(..prefix.len()) + .is_some_and(|candidate| candidate.eq_ignore_ascii_case(prefix)) + }) +} + fn parse_user_message(message: &[ContentItem]) -> Option { if is_contextual_user_message_content(message) { return None; diff --git a/codex-rs/core/tests/suite/compact_resume_fork.rs b/codex-rs/core/tests/suite/compact_resume_fork.rs index 2007aab55bd..a3c3f91370d 100644 --- a/codex-rs/core/tests/suite/compact_resume_fork.rs +++ b/codex-rs/core/tests/suite/compact_resume_fork.rs @@ -513,8 +513,9 @@ async fn snapshot_rollback_past_compaction_replays_append_only_history() -> Resu #[tokio::test(flavor = "multi_thread", worker_threads = 2)] /// Scenario: rolling back a turn that introduced persistent pre-turn context -/// diffs currently duplicates those context updates on the next request. -async fn snapshot_rollback_followup_turn_duplicates_context_updates() -> Result<()> { +/// diffs should trim those context updates so the next request includes them +/// only once. +async fn snapshot_rollback_followup_turn_trims_context_updates() -> Result<()> { if network_disabled() { println!("Skipping test because network is disabled in this sandbox"); return Ok(()); @@ -593,14 +594,12 @@ async fn snapshot_rollback_followup_turn_duplicates_context_updates() -> Result< let requests = request_log.requests(); assert_eq!(requests.len(), 3); - assert_eq!( - requests[1] - .message_input_texts("developer") - .iter() - .filter(|text| text.contains(ROLLED_BACK_DEV_INSTRUCTIONS)) - .count(), - 1 - ); + let before_rollback_developer_count = requests[1] + .message_input_texts("developer") + .iter() + .filter(|text| text.contains(ROLLED_BACK_DEV_INSTRUCTIONS)) + .count(); + assert_eq!(before_rollback_developer_count, 1); assert_eq!( requests[1] .message_input_texts("user") @@ -609,14 +608,13 @@ async fn snapshot_rollback_followup_turn_duplicates_context_updates() -> Result< .count(), 1 ); - assert_eq!( - requests[2] - .message_input_texts("developer") - .iter() - .filter(|text| text.contains(ROLLED_BACK_DEV_INSTRUCTIONS)) - .count(), - 2 - ); + + let after_rollback_developer_count = requests[2] + .message_input_texts("developer") + .iter() + .filter(|text| text.contains(ROLLED_BACK_DEV_INSTRUCTIONS)) + .count(); + assert_eq!(after_rollback_developer_count, 1); let after_rollback_user_texts = requests[2].message_input_texts("user"); assert_eq!( @@ -624,7 +622,7 @@ async fn snapshot_rollback_followup_turn_duplicates_context_updates() -> Result< .iter() .filter(|text| text.contains(PRETURN_CONTEXT_DIFF_CWD)) .count(), - 2 + 1 ); assert_eq!( after_rollback_user_texts.last().map(String::as_str), @@ -632,9 +630,9 @@ async fn snapshot_rollback_followup_turn_duplicates_context_updates() -> Result< ); insta::assert_snapshot!( - "rollback_followup_turn_duplicates_context_updates", + "rollback_followup_turn_trims_context_updates", context_snapshot::format_labeled_requests_snapshot( - "rollback currently duplicates pre-turn override context updates on the follow-up request", + "rollback trims pre-turn override context updates before the follow-up request", &[ ("rolled-back turn request", &requests[1]), ("follow-up request after rollback", &requests[2]), diff --git a/codex-rs/core/tests/suite/snapshots/all__suite__compact_resume_fork__rollback_followup_turn_duplicates_context_updates.snap b/codex-rs/core/tests/suite/snapshots/all__suite__compact_resume_fork__rollback_followup_turn_trims_context_updates.snap similarity index 57% rename from codex-rs/core/tests/suite/snapshots/all__suite__compact_resume_fork__rollback_followup_turn_duplicates_context_updates.snap rename to codex-rs/core/tests/suite/snapshots/all__suite__compact_resume_fork__rollback_followup_turn_trims_context_updates.snap index 676421c3730..925a810a43c 100644 --- a/codex-rs/core/tests/suite/snapshots/all__suite__compact_resume_fork__rollback_followup_turn_duplicates_context_updates.snap +++ b/codex-rs/core/tests/suite/snapshots/all__suite__compact_resume_fork__rollback_followup_turn_trims_context_updates.snap @@ -1,8 +1,8 @@ --- source: core/tests/suite/compact_resume_fork.rs -expression: "context_snapshot::format_labeled_requests_snapshot(\"rollback currently duplicates pre-turn override context updates on the follow-up request\",\n&[(\"rolled-back turn request\", &requests[1]),\n(\"follow-up request after rollback\", &requests[2]),],\n&ContextSnapshotOptions::default().strip_capability_instructions().render_mode(ContextSnapshotRenderMode::KindWithTextPrefix\n{ max_chars: 96 }),)" +expression: "context_snapshot::format_labeled_requests_snapshot(\"rollback trims pre-turn override context updates before the follow-up request\",\n&[(\"rolled-back turn request\", &requests[1]),\n(\"follow-up request after rollback\", &requests[2]),],\n&ContextSnapshotOptions::default().strip_capability_instructions().render_mode(ContextSnapshotRenderMode::KindWithTextPrefix\n{ max_chars: 96 }),)" --- -Scenario: rollback currently duplicates pre-turn override context updates on the follow-up request +Scenario: rollback trims pre-turn override context updates before the follow-up request ## rolled-back turn request 00:message/developer: @@ -20,6 +20,4 @@ Scenario: rollback currently duplicates pre-turn override context updates on the 03:message/assistant:turn 1 assistant 04:message/developer:ROLLED_BACK_DEV_INSTRUCTIONS 05:message/user: -06:message/developer:ROLLED_BACK_DEV_INSTRUCTIONS -07:message/user: -08:message/user:follow-up user +06:message/user:follow-up user diff --git a/codex-rs/core/tests/suite/snapshots/all__suite__compact_resume_fork__rollback_past_compaction_shapes.snap b/codex-rs/core/tests/suite/snapshots/all__suite__compact_resume_fork__rollback_past_compaction_shapes.snap index 04e45c3a682..bb694af6934 100644 --- a/codex-rs/core/tests/suite/snapshots/all__suite__compact_resume_fork__rollback_past_compaction_shapes.snap +++ b/codex-rs/core/tests/suite/snapshots/all__suite__compact_resume_fork__rollback_past_compaction_shapes.snap @@ -23,6 +23,4 @@ Scenario: rollback past compaction replay after rollback 01:message/user:\nSUMMARY_ONLY_CONTEXT 02:message/developer: 03:message/user:> -04:message/developer: -05:message/user:> -06:message/user:AFTER_ROLLBACK +04:message/user:AFTER_ROLLBACK