diff --git a/codex-rs/app-server-protocol/schema/json/EventMsg.json b/codex-rs/app-server-protocol/schema/json/EventMsg.json index 39af1ac96d6..9bd2055745e 100644 --- a/codex-rs/app-server-protocol/schema/json/EventMsg.json +++ b/codex-rs/app-server-protocol/schema/json/EventMsg.json @@ -1421,6 +1421,12 @@ "null" ] }, + "saved_path": { + "type": [ + "string", + "null" + ] + }, "status": { "type": "string" }, @@ -6069,6 +6075,12 @@ "null" ] }, + "saved_path": { + "type": [ + "string", + "null" + ] + }, "status": { "type": "string" }, @@ -7214,6 +7226,12 @@ "null" ] }, + "saved_path": { + "type": [ + "string", + "null" + ] + }, "status": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json index ff9f8d7d3bc..add14ca2517 100644 --- a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json +++ b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json @@ -2652,6 +2652,12 @@ "null" ] }, + "saved_path": { + "type": [ + "string", + "null" + ] + }, "status": { "type": "string" }, @@ -7445,6 +7451,12 @@ "null" ] }, + "saved_path": { + "type": [ + "string", + "null" + ] + }, "status": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json index da523ea97cd..df681bd1474 100644 --- a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json +++ b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json @@ -4189,6 +4189,12 @@ "null" ] }, + "saved_path": { + "type": [ + "string", + "null" + ] + }, "status": { "type": "string" }, @@ -13847,6 +13853,12 @@ "null" ] }, + "saved_path": { + "type": [ + "string", + "null" + ] + }, "status": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/typescript/ImageGenerationEndEvent.ts b/codex-rs/app-server-protocol/schema/typescript/ImageGenerationEndEvent.ts index edc8c5e16dd..a1a71ce3804 100644 --- a/codex-rs/app-server-protocol/schema/typescript/ImageGenerationEndEvent.ts +++ b/codex-rs/app-server-protocol/schema/typescript/ImageGenerationEndEvent.ts @@ -2,4 +2,4 @@ // This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. -export type ImageGenerationEndEvent = { call_id: string, status: string, revised_prompt?: string, result: string, }; +export type ImageGenerationEndEvent = { call_id: string, status: string, revised_prompt?: string, result: string, saved_path?: string, }; diff --git a/codex-rs/app-server-protocol/schema/typescript/ImageGenerationItem.ts b/codex-rs/app-server-protocol/schema/typescript/ImageGenerationItem.ts index 46032bff726..0edb7c22e6c 100644 --- a/codex-rs/app-server-protocol/schema/typescript/ImageGenerationItem.ts +++ b/codex-rs/app-server-protocol/schema/typescript/ImageGenerationItem.ts @@ -2,4 +2,4 @@ // This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. -export type ImageGenerationItem = { id: string, status: string, revised_prompt?: string, result: string, }; +export type ImageGenerationItem = { id: string, status: string, revised_prompt?: string, result: string, saved_path?: string, }; diff --git a/codex-rs/core/src/client_common.rs b/codex-rs/core/src/client_common.rs index 50facf53e4f..f37f30ed628 100644 --- a/codex-rs/core/src/client_common.rs +++ b/codex-rs/core/src/client_common.rs @@ -167,7 +167,7 @@ pub(crate) mod tools { #[serde(rename = "local_shell")] LocalShell {}, #[serde(rename = "image_generation")] - ImageGeneration {}, + ImageGeneration { output_format: String }, // TODO: Understand why we get an error on web_search although the API docs say it's supported. // https://platform.openai.com/docs/guides/tools-web-search?api-mode=responses#:~:text=%7B%20type%3A%20%22web_search%22%20%7D%2C // The `external_web_access` field determines whether the web search is over cached or live content. @@ -186,7 +186,7 @@ pub(crate) mod tools { match self { ToolSpec::Function(tool) => tool.name.as_str(), ToolSpec::LocalShell {} => "local_shell", - ToolSpec::ImageGeneration {} => "image_generation", + ToolSpec::ImageGeneration { .. } => "image_generation", ToolSpec::WebSearch { .. } => "web_search", ToolSpec::Freeform(tool) => tool.name.as_str(), } diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 0d04086bd48..5d029e418d9 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -6217,7 +6217,9 @@ async fn handle_assistant_item_done_in_plan_mode( { maybe_complete_plan_item_from_message(sess, turn_context, state, item).await; - if let Some(turn_item) = handle_non_tool_response_item(item, true) { + if let Some(turn_item) = + handle_non_tool_response_item(item, true, Some(&turn_context.cwd)).await + { emit_turn_item_in_plan_mode( sess, turn_context, @@ -6396,7 +6398,9 @@ async fn try_run_sampling_request( needs_follow_up |= output_result.needs_follow_up; } ResponseEvent::OutputItemAdded(item) => { - if let Some(turn_item) = handle_non_tool_response_item(&item, plan_mode) { + if let Some(turn_item) = + handle_non_tool_response_item(&item, plan_mode, Some(&turn_context.cwd)).await + { let mut turn_item = turn_item; let mut seeded_parsed: Option = None; let mut seeded_item_id: Option = None; diff --git a/codex-rs/core/src/event_mapping.rs b/codex-rs/core/src/event_mapping.rs index a5c09b1ef73..09f1235718b 100644 --- a/codex-rs/core/src/event_mapping.rs +++ b/codex-rs/core/src/event_mapping.rs @@ -153,6 +153,7 @@ pub fn parse_turn_item(item: &ResponseItem) -> Option { status: status.clone(), revised_prompt: revised_prompt.clone(), result: result.clone(), + saved_path: None, }, )), _ => None, diff --git a/codex-rs/core/src/stream_events_utils.rs b/codex-rs/core/src/stream_events_utils.rs index e44dacd4252..7f2ddb251ef 100644 --- a/codex-rs/core/src/stream_events_utils.rs +++ b/codex-rs/core/src/stream_events_utils.rs @@ -1,6 +1,10 @@ +use std::path::Path; +use std::path::PathBuf; use std::pin::Pin; use std::sync::Arc; +use base64::Engine; +use base64::engine::general_purpose::STANDARD as BASE64_STANDARD; use codex_protocol::config_types::ModeKind; use codex_protocol::items::TurnItem; use codex_utils_stream_parser::strip_citations; @@ -50,6 +54,34 @@ pub(crate) fn raw_assistant_output_text_from_item(item: &ResponseItem) -> Option None } +async fn save_image_generation_result_to_cwd( + cwd: &Path, + call_id: &str, + result: &str, +) -> Result { + let bytes = BASE64_STANDARD + .decode(result.trim().as_bytes()) + .map_err(|err| { + CodexErr::InvalidRequest(format!("invalid image generation payload: {err}")) + })?; + let mut file_stem: String = call_id + .chars() + .map(|ch| { + if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' { + ch + } else { + '_' + } + }) + .collect(); + if file_stem.is_empty() { + file_stem = "generated_image".to_string(); + } + let path = cwd.join(format!("{file_stem}.png")); + tokio::fs::write(&path, bytes).await?; + Ok(path) +} + /// Persist a completed model response item and record any cited memory usage. pub(crate) async fn record_completed_response_item( sess: &Session, @@ -157,13 +189,16 @@ pub(crate) async fn handle_output_item_done( } // No tool call: convert messages/reasoning into turn items and mark them as complete. Ok(None) => { - if let Some(turn_item) = handle_non_tool_response_item(&item, plan_mode) { + if let Some(turn_item) = + handle_non_tool_response_item(&item, plan_mode, Some(&ctx.turn_context.cwd)).await + { if previously_active_item.is_none() { let mut started_item = turn_item.clone(); if let TurnItem::ImageGeneration(item) = &mut started_item { item.status = "in_progress".to_string(); item.revised_prompt = None; item.result.clear(); + item.saved_path = None; } ctx.sess .emit_turn_item_started(&ctx.turn_context, &started_item) @@ -240,9 +275,10 @@ pub(crate) async fn handle_output_item_done( Ok(output) } -pub(crate) fn handle_non_tool_response_item( +pub(crate) async fn handle_non_tool_response_item( item: &ResponseItem, plan_mode: bool, + image_output_cwd: Option<&Path>, ) -> Option { debug!(?item, "Output item"); @@ -264,6 +300,24 @@ pub(crate) fn handle_non_tool_response_item( agent_message.content = vec![codex_protocol::items::AgentMessageContent::Text { text: stripped }]; } + if let TurnItem::ImageGeneration(image_item) = &mut turn_item + && let Some(cwd) = image_output_cwd + { + match save_image_generation_result_to_cwd(cwd, &image_item.id, &image_item.result) + .await + { + Ok(path) => { + image_item.saved_path = Some(path.to_string_lossy().into_owned()); + } + Err(err) => { + tracing::warn!( + call_id = %image_item.id, + cwd = %cwd.display(), + "failed to save generated image: {err}" + ); + } + } + } Some(turn_item) } ResponseItem::FunctionCallOutput { .. } | ResponseItem::CustomToolCallOutput { .. } => { @@ -326,10 +380,13 @@ pub(crate) fn response_input_to_response_item(input: &ResponseInputItem) -> Opti mod tests { use super::handle_non_tool_response_item; use super::last_assistant_message_from_item; + use super::save_image_generation_result_to_cwd; + use crate::error::CodexErr; use codex_protocol::items::TurnItem; use codex_protocol::models::ContentItem; use codex_protocol::models::ResponseItem; use pretty_assertions::assert_eq; + use tempfile::tempdir; fn assistant_output_text(text: &str) -> ResponseItem { ResponseItem::Message { @@ -343,12 +400,14 @@ mod tests { } } - #[test] - fn handle_non_tool_response_item_strips_citations_from_assistant_message() { + #[tokio::test] + async fn handle_non_tool_response_item_strips_citations_from_assistant_message() { let item = assistant_output_text("hellodoc1 world"); let turn_item = - handle_non_tool_response_item(&item, false).expect("assistant message should parse"); + handle_non_tool_response_item(&item, false, Some(std::path::Path::new("."))) + .await + .expect("assistant message should parse"); let TurnItem::AgentMessage(agent_message) = turn_item else { panic!("expected agent message"); @@ -388,4 +447,84 @@ mod tests { assert_eq!(last_assistant_message_from_item(&item, true), None); } + + #[tokio::test] + async fn save_image_generation_result_saves_base64_to_png_in_cwd() { + let dir = tempdir().expect("tempdir"); + + let saved_path = save_image_generation_result_to_cwd(dir.path(), "ig_123", "Zm9v") + .await + .expect("image should be saved"); + + assert_eq!( + saved_path.file_name().and_then(|v| v.to_str()), + Some("ig_123.png") + ); + assert_eq!(std::fs::read(saved_path).expect("saved file"), b"foo"); + } + + #[tokio::test] + async fn save_image_generation_result_rejects_data_url_payload() { + let dir = tempdir().expect("tempdir"); + let result = "data:image/jpeg;base64,Zm9v"; + + let err = save_image_generation_result_to_cwd(dir.path(), "ig_456", result) + .await + .expect_err("data url payload should error"); + assert!(matches!(err, CodexErr::InvalidRequest(_))); + } + + #[tokio::test] + async fn save_image_generation_result_overwrites_existing_file() { + let dir = tempdir().expect("tempdir"); + let existing_path = dir.path().join("ig_123.png"); + std::fs::write(&existing_path, b"existing").expect("seed existing image"); + + let saved_path = save_image_generation_result_to_cwd(dir.path(), "ig_123", "Zm9v") + .await + .expect("image should be saved"); + + assert_eq!( + saved_path.file_name().and_then(|v| v.to_str()), + Some("ig_123.png") + ); + assert_eq!(std::fs::read(saved_path).expect("saved file"), b"foo"); + } + + #[tokio::test] + async fn save_image_generation_result_sanitizes_call_id_for_output_path() { + let dir = tempdir().expect("tempdir"); + + let saved_path = save_image_generation_result_to_cwd(dir.path(), "../ig/..", "Zm9v") + .await + .expect("image should be saved"); + + assert_eq!(saved_path.parent(), Some(dir.path())); + assert_eq!( + saved_path.file_name().and_then(|v| v.to_str()), + Some("___ig___.png") + ); + assert_eq!(std::fs::read(saved_path).expect("saved file"), b"foo"); + } + + #[tokio::test] + async fn save_image_generation_result_rejects_non_standard_base64() { + let dir = tempdir().expect("tempdir"); + + let err = save_image_generation_result_to_cwd(dir.path(), "ig_urlsafe", "_-8") + .await + .expect_err("non-standard base64 should error"); + assert!(matches!(err, CodexErr::InvalidRequest(_))); + } + + #[tokio::test] + async fn save_image_generation_result_rejects_non_base64_data_urls() { + let dir = tempdir().expect("tempdir"); + + let err = + save_image_generation_result_to_cwd(dir.path(), "ig_svg", "data:image/svg+xml,") + .await + .expect_err("non-base64 data url should error"); + assert!(matches!(err, CodexErr::InvalidRequest(_))); + } } diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index dcdb48500e4..283b6c2aa15 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -1892,7 +1892,9 @@ pub(crate) fn build_specs( } if config.image_gen_tool { - builder.push_spec(ToolSpec::ImageGeneration {}); + builder.push_spec(ToolSpec::ImageGeneration { + output_format: "png".to_string(), + }); } builder.push_spec_with_parallel_support(create_view_image_tool(), true); @@ -2024,7 +2026,7 @@ mod tests { match tool { ToolSpec::Function(ResponsesApiTool { name, .. }) => name, ToolSpec::LocalShell {} => "local_shell", - ToolSpec::ImageGeneration {} => "image_generation", + ToolSpec::ImageGeneration { .. } => "image_generation", ToolSpec::WebSearch { .. } => "web_search", ToolSpec::Freeform(FreeformTool { name, .. }) => name, } @@ -2116,7 +2118,7 @@ mod tests { } ToolSpec::Freeform(_) | ToolSpec::LocalShell {} - | ToolSpec::ImageGeneration {} + | ToolSpec::ImageGeneration { .. } | ToolSpec::WebSearch { .. } => {} } } @@ -2401,6 +2403,14 @@ mod tests { }); let (supported_tools, _) = build_specs(&supported_tools_config, None, None, &[]).build(); assert_contains_tool_names(&supported_tools, &["image_generation"]); + let image_generation_tool = find_tool(&supported_tools, "image_generation"); + assert_eq!( + serde_json::to_value(&image_generation_tool.spec).expect("serialize image tool"), + serde_json::json!({ + "type": "image_generation", + "output_format": "png" + }) + ); let tools_config = ToolsConfig::new(&ToolsConfigParams { model_info: &unsupported_model_info, diff --git a/codex-rs/core/tests/suite/items.rs b/codex-rs/core/tests/suite/items.rs index 56c1b374adb..01136a84abf 100644 --- a/codex-rs/core/tests/suite/items.rs +++ b/codex-rs/core/tests/suite/items.rs @@ -269,7 +269,7 @@ async fn image_generation_call_event_is_emitted() -> anyhow::Result<()> { let server = start_mock_server().await; - let TestCodex { codex, .. } = test_codex().build(&server).await?; + let TestCodex { codex, cwd, .. } = test_codex().build(&server).await?; let first_response = sse(vec![ ev_response_created("resp-1"), @@ -304,6 +304,59 @@ async fn image_generation_call_event_is_emitted() -> anyhow::Result<()> { assert_eq!(end.status, "completed"); assert_eq!(end.revised_prompt, Some("A tiny blue square".to_string())); assert_eq!(end.result, "Zm9v"); + let expected_saved_path = cwd.path().join("ig_123.png"); + assert_eq!( + end.saved_path, + Some(expected_saved_path.to_string_lossy().into_owned()) + ); + assert_eq!(std::fs::read(expected_saved_path)?, b"foo"); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn image_generation_call_event_is_emitted_when_image_save_fails() -> anyhow::Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + + let TestCodex { codex, cwd, .. } = test_codex().build(&server).await?; + + let first_response = sse(vec![ + ev_response_created("resp-1"), + ev_image_generation_call("ig_invalid", "completed", "broken payload", "_-8"), + ev_completed("resp-1"), + ]); + mount_sse_once(&server, first_response).await; + + codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: "generate an image".into(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + }) + .await?; + + let begin = wait_for_event_match(&codex, |ev| match ev { + EventMsg::ImageGenerationBegin(event) => Some(event.clone()), + _ => None, + }) + .await; + let end = wait_for_event_match(&codex, |ev| match ev { + EventMsg::ImageGenerationEnd(event) => Some(event.clone()), + _ => None, + }) + .await; + + assert_eq!(begin.call_id, "ig_invalid"); + assert_eq!(end.call_id, "ig_invalid"); + assert_eq!(end.status, "completed"); + assert_eq!(end.revised_prompt, Some("broken payload".to_string())); + assert_eq!(end.result, "_-8"); + assert_eq!(end.saved_path, None); + assert!(!cwd.path().join("ig_invalid.png").exists()); Ok(()) } diff --git a/codex-rs/exec/src/event_processor_with_human_output.rs b/codex-rs/exec/src/event_processor_with_human_output.rs index 584f3acc3ef..ae58f3c9fd3 100644 --- a/codex-rs/exec/src/event_processor_with_human_output.rs +++ b/codex-rs/exec/src/event_processor_with_human_output.rs @@ -457,12 +457,27 @@ impl EventProcessor for EventProcessorWithHumanOutput { ); } EventMsg::ImageGenerationEnd(generated) => { - ts_msg!( - self, - "{} {}", - "generated image".style(self.magenta), - generated.call_id - ); + if !generated.result.is_empty() + && !generated.result.starts_with("data:") + && !generated.result.starts_with("http://") + && !generated.result.starts_with("https://") + && !generated.result.starts_with("file://") + { + ts_msg!( + self, + "{} {} {}", + "generated image".style(self.magenta), + generated.call_id, + generated.result.style(self.dimmed) + ); + } else { + ts_msg!( + self, + "{} {}", + "generated image".style(self.magenta), + generated.call_id + ); + } } EventMsg::PatchApplyBegin(PatchApplyBeginEvent { call_id, diff --git a/codex-rs/protocol/src/items.rs b/codex-rs/protocol/src/items.rs index 9b904c90086..f200fe6f752 100644 --- a/codex-rs/protocol/src/items.rs +++ b/codex-rs/protocol/src/items.rs @@ -89,6 +89,9 @@ pub struct ImageGenerationItem { #[ts(optional)] pub revised_prompt: Option, pub result: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + #[ts(optional)] + pub saved_path: Option, } #[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)] @@ -254,6 +257,7 @@ impl ImageGenerationItem { status: self.status.clone(), revised_prompt: self.revised_prompt.clone(), result: self.result.clone(), + saved_path: self.saved_path.clone(), }) } } diff --git a/codex-rs/protocol/src/protocol.rs b/codex-rs/protocol/src/protocol.rs index dbae3f3b548..39e54dc83de 100644 --- a/codex-rs/protocol/src/protocol.rs +++ b/codex-rs/protocol/src/protocol.rs @@ -1898,6 +1898,9 @@ pub struct ImageGenerationEndEvent { #[ts(optional)] pub revised_prompt: Option, pub result: String, + #[serde(skip_serializing_if = "Option::is_none")] + #[ts(optional)] + pub saved_path: Option, } // Conversation kept for backward compatibility. @@ -3270,6 +3273,7 @@ mod tests { status: "in_progress".into(), revised_prompt: None, result: String::new(), + saved_path: None, }), }; @@ -3291,6 +3295,7 @@ mod tests { status: "completed".into(), revised_prompt: Some("A tiny blue square".into()), result: "Zm9v".into(), + saved_path: Some("/tmp/ig-1.png".into()), }), }; @@ -3302,6 +3307,7 @@ mod tests { assert_eq!(event.status, "completed"); assert_eq!(event.revised_prompt.as_deref(), Some("A tiny blue square")); assert_eq!(event.result, "Zm9v"); + assert_eq!(event.saved_path.as_deref(), Some("/tmp/ig-1.png")); } _ => panic!("expected ImageGenerationEnd event"), } diff --git a/codex-rs/tui/src/chatwidget/tests.rs b/codex-rs/tui/src/chatwidget/tests.rs index 72fa61c13a0..2536c3300c7 100644 --- a/codex-rs/tui/src/chatwidget/tests.rs +++ b/codex-rs/tui/src/chatwidget/tests.rs @@ -6053,6 +6053,7 @@ async fn image_generation_call_adds_history_cell() { status: "completed".into(), revised_prompt: Some("A tiny blue square".into()), result: "Zm9v".into(), + saved_path: None, }), });