From 480f8d43a8b70d4c70315f1a9d77ab091dac03a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= Date: Sat, 24 Jan 2026 17:09:32 +0800 Subject: [PATCH 1/7] feat: add Vision Language Model (VLM) support Add support for image inputs in chat completions, enabling users to send images for model analysis (e.g., GPT-4V, Claude Vision, Doubao Vision). Changes: - Add ImageContentBlock type to internal adapter types - Update request adapters to parse image content: - openai-chat: parse image_url content parts - anthropic: parse image content blocks with base64 source - openai-response: parse input_image parts - Update upstream adapters to build provider-specific formats: - openai: build content arrays with image_url parts - anthropic: build image content blocks - openai-responses: build input_image content - Update API validation schema to accept content arrays with images - Add frontend image rendering in request detail panel Supported image formats: - Remote URL: {"type": "image_url", "image_url": {"url": "https://..."}} - Base64 data URL: {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}} Refs #56 Co-Authored-By: Claude Opus 4.5 --- backend/src/adapters/request/anthropic.ts | 11 ++- backend/src/adapters/request/openai-chat.ts | 13 ++- .../src/adapters/request/openai-response.ts | 40 ++++++-- backend/src/adapters/response/anthropic.ts | 3 + backend/src/adapters/types.ts | 17 +++- backend/src/adapters/upstream/anthropic.ts | 26 +++++ .../src/adapters/upstream/openai-responses.ts | 67 +++++++++++-- backend/src/adapters/upstream/openai.ts | 74 +++++++++++++-- backend/src/api/v1/completions.ts | 25 ++++- .../requests/detail-panel/pretty-view.tsx | 95 +++++++++++++++++++ 10 files changed, 341 insertions(+), 30 deletions(-) diff --git a/backend/src/adapters/request/anthropic.ts b/backend/src/adapters/request/anthropic.ts index 12bfea0..5450a22 100644 --- a/backend/src/adapters/request/anthropic.ts +++ b/backend/src/adapters/request/anthropic.ts @@ -4,6 +4,7 @@ */ import type { + ImageContentBlock, InternalContentBlock, InternalMessage, InternalRequest, @@ -137,8 +138,14 @@ function convertContentBlock( } case "image": - // Images not supported in MVP - return null; + return { + type: "image", + source: { + type: "base64", + mediaType: block.source?.media_type, + data: block.source?.data, + }, + } as ImageContentBlock; default: return null; diff --git a/backend/src/adapters/request/openai-chat.ts b/backend/src/adapters/request/openai-chat.ts index db3802b..3db4207 100644 --- a/backend/src/adapters/request/openai-chat.ts +++ b/backend/src/adapters/request/openai-chat.ts @@ -4,6 +4,7 @@ */ import type { + ImageContentBlock, InternalContentBlock, InternalMessage, InternalRequest, @@ -122,13 +123,21 @@ function convertContent( if (typeof content === "string") { return content; } - // Array of content parts - currently only support text (images not supported yet) + // Array of content parts - support text and image_url const blocks: InternalContentBlock[] = []; for (const part of content) { if (part.type === "text" && part.text) { blocks.push({ type: "text", text: part.text }); + } else if (part.type === "image_url" && part.image_url) { + blocks.push({ + type: "image", + source: { + type: "url", + url: part.image_url.url, + }, + detail: part.image_url.detail, + } as ImageContentBlock); } - // Skip image_url parts for now (not supported in MVP) } const [firstBlock] = blocks; diff --git a/backend/src/adapters/request/openai-response.ts b/backend/src/adapters/request/openai-response.ts index c211a34..67c1aba 100644 --- a/backend/src/adapters/request/openai-response.ts +++ b/backend/src/adapters/request/openai-response.ts @@ -4,6 +4,8 @@ */ import type { + ImageContentBlock, + InternalContentBlock, InternalMessage, InternalRequest, InternalToolDefinition, @@ -102,13 +104,39 @@ const KNOWN_FIELDS = new Set([ // ============================================================================= /** - * Convert Response API content parts to string + * Convert Response API content parts to string or content blocks */ -function convertContentParts(parts: ResponseApiContentPart[]): string { - return parts - .filter((p) => p.type === "input_text" || p.type === "text") - .map((p) => p.text || "") - .join(""); +function convertContentParts( + parts: ResponseApiContentPart[], +): string | InternalContentBlock[] { + const hasImages = parts.some((p) => p.type === "input_image"); + + if (!hasImages) { + // Simple case: text only + return parts + .filter((p) => p.type === "input_text" || p.type === "text") + .map((p) => p.text || "") + .join(""); + } + + // Complex case: includes images + const blocks: InternalContentBlock[] = []; + for (const part of parts) { + if (part.type === "input_text" || part.type === "text") { + if (part.text) { + blocks.push({ type: "text", text: part.text }); + } + } else if (part.type === "input_image" && part.image_url) { + blocks.push({ + type: "image", + source: { + type: "url", + url: part.image_url, + }, + } as ImageContentBlock); + } + } + return blocks; } /** diff --git a/backend/src/adapters/response/anthropic.ts b/backend/src/adapters/response/anthropic.ts index be19c8d..cf323af 100644 --- a/backend/src/adapters/response/anthropic.ts +++ b/backend/src/adapters/response/anthropic.ts @@ -94,6 +94,9 @@ function convertContentBlock( case "tool_result": // Tool results are not included in Anthropic assistant responses return null; + case "image": + // Images are not included in assistant responses (only in requests) + return null; } } diff --git a/backend/src/adapters/types.ts b/backend/src/adapters/types.ts index 04e3400..c942858 100644 --- a/backend/src/adapters/types.ts +++ b/backend/src/adapters/types.ts @@ -45,6 +45,20 @@ export interface ToolResultContentBlock { isError?: boolean; } +/** + * Image content block - represents an image input for vision models + */ +export interface ImageContentBlock { + type: "image"; + source: { + type: "base64" | "url"; + mediaType?: string; // "image/jpeg", "image/png", etc. + data?: string; // base64 data (when type is "base64") + url?: string; // image URL (when type is "url") + }; + detail?: "auto" | "low" | "high"; // OpenAI vision detail level +} + /** * Union type for all content blocks */ @@ -52,7 +66,8 @@ export type InternalContentBlock = | TextContentBlock | ThinkingContentBlock | ToolUseContentBlock - | ToolResultContentBlock; + | ToolResultContentBlock + | ImageContentBlock; // ============================================================================= // Message Types diff --git a/backend/src/adapters/upstream/anthropic.ts b/backend/src/adapters/upstream/anthropic.ts index df66db0..db6ff27 100644 --- a/backend/src/adapters/upstream/anthropic.ts +++ b/backend/src/adapters/upstream/anthropic.ts @@ -26,6 +26,12 @@ interface AnthropicContentBlock { type: "text" | "image" | "tool_use" | "tool_result" | "thinking"; text?: string; thinking?: string; + source?: { + type: "base64" | "url"; + media_type?: string; + data?: string; + url?: string; + }; id?: string; name?: string; input?: Record; @@ -195,6 +201,26 @@ function convertMessage(msg: InternalMessage): AnthropicMessage | null { text: block.text, cache_control: block.cacheControl, }); + } else if (block.type === "image") { + if (block.source.type === "base64") { + content.push({ + type: "image", + source: { + type: "base64", + media_type: block.source.mediaType || "image/jpeg", + data: block.source.data || "", + }, + }); + } else if (block.source.type === "url" && block.source.url) { + // Anthropic also supports URL source type + content.push({ + type: "image", + source: { + type: "url", + url: block.source.url, + }, + }); + } } } diff --git a/backend/src/adapters/upstream/openai-responses.ts b/backend/src/adapters/upstream/openai-responses.ts index fea801e..ac7060f 100644 --- a/backend/src/adapters/upstream/openai-responses.ts +++ b/backend/src/adapters/upstream/openai-responses.ts @@ -4,6 +4,7 @@ */ import type { + ImageContentBlock, InternalContentBlock, InternalMessage, InternalRequest, @@ -22,8 +23,10 @@ import type { // ============================================================================= interface ResponseApiContentPart { - type: "input_text" | "output_text" | "refusal"; + type: "input_text" | "output_text" | "refusal" | "input_image"; text?: string; + image_url?: string; + detail?: "auto" | "low" | "high"; } interface ResponseApiInputItem { @@ -102,6 +105,24 @@ interface ResponseApiStreamEvent { // Conversion Functions // ============================================================================= +/** + * Convert image source to URL format + */ +function convertImageToUrl(block: ImageContentBlock): string { + if (block.source.type === "url" && block.source.url) { + return block.source.url; + } + // Convert base64 to data URL + return `data:${block.source.mediaType || "image/jpeg"};base64,${block.source.data}`; +} + +/** + * Check if content blocks contain any images + */ +function hasImages(content: InternalContentBlock[]): boolean { + return content.some((b) => b.type === "image"); +} + /** * Convert internal message to Response API input item */ @@ -126,14 +147,42 @@ function convertMessage(msg: InternalMessage): ResponseApiInputItem | null { }; } - // Regular messages - const content = - typeof msg.content === "string" - ? msg.content - : msg.content - .filter((b) => b.type === "text") - .map((b) => b.text) - .join(""); + // Handle string content + if (typeof msg.content === "string") { + return { + type: "message", + role: msg.role, + content: msg.content, + }; + } + + // Handle content array - check if it contains images + if (hasImages(msg.content)) { + // Build content array with input_text and input_image parts + const contentParts: ResponseApiContentPart[] = []; + for (const block of msg.content) { + if (block.type === "text") { + contentParts.push({ type: "input_text", text: block.text }); + } else if (block.type === "image") { + contentParts.push({ + type: "input_image", + image_url: convertImageToUrl(block), + detail: block.detail, + }); + } + } + return { + type: "message", + role: msg.role, + content: contentParts, + }; + } + + // Text-only content - join as string + const content = msg.content + .filter((b) => b.type === "text") + .map((b) => b.text) + .join(""); return { type: "message", diff --git a/backend/src/adapters/upstream/openai.ts b/backend/src/adapters/upstream/openai.ts index 32c8c80..72596eb 100644 --- a/backend/src/adapters/upstream/openai.ts +++ b/backend/src/adapters/upstream/openai.ts @@ -4,6 +4,7 @@ */ import type { + ImageContentBlock, InternalContentBlock, InternalMessage, InternalRequest, @@ -23,9 +24,18 @@ import type { // OpenAI Request/Response Types // ============================================================================= +interface OpenAIContentPart { + type: "text" | "image_url"; + text?: string; + image_url?: { + url: string; + detail?: "auto" | "low" | "high"; + }; +} + interface OpenAIMessage { role: "system" | "user" | "assistant" | "tool"; - content: string | null; + content: string | OpenAIContentPart[] | null; name?: string; tool_calls?: OpenAIToolCall[]; tool_call_id?: string; @@ -125,6 +135,24 @@ interface OpenAIToolCallDelta { // Conversion Functions // ============================================================================= +/** + * Convert image source to OpenAI image URL format + */ +function convertImageToUrl(block: ImageContentBlock): string { + if (block.source.type === "url" && block.source.url) { + return block.source.url; + } + // Convert base64 to data URL + return `data:${block.source.mediaType || "image/jpeg"};base64,${block.source.data}`; +} + +/** + * Check if content blocks contain any images + */ +function hasImages(content: InternalContentBlock[]): boolean { + return content.some((b) => b.type === "image"); +} + /** * Convert internal message to OpenAI format */ @@ -168,14 +196,42 @@ function convertMessage(msg: InternalMessage): OpenAIMessage { }; } - // Regular messages - const content = - typeof msg.content === "string" - ? msg.content - : msg.content - .filter((b) => b.type === "text") - .map((b) => b.text) - .join(""); + // Handle string content + if (typeof msg.content === "string") { + return { + role: msg.role, + content: msg.content, + }; + } + + // Handle content array - check if it contains images + if (hasImages(msg.content)) { + // Build content array with text and image_url parts + const contentParts: OpenAIContentPart[] = []; + for (const block of msg.content) { + if (block.type === "text") { + contentParts.push({ type: "text", text: block.text }); + } else if (block.type === "image") { + contentParts.push({ + type: "image_url", + image_url: { + url: convertImageToUrl(block), + detail: block.detail, + }, + }); + } + } + return { + role: msg.role, + content: contentParts, + }; + } + + // Text-only content array - join as string + const content = msg.content + .filter((b) => b.type === "text") + .map((b) => b.text) + .join(""); return { role: msg.role, diff --git a/backend/src/api/v1/completions.ts b/backend/src/api/v1/completions.ts index db36840..068829e 100644 --- a/backend/src/api/v1/completions.ts +++ b/backend/src/api/v1/completions.ts @@ -72,11 +72,34 @@ const tToolChoice = t.Union([ }), ]); +// Content part schema - supports text and image_url +const tContentPart = t.Union([ + t.Object({ + type: t.Literal("text"), + text: t.String(), + }), + t.Object({ + type: t.Literal("image_url"), + image_url: t.Object({ + url: t.String(), + detail: t.Optional(t.Union([ + t.Literal("auto"), + t.Literal("low"), + t.Literal("high"), + ])), + }), + }), +]); + // Message schema - supports various message types const tMessage = t.Object( { role: t.String(), - content: t.Optional(t.Union([t.String(), t.Null()])), + content: t.Optional(t.Union([ + t.String(), + t.Null(), + t.Array(tContentPart), + ])), tool_calls: t.Optional(t.Array(t.Object({ id: t.String(), type: t.Literal("function"), diff --git a/frontend/src/pages/requests/detail-panel/pretty-view.tsx b/frontend/src/pages/requests/detail-panel/pretty-view.tsx index 9c84e66..25fb92e 100644 --- a/frontend/src/pages/requests/detail-panel/pretty-view.tsx +++ b/frontend/src/pages/requests/detail-panel/pretty-view.tsx @@ -5,6 +5,7 @@ import { CopyIcon, ForwardIcon, HelpCircleIcon, + ImageIcon, ReplyIcon, WrenchIcon, TerminalIcon, @@ -47,6 +48,15 @@ interface ToolDefinition { } } +// Image content part type +interface ImageContentPart { + type: 'image_url' + image_url: { + url: string + detail?: 'auto' | 'low' | 'high' + } +} + export function MessagesPrettyView() { const { t } = useTranslation() @@ -132,6 +142,9 @@ function MessageContent({ message }: { message: RequestMessage }) { // Check if this is an assistant message with tool calls const toolCalls = extendedMessage.tool_calls + // Extract images from content array + const images = getMessageImages(message) + const { content, reasoning } = match(message) .with({ role: 'assistant' }, () => extractReasoning(messageText)) .otherwise(() => ({ reasoning: null, content: messageText })) @@ -147,6 +160,20 @@ function MessageContent({ message }: { message: RequestMessage }) { {reasoning && } {content && } + {images.length > 0 && ( +
+
+ + {t('pages.requests.detail-panel.pretty-view.Images', { defaultValue: 'Images' })} + {images.length} +
+
+ {images.map((image, index) => ( + + ))} +
+
+ )} {toolCalls && toolCalls.length > 0 && (
@@ -463,6 +490,74 @@ function getMessageText(message: RequestMessage): string { .otherwise(() => '') } +/** + * Extract image content parts from a message + */ +function getMessageImages(message: RequestMessage): ImageContentPart[] { + // Handle case where content is an array + const content = (message as { content?: unknown }).content + if (!content || typeof content === 'string' || !Array.isArray(content)) { + return [] + } + + const images: ImageContentPart[] = [] + for (const part of content) { + if ( + part && + typeof part === 'object' && + 'type' in part && + part.type === 'image_url' && + 'image_url' in part && + part.image_url && + typeof part.image_url === 'object' && + 'url' in part.image_url + ) { + images.push({ + type: 'image_url', + image_url: { + url: String(part.image_url.url), + detail: (part.image_url as { detail?: 'auto' | 'low' | 'high' }).detail, + }, + }) + } + } + return images +} + +/** + * Component to display an image from a message + */ +function ImageContentDisplay({ image }: { image: ImageContentPart }) { + const { t } = useTranslation() + const { url, detail } = image.image_url + + // Check if it's a data URL (base64) + const isDataUrl = url.startsWith('data:') + + return ( +
+ + {t('pages.requests.detail-panel.pretty-view.UserImage', + + {detail && ( +
+ {t('pages.requests.detail-panel.pretty-view.ImageDetail', { defaultValue: 'Detail' })}: {detail} +
+ )} +
+ ) +} + /** * Component to display a single tool call */ From 8585bc0aa46cc53680d30823cc345eb236226848 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= Date: Sat, 24 Jan 2026 17:16:37 +0800 Subject: [PATCH 2/7] fix: improve Anthropic image handling - Support URL source type in Anthropic request adapter (not just base64) - Validate base64 data is present before sending to Anthropic API Co-Authored-By: Claude Opus 4.5 --- backend/src/adapters/request/anthropic.ts | 18 +++++++++++++++--- backend/src/adapters/upstream/anthropic.ts | 5 +++-- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/backend/src/adapters/request/anthropic.ts b/backend/src/adapters/request/anthropic.ts index 5450a22..66bcc01 100644 --- a/backend/src/adapters/request/anthropic.ts +++ b/backend/src/adapters/request/anthropic.ts @@ -26,9 +26,10 @@ interface AnthropicContentBlock { text?: string; thinking?: string; source?: { - type: "base64"; - media_type: string; - data: string; + type: "base64" | "url"; + media_type?: string; + data?: string; + url?: string; }; id?: string; name?: string; @@ -138,6 +139,17 @@ function convertContentBlock( } case "image": + // Handle both base64 and URL source types + if (block.source?.type === "url" && block.source.url) { + return { + type: "image", + source: { + type: "url", + url: block.source.url, + }, + } as ImageContentBlock; + } + // Default to base64 return { type: "image", source: { diff --git a/backend/src/adapters/upstream/anthropic.ts b/backend/src/adapters/upstream/anthropic.ts index db6ff27..4e5d20c 100644 --- a/backend/src/adapters/upstream/anthropic.ts +++ b/backend/src/adapters/upstream/anthropic.ts @@ -202,13 +202,14 @@ function convertMessage(msg: InternalMessage): AnthropicMessage | null { cache_control: block.cacheControl, }); } else if (block.type === "image") { - if (block.source.type === "base64") { + // Only push image blocks with valid data + if (block.source.type === "base64" && block.source.data) { content.push({ type: "image", source: { type: "base64", media_type: block.source.mediaType || "image/jpeg", - data: block.source.data || "", + data: block.source.data, }, }); } else if (block.source.type === "url" && block.source.url) { From 1e7fcc4a414441e82580498e8aa35266a761c871 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= Date: Sat, 24 Jan 2026 17:17:49 +0800 Subject: [PATCH 3/7] fix: validate image data before building upstream requests - Add validation in OpenAI upstream adapter to skip images with missing data - Add validation in OpenAI Responses upstream adapter for the same - Prevents invalid data URLs being sent to providers Co-Authored-By: Claude Opus 4.5 --- .../src/adapters/upstream/openai-responses.ts | 21 ++++++++++------ backend/src/adapters/upstream/openai.ts | 25 ++++++++++++------- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/backend/src/adapters/upstream/openai-responses.ts b/backend/src/adapters/upstream/openai-responses.ts index ac7060f..e7618e9 100644 --- a/backend/src/adapters/upstream/openai-responses.ts +++ b/backend/src/adapters/upstream/openai-responses.ts @@ -112,8 +112,11 @@ function convertImageToUrl(block: ImageContentBlock): string { if (block.source.type === "url" && block.source.url) { return block.source.url; } - // Convert base64 to data URL - return `data:${block.source.mediaType || "image/jpeg"};base64,${block.source.data}`; + // Convert base64 to data URL - only if data is present + if (block.source.type === "base64" && block.source.data) { + return `data:${block.source.mediaType || "image/jpeg"};base64,${block.source.data}`; + } + return ""; } /** @@ -164,11 +167,15 @@ function convertMessage(msg: InternalMessage): ResponseApiInputItem | null { if (block.type === "text") { contentParts.push({ type: "input_text", text: block.text }); } else if (block.type === "image") { - contentParts.push({ - type: "input_image", - image_url: convertImageToUrl(block), - detail: block.detail, - }); + // Only include images with valid data + const imageUrl = convertImageToUrl(block); + if (imageUrl) { + contentParts.push({ + type: "input_image", + image_url: imageUrl, + detail: block.detail, + }); + } } } return { diff --git a/backend/src/adapters/upstream/openai.ts b/backend/src/adapters/upstream/openai.ts index 72596eb..f3cf339 100644 --- a/backend/src/adapters/upstream/openai.ts +++ b/backend/src/adapters/upstream/openai.ts @@ -142,8 +142,11 @@ function convertImageToUrl(block: ImageContentBlock): string { if (block.source.type === "url" && block.source.url) { return block.source.url; } - // Convert base64 to data URL - return `data:${block.source.mediaType || "image/jpeg"};base64,${block.source.data}`; + // Convert base64 to data URL - only if data is present + if (block.source.type === "base64" && block.source.data) { + return `data:${block.source.mediaType || "image/jpeg"};base64,${block.source.data}`; + } + return ""; } /** @@ -212,13 +215,17 @@ function convertMessage(msg: InternalMessage): OpenAIMessage { if (block.type === "text") { contentParts.push({ type: "text", text: block.text }); } else if (block.type === "image") { - contentParts.push({ - type: "image_url", - image_url: { - url: convertImageToUrl(block), - detail: block.detail, - }, - }); + // Only include images with valid data + const imageUrl = convertImageToUrl(block); + if (imageUrl) { + contentParts.push({ + type: "image_url", + image_url: { + url: imageUrl, + detail: block.detail, + }, + }); + } } } return { From 88f4b0f48eb4372fcedfcef21917e9c09e6eff1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= Date: Sat, 24 Jan 2026 17:22:42 +0800 Subject: [PATCH 4/7] refactor: use discriminated union type for ImageContentBlock - Use discriminated union for ImageSource to ensure type safety - When type is "base64", data field is required - When type is "url", url field is required - Update request adapters to validate data before creating blocks Co-Authored-By: Claude Opus 4.5 --- backend/src/adapters/request/anthropic.ts | 22 ++++++++++++--------- backend/src/adapters/request/openai-chat.ts | 2 +- backend/src/adapters/types.ts | 21 ++++++++++++++------ 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/backend/src/adapters/request/anthropic.ts b/backend/src/adapters/request/anthropic.ts index 66bcc01..6327e5e 100644 --- a/backend/src/adapters/request/anthropic.ts +++ b/backend/src/adapters/request/anthropic.ts @@ -149,15 +149,19 @@ function convertContentBlock( }, } as ImageContentBlock; } - // Default to base64 - return { - type: "image", - source: { - type: "base64", - mediaType: block.source?.media_type, - data: block.source?.data, - }, - } as ImageContentBlock; + // Default to base64 - only if data is present + if (block.source?.data) { + return { + type: "image", + source: { + type: "base64", + mediaType: block.source.media_type, + data: block.source.data, + }, + } as ImageContentBlock; + } + // Skip images with missing data + return null; default: return null; diff --git a/backend/src/adapters/request/openai-chat.ts b/backend/src/adapters/request/openai-chat.ts index 3db4207..850cd33 100644 --- a/backend/src/adapters/request/openai-chat.ts +++ b/backend/src/adapters/request/openai-chat.ts @@ -128,7 +128,7 @@ function convertContent( for (const part of content) { if (part.type === "text" && part.text) { blocks.push({ type: "text", text: part.text }); - } else if (part.type === "image_url" && part.image_url) { + } else if (part.type === "image_url" && part.image_url?.url) { blocks.push({ type: "image", source: { diff --git a/backend/src/adapters/types.ts b/backend/src/adapters/types.ts index c942858..0d13fa4 100644 --- a/backend/src/adapters/types.ts +++ b/backend/src/adapters/types.ts @@ -45,17 +45,26 @@ export interface ToolResultContentBlock { isError?: boolean; } +/** + * Image source types - discriminated union for type safety + */ +export type ImageSource = + | { + type: "base64"; + mediaType?: string; // "image/jpeg", "image/png", etc. + data: string; + } + | { + type: "url"; + url: string; + }; + /** * Image content block - represents an image input for vision models */ export interface ImageContentBlock { type: "image"; - source: { - type: "base64" | "url"; - mediaType?: string; // "image/jpeg", "image/png", etc. - data?: string; // base64 data (when type is "base64") - url?: string; // image URL (when type is "url") - }; + source: ImageSource; detail?: "auto" | "low" | "high"; // OpenAI vision detail level } From ccb76f8417f617d9bebfdd7e7a720c017b918a64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= Date: Sat, 24 Jan 2026 17:29:26 +0800 Subject: [PATCH 5/7] fix: improve image content validation - Check source.type === "base64" before processing as base64 in Anthropic adapter - Handle empty content array edge case in OpenAI upstream adapters - Fallback to empty string if contentParts is empty Co-Authored-By: Claude Opus 4.5 --- backend/src/adapters/request/anthropic.ts | 4 ++-- backend/src/adapters/upstream/openai-responses.ts | 8 ++++++++ backend/src/adapters/upstream/openai.ts | 7 +++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/backend/src/adapters/request/anthropic.ts b/backend/src/adapters/request/anthropic.ts index 6327e5e..ffe9e57 100644 --- a/backend/src/adapters/request/anthropic.ts +++ b/backend/src/adapters/request/anthropic.ts @@ -149,8 +149,8 @@ function convertContentBlock( }, } as ImageContentBlock; } - // Default to base64 - only if data is present - if (block.source?.data) { + // Handle base64 - only if type matches and data is present + if (block.source?.type === "base64" && block.source.data) { return { type: "image", source: { diff --git a/backend/src/adapters/upstream/openai-responses.ts b/backend/src/adapters/upstream/openai-responses.ts index e7618e9..fb64a0c 100644 --- a/backend/src/adapters/upstream/openai-responses.ts +++ b/backend/src/adapters/upstream/openai-responses.ts @@ -178,6 +178,14 @@ function convertMessage(msg: InternalMessage): ResponseApiInputItem | null { } } } + // Ensure we don't send empty content array to API + if (contentParts.length === 0) { + return { + type: "message", + role: msg.role, + content: "", + }; + } return { type: "message", role: msg.role, diff --git a/backend/src/adapters/upstream/openai.ts b/backend/src/adapters/upstream/openai.ts index f3cf339..1f23221 100644 --- a/backend/src/adapters/upstream/openai.ts +++ b/backend/src/adapters/upstream/openai.ts @@ -228,6 +228,13 @@ function convertMessage(msg: InternalMessage): OpenAIMessage { } } } + // Ensure we don't send empty content array to API + if (contentParts.length === 0) { + return { + role: msg.role, + content: "", + }; + } return { role: msg.role, content: contentParts, From a66cc433812a58b8a231974240563131feb18433 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= Date: Sat, 24 Jan 2026 17:33:24 +0800 Subject: [PATCH 6/7] refactor: extract shared image utility functions - Create backend/src/adapters/upstream/utils.ts with shared functions - Extract convertImageToUrl and hasImages to shared module - Update openai.ts and openai-responses.ts to use shared utilities - Reduces code duplication and improves maintainability Co-Authored-By: Claude Opus 4.5 --- .../src/adapters/upstream/openai-responses.ts | 23 +--------------- backend/src/adapters/upstream/openai.ts | 23 +--------------- backend/src/adapters/upstream/utils.ts | 26 +++++++++++++++++++ 3 files changed, 28 insertions(+), 44 deletions(-) create mode 100644 backend/src/adapters/upstream/utils.ts diff --git a/backend/src/adapters/upstream/openai-responses.ts b/backend/src/adapters/upstream/openai-responses.ts index fb64a0c..5fe6c93 100644 --- a/backend/src/adapters/upstream/openai-responses.ts +++ b/backend/src/adapters/upstream/openai-responses.ts @@ -4,7 +4,6 @@ */ import type { - ImageContentBlock, InternalContentBlock, InternalMessage, InternalRequest, @@ -17,6 +16,7 @@ import type { ToolUseContentBlock, UpstreamAdapter, } from "../types"; +import { convertImageToUrl, hasImages } from "./utils"; // ============================================================================= // Response API Types @@ -105,27 +105,6 @@ interface ResponseApiStreamEvent { // Conversion Functions // ============================================================================= -/** - * Convert image source to URL format - */ -function convertImageToUrl(block: ImageContentBlock): string { - if (block.source.type === "url" && block.source.url) { - return block.source.url; - } - // Convert base64 to data URL - only if data is present - if (block.source.type === "base64" && block.source.data) { - return `data:${block.source.mediaType || "image/jpeg"};base64,${block.source.data}`; - } - return ""; -} - -/** - * Check if content blocks contain any images - */ -function hasImages(content: InternalContentBlock[]): boolean { - return content.some((b) => b.type === "image"); -} - /** * Convert internal message to Response API input item */ diff --git a/backend/src/adapters/upstream/openai.ts b/backend/src/adapters/upstream/openai.ts index 1f23221..f04b928 100644 --- a/backend/src/adapters/upstream/openai.ts +++ b/backend/src/adapters/upstream/openai.ts @@ -4,7 +4,6 @@ */ import type { - ImageContentBlock, InternalContentBlock, InternalMessage, InternalRequest, @@ -19,6 +18,7 @@ import type { ToolUseContentBlock, UpstreamAdapter, } from "../types"; +import { convertImageToUrl, hasImages } from "./utils"; // ============================================================================= // OpenAI Request/Response Types @@ -135,27 +135,6 @@ interface OpenAIToolCallDelta { // Conversion Functions // ============================================================================= -/** - * Convert image source to OpenAI image URL format - */ -function convertImageToUrl(block: ImageContentBlock): string { - if (block.source.type === "url" && block.source.url) { - return block.source.url; - } - // Convert base64 to data URL - only if data is present - if (block.source.type === "base64" && block.source.data) { - return `data:${block.source.mediaType || "image/jpeg"};base64,${block.source.data}`; - } - return ""; -} - -/** - * Check if content blocks contain any images - */ -function hasImages(content: InternalContentBlock[]): boolean { - return content.some((b) => b.type === "image"); -} - /** * Convert internal message to OpenAI format */ diff --git a/backend/src/adapters/upstream/utils.ts b/backend/src/adapters/upstream/utils.ts new file mode 100644 index 0000000..41ebb18 --- /dev/null +++ b/backend/src/adapters/upstream/utils.ts @@ -0,0 +1,26 @@ +/** + * Shared utility functions for upstream adapters + */ + +import type { ImageContentBlock, InternalContentBlock } from "../types"; + +/** + * Convert image source to URL format (data URL for base64, direct URL for url type) + */ +export function convertImageToUrl(block: ImageContentBlock): string { + if (block.source.type === "url") { + return block.source.url; + } + // Convert base64 to data URL + if (block.source.type === "base64") { + return `data:${block.source.mediaType || "image/jpeg"};base64,${block.source.data}`; + } + return ""; +} + +/** + * Check if content blocks contain any images + */ +export function hasImages(content: InternalContentBlock[]): boolean { + return content.some((b) => b.type === "image"); +} From 7acbe0439ca35cfd10e933fede02a8e7bbff0e5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= Date: Sat, 24 Jan 2026 17:38:40 +0800 Subject: [PATCH 7/7] fix: parse data URLs as base64 in request adapters - Add parseImageUrl helper to detect and parse data URLs - Convert data URLs (data:image/...;base64,...) to base64 source type - Ensures cross-provider compatibility (e.g., OpenAI to Anthropic) - Applied to both openai-chat and openai-response adapters Co-Authored-By: Claude Opus 4.5 --- backend/src/adapters/request/openai-chat.ts | 37 +++++++++++++++++-- .../src/adapters/request/openai-response.ts | 37 +++++++++++++++++-- 2 files changed, 66 insertions(+), 8 deletions(-) diff --git a/backend/src/adapters/request/openai-chat.ts b/backend/src/adapters/request/openai-chat.ts index 850cd33..208d664 100644 --- a/backend/src/adapters/request/openai-chat.ts +++ b/backend/src/adapters/request/openai-chat.ts @@ -5,6 +5,7 @@ import type { ImageContentBlock, + ImageSource, InternalContentBlock, InternalMessage, InternalRequest, @@ -15,6 +16,35 @@ import type { ToolUseContentBlock, } from "../types"; +// ============================================================================= +// Helper Functions +// ============================================================================= + +/** + * Parse a data URL into base64 source, or return URL source for regular URLs + * Data URL format: data:[][;base64], + */ +function parseImageUrl(url: string): ImageSource { + if (url.startsWith("data:")) { + // Parse data URL: data:image/jpeg;base64,/9j/4AAQ... + const match = url.match(/^data:([^;,]+)?(?:;base64)?,(.*)$/); + if (match) { + const mediaType = match[1] || "image/jpeg"; + const data = match[2] || ""; + return { + type: "base64", + mediaType, + data, + }; + } + } + // Regular URL + return { + type: "url", + url, + }; +} + // ============================================================================= // OpenAI Chat Request Types // ============================================================================= @@ -129,12 +159,11 @@ function convertContent( if (part.type === "text" && part.text) { blocks.push({ type: "text", text: part.text }); } else if (part.type === "image_url" && part.image_url?.url) { + // Parse URL - handles both regular URLs and data URLs (base64) + const source = parseImageUrl(part.image_url.url); blocks.push({ type: "image", - source: { - type: "url", - url: part.image_url.url, - }, + source, detail: part.image_url.detail, } as ImageContentBlock); } diff --git a/backend/src/adapters/request/openai-response.ts b/backend/src/adapters/request/openai-response.ts index 67c1aba..296ed7e 100644 --- a/backend/src/adapters/request/openai-response.ts +++ b/backend/src/adapters/request/openai-response.ts @@ -5,6 +5,7 @@ import type { ImageContentBlock, + ImageSource, InternalContentBlock, InternalMessage, InternalRequest, @@ -14,6 +15,35 @@ import type { ToolResultContentBlock, } from "../types"; +// ============================================================================= +// Helper Functions +// ============================================================================= + +/** + * Parse a data URL into base64 source, or return URL source for regular URLs + * Data URL format: data:[][;base64], + */ +function parseImageUrl(url: string): ImageSource { + if (url.startsWith("data:")) { + // Parse data URL: data:image/jpeg;base64,/9j/4AAQ... + const match = url.match(/^data:([^;,]+)?(?:;base64)?,(.*)$/); + if (match) { + const mediaType = match[1] || "image/jpeg"; + const data = match[2] || ""; + return { + type: "base64", + mediaType, + data, + }; + } + } + // Regular URL + return { + type: "url", + url, + }; +} + // ============================================================================= // OpenAI Response API Request Types // ============================================================================= @@ -127,12 +157,11 @@ function convertContentParts( blocks.push({ type: "text", text: part.text }); } } else if (part.type === "input_image" && part.image_url) { + // Parse URL - handles both regular URLs and data URLs (base64) + const source = parseImageUrl(part.image_url); blocks.push({ type: "image", - source: { - type: "url", - url: part.image_url, - }, + source, } as ImageContentBlock); } }