From b0673a98b8eef2674636d5554818132f780c40b4 Mon Sep 17 00:00:00 2001
From: Lagyu <sasaki.y@ruri.waseda.jp>
Date: Sat, 23 Aug 2025 23:52:17 +0900
Subject: [PATCH 01/14] feat: Add support for responses API in Azure Compatible
 Provider.

---
 packages/types/src/provider-settings.ts    |   1 +
 src/api/providers/__tests__/openai.spec.ts | 430 ++++++++++++++++++---
 src/api/providers/openai.ts                | 383 +++++++++++++++++-
 3 files changed, 766 insertions(+), 48 deletions(-)

diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts
index 3fa7094d873..d2b38064105 100644
--- a/packages/types/src/provider-settings.ts
+++ b/packages/types/src/provider-settings.ts
@@ -179,6 +179,7 @@ const openAiSchema = baseProviderSettingsSchema.extend({
 	openAiStreamingEnabled: z.boolean().optional(),
 	openAiHostHeader: z.string().optional(), // Keep temporarily for backward compatibility during migration.
 	openAiHeaders: z.record(z.string(), z.string()).optional(),
+	openAiApiFlavor: z.union([z.literal("auto"), z.literal("responses"), z.literal("chat")]).optional(),
 })
 
 const ollamaSchema = baseProviderSettingsSchema.extend({
diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts
index 14ed35430a5..9266180cd55 100644
--- a/src/api/providers/__tests__/openai.spec.ts
+++ b/src/api/providers/__tests__/openai.spec.ts
@@ -4,27 +4,51 @@ import { OpenAiHandler, getOpenAiModels } from "../openai"
 import { ApiHandlerOptions } from "../../../shared/api"
 import { Anthropic } from "@anthropic-ai/sdk"
 import OpenAI from "openai"
-import { openAiModelInfoSaneDefaults } from "@roo-code/types"
 import { Package } from "../../../shared/package"
 import axios from "axios"
 
 const mockCreate = vitest.fn()
+const mockResponsesCreate = vitest.fn()
 
 vitest.mock("openai", () => {
 	const mockConstructor = vitest.fn()
-	return {
-		__esModule: true,
-		default: mockConstructor.mockImplementation(() => ({
-			chat: {
-				completions: {
-					create: mockCreate.mockImplementation(async (options) => {
-						if (!options.stream) {
-							return {
-								id: "test-completion",
+	const makeClient = () => ({
+		chat: {
+			completions: {
+				create: mockCreate.mockImplementation(async (options) => {
+					if (!options.stream) {
+						return {
+							id: "test-completion",
+							choices: [
+								{
+									message: { role: "assistant", content: "Test response", refusal: null },
+									finish_reason: "stop",
+									index: 0,
+								},
+							],
+							usage: {
+								prompt_tokens: 10,
+								completion_tokens: 5,
+								total_tokens: 15,
+							},
+						}
+					}
+
+					return {
+						[Symbol.asyncIterator]: async function* () {
+							yield {
 								choices: [
 									{
-										message: { role: "assistant", content: "Test response", refusal: null },
-										finish_reason: "stop",
+										delta: { content: "Test response" },
+										index: 0,
+									},
+								],
+								usage: null,
+							}
+							yield {
+								choices: [
+									{
+										delta: {},
 										index: 0,
 									},
 								],
@@ -34,38 +58,30 @@ vitest.mock("openai", () => {
 									total_tokens: 15,
 								},
 							}
-						}
-
-						return {
-							[Symbol.asyncIterator]: async function* () {
-								yield {
-									choices: [
-										{
-											delta: { content: "Test response" },
-											index: 0,
-										},
-									],
-									usage: null,
-								}
-								yield {
-									choices: [
-										{
-											delta: {},
-											index: 0,
-										},
-									],
-									usage: {
-										prompt_tokens: 10,
-										completion_tokens: 5,
-										total_tokens: 15,
-									},
-								}
-							},
-						}
-					}),
-				},
+						},
+					}
+				}),
 			},
-		})),
+		},
+		responses: {
+			create: mockResponsesCreate.mockImplementation(async (options) => {
+				// Default happy-path mock for non-streaming Responses API
+				return {
+					id: "test-response",
+					output_text: "Test response",
+					usage: {
+						input_tokens: 10,
+						output_tokens: 5,
+						total_tokens: 15,
+					},
+				}
+			}),
+		},
+	})
+	return {
+		__esModule: true,
+		default: mockConstructor.mockImplementation((args: any) => makeClient()),
+		AzureOpenAI: mockConstructor.mockImplementation((args: any) => makeClient()),
 	}
 })
 
@@ -977,6 +993,56 @@ describe("getOpenAiModels", () => {
 		expect(result).toEqual([])
 	})
 
+	describe("Azure portal Responses URL normalization", () => {
+		beforeEach(() => {
+			mockCreate.mockClear()
+			mockResponsesCreate.mockClear()
+		})
+
+		it("Responses URL from Azure portal is converted to use Responses API", async () => {
+			const handler = new OpenAiHandler({
+				openAiApiKey: "test-azure",
+				openAiModelId: "my-deployment",
+				openAiBaseUrl: "https://sample-name.openai.azure.com/openai/responses?api-version=2025-04-01-preview",
+				openAiUseAzure: true,
+				openAiStreamingEnabled: false,
+				includeMaxTokens: true,
+				openAiCustomModelInfo: {
+					contextWindow: 128_000,
+					maxTokens: 64,
+					supportsPromptCache: false,
+				},
+			})
+
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{ role: "user", content: [{ type: "text", text: "Hello!" }] },
+			]
+
+			const stream = handler.createMessage("You are Roo Code.", messages)
+			const chunks: any[] = []
+			for await (const ch of stream) {
+				chunks.push(ch)
+			}
+
+			// Should have used Responses API, not Chat Completions
+			expect(mockResponsesCreate).toHaveBeenCalled()
+			expect(mockCreate).not.toHaveBeenCalled()
+
+			// Payload shape sanity
+			const args = mockResponsesCreate.mock.calls[0][0]
+			expect(args).toHaveProperty("model", "my-deployment")
+			expect(args).toHaveProperty("input")
+			expect(typeof args.input).toBe("string")
+			expect(args.input).toContain("Developer: You are Roo Code.")
+			expect(args.input).toContain("User: Hello!")
+			expect(args).toHaveProperty("max_output_tokens", 64)
+
+			// Ensure returned text chunk surfaced
+			const textChunk = chunks.find((c) => c.type === "text")
+			expect(textChunk?.text).toBe("Test response")
+		})
+	})
+
 	it("should deduplicate model IDs", async () => {
 		const mockResponse = {
 			data: {
@@ -990,3 +1056,281 @@ describe("getOpenAiModels", () => {
 		expect(result).toEqual(["gpt-4", "gpt-3.5-turbo"])
 	})
 })
+
+// -- Added Responses API tests (TDD) --
+
+describe("OpenAI Compatible - Responses API", () => {
+	let handler: OpenAiHandler
+	const baseMessages: Anthropic.Messages.MessageParam[] = [
+		{
+			role: "user",
+			content: [
+				{
+					type: "text" as const,
+					text: "Hello!",
+				},
+			],
+		},
+	]
+
+	beforeEach(() => {
+		mockCreate.mockClear()
+		mockResponsesCreate.mockClear()
+	})
+
+	it("Azure Responses happy path uses string input (no messages) and max_output_tokens", async () => {
+		const opts: ApiHandlerOptions = {
+			openAiApiKey: "test-azure",
+			openAiModelId: "my-deployment",
+			openAiBaseUrl: "https://myres.openai.azure.com/openai/v1/responses?api-version=preview",
+			openAiStreamingEnabled: false,
+			includeMaxTokens: true,
+			openAiCustomModelInfo: {
+				contextWindow: 128_000,
+				maxTokens: 256,
+				supportsPromptCache: false,
+			},
+			enableReasoningEffort: false,
+		}
+		handler = new OpenAiHandler(opts)
+
+		const stream = handler.createMessage("You are Roo Code.", baseMessages)
+		const chunks: any[] = []
+		for await (const chunk of stream) {
+			chunks.push(chunk)
+		}
+
+		// Should have produced a text chunk
+		const textChunk = chunks.find((c) => c.type === "text")
+		expect(textChunk?.text).toBe("Test response")
+
+		// Ensure Responses API was used
+		expect(mockResponsesCreate).toHaveBeenCalled()
+		expect(mockCreate).not.toHaveBeenCalled()
+
+		const callArgs = mockResponsesCreate.mock.calls[0][0]
+		expect(callArgs).not.toHaveProperty("messages")
+		expect(callArgs).toHaveProperty("input")
+		expect(typeof callArgs.input).toBe("string")
+		expect(callArgs.input).toContain("Developer: You are Roo Code.")
+		expect(callArgs.input).toContain("User: Hello!")
+		expect(callArgs).toHaveProperty("model", "my-deployment")
+		// Azure Responses naming
+		expect(callArgs).toHaveProperty("max_output_tokens", 256)
+	})
+
+	it("Auto-detect: '/v1/responses' => Responses payload; '/chat/completions' => Chat Completions payload", async () => {
+		// Responses URL
+		const respHandler = new OpenAiHandler({
+			openAiApiKey: "test",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+		})
+		for await (const _ of respHandler.createMessage("sys", baseMessages)) {
+		}
+		expect(mockResponsesCreate).toHaveBeenCalled()
+		const respArgs = mockResponsesCreate.mock.calls.pop()?.[0]
+		expect(respArgs).not.toHaveProperty("messages")
+		expect(respArgs).toHaveProperty("input")
+
+		// Chat Completions URL
+		mockResponsesCreate.mockClear()
+		mockCreate.mockClear()
+		const chatHandler = new OpenAiHandler({
+			openAiApiKey: "test",
+			openAiModelId: "gpt-4o",
+			openAiBaseUrl: "https://api.openai.com/v1/chat/completions",
+			openAiStreamingEnabled: false,
+		})
+		for await (const _ of chatHandler.createMessage("sys", baseMessages)) {
+		}
+		expect(mockCreate).toHaveBeenCalled()
+		const chatArgs = mockCreate.mock.calls.pop()?.[0]
+		expect(chatArgs).toHaveProperty("messages")
+		expect(chatArgs).not.toHaveProperty("input")
+	})
+
+	it("Manual override: force Responses or Chat regardless of URL", async () => {
+		// Force Responses
+		const forceResp = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1", // no responses segment
+			openAiStreamingEnabled: false,
+			openAiApiFlavor: "responses",
+		})
+		for await (const _ of forceResp.createMessage("sys", baseMessages)) {
+		}
+		expect(mockResponsesCreate).toHaveBeenCalled()
+		const rArgs = mockResponsesCreate.mock.calls.pop()?.[0]
+		expect(rArgs).toHaveProperty("input")
+		expect(rArgs).not.toHaveProperty("messages")
+
+		// Force Chat
+		mockResponsesCreate.mockClear()
+		mockCreate.mockClear()
+		const forceChat = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-4o",
+			openAiBaseUrl: "https://api.openai.com/v1/responses", // would auto-detect as responses
+			openAiStreamingEnabled: false,
+			openAiApiFlavor: "chat",
+		})
+		for await (const _ of forceChat.createMessage("sys", baseMessages)) {
+		}
+		expect(mockCreate).toHaveBeenCalled()
+		const cArgs = mockCreate.mock.calls.pop()?.[0]
+		expect(cArgs).toHaveProperty("messages")
+	})
+
+	it("Reasoning effort mapping: Responses uses reasoning: { effort }, Chat uses reasoning_effort", async () => {
+		// Responses path
+		const responsesHandler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+			enableReasoningEffort: true,
+			reasoningEffort: "high",
+			openAiCustomModelInfo: {
+				contextWindow: 128_000,
+				supportsPromptCache: false,
+				supportsReasoningEffort: true,
+			},
+		})
+		for await (const _ of responsesHandler.createMessage("sys", baseMessages)) {
+		}
+		expect(mockResponsesCreate).toHaveBeenCalled()
+		const rArgs = mockResponsesCreate.mock.calls.pop()?.[0]
+		expect(rArgs).toHaveProperty("reasoning")
+		expect(rArgs.reasoning).toEqual({ effort: "high" })
+
+		// Chat path
+		mockResponsesCreate.mockClear()
+		mockCreate.mockClear()
+		const chatHandler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-4o",
+			openAiBaseUrl: "https://api.openai.com/v1/chat/completions",
+			openAiStreamingEnabled: false,
+			enableReasoningEffort: true,
+			reasoningEffort: "high",
+			openAiCustomModelInfo: {
+				contextWindow: 128_000,
+				supportsPromptCache: false,
+				supportsReasoningEffort: true,
+			},
+		})
+		for await (const _ of chatHandler.createMessage("sys", baseMessages)) {
+		}
+		expect(mockCreate).toHaveBeenCalled()
+		const cArgs = mockCreate.mock.calls.pop()?.[0]
+		expect(cArgs).toHaveProperty("reasoning_effort", "high")
+	})
+
+	it("Verbosity (Responses): include when set; if server rejects, retry without it (warn once)", async () => {
+		// First call throws 400 for 'verbosity', second succeeds
+		mockResponsesCreate.mockImplementationOnce((_opts: any) => {
+			const err = new Error("Unsupported parameter: 'verbosity'")
+			;(err as any).status = 400
+			throw err
+		})
+
+		const h = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+			verbosity: "high",
+		})
+
+		const stream = h.createMessage("sys", baseMessages)
+		const chunks: any[] = []
+		for await (const ch of stream) {
+			chunks.push(ch)
+		}
+
+		expect(mockResponsesCreate).toHaveBeenCalledTimes(2)
+		const first = mockResponsesCreate.mock.calls[0][0]
+		const second = mockResponsesCreate.mock.calls[1][0]
+		expect(first).toHaveProperty("text")
+		expect(first.text).toEqual({ verbosity: "high" })
+		expect(second).not.toHaveProperty("text")
+
+		// Should still yield text
+		const textChunk = chunks.find((c) => c.type === "text")
+		expect(textChunk?.text).toBe("Test response")
+	})
+
+	it("Azure naming: use max_output_tokens for Responses; keep max_completion_tokens for Chat Completions", async () => {
+		// Responses + includeMaxTokens
+		const r = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+			includeMaxTokens: true,
+			modelMaxTokens: 128,
+			openAiCustomModelInfo: {
+				contextWindow: 128_000,
+				maxTokens: 4096,
+				supportsPromptCache: false,
+			},
+		})
+		for await (const _ of r.createMessage("sys", baseMessages)) {
+		}
+		const rArgs = mockResponsesCreate.mock.calls.pop()?.[0]
+		expect(rArgs).toHaveProperty("max_output_tokens", 128)
+		expect(rArgs).not.toHaveProperty("max_completion_tokens")
+
+		// Chat + includeMaxTokens
+		mockResponsesCreate.mockClear()
+		mockCreate.mockClear()
+		const c = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-4o",
+			openAiBaseUrl: "https://api.openai.com/v1/chat/completions",
+			openAiStreamingEnabled: false,
+			includeMaxTokens: true,
+			modelMaxTokens: 128,
+			openAiCustomModelInfo: {
+				contextWindow: 128_000,
+				maxTokens: 4096,
+				supportsPromptCache: false,
+			},
+		})
+		for await (const _ of c.createMessage("sys", baseMessages)) {
+		}
+		const cArgs = mockCreate.mock.calls.pop()?.[0]
+		expect(cArgs).toHaveProperty("max_completion_tokens", 128)
+		expect(cArgs).not.toHaveProperty("max_output_tokens")
+	})
+
+	it("Normalizes Azure portal responses URL to /openai/v1 with apiVersion=preview", async () => {
+		mockResponsesCreate.mockClear()
+		mockCreate.mockClear()
+
+		const portalUrl = "https://sample-name.openai.azure.com/openai/responses?api-version=2025-04-01-preview"
+
+		const handler = new OpenAiHandler({
+			openAiApiKey: "test-azure",
+			openAiModelId: "my-deployment",
+			openAiBaseUrl: portalUrl,
+			openAiStreamingEnabled: false,
+		})
+
+		for await (const _ of handler.createMessage("sys", baseMessages)) {
+		}
+
+		// Ensures Responses API path was used
+		expect(mockResponsesCreate).toHaveBeenCalled()
+
+		// Ensure SDK constructor was called with normalized baseURL and 'preview' apiVersion (per requirement)
+		// Note: AzureOpenAI and OpenAI share same mock constructor; inspect last call
+		const ctorCalls = vi.mocked(OpenAI as unknown as any).mock.calls as any[]
+		const lastCtorArgs = ctorCalls[ctorCalls.length - 1]?.[0] || {}
+		expect(lastCtorArgs.baseURL).toBe("https://sample-name.openai.azure.com/openai/v1")
+		expect(lastCtorArgs.apiVersion).toBe("preview")
+	})
+})
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index 36158d770c1..b61955433cb 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -36,10 +36,18 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		super()
 		this.options = options
 
-		const baseURL = this.options.openAiBaseUrl ?? "https://api.openai.com/v1"
+		// Normalize Azure Responses "web" URL shape if provided by users.
+		// Example input (Azure portal sometimes shows):
+		//   https://{resource}.openai.azure.com/openai/responses?api-version=2025-04-01-preview
+		// We normalize to Azure SDK-friendly base and version:
+		//   baseURL: https://{resource}.openai.azure.com/openai/v1
+		//   apiVersion: preview
+		const rawBaseURL = this.options.openAiBaseUrl ?? "https://api.openai.com/v1"
+		const azureNormalization = this._normalizeAzureResponsesBaseUrlAndVersion(rawBaseURL)
+		const baseURL = azureNormalization.baseURL
 		const apiKey = this.options.openAiApiKey ?? "not-provided"
-		const isAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl)
-		const urlHost = this._getUrlHost(this.options.openAiBaseUrl)
+		const isAzureAiInference = this._isAzureAiInference(baseURL)
+		const urlHost = this._getUrlHost(baseURL)
 		const isAzureOpenAi = urlHost === "azure.com" || urlHost.endsWith(".azure.com") || options.openAiUseAzure
 
 		const headers = {
@@ -61,10 +69,23 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		} else if (isAzureOpenAi) {
 			// Azure API shape slightly differs from the core API shape:
 			// https://github.com/openai/openai-node?tab=readme-ov-file#microsoft-azure-openai
+
+			// Determine if we're using the Responses API flavor for Azure
+			const flavor = this._resolveApiFlavor(this.options.openAiApiFlavor, this.options.openAiBaseUrl ?? "")
+			const isResponsesFlavor =
+				flavor === "responses" ||
+				this._isAzureOpenAiResponses(this.options.openAiBaseUrl) ||
+				this._isAzureOpenAiResponses(baseURL)
+
+			// Always use 'preview' for Azure Responses API calls (per user requirement)
+			const azureVersion = isResponsesFlavor
+				? "preview"
+				: this.options.azureApiVersion || azureOpenAiDefaultApiVersion
+
 			this.client = new AzureOpenAI({
 				baseURL,
 				apiKey,
-				apiVersion: this.options.azureApiVersion || azureOpenAiDefaultApiVersion,
+				apiVersion: azureVersion,
 				defaultHeaders: headers,
 				timeout,
 			})
@@ -83,7 +104,21 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		messages: Anthropic.Messages.MessageParam[],
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
-		const { info: modelInfo, reasoning } = this.getModel()
+		// Gather model params (centralized: temperature, max tokens, reasoning, verbosity)
+		const modelParams = this.getModel()
+		const {
+			info: modelInfo,
+			reasoning,
+			reasoningEffort,
+			verbosity,
+		} = modelParams as unknown as {
+			id: string
+			info: ModelInfo
+			reasoning?: { reasoning_effort?: "low" | "medium" | "high" }
+			reasoningEffort?: "minimal" | "low" | "medium" | "high"
+			verbosity?: "low" | "medium" | "high"
+		}
+
 		const modelUrl = this.options.openAiBaseUrl ?? ""
 		const modelId = this.options.openAiModelId ?? ""
 		const enabledR1Format = this.options.openAiR1FormatEnabled ?? false
@@ -92,6 +127,70 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format
 		const ark = modelUrl.includes(".volces.com")
 
+		// Decide API flavor (manual override > auto-detect by URL)
+		const flavor = this._resolveApiFlavor(this.options.openAiApiFlavor, modelUrl)
+
+		// If Responses API is selected, use the Responses payload and endpoint
+		if (flavor === "responses") {
+			const nonStreaming = !(this.options.openAiStreamingEnabled ?? true)
+
+			// Build Responses payload (align with OpenAI Native Responses API formatting)
+			const formattedInput = this._formatResponsesInput(systemPrompt, messages)
+			const payload: Record<string, unknown> = {
+				model: modelId,
+				input: formattedInput,
+			}
+
+			// Reasoning effort (Responses expects: reasoning: { effort })
+			if (this.options.enableReasoningEffort && (this.options.reasoningEffort || reasoningEffort)) {
+				const effort = (this.options.reasoningEffort || reasoningEffort) as
+					| "minimal"
+					| "low"
+					| "medium"
+					| "high"
+					| undefined
+				// If effort is set and not "minimal" (minimal is treated as "no explicit effort")
+				if (effort && effort !== "minimal") {
+					payload.reasoning = { effort }
+				}
+			}
+
+			// Temperature (only include when explicitly set by the user)
+			if (this.options.modelTemperature !== undefined) {
+				payload.temperature = this.options.modelTemperature
+			} else if (deepseekReasoner) {
+				payload.temperature = DEEP_SEEK_DEFAULT_TEMPERATURE
+			}
+
+			// Verbosity: include via text.verbosity (Responses API expectation per openai-native handler)
+			if (this.options.verbosity || verbosity) {
+				;(payload as any).text = { verbosity: this.options.verbosity || verbosity }
+			}
+
+			// Add max_output_tokens if requested (Azure Responses naming)
+			if (this.options.includeMaxTokens === true) {
+				payload.max_output_tokens = this.options.modelMaxTokens || modelInfo.maxTokens
+			}
+
+			// NOTE: Streaming for Responses API isn't covered by current tests.
+			// We call non-streaming for now to preserve stable behavior.
+			try {
+				const response: any = await (this.client as any).responses.create(payload)
+				yield* this._yieldResponsesResult(response, modelInfo)
+			} catch (err: unknown) {
+				// Graceful downgrade if verbosity is rejected by server (400 unknown/unsupported parameter)
+				if ((payload as any).text && this._isVerbosityUnsupportedError(err)) {
+					// Remove text.verbosity and retry once
+					const { text: _omit, ...withoutVerbosity } = payload as any
+					const response: any = await (this.client as any).responses.create(withoutVerbosity)
+					yield* this._yieldResponsesResult(response, modelInfo)
+				} else {
+					throw err
+				}
+			}
+			return
+		}
+
 		if (modelId.includes("o1") || modelId.includes("o3") || modelId.includes("o4")) {
 			yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages)
 			return
@@ -232,6 +331,10 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 						? [systemMessage, ...convertToSimpleMessages(messages)]
 						: [systemMessage, ...convertToOpenAiMessages(messages)],
 			}
+			// Include reasoning_effort for Chat Completions when available
+			if (reasoning) {
+				Object.assign(requestOptions, reasoning)
+			}
 
 			// Add max_tokens if needed
 			this.addMaxTokensIfNeeded(requestOptions, modelInfo)
@@ -270,9 +373,64 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 	async completePrompt(prompt: string): Promise<string> {
 		try {
 			const isAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl)
+			const flavor = this._resolveApiFlavor(this.options.openAiApiFlavor, this.options.openAiBaseUrl ?? "")
 			const model = this.getModel()
 			const modelInfo = model.info
 
+			// Use Responses API when selected (non-streaming convenience method)
+			if (flavor === "responses") {
+				// Build a single-turn formatted string input (Developer/User style) for Responses API
+				const formattedInput = this._formatResponsesSingleMessage(
+					{
+						role: "user",
+						content: [{ type: "text", text: prompt }] as any,
+					} as Anthropic.Messages.MessageParam,
+					/*includeRole*/ true,
+				)
+				const payload: Record<string, unknown> = {
+					model: model.id,
+					input: formattedInput,
+				}
+
+				// Reasoning effort (Responses)
+				const effort = (this.options.reasoningEffort || (model as any).reasoningEffort) as
+					| "minimal"
+					| "low"
+					| "medium"
+					| "high"
+					| undefined
+				if (this.options.enableReasoningEffort && effort && effort !== "minimal") {
+					payload.reasoning = { effort }
+				}
+
+				// Temperature if set
+				if (this.options.modelTemperature !== undefined) {
+					payload.temperature = this.options.modelTemperature
+				}
+
+				// Verbosity via text.verbosity
+				if (this.options.verbosity) {
+					;(payload as any).text = { verbosity: this.options.verbosity }
+				}
+
+				// max_output_tokens
+				if (this.options.includeMaxTokens === true) {
+					payload.max_output_tokens = this.options.modelMaxTokens || modelInfo.maxTokens
+				}
+
+				try {
+					const response: any = await (this.client as any).responses.create(payload)
+					return this._extractResponsesText(response) ?? ""
+				} catch (err: unknown) {
+					if ((payload as any).text && this._isVerbosityUnsupportedError(err)) {
+						const { text: _omit, ...withoutVerbosity } = payload as any
+						const response: any = await (this.client as any).responses.create(withoutVerbosity)
+						return this._extractResponsesText(response) ?? ""
+					}
+					throw err
+				}
+			}
+
 			const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
 				model: model.id,
 				messages: [{ role: "user", content: prompt }],
@@ -403,6 +561,68 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		return urlHost.endsWith(".services.ai.azure.com")
 	}
 
+	private _isAzureOpenAiResponses(baseUrl?: string): boolean {
+		try {
+			if (!baseUrl) return false
+			const u = new URL(baseUrl)
+			const host = u.host
+			const path = u.pathname.replace(/\/+$/, "")
+			if (!(host.endsWith(".openai.azure.com") || host === "openai.azure.com")) return false
+			return (
+				path.endsWith("/openai/v1/responses") ||
+				path.endsWith("/openai/responses") ||
+				path.endsWith("/responses")
+			)
+		} catch {
+			return false
+		}
+	}
+
+	/**
+	 * Normalize Azure "responses" portal URLs to SDK-friendly base and version.
+	 * - Input (portal sometimes shows): https://{res}.openai.azure.com/openai/responses?api-version=2025-04-01-preview
+	 * - Output: baseURL=https://{res}.openai.azure.com/openai/v1, apiVersionOverride="preview"
+	 * No-op for already-correct or non-Azure URLs.
+	 */
+	private _normalizeAzureResponsesBaseUrlAndVersion(inputBaseUrl: string): {
+		baseURL: string
+		apiVersionOverride?: string
+	} {
+		try {
+			const url = new URL(inputBaseUrl)
+			const isAzureHost = url.hostname.endsWith(".openai.azure.com") || url.hostname === "openai.azure.com"
+			const pathname = (url.pathname || "").replace(/\/+$/, "")
+
+			// 1) Azure portal "non-v1" shape:
+			//    https://{res}.openai.azure.com/openai/responses?api-version=2025-04-01-preview
+			const isPortalNonV1 =
+				isAzureHost &&
+				pathname === "/openai/responses" &&
+				url.searchParams.get("api-version") === "2025-04-01-preview"
+
+			if (isPortalNonV1) {
+				const normalized = `${url.protocol}//${url.host}/openai/v1`
+				const ver = "preview"
+				return { baseURL: normalized, apiVersionOverride: ver }
+			}
+
+			// 2) v1 responses path passed as base URL:
+			//    https://{res}.openai.azure.com/openai/v1/responses?api-version=preview
+			// Normalize base to '/openai/v1' and force apiVersion 'preview' for Azure Responses v1 preview.
+			const isV1ResponsesPath = isAzureHost && pathname === "/openai/v1/responses"
+			if (isV1ResponsesPath) {
+				const normalized = `${url.protocol}//${url.host}/openai/v1`
+				const ver = "preview"
+				return { baseURL: normalized, apiVersionOverride: ver }
+			}
+
+			// If it's already '/openai/v1' or any other valid path, keep as-is
+			return { baseURL: inputBaseUrl }
+		} catch {
+			return { baseURL: inputBaseUrl }
+		}
+	}
+
 	/**
 	 * Adds max_completion_tokens to the request body if needed based on provider configuration
 	 * Note: max_tokens is deprecated in favor of max_completion_tokens as per OpenAI documentation
@@ -421,6 +641,159 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 			requestOptions.max_completion_tokens = this.options.modelMaxTokens || modelInfo.maxTokens
 		}
 	}
+
+	// --- Responses helpers ---
+
+	private _resolveApiFlavor(
+		override: "auto" | "responses" | "chat" | undefined,
+		baseUrl: string,
+	): "responses" | "chat" {
+		if (override === "responses") return "responses"
+		if (override === "chat") return "chat"
+		// Auto-detect by URL path
+		const url = this._safeParseUrl(baseUrl)
+		const path = url?.pathname || ""
+		if (path.includes("/v1/responses") || path.endsWith("/responses")) {
+			return "responses"
+		}
+		if (path.includes("/chat/completions")) {
+			return "chat"
+		}
+		// Default to Chat Completions for backward compatibility
+		return "chat"
+	}
+
+	private _safeParseUrl(input?: string): URL | undefined {
+		try {
+			if (!input) return undefined
+			return new URL(input)
+		} catch {
+			return undefined
+		}
+	}
+
+	private _toResponsesInput(anthropicMessages: Anthropic.Messages.MessageParam[]): Array<{
+		role: "user" | "assistant"
+		content: Array<{ type: "input_text"; text: string } | { type: "input_image"; image_url: string }>
+	}> {
+		const input: Array<{
+			role: "user" | "assistant"
+			content: Array<{ type: "input_text"; text: string } | { type: "input_image"; image_url: string }>
+		}> = []
+
+		for (const msg of anthropicMessages) {
+			const role = msg.role === "assistant" ? "assistant" : "user"
+			const parts: Array<{ type: "input_text"; text: string } | { type: "input_image"; image_url: string }> = []
+
+			if (typeof msg.content === "string") {
+				if (msg.content.length > 0) {
+					parts.push({ type: "input_text", text: msg.content })
+				}
+			} else {
+				for (const block of msg.content) {
+					if (block.type === "text") {
+						parts.push({ type: "input_text", text: block.text })
+					} else if (block.type === "image") {
+						parts.push({
+							type: "input_image",
+							image_url: `data:${block.source.media_type};base64,${block.source.data}`,
+						})
+					}
+					// tool_use/tool_result are omitted in this minimal mapping (can be added as needed)
+				}
+			}
+
+			if (parts.length > 0) {
+				input.push({ role, content: parts })
+			}
+		}
+		return input
+	}
+
+	private _extractResponsesText(response: any): string | undefined {
+		// Prefer the simple output_text if present, otherwise attempt to parse output array
+		if (response?.output_text) return response.output_text
+		if (Array.isArray(response?.output)) {
+			// Find assistant message with output_text
+			for (const item of response.output) {
+				if (item?.type === "message" && Array.isArray(item.content)) {
+					const textPart = item.content.find(
+						(c: any) => c.type === "output_text" && typeof c.text === "string",
+					)
+					if (textPart?.text) return textPart.text
+				}
+			}
+		}
+		return undefined
+	}
+
+	private async *_yieldResponsesResult(response: any, modelInfo: ModelInfo): ApiStream {
+		const text = this._extractResponsesText(response) ?? ""
+		if (text) {
+			yield { type: "text", text }
+		}
+		// Translate usage fields if present
+		const usage = response?.usage
+		if (usage) {
+			yield {
+				type: "usage",
+				inputTokens: usage.input_tokens || usage.prompt_tokens || 0,
+				outputTokens: usage.output_tokens || usage.completion_tokens || 0,
+				cacheWriteTokens: usage.cache_creation_input_tokens || undefined,
+				cacheReadTokens: usage.cache_read_input_tokens || undefined,
+			}
+		}
+	}
+
+	private _isVerbosityUnsupportedError(err: unknown): boolean {
+		const anyErr = err as any
+		const msg = (anyErr?.message || "").toString().toLowerCase()
+		const status = anyErr?.status
+		return (
+			status === 400 &&
+			(msg.includes("verbosity") || msg.includes("unknown parameter") || msg.includes("unsupported"))
+		)
+	}
+
+	// ---- Responses input formatting (align with openai-native.ts) ----
+
+	private _formatResponsesInput(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): string {
+		// Developer role for system prompt
+		let formattedInput = `Developer: ${systemPrompt}\n\n`
+		for (const message of messages) {
+			const role = message.role === "user" ? "User" : "Assistant"
+			if (typeof message.content === "string") {
+				formattedInput += `${role}: ${message.content}\n\n`
+			} else if (Array.isArray(message.content)) {
+				const textContent = message.content
+					.filter((block) => (block as any).type === "text")
+					.map((block) => (block as any).text as string)
+					.join("\n")
+				if (textContent) {
+					formattedInput += `${role}: ${textContent}\n\n`
+				}
+			}
+		}
+		return formattedInput.trim()
+	}
+
+	private _formatResponsesSingleMessage(
+		message: Anthropic.Messages.MessageParam,
+		includeRole: boolean = true,
+	): string {
+		const role = includeRole ? (message.role === "user" ? "User" : "Assistant") + ": " : ""
+		if (typeof message.content === "string") {
+			return `${role}${message.content}`
+		}
+		if (Array.isArray(message.content)) {
+			const textContent = message.content
+				.filter((block) => (block as any).type === "text")
+				.map((block) => (block as any).text as string)
+				.join("\n")
+			return `${role}${textContent}`
+		}
+		return role
+	}
 }
 
 export async function getOpenAiModels(baseUrl?: string, apiKey?: string, openAiHeaders?: Record<string, string>) {

From f05544be10476af3d95425fd5966020796552bff Mon Sep 17 00:00:00 2001
From: Lagyu <sasaki.y@ruri.waseda.jp>
Date: Sun, 24 Aug 2025 00:45:30 +0900
Subject: [PATCH 02/14] feat: remove api flavor override from setting ui.

---
 packages/types/src/provider-settings.ts    |  1 -
 src/api/providers/__tests__/openai.spec.ts | 33 ----------------------
 src/api/providers/openai.ts                | 17 ++++-------
 3 files changed, 6 insertions(+), 45 deletions(-)

diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts
index d2b38064105..3fa7094d873 100644
--- a/packages/types/src/provider-settings.ts
+++ b/packages/types/src/provider-settings.ts
@@ -179,7 +179,6 @@ const openAiSchema = baseProviderSettingsSchema.extend({
 	openAiStreamingEnabled: z.boolean().optional(),
 	openAiHostHeader: z.string().optional(), // Keep temporarily for backward compatibility during migration.
 	openAiHeaders: z.record(z.string(), z.string()).optional(),
-	openAiApiFlavor: z.union([z.literal("auto"), z.literal("responses"), z.literal("chat")]).optional(),
 })
 
 const ollamaSchema = baseProviderSettingsSchema.extend({
diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts
index 9266180cd55..b80e94a7be4 100644
--- a/src/api/providers/__tests__/openai.spec.ts
+++ b/src/api/providers/__tests__/openai.spec.ts
@@ -1151,39 +1151,6 @@ describe("OpenAI Compatible - Responses API", () => {
 		expect(chatArgs).not.toHaveProperty("input")
 	})
 
-	it("Manual override: force Responses or Chat regardless of URL", async () => {
-		// Force Responses
-		const forceResp = new OpenAiHandler({
-			openAiApiKey: "k",
-			openAiModelId: "gpt-5",
-			openAiBaseUrl: "https://api.openai.com/v1", // no responses segment
-			openAiStreamingEnabled: false,
-			openAiApiFlavor: "responses",
-		})
-		for await (const _ of forceResp.createMessage("sys", baseMessages)) {
-		}
-		expect(mockResponsesCreate).toHaveBeenCalled()
-		const rArgs = mockResponsesCreate.mock.calls.pop()?.[0]
-		expect(rArgs).toHaveProperty("input")
-		expect(rArgs).not.toHaveProperty("messages")
-
-		// Force Chat
-		mockResponsesCreate.mockClear()
-		mockCreate.mockClear()
-		const forceChat = new OpenAiHandler({
-			openAiApiKey: "k",
-			openAiModelId: "gpt-4o",
-			openAiBaseUrl: "https://api.openai.com/v1/responses", // would auto-detect as responses
-			openAiStreamingEnabled: false,
-			openAiApiFlavor: "chat",
-		})
-		for await (const _ of forceChat.createMessage("sys", baseMessages)) {
-		}
-		expect(mockCreate).toHaveBeenCalled()
-		const cArgs = mockCreate.mock.calls.pop()?.[0]
-		expect(cArgs).toHaveProperty("messages")
-	})
-
 	it("Reasoning effort mapping: Responses uses reasoning: { effort }, Chat uses reasoning_effort", async () => {
 		// Responses path
 		const responsesHandler = new OpenAiHandler({
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index b61955433cb..47ab20347e1 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -70,8 +70,8 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 			// Azure API shape slightly differs from the core API shape:
 			// https://github.com/openai/openai-node?tab=readme-ov-file#microsoft-azure-openai
 
-			// Determine if we're using the Responses API flavor for Azure
-			const flavor = this._resolveApiFlavor(this.options.openAiApiFlavor, this.options.openAiBaseUrl ?? "")
+			// Determine if we're using the Responses API flavor for Azure (auto-detect by URL only)
+			const flavor = this._resolveApiFlavor(this.options.openAiBaseUrl ?? "")
 			const isResponsesFlavor =
 				flavor === "responses" ||
 				this._isAzureOpenAiResponses(this.options.openAiBaseUrl) ||
@@ -127,8 +127,8 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format
 		const ark = modelUrl.includes(".volces.com")
 
-		// Decide API flavor (manual override > auto-detect by URL)
-		const flavor = this._resolveApiFlavor(this.options.openAiApiFlavor, modelUrl)
+		// Decide API flavor (auto-detect by URL)
+		const flavor = this._resolveApiFlavor(modelUrl)
 
 		// If Responses API is selected, use the Responses payload and endpoint
 		if (flavor === "responses") {
@@ -373,7 +373,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 	async completePrompt(prompt: string): Promise<string> {
 		try {
 			const isAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl)
-			const flavor = this._resolveApiFlavor(this.options.openAiApiFlavor, this.options.openAiBaseUrl ?? "")
+			const flavor = this._resolveApiFlavor(this.options.openAiBaseUrl ?? "")
 			const model = this.getModel()
 			const modelInfo = model.info
 
@@ -644,12 +644,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 
 	// --- Responses helpers ---
 
-	private _resolveApiFlavor(
-		override: "auto" | "responses" | "chat" | undefined,
-		baseUrl: string,
-	): "responses" | "chat" {
-		if (override === "responses") return "responses"
-		if (override === "chat") return "chat"
+	private _resolveApiFlavor(baseUrl: string): "responses" | "chat" {
 		// Auto-detect by URL path
 		const url = this._safeParseUrl(baseUrl)
 		const path = url?.pathname || ""

From ad1aeea454562710e30b2b6790fdc88abdf42b26 Mon Sep 17 00:00:00 2001
From: Lagyu <sasaki.y@ruri.waseda.jp>
Date: Mon, 25 Aug 2025 16:02:39 +0900
Subject: [PATCH 03/14] feat(openai): Responses API parity and continuity
 fixes; richer event handling; tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add previous_response_id retry path on 400 “Previous response … not found”
  - Non-streaming and streaming: drop previous_response_id and retry once; clear continuity state
  - Code: [src/api/providers/openai.ts](src/api/providers/openai.ts:238), [src/api/providers/openai.ts](src/api/providers/openai.ts:291), guard [OpenAiHandler._isPreviousResponseNotFoundError()](src/api/providers/openai.ts:934)

- Support GPT‑5-style reasoning summary and minimal effort on Responses API
  - Default enable summary: "auto" unless explicitly disabled in settings
  - Include reasoning: { effort: "minimal" | "low" | "medium" | "high", summary?: "auto" }
  - Code: constructor default [OpenAiHandler](src/api/providers/openai.ts:38), payload assembly [createMessage](src/api/providers/openai.ts:193)

- Improve Responses streaming event coverage
  - Handle response.content_part.added (emit text)
  - Handle response.audio_transcript.delta (emit text as transcript)
  - Preserve response.id via stream callback for continuity
  - Code: [handleResponsesStream](src/api/transform/responses-stream.ts:91), [src/api/transform/responses-stream.ts](src/api/transform/responses-stream.ts:47), responseId callback [src/api/transform/responses-stream.ts](src/api/transform/responses-stream.ts:19) and usage in [openai.ts](src/api/providers/openai.ts:283)

- Maintain conversation continuity for Responses API
  - Store lastResponseId on both streaming and non-streaming paths; pass previous_response_id unless suppressed
  - Code: stream wiring [src/api/providers/openai.ts](src/api/providers/openai.ts:283), non-streaming capture [src/api/providers/openai.ts](src/api/providers/openai.ts:889)

- Update and extend tests
  - Add tests for 400 previous_response_id retry (streaming and non-streaming)
  - Add tests for content_part and audio_transcript events
  - Add tests for reasoning minimal + summary auto, and summary disabling
  - Adjust expectation to allow summary in reasoning payload
  - Tests: [src/api/providers/__tests__/openai.spec.ts](src/api/providers/__tests__/openai.spec.ts:1663), [src/api/providers/__tests__/openai.spec.ts](src/api/providers/__tests__/openai.spec.ts:1170)

- Minor: default enableGpt5ReasoningSummary to true in compatible provider for Responses flows
---
 src/api/providers/__tests__/openai.spec.ts | 561 ++++++++++++++++++++-
 src/api/providers/openai.ts                | 340 +++++++++++--
 src/api/transform/responses-stream.ts      | 263 ++++++++++
 3 files changed, 1126 insertions(+), 38 deletions(-)
 create mode 100644 src/api/transform/responses-stream.ts

diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts
index b80e94a7be4..eeab8315521 100644
--- a/src/api/providers/__tests__/openai.spec.ts
+++ b/src/api/providers/__tests__/openai.spec.ts
@@ -1171,7 +1171,7 @@ describe("OpenAI Compatible - Responses API", () => {
 		expect(mockResponsesCreate).toHaveBeenCalled()
 		const rArgs = mockResponsesCreate.mock.calls.pop()?.[0]
 		expect(rArgs).toHaveProperty("reasoning")
-		expect(rArgs.reasoning).toEqual({ effort: "high" })
+		expect(rArgs.reasoning).toMatchObject({ effort: "high" })
 
 		// Chat path
 		mockResponsesCreate.mockClear()
@@ -1300,4 +1300,563 @@ describe("OpenAI Compatible - Responses API", () => {
 		expect(lastCtorArgs.baseURL).toBe("https://sample-name.openai.azure.com/openai/v1")
 		expect(lastCtorArgs.apiVersion).toBe("preview")
 	})
+
+	it("streams Responses API when provider returns AsyncIterable", async () => {
+		// Arrange: make responses.create return an AsyncIterable stream for this test
+		mockResponsesCreate.mockImplementationOnce(async (_opts: any) => {
+			return {
+				[Symbol.asyncIterator]: async function* () {
+					yield { type: "response.text.delta", delta: "Hello " }
+					yield { type: "response.text.delta", delta: "world" }
+					yield {
+						type: "response.completed",
+						response: { usage: { input_tokens: 7, output_tokens: 2 } },
+					}
+				},
+			}
+		})
+
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5-mini",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			// streaming enabled by default
+		})
+
+		const stream = handler.createMessage("You are Roo.", [
+			{ role: "user", content: [{ type: "text" as const, text: "Say hi" }] },
+		])
+
+		const chunks: any[] = []
+		for await (const ch of stream) {
+			chunks.push(ch)
+		}
+
+		// Text should be streamed and concatenated in order
+		const text = chunks
+			.filter((c) => c.type === "text")
+			.map((c) => c.text)
+			.join("")
+		expect(text).toBe("Hello world")
+
+		// Usage chunk emitted at completion
+		const usage = chunks.find((c) => c.type === "usage")
+		expect(usage).toBeDefined()
+		expect(usage.inputTokens).toBe(7)
+		expect(usage.outputTokens).toBe(2)
+
+		// Ensure stream: true was sent
+		const args = mockResponsesCreate.mock.calls.pop()?.[0]
+		expect(args).toHaveProperty("stream", true)
+	})
+})
+
+describe("OpenAI Compatible - Responses API (extended streaming)", () => {
+	it("handles reasoning deltas and output_text in message content", async () => {
+		// Arrange: make responses.create return an AsyncIterable stream for this test
+		mockResponsesCreate.mockImplementationOnce(async (_opts: any) => {
+			return {
+				[Symbol.asyncIterator]: async function* () {
+					// Reasoning delta first
+					yield { type: "response.reasoning.delta", delta: "Thinking. " }
+					// Then a message item with output_text inside content array
+					yield {
+						type: "response.output_item.added",
+						item: {
+							type: "message",
+							content: [{ type: "output_text", text: "Answer." }],
+						},
+					}
+					// Completion with usage
+					yield {
+						type: "response.completed",
+						response: { usage: { input_tokens: 3, output_tokens: 2 } },
+					}
+				},
+			}
+		})
+
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5-mini",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+		})
+
+		const chunks: any[] = []
+		for await (const ch of handler.createMessage("sys", [
+			{ role: "user", content: [{ type: "text" as const, text: "Hi" }] },
+		])) {
+			chunks.push(ch)
+		}
+
+		const reasoning = chunks.find((c) => c.type === "reasoning")
+		expect(reasoning?.text).toBe("Thinking. ")
+
+		const text = chunks.find((c) => c.type === "text")
+		expect(text?.text).toBe("Answer.")
+
+		const usage = chunks.find((c) => c.type === "usage")
+		expect(usage).toBeDefined()
+		expect(usage.inputTokens).toBe(3)
+		expect(usage.outputTokens).toBe(2)
+
+		// Ensure stream: true was sent
+		const args = mockResponsesCreate.mock.calls.pop()?.[0]
+		expect(args).toHaveProperty("stream", true)
+	})
+
+	it("maps refusal deltas to text with prefix", async () => {
+		mockResponsesCreate.mockImplementationOnce(async (_opts: any) => {
+			return {
+				[Symbol.asyncIterator]: async function* () {
+					yield { type: "response.refusal.delta", delta: "Cannot comply" }
+					// Usage may be attached directly on the event for some implementations
+					yield { type: "response.done", usage: { prompt_tokens: 1, completion_tokens: 1 } }
+				},
+			}
+		})
+
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5-mini",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+		})
+
+		const result: any[] = []
+		for await (const ch of handler.createMessage("sys", [
+			{ role: "user", content: [{ type: "text" as const, text: "Hi" }] },
+		])) {
+			result.push(ch)
+		}
+
+		const textChunks = result.filter((c) => c.type === "text").map((c) => c.text)
+		expect(textChunks).toContain("[Refusal] Cannot comply")
+
+		const usage = result.find((c) => c.type === "usage")
+		expect(usage).toBeDefined()
+		expect(usage.inputTokens).toBe(1)
+		expect(usage.outputTokens).toBe(1)
+	})
+})
+
+describe("OpenAI Compatible - Responses API (multimodal)", () => {
+	it("builds structured array input with images (non-streaming)", async () => {
+		// Reset mocks for clarity
+		mockResponsesCreate.mockClear()
+		mockCreate.mockClear()
+
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5-mini",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+			includeMaxTokens: false,
+		})
+
+		const messages: Anthropic.Messages.MessageParam[] = [
+			{
+				role: "user",
+				content: [
+					{ type: "text" as const, text: "Here is an image" },
+					{
+						type: "image" as const,
+						// Minimal Anthropic-style inline image (base64) block
+						source: { media_type: "image/png", data: "BASE64DATA" } as any,
+					},
+				],
+			},
+		]
+
+		const chunks: any[] = []
+		for await (const ch of handler.createMessage("You are Roo Code.", messages)) {
+			chunks.push(ch)
+		}
+
+		// Should have used Responses API
+		expect(mockResponsesCreate).toHaveBeenCalled()
+		const args = mockResponsesCreate.mock.calls[0][0]
+
+		// Input should be an array (structured input mode)
+		expect(Array.isArray(args.input)).toBe(true)
+		const arr = args.input as any[]
+
+		// First element should be Developer preface as input_text
+		expect(arr[0]?.role).toBe("user")
+		expect(arr[0]?.content?.[0]?.type).toBe("input_text")
+		expect(arr[0]?.content?.[0]?.text).toContain("Developer: You are Roo Code.")
+
+		// There should be at least one input_image with a data URL for the provided image
+		const hasInputImage = arr.some((item: any) => {
+			const c = item?.content
+			return (
+				Array.isArray(c) &&
+				c.some(
+					(part: any) =>
+						part?.type === "input_image" &&
+						typeof part?.image_url === "string" &&
+						part.image_url.startsWith("data:image/png;base64,BASE64DATA"),
+				)
+			)
+		})
+		expect(hasInputImage).toBe(true)
+
+		// Should still yield a text chunk and usage (from default mock)
+		const textChunk = chunks.find((c: any) => c.type === "text")
+		const usageChunk = chunks.find((c: any) => c.type === "usage")
+		expect(textChunk?.text).toBe("Test response")
+		expect(usageChunk?.inputTokens).toBe(10)
+		expect(usageChunk?.outputTokens).toBe(5)
+	})
+
+	it("streams with multimodal input using array 'input'", async () => {
+		// Make responses.create return an AsyncIterable stream for this test
+		mockResponsesCreate.mockClear()
+		mockResponsesCreate.mockImplementationOnce(async (_opts: any) => {
+			return {
+				[Symbol.asyncIterator]: async function* () {
+					yield { type: "response.text.delta", delta: "A" }
+					yield { type: "response.text.delta", delta: "B" }
+					yield {
+						type: "response.completed",
+						response: { usage: { input_tokens: 2, output_tokens: 2 } },
+					}
+				},
+			}
+		})
+
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5-mini",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+		})
+
+		const messages: Anthropic.Messages.MessageParam[] = [
+			{
+				role: "user",
+				content: [
+					{ type: "text" as const, text: "Look at this" },
+					{
+						type: "image" as const,
+						source: { media_type: "image/jpeg", data: "IMGDATA" } as any,
+					},
+				],
+			},
+		]
+
+		const out: any[] = []
+		for await (const ch of handler.createMessage("System text", messages)) {
+			out.push(ch)
+		}
+
+		// Ensure stream: true was sent and input is array
+		expect(mockResponsesCreate).toHaveBeenCalled()
+		const args = mockResponsesCreate.mock.calls[0][0]
+		expect(args).toHaveProperty("stream", true)
+		expect(Array.isArray(args.input)).toBe(true)
+
+		// Verify streamed text concatenation and usage
+		const combined = out
+			.filter((c) => c.type === "text")
+			.map((c) => c.text)
+			.join("")
+		expect(combined).toBe("AB")
+
+		const usage = out.find((c) => c.type === "usage")
+		expect(usage?.inputTokens).toBe(2)
+		expect(usage?.outputTokens).toBe(2)
+	})
+})
+
+// --- New tests: Responses API conversation continuity (previous_response_id) ---
+describe("OpenAI Compatible - Responses API conversation continuity", () => {
+	beforeEach(() => {
+		mockCreate.mockClear()
+		mockResponsesCreate.mockClear()
+	})
+
+	it("propagates previous_response_id from first streaming response into the next request", async () => {
+		// First call will stream and include a response.id
+		mockResponsesCreate.mockImplementationOnce(async (_opts: any) => {
+			return {
+				[Symbol.asyncIterator]: async function* () {
+					yield { type: "response.text.delta", delta: "Desc " }
+					yield {
+						type: "response.completed",
+						response: { id: "resp-1", usage: { input_tokens: 5, output_tokens: 2 } },
+					}
+				},
+			}
+		})
+
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5-mini",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+		})
+
+		// 1) First call (establish response id)
+		const firstChunks: any[] = []
+		for await (const ch of handler.createMessage("You are Roo.", [
+			{ role: "user", content: [{ type: "text" as const, text: "Describe the image" }] },
+		])) {
+			firstChunks.push(ch)
+		}
+
+		// Ensure first call was made
+		expect(mockResponsesCreate).toHaveBeenCalledTimes(1)
+		// 2) Second call - should include previous_response_id from first call
+		const secondChunks: any[] = []
+		for await (const ch of handler.createMessage("You are Roo.", [
+			{ role: "user", content: [{ type: "text" as const, text: "Continue." }] },
+		])) {
+			secondChunks.push(ch)
+		}
+
+		// Validate that a second Responses.create call was made
+		expect(mockResponsesCreate).toHaveBeenCalledTimes(2)
+		const secondArgs = mockResponsesCreate.mock.calls[1][0]
+		expect(secondArgs).toHaveProperty("previous_response_id", "resp-1")
+	})
+
+	it("omits previous_response_id when metadata.suppressPreviousResponseId is true", async () => {
+		// First call streams and returns an id
+		mockResponsesCreate.mockImplementationOnce(async (_opts: any) => {
+			return {
+				[Symbol.asyncIterator]: async function* () {
+					yield { type: "response.text.delta", delta: "First" }
+					yield {
+						type: "response.completed",
+						response: { id: "rid-xyz", usage: { input_tokens: 1, output_tokens: 1 } },
+					}
+				},
+			}
+		})
+
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5-mini",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+		})
+
+		// First call to capture lastResponseId
+		for await (const _ of handler.createMessage("sys", [
+			{ role: "user", content: [{ type: "text" as const, text: "Turn 1" }] },
+		])) {
+		}
+
+		// Second call with suppressPreviousResponseId => should NOT include previous_response_id
+		for await (const _ of handler.createMessage(
+			"sys",
+			[{ role: "user", content: [{ type: "text" as const, text: "Turn 2" }] }],
+			{ suppressPreviousResponseId: true } as any,
+		)) {
+		}
+
+		expect(mockResponsesCreate).toHaveBeenCalledTimes(2)
+		const args = mockResponsesCreate.mock.calls[1][0]
+		expect(args).not.toHaveProperty("previous_response_id")
+	})
+})
+
+// --- New: Responses API parity improvements tests ---
+describe("OpenAI Compatible - Responses API parity improvements", () => {
+	beforeEach(() => {
+		mockCreate.mockClear()
+		mockResponsesCreate.mockClear()
+	})
+
+	it("retries without previous_response_id when server returns 400 'Previous response ... not found' (non-streaming)", async () => {
+		// First call throws 400 for previous_response_id, second succeeds
+		mockResponsesCreate
+			.mockImplementationOnce((_opts: any) => {
+				const err = new Error("Previous response rid-bad not found")
+				;(err as any).status = 400
+				throw err
+			})
+			.mockImplementationOnce(async (_opts: any) => {
+				return { id: "rid-good", output_text: "OK", usage: { input_tokens: 1, output_tokens: 1 } }
+			})
+
+		const h = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+		})
+
+		const chunks: any[] = []
+		for await (const ch of h.createMessage(
+			"sys",
+			[{ role: "user", content: [{ type: "text" as const, text: "Turn" }] }],
+			{ previousResponseId: "rid-bad" } as any,
+		)) {
+			chunks.push(ch)
+		}
+
+		// Two calls made: first fails with 400, second retries without previous_response_id
+		expect(mockResponsesCreate).toHaveBeenCalledTimes(2)
+		const firstArgs = mockResponsesCreate.mock.calls[0][0]
+		expect(firstArgs).toHaveProperty("previous_response_id", "rid-bad")
+
+		const secondArgs = mockResponsesCreate.mock.calls[1][0]
+		expect(secondArgs).not.toHaveProperty("previous_response_id")
+
+		// Should still surface text
+		const textChunk = chunks.find((c: any) => c.type === "text")
+		expect(textChunk?.text).toBe("OK")
+	})
+
+	it("retries without previous_response_id when server returns 400 (streaming)", async () => {
+		// First call throws, second returns a stream
+		mockResponsesCreate
+			.mockImplementationOnce((_opts: any) => {
+				const err = new Error("Previous response not found")
+				;(err as any).status = 400
+				throw err
+			})
+			.mockImplementationOnce(async (_opts: any) => {
+				return {
+					[Symbol.asyncIterator]: async function* () {
+						yield { type: "response.text.delta", delta: "Hello" }
+						yield { type: "response.completed", response: { usage: { input_tokens: 1, output_tokens: 1 } } }
+					},
+				}
+			})
+
+		const h = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			// streaming enabled by default
+		})
+
+		const out: any[] = []
+		for await (const ch of h.createMessage(
+			"sys",
+			[{ role: "user", content: [{ type: "text" as const, text: "Hi" }] }],
+			{ previousResponseId: "bad-id" } as any,
+		)) {
+			out.push(ch)
+		}
+
+		expect(mockResponsesCreate).toHaveBeenCalledTimes(2)
+		const first = mockResponsesCreate.mock.calls[0][0]
+		expect(first).toHaveProperty("previous_response_id", "bad-id")
+		const second = mockResponsesCreate.mock.calls[1][0]
+		expect(second).not.toHaveProperty("previous_response_id")
+
+		const combined = out
+			.filter((c) => c.type === "text")
+			.map((c) => c.text)
+			.join("")
+		expect(combined).toBe("Hello")
+	})
+
+	it("handles response.content_part.added by emitting text", async () => {
+		mockResponsesCreate.mockImplementationOnce(async (_opts: any) => {
+			return {
+				[Symbol.asyncIterator]: async function* () {
+					yield { type: "response.content_part.added", part: { type: "text", text: "Part" } }
+					yield { type: "response.completed", response: { usage: { input_tokens: 0, output_tokens: 0 } } }
+				},
+			}
+		})
+
+		const h = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+		})
+
+		const out: any[] = []
+		for await (const ch of h.createMessage("sys", [
+			{ role: "user", content: [{ type: "text" as const, text: "Hi" }] },
+		])) {
+			out.push(ch)
+		}
+
+		const texts = out.filter((c) => c.type === "text").map((c) => c.text)
+		expect(texts).toContain("Part")
+	})
+
+	it("maps response.audio_transcript.delta to text", async () => {
+		mockResponsesCreate.mockImplementationOnce(async (_opts: any) => {
+			return {
+				[Symbol.asyncIterator]: async function* () {
+					yield { type: "response.audio_transcript.delta", delta: "Transcript" }
+					yield { type: "response.completed", response: { usage: { input_tokens: 0, output_tokens: 0 } } }
+				},
+			}
+		})
+
+		const h = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+		})
+
+		const out: any[] = []
+		for await (const ch of h.createMessage("sys", [
+			{ role: "user", content: [{ type: "text" as const, text: "Hi" }] },
+		])) {
+			out.push(ch)
+		}
+
+		const texts = out.filter((c) => c.type === "text").map((c) => c.text)
+		expect(texts).toContain("Transcript")
+	})
+
+	it("includes reasoning: { effort: 'minimal', summary: 'auto' } when enabled (non-streaming)", async () => {
+		mockResponsesCreate.mockImplementationOnce(async (opts: any) => {
+			return { id: "rid-1", output_text: "ok", usage: { input_tokens: 1, output_tokens: 1 } }
+		})
+
+		const h = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+			enableReasoningEffort: true,
+			reasoningEffort: "minimal",
+		})
+
+		for await (const _ of h.createMessage("sys", [
+			{ role: "user", content: [{ type: "text" as const, text: "Hi" }] },
+		])) {
+			// consume
+		}
+
+		expect(mockResponsesCreate).toHaveBeenCalledTimes(1)
+		const args = mockResponsesCreate.mock.calls[0][0]
+		expect(args).toHaveProperty("reasoning")
+		expect(args.reasoning).toMatchObject({ effort: "minimal", summary: "auto" })
+	})
+
+	it("omits reasoning.summary when enableGpt5ReasoningSummary is false", async () => {
+		mockResponsesCreate.mockImplementationOnce(async (opts: any) => {
+			return { id: "rid-2", output_text: "ok", usage: { input_tokens: 1, output_tokens: 1 } }
+		})
+
+		const h = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+			enableReasoningEffort: true,
+			reasoningEffort: "low",
+			enableGpt5ReasoningSummary: false,
+		})
+
+		for await (const _ of h.createMessage("sys", [
+			{ role: "user", content: [{ type: "text" as const, text: "Hi" }] },
+		])) {
+			// consume
+		}
+
+		expect(mockResponsesCreate).toHaveBeenCalledTimes(1)
+		const args = mockResponsesCreate.mock.calls[0][0]
+		expect(args).toHaveProperty("reasoning")
+		expect(args.reasoning.effort).toBe("low")
+		expect(args.reasoning.summary).toBeUndefined()
+	})
 })
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index 47ab20347e1..a4b68fd42f9 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -19,11 +19,13 @@ import { convertToR1Format } from "../transform/r1-format"
 import { convertToSimpleMessages } from "../transform/simple-format"
 import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
 import { getModelParams } from "../transform/model-params"
+import { handleResponsesStream } from "../transform/responses-stream"
 
 import { DEFAULT_HEADERS } from "./constants"
 import { BaseProvider } from "./base-provider"
 import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
 import { getApiRequestTimeout } from "./utils/timeout-config"
+import { ResponseCreateParamsNonStreaming } from "openai/resources/responses/responses"
 
 // TODO: Rename this to OpenAICompatibleHandler. Also, I think the
 // `OpenAINativeHandler` can subclass from this, since it's obviously
@@ -31,10 +33,15 @@ import { getApiRequestTimeout } from "./utils/timeout-config"
 export class OpenAiHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	private client: OpenAI
+	private lastResponseId: string | undefined
 
 	constructor(options: ApiHandlerOptions) {
 		super()
 		this.options = options
+		// Default to including reasoning.summary: "auto" for Responses API (parity with native provider)
+		if (this.options.enableGpt5ReasoningSummary === undefined) {
+			this.options.enableGpt5ReasoningSummary = true
+		}
 
 		// Normalize Azure Responses "web" URL shape if provided by users.
 		// Example input (Azure portal sometimes shows):
@@ -135,13 +142,61 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 			const nonStreaming = !(this.options.openAiStreamingEnabled ?? true)
 
 			// Build Responses payload (align with OpenAI Native Responses API formatting)
-			const formattedInput = this._formatResponsesInput(systemPrompt, messages)
-			const payload: Record<string, unknown> = {
+			// Azure- and Responses-compatible multimodal handling:
+			// - Use array input ONLY when the latest user message contains images
+			// - Include the most recent assistant message as input_text to preserve continuity
+			// - Always include a Developer preface
+			const lastUserMessage = [...messages].reverse().find((m) => m.role === "user")
+			const lastUserHasImages =
+				!!lastUserMessage &&
+				Array.isArray(lastUserMessage.content) &&
+				lastUserMessage.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image")
+
+			let inputPayload: unknown
+			if (lastUserHasImages && lastUserMessage) {
+				// Select messages to retain context in array mode:
+				// - The most recent assistant message (text-only, as input_text)
+				// - All user messages that contain images
+				// - The latest user message (even if it has no image)
+				const lastAssistantMessage = [...messages].reverse().find((m) => m.role === "assistant")
+
+				const messagesForArray = messages.filter((m) => {
+					if (m.role === "assistant") {
+						return lastAssistantMessage ? m === lastAssistantMessage : false
+					}
+					if (m.role === "user") {
+						const hasImage =
+							Array.isArray(m.content) &&
+							m.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image")
+						return hasImage || m === lastUserMessage
+					}
+					return false
+				})
+
+				const arrayInput = this._toResponsesInput(messagesForArray)
+				const developerPreface = {
+					role: "user" as const,
+					content: [{ type: "input_text" as const, text: `Developer: ${systemPrompt}` }],
+				}
+				inputPayload = [developerPreface, ...arrayInput]
+			} else {
+				// Pure text history: use compact transcript (includes both user and assistant turns)
+				inputPayload = this._formatResponsesInput(systemPrompt, messages)
+			}
+			const usedArrayInput = Array.isArray(inputPayload)
+
+			const previousId = metadata?.suppressPreviousResponseId
+				? undefined
+				: (metadata?.previousResponseId ?? this.lastResponseId)
+
+			const basePayload: Record<string, unknown> = {
 				model: modelId,
-				input: formattedInput,
+				input: inputPayload,
+				...(previousId ? { previous_response_id: previousId } : {}),
 			}
 
-			// Reasoning effort (Responses expects: reasoning: { effort })
+			// Reasoning effort (Responses expects: reasoning: { effort, summary? })
+			// Parity with native: support "minimal" and include summary: "auto" unless explicitly disabled
 			if (this.options.enableReasoningEffort && (this.options.reasoningEffort || reasoningEffort)) {
 				const effort = (this.options.reasoningEffort || reasoningEffort) as
 					| "minimal"
@@ -149,41 +204,200 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 					| "medium"
 					| "high"
 					| undefined
-				// If effort is set and not "minimal" (minimal is treated as "no explicit effort")
-				if (effort && effort !== "minimal") {
-					payload.reasoning = { effort }
+				if (effort) {
+					;(
+						basePayload as {
+							reasoning?: { effort: "minimal" | "low" | "medium" | "high"; summary?: "auto" }
+						}
+					).reasoning = {
+						effort,
+						...(this.options.enableGpt5ReasoningSummary !== false ? { summary: "auto" as const } : {}),
+					}
 				}
 			}
 
 			// Temperature (only include when explicitly set by the user)
 			if (this.options.modelTemperature !== undefined) {
-				payload.temperature = this.options.modelTemperature
+				basePayload.temperature = this.options.modelTemperature
 			} else if (deepseekReasoner) {
-				payload.temperature = DEEP_SEEK_DEFAULT_TEMPERATURE
+				basePayload.temperature = DEEP_SEEK_DEFAULT_TEMPERATURE
 			}
 
 			// Verbosity: include via text.verbosity (Responses API expectation per openai-native handler)
-			if (this.options.verbosity || verbosity) {
-				;(payload as any).text = { verbosity: this.options.verbosity || verbosity }
+			const effectiveVerbosity = this.options.verbosity || verbosity
+			if (effectiveVerbosity) {
+				;(basePayload as { text?: { verbosity: "low" | "medium" | "high" } }).text = {
+					verbosity: effectiveVerbosity as "low" | "medium" | "high",
+				}
 			}
 
 			// Add max_output_tokens if requested (Azure Responses naming)
 			if (this.options.includeMaxTokens === true) {
-				payload.max_output_tokens = this.options.modelMaxTokens || modelInfo.maxTokens
+				basePayload.max_output_tokens = this.options.modelMaxTokens || modelInfo.maxTokens
 			}
 
-			// NOTE: Streaming for Responses API isn't covered by current tests.
-			// We call non-streaming for now to preserve stable behavior.
+			// Non-streaming path (preserves existing behavior and tests)
+			if (nonStreaming) {
+				try {
+					const response = await (
+						this.client as unknown as {
+							responses: { create: (body: Record<string, unknown>) => Promise<unknown> }
+						}
+					).responses.create(basePayload)
+					yield* this._yieldResponsesResult(response as unknown, modelInfo)
+				} catch (err: unknown) {
+					// Retry without previous_response_id if server rejects it (400 "Previous response ... not found")
+					if (previousId && this._isPreviousResponseNotFoundError(err)) {
+						const { previous_response_id: _omitPrev, ...withoutPrev } = basePayload as {
+							previous_response_id?: unknown
+							[key: string]: unknown
+						}
+						// Clear stored continuity to avoid reusing a bad id
+						this.lastResponseId = undefined
+						const response = await (
+							this.client as unknown as {
+								responses: { create: (body: Record<string, unknown>) => Promise<unknown> }
+							}
+						).responses.create(withoutPrev)
+						yield* this._yieldResponsesResult(response as unknown, modelInfo)
+					}
+					// Graceful downgrade if verbosity is rejected by server (400 unknown/unsupported parameter)
+					else if ("text" in basePayload && this._isVerbosityUnsupportedError(err)) {
+						// Remove text.verbosity and retry once
+						const { text: _omit, ...withoutVerbosity } = basePayload as { text?: unknown } & Record<
+							string,
+							unknown
+						>
+						const response = await (
+							this.client as unknown as {
+								responses: { create: (body: Record<string, unknown>) => Promise<unknown> }
+							}
+						).responses.create(withoutVerbosity)
+						yield* this._yieldResponsesResult(response as unknown, modelInfo)
+					} else if (usedArrayInput && this._isInputTextInvalidError(err)) {
+						// Azure-specific fallback: retry with string transcript when array input is rejected
+						const retryPayload: Record<string, unknown> = {
+							...basePayload,
+							input: this._formatResponsesInput(systemPrompt, messages),
+						}
+						const response = await (
+							this.client as unknown as {
+								responses: { create: (body: Record<string, unknown>) => Promise<unknown> }
+							}
+						).responses.create(retryPayload)
+						yield* this._yieldResponsesResult(response as unknown, modelInfo)
+					} else {
+						throw err
+					}
+				}
+				return
+			}
+
+			// Streaming path (auto-fallback to non-streaming result if provider ignores stream flag)
+			const streamingPayload: Record<string, unknown> = { ...basePayload, stream: true }
 			try {
-				const response: any = await (this.client as any).responses.create(payload)
-				yield* this._yieldResponsesResult(response, modelInfo)
+				const maybeStream = await (
+					this.client as unknown as {
+						responses: { create: (body: Record<string, unknown>) => Promise<unknown> }
+					}
+				).responses.create(streamingPayload)
+
+				const isAsyncIterable = (obj: unknown): obj is AsyncIterable<unknown> =>
+					typeof (obj as AsyncIterable<unknown>)[Symbol.asyncIterator] === "function"
+
+				if (isAsyncIterable(maybeStream)) {
+					for await (const chunk of handleResponsesStream(maybeStream, {
+						onResponseId: (id) => {
+							this.lastResponseId = id
+						},
+					})) {
+						yield chunk
+					}
+				} else {
+					// Some providers may ignore the stream flag and return a complete response
+					yield* this._yieldResponsesResult(maybeStream as unknown, modelInfo)
+				}
 			} catch (err: unknown) {
-				// Graceful downgrade if verbosity is rejected by server (400 unknown/unsupported parameter)
-				if ((payload as any).text && this._isVerbosityUnsupportedError(err)) {
-					// Remove text.verbosity and retry once
-					const { text: _omit, ...withoutVerbosity } = payload as any
-					const response: any = await (this.client as any).responses.create(withoutVerbosity)
-					yield* this._yieldResponsesResult(response, modelInfo)
+				// Retry without previous_response_id if server rejects it (400 "Previous response ... not found")
+				if (previousId && this._isPreviousResponseNotFoundError(err)) {
+					const { previous_response_id: _omitPrev, ...withoutPrev } = streamingPayload as {
+						previous_response_id?: unknown
+						[key: string]: unknown
+					}
+					this.lastResponseId = undefined
+					const maybeStreamRetry = await (
+						this.client as unknown as {
+							responses: { create: (body: Record<string, unknown>) => Promise<unknown> }
+						}
+					).responses.create(withoutPrev)
+
+					const isAsyncIterable = (obj: unknown): obj is AsyncIterable<unknown> =>
+						typeof (obj as AsyncIterable<unknown>)[Symbol.asyncIterator] === "function"
+
+					if (isAsyncIterable(maybeStreamRetry)) {
+						for await (const chunk of handleResponsesStream(maybeStreamRetry, {
+							onResponseId: (id) => {
+								this.lastResponseId = id
+							},
+						})) {
+							yield chunk
+						}
+					} else {
+						yield* this._yieldResponsesResult(maybeStreamRetry as unknown, modelInfo)
+					}
+				}
+				// Graceful verbosity removal on 400
+				else if ("text" in streamingPayload && this._isVerbosityUnsupportedError(err)) {
+					const { text: _omit, ...withoutVerbosity } = streamingPayload as { text?: unknown } & Record<
+						string,
+						unknown
+					>
+					const maybeStreamRetry = await (
+						this.client as unknown as {
+							responses: { create: (body: Record<string, unknown>) => Promise<unknown> }
+						}
+					).responses.create(withoutVerbosity)
+
+					const isAsyncIterable = (obj: unknown): obj is AsyncIterable<unknown> =>
+						typeof (obj as AsyncIterable<unknown>)[Symbol.asyncIterator] === "function"
+
+					if (isAsyncIterable(maybeStreamRetry)) {
+						for await (const chunk of handleResponsesStream(maybeStreamRetry, {
+							onResponseId: (id) => {
+								this.lastResponseId = id
+							},
+						})) {
+							yield chunk
+						}
+					} else {
+						yield* this._yieldResponsesResult(maybeStreamRetry as unknown, modelInfo)
+					}
+				} else if (usedArrayInput && this._isInputTextInvalidError(err)) {
+					// Azure-specific fallback for streaming: retry with string transcript while keeping stream: true
+					const retryStreamingPayload: Record<string, unknown> = {
+						...streamingPayload,
+						input: this._formatResponsesInput(systemPrompt, messages),
+					}
+					const maybeStreamRetry = await (
+						this.client as unknown as {
+							responses: { create: (body: Record<string, unknown>) => Promise<unknown> }
+						}
+					).responses.create(retryStreamingPayload)
+
+					const isAsyncIterable = (obj: unknown): obj is AsyncIterable<unknown> =>
+						typeof (obj as AsyncIterable<unknown>)[Symbol.asyncIterator] === "function"
+
+					if (isAsyncIterable(maybeStreamRetry)) {
+						for await (const chunk of handleResponsesStream(maybeStreamRetry, {
+							onResponseId: (id) => {
+								this.lastResponseId = id
+							},
+						})) {
+							yield chunk
+						}
+					} else {
+						yield* this._yieldResponsesResult(maybeStreamRetry as unknown, modelInfo)
+					}
 				} else {
 					throw err
 				}
@@ -383,17 +597,17 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				const formattedInput = this._formatResponsesSingleMessage(
 					{
 						role: "user",
-						content: [{ type: "text", text: prompt }] as any,
+						content: [{ type: "text", text: prompt }],
 					} as Anthropic.Messages.MessageParam,
 					/*includeRole*/ true,
 				)
-				const payload: Record<string, unknown> = {
+				const payload: ResponseCreateParamsNonStreaming = {
 					model: model.id,
 					input: formattedInput,
 				}
 
 				// Reasoning effort (Responses)
-				const effort = (this.options.reasoningEffort || (model as any).reasoningEffort) as
+				const effort = (this.options.reasoningEffort || model.reasoningEffort) as
 					| "minimal"
 					| "low"
 					| "medium"
@@ -410,7 +624,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 
 				// Verbosity via text.verbosity
 				if (this.options.verbosity) {
-					;(payload as any).text = { verbosity: this.options.verbosity }
+					payload.text = { verbosity: this.options.verbosity }
 				}
 
 				// max_output_tokens
@@ -419,12 +633,20 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				}
 
 				try {
-					const response: any = await (this.client as any).responses.create(payload)
+					const response = await this.client.responses.create(payload)
+					try {
+						const respId = (response as { id?: unknown } | undefined)?.id
+						if (typeof respId === "string" && respId.length > 0) {
+							this.lastResponseId = respId
+						}
+					} catch {
+						// ignore
+					}
 					return this._extractResponsesText(response) ?? ""
 				} catch (err: unknown) {
-					if ((payload as any).text && this._isVerbosityUnsupportedError(err)) {
-						const { text: _omit, ...withoutVerbosity } = payload as any
-						const response: any = await (this.client as any).responses.create(withoutVerbosity)
+					if (payload.text && this._isVerbosityUnsupportedError(err)) {
+						const { text: _omit, ...withoutVerbosity } = payload
+						const response = await this.client.responses.create(withoutVerbosity)
 						return this._extractResponsesText(response) ?? ""
 					}
 					throw err
@@ -722,7 +944,30 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		return undefined
 	}
 
+	private _isInputTextInvalidError(err: unknown): boolean {
+		if (err == null || typeof err !== "object") return false
+		const anyErr = err as {
+			status?: unknown
+			response?: { status?: unknown }
+			message?: unknown
+			error?: { message?: unknown }
+		}
+		const statusRaw = anyErr.status ?? anyErr.response?.status
+		const status = typeof statusRaw === "number" ? statusRaw : Number(statusRaw)
+		const msgRaw = (anyErr.message ?? anyErr.error?.message ?? "").toString().toLowerCase()
+		return status === 400 && msgRaw.includes("invalid value") && msgRaw.includes("input_text")
+	}
 	private async *_yieldResponsesResult(response: any, modelInfo: ModelInfo): ApiStream {
+		// Capture response id for continuity when present
+		try {
+			const respId = (response as { id?: unknown } | undefined)?.id
+			if (typeof respId === "string" && respId.length > 0) {
+				this.lastResponseId = respId
+			}
+		} catch {
+			// ignore
+		}
+
 		const text = this._extractResponsesText(response) ?? ""
 		if (text) {
 			yield { type: "text", text }
@@ -741,15 +986,36 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 	}
 
 	private _isVerbosityUnsupportedError(err: unknown): boolean {
-		const anyErr = err as any
-		const msg = (anyErr?.message || "").toString().toLowerCase()
-		const status = anyErr?.status
+		if (err == null || typeof err !== "object") return false
+
+		// you had hasOwnProperty("message") twice — likely a typo
+		if (!("message" in err)) return false
+
+		const msg = String((err as { message?: unknown }).message ?? "").toLowerCase()
+
+		const rawStatus = "status" in err ? (err as { status?: unknown }).status : undefined
+		const status = typeof rawStatus === "number" ? rawStatus : Number(rawStatus)
+
 		return (
 			status === 400 &&
 			(msg.includes("verbosity") || msg.includes("unknown parameter") || msg.includes("unsupported"))
 		)
 	}
 
+	private _isPreviousResponseNotFoundError(err: unknown): boolean {
+		if (err == null || typeof err !== "object") return false
+		const anyErr = err as {
+			status?: unknown
+			response?: { status?: unknown }
+			message?: unknown
+			error?: { message?: unknown }
+		}
+		const statusRaw = anyErr.status ?? anyErr.response?.status
+		const status = typeof statusRaw === "number" ? statusRaw : Number(statusRaw)
+		const msg = (anyErr.message ?? anyErr.error?.message ?? "").toString().toLowerCase()
+		return status === 400 && (msg.includes("previous response") || msg.includes("not found"))
+	}
+
 	// ---- Responses input formatting (align with openai-native.ts) ----
 
 	private _formatResponsesInput(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): string {
@@ -761,8 +1027,8 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				formattedInput += `${role}: ${message.content}\n\n`
 			} else if (Array.isArray(message.content)) {
 				const textContent = message.content
-					.filter((block) => (block as any).type === "text")
-					.map((block) => (block as any).text as string)
+					.filter((block) => block.type === "text")
+					.map((block) => block.text)
 					.join("\n")
 				if (textContent) {
 					formattedInput += `${role}: ${textContent}\n\n`
@@ -782,8 +1048,8 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		}
 		if (Array.isArray(message.content)) {
 			const textContent = message.content
-				.filter((block) => (block as any).type === "text")
-				.map((block) => (block as any).text as string)
+				.filter((block) => block.type === "text")
+				.map((block) => block.text)
 				.join("\n")
 			return `${role}${textContent}`
 		}
diff --git a/src/api/transform/responses-stream.ts b/src/api/transform/responses-stream.ts
new file mode 100644
index 00000000000..8b6eb3fa7ec
--- /dev/null
+++ b/src/api/transform/responses-stream.ts
@@ -0,0 +1,263 @@
+import type { ApiStreamChunk } from "./stream"
+
+/**
+ * Minimal, typed streaming handler for OpenAI/Azure Responses API streams.
+ * Consumes an AsyncIterable of events and yields ApiStreamChunk items.
+ *
+ * Notes:
+ * - We intentionally handle only the core, stable event shapes that we already
+ *   use in openai-native, to keep the surface area small and predictable.
+ * - If the event format changes, extend the type guards below conservatively.
+ */
+export async function* handleResponsesStream(
+	stream: AsyncIterable<unknown>,
+	options?: { onResponseId?: (id: string) => void },
+): AsyncGenerator<ApiStreamChunk> {
+	let lastUsage: ResponseUsage | undefined
+
+	for await (const event of stream) {
+		// Surface response.id to callers when available (for conversation continuity)
+		if (isObject(event)) {
+			const resp = (event as Record<string, unknown>).response as unknown
+			if (isObject(resp)) {
+				const rid = (resp as Record<string, unknown>).id
+				if (typeof rid === "string") {
+					options?.onResponseId?.(rid)
+				}
+			}
+		}
+		// 1) Streaming text deltas
+		if (isTextDelta(event)) {
+			const e = event as TextDeltaEvent
+			if (e.delta != null) {
+				yield { type: "text", text: String(e.delta) }
+			}
+			continue
+		}
+
+		// 2) Streaming reasoning deltas
+		if (isReasoningDelta(event)) {
+			const e = event as ReasoningDeltaEvent
+			if (e.delta != null) {
+				yield { type: "reasoning", text: String(e.delta) }
+			}
+			continue
+		}
+
+		// 2.1) Audio transcript deltas (map to text)
+		if (isAudioTranscriptDelta(event)) {
+			const e = event as AudioTranscriptDeltaEvent
+			if (e.delta != null) {
+				yield { type: "text", text: String(e.delta) }
+			}
+			continue
+		}
+
+		// 3) Refusal deltas (map to text with prefix, matching native handler behavior)
+		if (isRefusalDelta(event)) {
+			const e = event as RefusalDeltaEvent
+			if (e.delta != null) {
+				yield { type: "text", text: `[Refusal] ${String(e.delta)}` }
+			}
+			continue
+		}
+
+		// 4) Output-item added (alternative carrier for text/reasoning)
+		if (isOutputItemAdded(event)) {
+			const item = (event as OutputItemAddedEvent).item
+			if (item) {
+				if (item.type === "text" && typeof item.text === "string") {
+					yield { type: "text", text: item.text }
+				} else if (item.type === "reasoning" && typeof item.text === "string") {
+					yield { type: "reasoning", text: item.text }
+				} else if (item.type === "message" && Array.isArray(item.content)) {
+					for (const content of item.content) {
+						// Some servers use "text"; others use "output_text"
+						if (
+							(content?.type === "text" || content?.type === "output_text") &&
+							typeof content?.text === "string"
+						) {
+							yield { type: "text", text: content.text }
+						}
+					}
+				} else if (typeof item.text === "string") {
+					// Fallback: emit item.text even if item.type is unknown (matches native handler tolerance)
+					yield { type: "text", text: item.text }
+				}
+			}
+			continue
+		}
+
+		// 4.1) Content part added (SDK alternative format)
+		if (isContentPartAdded(event)) {
+			const part = (event as ContentPartAddedEvent).part
+			if (part && part.type === "text" && typeof part.text === "string") {
+				yield { type: "text", text: part.text }
+			}
+			continue
+		}
+
+		// 5) Fallback: some implementations (or older shapes) supply choices[0].delta.content
+		const content = getChoiceDeltaContent(event)
+		if (content) {
+			yield { type: "text", text: content }
+		}
+
+		// 6) Track usage whenever present
+		const usage = extractUsage(event)
+		if (usage) {
+			lastUsage = usage
+		}
+
+		// 7) Completion/done events - emit usage if we have it
+		if (isDoneEvent(event)) {
+			const u = lastUsage
+			if (u && hasAnyUsage(u)) {
+				yield makeUsageChunk(u)
+			}
+		}
+	}
+}
+
+/** Types, guards, and helpers */
+
+type ResponseUsage = {
+	input_tokens?: number
+	output_tokens?: number
+	prompt_tokens?: number
+	completion_tokens?: number
+	cache_creation_input_tokens?: number
+	cache_read_input_tokens?: number
+	prompt_tokens_details?: { cached_tokens?: number }
+}
+
+type TextDeltaEvent = {
+	type: "response.text.delta" | "response.output_text.delta"
+	delta?: unknown
+}
+
+type ReasoningDeltaEvent = {
+	type:
+		| "response.reasoning.delta"
+		| "response.reasoning_text.delta"
+		| "response.reasoning_summary.delta"
+		| "response.reasoning_summary_text.delta"
+	delta?: unknown
+}
+
+type RefusalDeltaEvent = {
+	type: "response.refusal.delta"
+	delta?: unknown
+}
+
+type OutputItemAddedEvent = {
+	type: "response.output_item.added"
+	item?: {
+		type?: string
+		text?: unknown
+		content?: Array<{ type?: string; text?: unknown }>
+	}
+}
+
+type DoneEvent = {
+	type: "response.done" | "response.completed"
+}
+
+type AudioTranscriptDeltaEvent = {
+	type: "response.audio_transcript.delta"
+	delta?: unknown
+}
+
+type ContentPartAddedEvent = {
+	type: "response.content_part.added"
+	part?: {
+		type?: string
+		text?: unknown
+	}
+}
+
+function isObject(value: unknown): value is Record<string, unknown> {
+	return typeof value === "object" && value !== null
+}
+
+function isTextDelta(event: unknown): event is TextDeltaEvent {
+	return (
+		isObject(event) &&
+		typeof (event as Record<string, unknown>).type === "string" &&
+		(((event as Record<string, unknown>).type as string) === "response.text.delta" ||
+			((event as Record<string, unknown>).type as string) === "response.output_text.delta")
+	)
+}
+
+function isReasoningDelta(event: unknown): event is ReasoningDeltaEvent {
+	if (!isObject(event)) return false
+	const t = (event as Record<string, unknown>).type
+	return (
+		t === "response.reasoning.delta" ||
+		t === "response.reasoning_text.delta" ||
+		t === "response.reasoning_summary.delta" ||
+		t === "response.reasoning_summary_text.delta"
+	)
+}
+
+function isRefusalDelta(event: unknown): event is RefusalDeltaEvent {
+	return isObject(event) && (event as Record<string, unknown>).type === "response.refusal.delta"
+}
+
+function isOutputItemAdded(event: unknown): event is OutputItemAddedEvent {
+	return isObject(event) && (event as Record<string, unknown>).type === "response.output_item.added"
+}
+
+function isAudioTranscriptDelta(event: unknown): event is AudioTranscriptDeltaEvent {
+	return isObject(event) && (event as Record<string, unknown>).type === "response.audio_transcript.delta"
+}
+
+function isContentPartAdded(event: unknown): event is ContentPartAddedEvent {
+	return isObject(event) && (event as Record<string, unknown>).type === "response.content_part.added"
+}
+
+function isDoneEvent(event: unknown): event is DoneEvent {
+	if (!isObject(event)) return false
+	const t = (event as Record<string, unknown>).type
+	return t === "response.done" || t === "response.completed"
+}
+
+function getChoiceDeltaContent(event: unknown): string | undefined {
+	if (!isObject(event)) return undefined
+	const choices = (event as Record<string, unknown>).choices as unknown
+	if (!Array.isArray(choices) || choices.length === 0) return undefined
+	const first = choices[0] as unknown
+	if (!isObject(first)) return undefined
+	const delta = (first as Record<string, unknown>).delta as unknown
+	if (!isObject(delta)) return undefined
+	const content = (delta as Record<string, unknown>).content
+	if (content == null) return undefined
+	return String(content)
+}
+
+function extractUsage(event: unknown): ResponseUsage | undefined {
+	if (!isObject(event)) return undefined
+	const resp = (event as Record<string, unknown>).response as unknown
+	if (isObject(resp) && isObject((resp as Record<string, unknown>).usage)) {
+		return (resp as Record<string, unknown>).usage as ResponseUsage
+	}
+	const usage = (event as Record<string, unknown>).usage as unknown
+	if (isObject(usage)) {
+		return usage as ResponseUsage
+	}
+	return undefined
+}
+
+function hasAnyUsage(usage: ResponseUsage): boolean {
+	return Boolean(usage.input_tokens || usage.output_tokens || usage.prompt_tokens || usage.completion_tokens)
+}
+
+function makeUsageChunk(usage: ResponseUsage): ApiStreamChunk {
+	return {
+		type: "usage",
+		inputTokens: usage.input_tokens ?? usage.prompt_tokens ?? 0,
+		outputTokens: usage.output_tokens ?? usage.completion_tokens ?? 0,
+		cacheWriteTokens: usage.cache_creation_input_tokens ?? undefined,
+		cacheReadTokens: usage.cache_read_input_tokens ?? usage.prompt_tokens_details?.cached_tokens ?? undefined,
+	}
+}

From 08570ea7d2569babd2c90094fec06f7d06964416 Mon Sep 17 00:00:00 2001
From: Lagyu <sasaki.y@ruri.waseda.jp>
Date: Mon, 25 Aug 2025 16:12:07 +0900
Subject: [PATCH 04/14] chore: update openai package, to use the `verbosity`
 parameter.

---
 src/package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/package.json b/src/package.json
index 0600711e4dc..a660faef14f 100644
--- a/src/package.json
+++ b/src/package.json
@@ -464,7 +464,7 @@
 		"node-cache": "^5.1.2",
 		"node-ipc": "^12.0.0",
 		"ollama": "^0.5.17",
-		"openai": "^5.0.0",
+		"openai": "^5.15.0",
 		"os-name": "^6.0.0",
 		"p-limit": "^6.2.0",
 		"p-wait-for": "^5.0.2",

From bc3661c168921b5bf5aec30632a018ce6ac28a94 Mon Sep 17 00:00:00 2001
From: Lagyu <sasaki.y@ruri.waseda.jp>
Date: Mon, 25 Aug 2025 16:16:13 +0900
Subject: [PATCH 05/14] chore: update pnpm-lock.yaml

---
 pnpm-lock.yaml | 26 ++++++--------------------
 1 file changed, 6 insertions(+), 20 deletions(-)

diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index be701e50e7e..eb396327527 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -680,8 +680,8 @@ importers:
         specifier: ^0.5.17
         version: 0.5.17
       openai:
-        specifier: ^5.0.0
-        version: 5.5.1(ws@8.18.3)(zod@3.25.61)
+        specifier: ^5.15.0
+        version: 5.15.0(ws@8.18.3)(zod@3.25.61)
       os-name:
         specifier: ^6.0.0
         version: 6.1.0
@@ -3916,9 +3916,6 @@ packages:
   '@types/node@20.17.57':
     resolution: {integrity: sha512-f3T4y6VU4fVQDKVqJV4Uppy8c1p/sVvS3peyqxyWnzkqXFJLRU7Y1Bl7rMS1Qe9z0v4M6McY0Fp9yBsgHJUsWQ==}
 
-  '@types/node@20.19.11':
-    resolution: {integrity: sha512-uug3FEEGv0r+jrecvUUpbY8lLisvIjg6AAic6a2bSP5OEOLeJsDSnvhCDov7ipFFMXS3orMpzlmi0ZcuGkBbow==}
-
   '@types/node@24.2.1':
     resolution: {integrity: sha512-DRh5K+ka5eJic8CjH7td8QpYEV6Zo10gfRkjHCO3weqZHWDtAaSTFtl4+VMqOJ4N5jcuhZ9/l+yy8rVgw7BQeQ==}
 
@@ -7690,8 +7687,8 @@ packages:
     resolution: {integrity: sha512-cxN6aIDPz6rm8hbebcP7vrQNhvRcveZoJU72Y7vskh4oIm+BZwBECnx5nTmrlres1Qapvx27Qo1Auukpf8PKXw==}
     engines: {node: '>=18'}
 
-  openai@5.5.1:
-    resolution: {integrity: sha512-5i19097mGotHA1eFsM6Tjd/tJ8uo9sa5Ysv4Q6bKJ2vtN6rc0MzMrUefXnLXYAJcmMQrC1Efhj0AvfIkXrQamw==}
+  openai@5.15.0:
+    resolution: {integrity: sha512-kcUdws8K/A8m02I+IqFBwO51gS+87GP89yWEufGbzEi8anBz4FB/bti2QxaJdGwwY4mwJGzx85XO7TuL/Tpu1w==}
     hasBin: true
     peerDependencies:
       ws: ^8.18.0
@@ -9326,9 +9323,6 @@ packages:
   undici-types@6.19.8:
     resolution: {integrity: sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==}
 
-  undici-types@6.21.0:
-    resolution: {integrity: sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==}
-
   undici-types@7.10.0:
     resolution: {integrity: sha512-t5Fy/nfn+14LuOc2KNYg75vZqClpAiqscVvMygNnlsHBFpSXdJaYtXMcdNLpl/Qvc3P2cB3s6lOV51nqsFq4ag==}
 
@@ -13332,11 +13326,6 @@ snapshots:
     dependencies:
       undici-types: 6.19.8
 
-  '@types/node@20.19.11':
-    dependencies:
-      undici-types: 6.21.0
-    optional: true
-
   '@types/node@24.2.1':
     dependencies:
       undici-types: 7.10.0
@@ -13400,7 +13389,7 @@ snapshots:
 
   '@types/ws@8.18.1':
     dependencies:
-      '@types/node': 20.19.11
+      '@types/node': 24.2.1
     optional: true
 
   '@types/yargs-parser@21.0.3': {}
@@ -17755,7 +17744,7 @@ snapshots:
       is-inside-container: 1.0.0
       is-wsl: 3.1.0
 
-  openai@5.5.1(ws@8.18.3)(zod@3.25.61):
+  openai@5.15.0(ws@8.18.3)(zod@3.25.61):
     optionalDependencies:
       ws: 8.18.3
       zod: 3.25.61
@@ -19659,9 +19648,6 @@ snapshots:
 
   undici-types@6.19.8: {}
 
-  undici-types@6.21.0:
-    optional: true
-
   undici-types@7.10.0: {}
 
   undici@6.21.3: {}

From 825c5025f15fd0d45503180972c1a854f0a26d6f Mon Sep 17 00:00:00 2001
From: Lagyu <sasaki.y@ruri.waseda.jp>
Date: Mon, 25 Aug 2025 16:31:41 +0900
Subject: [PATCH 06/14] docs: add JSDoc describing the auto url detection logic

---
 src/api/providers/openai.ts | 131 ++++++++++++++++++++++++++++++++++--
 1 file changed, 127 insertions(+), 4 deletions(-)

diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index a4b68fd42f9..2a43389911b 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -30,6 +30,45 @@ import { ResponseCreateParamsNonStreaming } from "openai/resources/responses/res
 // TODO: Rename this to OpenAICompatibleHandler. Also, I think the
 // `OpenAINativeHandler` can subclass from this, since it's obviously
 // compatible with the OpenAI API. We can also rename it to `OpenAIHandler`.
+/**
+ * URL auto-detection overview
+ *
+ * Decision tree (host and path based):
+ * 1) Azure AI Inference Service:
+ *    - Detected when host ends with ".services.ai.azure.com"
+ *    - Uses OpenAI Chat Completions API shape with a path override
+ *      (see OPENAI_AZURE_AI_INFERENCE_PATH) when making requests.
+ *
+ * 2) Azure OpenAI:
+ *    - Detected when host is "openai.azure.com" or ends with ".openai.azure.com"
+ *      or when options.openAiUseAzure is explicitly true.
+ *    - Within Azure OpenAI, the API "flavor" is chosen by URL path:
+ *      - Responses API:
+ *        * Path contains "/v1/responses" or ends with "/responses"
+ *        * Also auto-detected for portal-style URLs (e.g. "/openai/responses?api-version=2025-04-01-preview")
+ *          which itself is not valid in request, are normalized to "/openai/v1" with apiVersion "preview".
+ *      - Chat Completions API:
+ *        * Path contains "/chat/completions"
+ *      - Default:
+ *        * Falls back to Chat Completions if none of the above match.
+ *
+ * 3) Generic OpenAI-compatible endpoints:
+ *    - Anything else (OpenAI, OpenRouter, LM Studio, vLLM, etc.)
+ *    - Flavor is again selected by URL path as above:
+ *      - "/v1/responses" or ending with "/responses" => Responses API
+ *      - "/chat/completions" => Chat Completions
+ *      - otherwise defaults to Chat Completions for backward compatibility.
+ *
+ * Examples:
+ * - https://api.openai.com/v1                      -> Chat Completions (default)
+ * - https://api.openai.com/v1/responses            -> Responses API
+ * - https://api.openai.com/v1/chat/completions     -> Chat Completions
+ * - https://myres.openai.azure.com/openai/v1/responses?api-version=preview
+ *                                                   -> Azure OpenAI + Responses API
+ * - https://myres.openai.azure.com/openai/responses?api-version=2025-04-01-preview
+ *                                                   -> normalized to base /openai/v1 + apiVersion "preview" (Responses)
+ * - https://test.services.ai.azure.com             -> Azure AI Inference Service (Chat Completions with path override)
+ */
 export class OpenAiHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	private client: OpenAI
@@ -773,16 +812,55 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		}
 	}
 
+	/**
+	 * Detects Grok xAI endpoints.
+	 * - Returns true when the host contains "x.ai" (e.g., "api.x.ai").
+	 * - Used to omit stream_options for streaming requests because Grok may not support them.
+	 *
+	 * Examples:
+	 * - https://api.x.ai/v1 -> true
+	 * - https://api.openai.com/v1 -> false
+	 */
 	private _isGrokXAI(baseUrl?: string): boolean {
 		const urlHost = this._getUrlHost(baseUrl)
 		return urlHost.includes("x.ai")
 	}
 
+	/**
+	 * Detects Azure AI Inference Service endpoints (distinct from Azure OpenAI).
+	 * - Returns true when host ends with ".services.ai.azure.com".
+	 * - These endpoints require a special path override when calling the Chat Completions API.
+	 *
+	 * Examples:
+	 * - https://myenv.services.ai.azure.com -> true
+	 * - https://myres.openai.azure.com      -> false (this is Azure OpenAI, not AI Inference)
+	 */
 	private _isAzureAiInference(baseUrl?: string): boolean {
 		const urlHost = this._getUrlHost(baseUrl)
 		return urlHost.endsWith(".services.ai.azure.com")
 	}
 
+	/**
+	 * Detects Azure OpenAI "Responses API" URLs by host and path.
+	 * - Host must be "openai.azure.com" or end with ".openai.azure.com"
+	 * - Path may be one of:
+	 *   • "/openai/v1/responses" (preferred v1 path)
+	 *   • "/openai/responses"    (portal/legacy style)
+	 *   • any path ending with "/responses"
+	 * - Trailing slashes are trimmed before matching.
+	 *
+	 * This is used to favor the Responses API flavor on Azure OpenAI when the base URL already
+	 * points to a Responses path.
+	 *
+	 * Examples (true):
+	 * - https://myres.openai.azure.com/openai/v1/responses?api-version=preview
+	 * - https://myres.openai.azure.com/openai/responses?api-version=2025-04-01-preview
+	 * - https://openai.azure.com/openai/v1/responses
+	 *
+	 * Examples (false):
+	 * - https://myres.openai.azure.com/openai/v1/chat/completions
+	 * - https://api.openai.com/v1/responses         (not an Azure host)
+	 */
 	private _isAzureOpenAiResponses(baseUrl?: string): boolean {
 		try {
 			if (!baseUrl) return false
@@ -801,10 +879,36 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 	}
 
 	/**
-	 * Normalize Azure "responses" portal URLs to SDK-friendly base and version.
-	 * - Input (portal sometimes shows): https://{res}.openai.azure.com/openai/responses?api-version=2025-04-01-preview
-	 * - Output: baseURL=https://{res}.openai.azure.com/openai/v1, apiVersionOverride="preview"
-	 * No-op for already-correct or non-Azure URLs.
+	 * Normalizes Azure OpenAI "Responses" portal URLs to an SDK-friendly base and version.
+	 *
+	 * Why:
+	 * - The Azure portal often presents a non-v1 Responses endpoint such as:
+	 *   https://{res}.openai.azure.com/openai/responses?api-version=2025-04-01-preview
+	 *   which is not the ideal base for SDK clients. We convert it to:
+	 *   baseURL = https://{res}.openai.azure.com/openai/v1
+	 *   apiVersionOverride = "preview"
+	 *
+	 * What it does:
+	 * - If the input is an Azure OpenAI host and its path is exactly "/openai/responses"
+	 *   with api-version=2025-04-01-preview, we:
+	 *     • return { baseURL: "https://{host}/openai/v1", apiVersionOverride: "preview" }
+	 * - If the input is already "/openai/v1/responses", we similarly normalize the base to "/openai/v1"
+	 *   and set apiVersionOverride to "preview".
+	 * - Otherwise, returns the original URL unchanged.
+	 *
+	 * Scope:
+	 * - Only applies to Azure OpenAI hosts ("openai.azure.com" or "*.openai.azure.com").
+	 * - Non-Azure URLs or already SDK-friendly bases are returned as-is.
+	 *
+	 * Examples:
+	 * - In:  https://sample.openai.azure.com/openai/responses?api-version=2025-04-01-preview
+	 *   Out: baseURL=https://sample.openai.azure.com/openai/v1, apiVersionOverride="preview"
+	 *
+	 * - In:  https://sample.openai.azure.com/openai/v1/responses?api-version=preview
+	 *   Out: baseURL=https://sample.openai.azure.com/openai/v1, apiVersionOverride="preview"
+	 *
+	 * - In:  https://api.openai.com/v1/responses
+	 *   Out: baseURL unchanged (non-Azure)
 	 */
 	private _normalizeAzureResponsesBaseUrlAndVersion(inputBaseUrl: string): {
 		baseURL: string
@@ -866,6 +970,25 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 
 	// --- Responses helpers ---
 
+	/**
+	 * Determines which OpenAI-compatible API flavor to use based on the URL path.
+	 * - This is purely path-based and provider-agnostic (works for OpenAI, Azure OpenAI after normalization, etc.).
+	 *
+	 * Rules:
+	 * - If path contains "/v1/responses" OR ends with "/responses" => "responses"
+	 * - Else if path contains "/chat/completions"                  => "chat"
+	 * - Else default to "chat" for backward compatibility
+	 *
+	 * Notes:
+	 * - Trailing slashes are not required to match; we rely on substring checks.
+	 * - Azure "portal" style URLs are normalized beforehand where applicable.
+	 *
+	 * Examples:
+	 * - https://api.openai.com/v1/responses            -> "responses"
+	 * - https://api.openai.com/v1/chat/completions     -> "chat"
+	 * - https://myres.openai.azure.com/openai/v1       -> "chat" (default)
+	 * - https://myres.openai.azure.com/openai/v1/responses -> "responses"
+	 */
 	private _resolveApiFlavor(baseUrl: string): "responses" | "chat" {
 		// Auto-detect by URL path
 		const url = this._safeParseUrl(baseUrl)

From cd512544ff7c2eb565e210807507d5a83d538507 Mon Sep 17 00:00:00 2001
From: Lagyu <sasaki.y@ruri.waseda.jp>
Date: Wed, 27 Aug 2025 12:13:06 +0900
Subject: [PATCH 07/14] fix: omit the conversation in responses api.

---
 src/api/providers/__tests__/openai.spec.ts    | 127 ++++++++++++++++++
 src/api/providers/openai.ts                   |  71 ++++++----
 .../settings/providers/OpenAICompatible.tsx   |  23 ++++
 3 files changed, 193 insertions(+), 28 deletions(-)

diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts
index eeab8315521..ae522c0e81b 100644
--- a/src/api/providers/__tests__/openai.spec.ts
+++ b/src/api/providers/__tests__/openai.spec.ts
@@ -1860,3 +1860,130 @@ describe("OpenAI Compatible - Responses API parity improvements", () => {
 		expect(args.reasoning.summary).toBeUndefined()
 	})
 })
+
+describe("OpenAI Compatible - Responses API minimal input parity (new tests)", () => {
+	beforeEach(() => {
+		// @ts-ignore - reuse mocks from this spec module
+		mockCreate.mockClear()
+		// @ts-ignore - reuse mocks from this spec module
+		mockResponsesCreate.mockClear()
+	})
+
+	it("sends only latest user message when previous_response_id is provided (string input, no Developer preface)", async () => {
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+		})
+
+		const msgs: Anthropic.Messages.MessageParam[] = [
+			{ role: "user", content: [{ type: "text" as const, text: "First" }] },
+			{ role: "assistant", content: [{ type: "text" as const, text: "Reply" }] },
+			{ role: "user", content: [{ type: "text" as const, text: "Latest" }] },
+		]
+
+		const chunks: any[] = []
+		for await (const ch of handler.createMessage("System Inst", msgs, { previousResponseId: "prev-1" } as any)) {
+			chunks.push(ch)
+		}
+
+		// Ensure Responses API was used with minimal input
+		// @ts-ignore
+		expect(mockResponsesCreate).toHaveBeenCalled()
+		// @ts-ignore
+		const args = mockResponsesCreate.mock.calls[0][0]
+
+		expect(typeof args.input).toBe("string")
+		expect(args.input).toBe("User: Latest")
+		expect(String(args.input)).not.toContain("Developer: System Inst")
+	})
+
+	it("uses array input with only latest user content when previous_response_id and last user has images (no Developer preface)", async () => {
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+		})
+
+		const msgs: Anthropic.Messages.MessageParam[] = [
+			{ role: "user", content: [{ type: "text" as const, text: "Prev" }] },
+			{ role: "assistant", content: [{ type: "text" as const, text: "Ok" }] },
+			{
+				role: "user",
+				content: [
+					{ type: "text" as const, text: "See" },
+					{ type: "image" as const, source: { media_type: "image/png", data: "IMGDATA" } as any },
+				],
+			},
+		]
+
+		const iter = handler.createMessage("Sys", msgs, { previousResponseId: "prev-2" } as any)
+		for await (const _ of iter) {
+			// consume
+		}
+
+		// @ts-ignore
+		const args = mockResponsesCreate.mock.calls.pop()?.[0]
+		expect(Array.isArray(args.input)).toBe(true)
+
+		const arr = args.input as any[]
+		expect(arr.length).toBe(1)
+		expect(arr[0]?.role).toBe("user")
+
+		const contents = arr[0]?.content || []
+		const hasImg = contents.some((p: any) => p?.type === "input_image")
+		expect(hasImg).toBe(true)
+
+		// No Developer preface should be injected in minimal mode
+		const hasDev = contents.some(
+			(p: any) => p?.type === "input_text" && typeof p.text === "string" && p.text.includes("Developer:"),
+		)
+		expect(hasDev).toBe(false)
+	})
+
+	it("always includes max_output_tokens for Responses API", async () => {
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+			includeMaxTokens: false, // should still include based on model info
+			openAiCustomModelInfo: {
+				contextWindow: 128_000,
+				maxTokens: 123, // fallback used when modelMaxTokens not set
+				supportsPromptCache: false,
+			},
+		})
+
+		for await (const _ of handler.createMessage("sys", [
+			{ role: "user", content: [{ type: "text" as const, text: "Hi" }] },
+		])) {
+			// consume
+		}
+
+		// @ts-ignore
+		const args = mockResponsesCreate.mock.calls.pop()?.[0]
+		expect(args).toHaveProperty("max_output_tokens", 123)
+	})
+
+	it("does not include text.verbosity when not provided", async () => {
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+		})
+
+		for await (const _ of handler.createMessage("sys", [
+			{ role: "user", content: [{ type: "text" as const, text: "Hi" }] },
+		])) {
+			// consume
+		}
+
+		// @ts-ignore
+		const args = mockResponsesCreate.mock.calls.pop()?.[0]
+		expect(args).not.toHaveProperty("text")
+	})
+})
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index 2a43389911b..774baa77093 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -182,21 +182,35 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 
 			// Build Responses payload (align with OpenAI Native Responses API formatting)
 			// Azure- and Responses-compatible multimodal handling:
-			// - Use array input ONLY when the latest user message contains images
-			// - Include the most recent assistant message as input_text to preserve continuity
-			// - Always include a Developer preface
+			// - Use array input ONLY when the latest user message contains images (initial turn)
+			// - When previous_response_id is present, send only the latest user turn:
+			//   • Text-only => single string "User: ...", no Developer preface
+			//   • With images => one-item array containing only the latest user content (no Developer preface)
 			const lastUserMessage = [...messages].reverse().find((m) => m.role === "user")
 			const lastUserHasImages =
 				!!lastUserMessage &&
 				Array.isArray(lastUserMessage.content) &&
 				lastUserMessage.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image")
 
+			// Conversation continuity (parity with OpenAiNativeHandler.prepareGpt5Input)
+			const previousId = metadata?.suppressPreviousResponseId
+				? undefined
+				: (metadata?.previousResponseId ?? this.lastResponseId)
+
+			const minimalInputMode = Boolean(previousId)
+
 			let inputPayload: unknown
-			if (lastUserHasImages && lastUserMessage) {
-				// Select messages to retain context in array mode:
-				// - The most recent assistant message (text-only, as input_text)
-				// - All user messages that contain images
-				// - The latest user message (even if it has no image)
+			if (minimalInputMode && lastUserMessage) {
+				// Minimal-mode: only the latest user message (no Developer preface)
+				if (lastUserHasImages) {
+					// Single-item array with just the latest user content
+					inputPayload = this._toResponsesInput([lastUserMessage])
+				} else {
+					// Single message string "User: ..."
+					inputPayload = this._formatResponsesSingleMessage(lastUserMessage, true)
+				}
+			} else if (lastUserHasImages && lastUserMessage) {
+				// Initial turn with images: include Developer preface and minimal prior context to preserve continuity
 				const lastAssistantMessage = [...messages].reverse().find((m) => m.role === "assistant")
 
 				const messagesForArray = messages.filter((m) => {
@@ -219,15 +233,11 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				}
 				inputPayload = [developerPreface, ...arrayInput]
 			} else {
-				// Pure text history: use compact transcript (includes both user and assistant turns)
+				// Pure text history: full compact transcript (includes both user and assistant turns)
 				inputPayload = this._formatResponsesInput(systemPrompt, messages)
 			}
 			const usedArrayInput = Array.isArray(inputPayload)
 
-			const previousId = metadata?.suppressPreviousResponseId
-				? undefined
-				: (metadata?.previousResponseId ?? this.lastResponseId)
-
 			const basePayload: Record<string, unknown> = {
 				model: modelId,
 				input: inputPayload,
@@ -262,20 +272,19 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				basePayload.temperature = DEEP_SEEK_DEFAULT_TEMPERATURE
 			}
 
-			// Verbosity: include via text.verbosity (Responses API expectation per openai-native handler)
-			const effectiveVerbosity = this.options.verbosity || verbosity
-			if (effectiveVerbosity) {
+			// Verbosity: include only when explicitly specified in settings
+			if (this.options.verbosity) {
 				;(basePayload as { text?: { verbosity: "low" | "medium" | "high" } }).text = {
-					verbosity: effectiveVerbosity as "low" | "medium" | "high",
+					verbosity: this.options.verbosity as "low" | "medium" | "high",
 				}
 			}
 
-			// Add max_output_tokens if requested (Azure Responses naming)
-			if (this.options.includeMaxTokens === true) {
-				basePayload.max_output_tokens = this.options.modelMaxTokens || modelInfo.maxTokens
-			}
+			// Always include max_output_tokens for Responses API to cap output length
+			const reservedMax = (modelParams as any)?.maxTokens
+			;(basePayload as Record<string, unknown>).max_output_tokens =
+				this.options.modelMaxTokens || reservedMax || modelInfo.maxTokens
 
-			// Non-streaming path (preserves existing behavior and tests)
+			// Non-streaming path
 			if (nonStreaming) {
 				try {
 					const response = await (
@@ -314,10 +323,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 						).responses.create(withoutVerbosity)
 						yield* this._yieldResponsesResult(response as unknown, modelInfo)
 					} else if (usedArrayInput && this._isInputTextInvalidError(err)) {
-						// Azure-specific fallback: retry with string transcript when array input is rejected
+						// Azure-specific fallback: retry with a minimal single-message string when array input is rejected
 						const retryPayload: Record<string, unknown> = {
 							...basePayload,
-							input: this._formatResponsesInput(systemPrompt, messages),
+							input:
+								previousId && lastUserMessage
+									? this._formatResponsesSingleMessage(lastUserMessage, true)
+									: this._formatResponsesInput(systemPrompt, messages),
 						}
 						const response = await (
 							this.client as unknown as {
@@ -412,10 +424,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 						yield* this._yieldResponsesResult(maybeStreamRetry as unknown, modelInfo)
 					}
 				} else if (usedArrayInput && this._isInputTextInvalidError(err)) {
-					// Azure-specific fallback for streaming: retry with string transcript while keeping stream: true
+					// Azure-specific fallback for streaming: retry with minimal single-message string while keeping stream: true
 					const retryStreamingPayload: Record<string, unknown> = {
 						...streamingPayload,
-						input: this._formatResponsesInput(systemPrompt, messages),
+						input:
+							previousId && lastUserMessage
+								? this._formatResponsesSingleMessage(lastUserMessage, true)
+								: this._formatResponsesInput(systemPrompt, messages),
 					}
 					const maybeStreamRetry = await (
 						this.client as unknown as {
@@ -661,9 +676,9 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 					payload.temperature = this.options.modelTemperature
 				}
 
-				// Verbosity via text.verbosity
+				// Verbosity via text.verbosity - include only when explicitly specified
 				if (this.options.verbosity) {
-					payload.text = { verbosity: this.options.verbosity }
+					payload.text = { verbosity: this.options.verbosity as "low" | "medium" | "high" }
 				}
 
 				// max_output_tokens
diff --git a/webview-ui/src/components/settings/providers/OpenAICompatible.tsx b/webview-ui/src/components/settings/providers/OpenAICompatible.tsx
index ee462296b5f..b64ee187a12 100644
--- a/webview-ui/src/components/settings/providers/OpenAICompatible.tsx
+++ b/webview-ui/src/components/settings/providers/OpenAICompatible.tsx
@@ -22,6 +22,7 @@ import { inputEventTransform, noTransform } from "../transforms"
 import { ModelPicker } from "../ModelPicker"
 import { R1FormatSetting } from "../R1FormatSetting"
 import { ThinkingBudget } from "../ThinkingBudget"
+import { Verbosity } from "../Verbosity"
 
 type OpenAICompatibleProps = {
 	apiConfiguration: ProviderSettings
@@ -40,6 +41,7 @@ export const OpenAICompatible = ({
 
 	const [azureApiVersionSelected, setAzureApiVersionSelected] = useState(!!apiConfiguration?.azureApiVersion)
 	const [openAiLegacyFormatSelected, setOpenAiLegacyFormatSelected] = useState(!!apiConfiguration?.openAiLegacyFormat)
+	const [verbositySelected, setVerbositySelected] = useState(!!apiConfiguration?.verbosity)
 
 	const [openAiModels, setOpenAiModels] = useState<Record<string, ModelInfo> | null>(null)
 
@@ -282,6 +284,27 @@ export const OpenAICompatible = ({
 					/>
 				)}
 			</div>
+			<div className="flex flex-col gap-1">
+				<Checkbox
+					checked={verbositySelected}
+					onChange={(checked: boolean) => {
+						setVerbositySelected(checked)
+						if (!checked) {
+							setApiConfigurationField("verbosity", undefined as any)
+						} else if (!apiConfiguration.verbosity) {
+							setApiConfigurationField("verbosity", "medium" as any)
+						}
+					}}>
+					{t("settings:providers.verbosity.label")}
+				</Checkbox>
+				{verbositySelected && (
+					<Verbosity
+						apiConfiguration={apiConfiguration}
+						setApiConfigurationField={setApiConfigurationField as any}
+						modelInfo={apiConfiguration.openAiCustomModelInfo || openAiModelInfoSaneDefaults}
+					/>
+				)}
+			</div>
 			<div className="flex flex-col gap-3">
 				<div className="text-sm text-vscode-descriptionForeground whitespace-pre-line">
 					{t("settings:providers.customModel.capabilities")}

From bf49d7775428a79e4c2566826ebe7131da1b3b6a Mon Sep 17 00:00:00 2001
From: Lagyu <sasaki.y@ruri.waseda.jp>
Date: Wed, 27 Aug 2025 12:15:34 +0900
Subject: [PATCH 08/14] chore: remove unnecessary type cast

---
 .../src/components/settings/providers/OpenAICompatible.tsx  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/webview-ui/src/components/settings/providers/OpenAICompatible.tsx b/webview-ui/src/components/settings/providers/OpenAICompatible.tsx
index b64ee187a12..19f1fdc32ab 100644
--- a/webview-ui/src/components/settings/providers/OpenAICompatible.tsx
+++ b/webview-ui/src/components/settings/providers/OpenAICompatible.tsx
@@ -290,9 +290,9 @@ export const OpenAICompatible = ({
 					onChange={(checked: boolean) => {
 						setVerbositySelected(checked)
 						if (!checked) {
-							setApiConfigurationField("verbosity", undefined as any)
+							setApiConfigurationField("verbosity", undefined)
 						} else if (!apiConfiguration.verbosity) {
-							setApiConfigurationField("verbosity", "medium" as any)
+							setApiConfigurationField("verbosity", "medium")
 						}
 					}}>
 					{t("settings:providers.verbosity.label")}
@@ -300,7 +300,7 @@ export const OpenAICompatible = ({
 				{verbositySelected && (
 					<Verbosity
 						apiConfiguration={apiConfiguration}
-						setApiConfigurationField={setApiConfigurationField as any}
+						setApiConfigurationField={setApiConfigurationField}
 						modelInfo={apiConfiguration.openAiCustomModelInfo || openAiModelInfoSaneDefaults}
 					/>
 				)}

From 48d1a61fb8ac54eb2a64e38014575639efe3d1da Mon Sep 17 00:00:00 2001
From: Lagyu <sasaki.y@ruri.waseda.jp>
Date: Fri, 29 Aug 2025 18:02:05 +0900
Subject: [PATCH 09/14] refactor(openai): centralize Responses error handling
 via _responsesCreateWithRetries; dedupe checks for previous_response_id,
 verbosity, and Azure input_text invalid in streaming and non-streaming paths

---
 src/api/providers/openai.ts | 276 +++++++++++++-----------------------
 1 file changed, 99 insertions(+), 177 deletions(-)

diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index 774baa77093..4dda6f2afb0 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -286,175 +286,41 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 
 			// Non-streaming path
 			if (nonStreaming) {
-				try {
-					const response = await (
-						this.client as unknown as {
-							responses: { create: (body: Record<string, unknown>) => Promise<unknown> }
-						}
-					).responses.create(basePayload)
-					yield* this._yieldResponsesResult(response as unknown, modelInfo)
-				} catch (err: unknown) {
-					// Retry without previous_response_id if server rejects it (400 "Previous response ... not found")
-					if (previousId && this._isPreviousResponseNotFoundError(err)) {
-						const { previous_response_id: _omitPrev, ...withoutPrev } = basePayload as {
-							previous_response_id?: unknown
-							[key: string]: unknown
-						}
-						// Clear stored continuity to avoid reusing a bad id
-						this.lastResponseId = undefined
-						const response = await (
-							this.client as unknown as {
-								responses: { create: (body: Record<string, unknown>) => Promise<unknown> }
-							}
-						).responses.create(withoutPrev)
-						yield* this._yieldResponsesResult(response as unknown, modelInfo)
-					}
-					// Graceful downgrade if verbosity is rejected by server (400 unknown/unsupported parameter)
-					else if ("text" in basePayload && this._isVerbosityUnsupportedError(err)) {
-						// Remove text.verbosity and retry once
-						const { text: _omit, ...withoutVerbosity } = basePayload as { text?: unknown } & Record<
-							string,
-							unknown
-						>
-						const response = await (
-							this.client as unknown as {
-								responses: { create: (body: Record<string, unknown>) => Promise<unknown> }
-							}
-						).responses.create(withoutVerbosity)
-						yield* this._yieldResponsesResult(response as unknown, modelInfo)
-					} else if (usedArrayInput && this._isInputTextInvalidError(err)) {
-						// Azure-specific fallback: retry with a minimal single-message string when array input is rejected
-						const retryPayload: Record<string, unknown> = {
-							...basePayload,
-							input:
-								previousId && lastUserMessage
-									? this._formatResponsesSingleMessage(lastUserMessage, true)
-									: this._formatResponsesInput(systemPrompt, messages),
-						}
-						const response = await (
-							this.client as unknown as {
-								responses: { create: (body: Record<string, unknown>) => Promise<unknown> }
-							}
-						).responses.create(retryPayload)
-						yield* this._yieldResponsesResult(response as unknown, modelInfo)
-					} else {
-						throw err
-					}
-				}
+				const response = await this._responsesCreateWithRetries(basePayload, {
+					usedArrayInput,
+					lastUserMessage,
+					previousId,
+					systemPrompt,
+					messages,
+				})
+				yield* this._yieldResponsesResult(response as unknown, modelInfo)
 				return
 			}
 
 			// Streaming path (auto-fallback to non-streaming result if provider ignores stream flag)
 			const streamingPayload: Record<string, unknown> = { ...basePayload, stream: true }
-			try {
-				const maybeStream = await (
-					this.client as unknown as {
-						responses: { create: (body: Record<string, unknown>) => Promise<unknown> }
-					}
-				).responses.create(streamingPayload)
-
-				const isAsyncIterable = (obj: unknown): obj is AsyncIterable<unknown> =>
-					typeof (obj as AsyncIterable<unknown>)[Symbol.asyncIterator] === "function"
-
-				if (isAsyncIterable(maybeStream)) {
-					for await (const chunk of handleResponsesStream(maybeStream, {
-						onResponseId: (id) => {
-							this.lastResponseId = id
-						},
-					})) {
-						yield chunk
-					}
-				} else {
-					// Some providers may ignore the stream flag and return a complete response
-					yield* this._yieldResponsesResult(maybeStream as unknown, modelInfo)
-				}
-			} catch (err: unknown) {
-				// Retry without previous_response_id if server rejects it (400 "Previous response ... not found")
-				if (previousId && this._isPreviousResponseNotFoundError(err)) {
-					const { previous_response_id: _omitPrev, ...withoutPrev } = streamingPayload as {
-						previous_response_id?: unknown
-						[key: string]: unknown
-					}
-					this.lastResponseId = undefined
-					const maybeStreamRetry = await (
-						this.client as unknown as {
-							responses: { create: (body: Record<string, unknown>) => Promise<unknown> }
-						}
-					).responses.create(withoutPrev)
-
-					const isAsyncIterable = (obj: unknown): obj is AsyncIterable<unknown> =>
-						typeof (obj as AsyncIterable<unknown>)[Symbol.asyncIterator] === "function"
-
-					if (isAsyncIterable(maybeStreamRetry)) {
-						for await (const chunk of handleResponsesStream(maybeStreamRetry, {
-							onResponseId: (id) => {
-								this.lastResponseId = id
-							},
-						})) {
-							yield chunk
-						}
-					} else {
-						yield* this._yieldResponsesResult(maybeStreamRetry as unknown, modelInfo)
-					}
-				}
-				// Graceful verbosity removal on 400
-				else if ("text" in streamingPayload && this._isVerbosityUnsupportedError(err)) {
-					const { text: _omit, ...withoutVerbosity } = streamingPayload as { text?: unknown } & Record<
-						string,
-						unknown
-					>
-					const maybeStreamRetry = await (
-						this.client as unknown as {
-							responses: { create: (body: Record<string, unknown>) => Promise<unknown> }
-						}
-					).responses.create(withoutVerbosity)
-
-					const isAsyncIterable = (obj: unknown): obj is AsyncIterable<unknown> =>
-						typeof (obj as AsyncIterable<unknown>)[Symbol.asyncIterator] === "function"
-
-					if (isAsyncIterable(maybeStreamRetry)) {
-						for await (const chunk of handleResponsesStream(maybeStreamRetry, {
-							onResponseId: (id) => {
-								this.lastResponseId = id
-							},
-						})) {
-							yield chunk
-						}
-					} else {
-						yield* this._yieldResponsesResult(maybeStreamRetry as unknown, modelInfo)
-					}
-				} else if (usedArrayInput && this._isInputTextInvalidError(err)) {
-					// Azure-specific fallback for streaming: retry with minimal single-message string while keeping stream: true
-					const retryStreamingPayload: Record<string, unknown> = {
-						...streamingPayload,
-						input:
-							previousId && lastUserMessage
-								? this._formatResponsesSingleMessage(lastUserMessage, true)
-								: this._formatResponsesInput(systemPrompt, messages),
-					}
-					const maybeStreamRetry = await (
-						this.client as unknown as {
-							responses: { create: (body: Record<string, unknown>) => Promise<unknown> }
-						}
-					).responses.create(retryStreamingPayload)
+			const maybeStream = await this._responsesCreateWithRetries(streamingPayload, {
+				usedArrayInput,
+				lastUserMessage,
+				previousId,
+				systemPrompt,
+				messages,
+			})
 
-					const isAsyncIterable = (obj: unknown): obj is AsyncIterable<unknown> =>
-						typeof (obj as AsyncIterable<unknown>)[Symbol.asyncIterator] === "function"
+			const isAsyncIterable = (obj: unknown): obj is AsyncIterable<unknown> =>
+				typeof (obj as AsyncIterable<unknown>)[Symbol.asyncIterator] === "function"
 
-					if (isAsyncIterable(maybeStreamRetry)) {
-						for await (const chunk of handleResponsesStream(maybeStreamRetry, {
-							onResponseId: (id) => {
-								this.lastResponseId = id
-							},
-						})) {
-							yield chunk
-						}
-					} else {
-						yield* this._yieldResponsesResult(maybeStreamRetry as unknown, modelInfo)
-					}
-				} else {
-					throw err
+			if (isAsyncIterable(maybeStream)) {
+				for await (const chunk of handleResponsesStream(maybeStream, {
+					onResponseId: (id) => {
+						this.lastResponseId = id
+					},
+				})) {
+					yield chunk
 				}
+			} else {
+				// Some providers may ignore the stream flag and return a complete response
+				yield* this._yieldResponsesResult(maybeStream as unknown, modelInfo)
 			}
 			return
 		}
@@ -686,25 +552,22 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 					payload.max_output_tokens = this.options.modelMaxTokens || modelInfo.maxTokens
 				}
 
+				const response = await this._responsesCreateWithRetries(payload as unknown as Record<string, unknown>, {
+					usedArrayInput: false,
+					lastUserMessage: undefined,
+					previousId: undefined,
+					systemPrompt: "",
+					messages: [],
+				})
 				try {
-					const response = await this.client.responses.create(payload)
-					try {
-						const respId = (response as { id?: unknown } | undefined)?.id
-						if (typeof respId === "string" && respId.length > 0) {
-							this.lastResponseId = respId
-						}
-					} catch {
-						// ignore
-					}
-					return this._extractResponsesText(response) ?? ""
-				} catch (err: unknown) {
-					if (payload.text && this._isVerbosityUnsupportedError(err)) {
-						const { text: _omit, ...withoutVerbosity } = payload
-						const response = await this.client.responses.create(withoutVerbosity)
-						return this._extractResponsesText(response) ?? ""
+					const respId = (response as { id?: unknown } | undefined)?.id
+					if (typeof respId === "string" && respId.length > 0) {
+						this.lastResponseId = respId
 					}
-					throw err
+				} catch {
+					// ignore
 				}
+				return this._extractResponsesText(response) ?? ""
 			}
 
 			const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
@@ -1095,6 +958,65 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		const msgRaw = (anyErr.message ?? anyErr.error?.message ?? "").toString().toLowerCase()
 		return status === 400 && msgRaw.includes("invalid value") && msgRaw.includes("input_text")
 	}
+
+	/**
+	 * Centralized Responses.create with one-shot retries for common provider errors:
+	 * - 400 "Previous response ... not found" -> drop previous_response_id and retry
+	 * - 400 unknown/unsupported "text.verbosity" -> remove text and retry
+	 * - 400 invalid value for input_text (Azure) -> rebuild single-message string input and retry
+	 * Returns either an AsyncIterable (streaming) or a full response object (non-streaming).
+	 */
+	private async _responsesCreateWithRetries(
+		payload: Record<string, unknown>,
+		opts: {
+			usedArrayInput: boolean
+			lastUserMessage?: Anthropic.Messages.MessageParam
+			previousId?: string
+			systemPrompt: string
+			messages: Anthropic.Messages.MessageParam[]
+		},
+	): Promise<unknown> {
+		const create = (body: Record<string, unknown>) =>
+			(
+				this.client as unknown as { responses: { create: (b: Record<string, unknown>) => Promise<unknown> } }
+			).responses.create(body)
+
+		try {
+			return await create(payload)
+		} catch (err: unknown) {
+			// Retry without previous_response_id if server rejects it
+			if (opts.previousId && this._isPreviousResponseNotFoundError(err)) {
+				const { previous_response_id: _omitPrev, ...withoutPrev } = payload as {
+					previous_response_id?: unknown
+					[key: string]: unknown
+				}
+				this.lastResponseId = undefined
+				return await create(withoutPrev)
+			}
+
+			// Graceful downgrade if verbosity is rejected by server
+			if ("text" in payload && this._isVerbosityUnsupportedError(err)) {
+				const { text: _omit, ...withoutVerbosity } = payload as { text?: unknown } & Record<string, unknown>
+				return await create(withoutVerbosity)
+			}
+
+			// Azure-specific fallback when array input is rejected
+			if (opts.usedArrayInput && this._isInputTextInvalidError(err)) {
+				const fallbackInput =
+					opts.previousId && opts.lastUserMessage
+						? this._formatResponsesSingleMessage(opts.lastUserMessage, true)
+						: this._formatResponsesInput(opts.systemPrompt, opts.messages)
+
+				const retryPayload: Record<string, unknown> = {
+					...payload,
+					input: fallbackInput,
+				}
+				return await create(retryPayload)
+			}
+
+			throw err
+		}
+	}
 	private async *_yieldResponsesResult(response: any, modelInfo: ModelInfo): ApiStream {
 		// Capture response id for continuity when present
 		try {

From eb25c45e0d0929091f02aade0332c7a95f110930 Mon Sep 17 00:00:00 2001
From: Lagyu <sasaki.y@ruri.waseda.jp>
Date: Fri, 5 Sep 2025 15:54:14 +0900
Subject: [PATCH 10/14] chore: remove unnecessary type cast

---
 src/api/providers/__tests__/openai.spec.ts | 73 ++++++++++++++--------
 src/api/providers/openai.ts                | 50 ++++++++-------
 src/api/transform/responses-stream.ts      | 12 ++--
 3 files changed, 81 insertions(+), 54 deletions(-)

diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts
index ae522c0e81b..cd2a64a87eb 100644
--- a/src/api/providers/__tests__/openai.spec.ts
+++ b/src/api/providers/__tests__/openai.spec.ts
@@ -7,6 +7,20 @@ import OpenAI from "openai"
 import { Package } from "../../../shared/package"
 import axios from "axios"
 
+type ErrorWithStatus = Error & { status?: number }
+
+function getMockCallsOf(fn: unknown): any[] {
+	const isObj = (v: unknown): v is Record<string, unknown> => typeof v === "object" && v !== null
+	if (isObj(fn) || typeof fn === "function") {
+		const rec = fn as Record<string, unknown>
+		const mock = rec["mock"]
+		if (isObj(mock)) {
+			const calls = mock["calls"]
+			if (Array.isArray(calls)) return calls
+		}
+	}
+	return []
+}
 const mockCreate = vitest.fn()
 const mockResponsesCreate = vitest.fn()
 
@@ -424,9 +438,9 @@ describe("OpenAiHandler", () => {
 		})
 
 		it("should handle rate limiting", async () => {
-			const rateLimitError = new Error("Rate limit exceeded")
+			const rateLimitError: ErrorWithStatus = new Error("Rate limit exceeded")
 			rateLimitError.name = "Error"
-			;(rateLimitError as any).status = 429
+			rateLimitError.status = 429
 			mockCreate.mockRejectedValueOnce(rateLimitError)
 
 			const stream = handler.createMessage("system prompt", testMessages)
@@ -1198,9 +1212,9 @@ describe("OpenAI Compatible - Responses API", () => {
 
 	it("Verbosity (Responses): include when set; if server rejects, retry without it (warn once)", async () => {
 		// First call throws 400 for 'verbosity', second succeeds
-		mockResponsesCreate.mockImplementationOnce((_opts: any) => {
-			const err = new Error("Unsupported parameter: 'verbosity'")
-			;(err as any).status = 400
+		mockResponsesCreate.mockImplementationOnce((_opts: unknown) => {
+			const err: ErrorWithStatus = new Error("Unsupported parameter: 'verbosity'")
+			err.status = 400
 			throw err
 		})
 
@@ -1295,10 +1309,13 @@ describe("OpenAI Compatible - Responses API", () => {
 
 		// Ensure SDK constructor was called with normalized baseURL and 'preview' apiVersion (per requirement)
 		// Note: AzureOpenAI and OpenAI share same mock constructor; inspect last call
-		const ctorCalls = vi.mocked(OpenAI as unknown as any).mock.calls as any[]
-		const lastCtorArgs = ctorCalls[ctorCalls.length - 1]?.[0] || {}
-		expect(lastCtorArgs.baseURL).toBe("https://sample-name.openai.azure.com/openai/v1")
-		expect(lastCtorArgs.apiVersion).toBe("preview")
+		const ctorCalls = getMockCallsOf(OpenAI)
+		const lastCall = ctorCalls[ctorCalls.length - 1]
+		const lastArg0 = Array.isArray(lastCall) ? lastCall[0] : undefined
+		const lastCtorArgs =
+			typeof lastArg0 === "object" && lastArg0 !== null ? (lastArg0 as Record<string, unknown>) : {}
+		expect(lastCtorArgs["baseURL"]).toBe("https://sample-name.openai.azure.com/openai/v1")
+		expect(lastCtorArgs["apiVersion"]).toBe("preview")
 	})
 
 	it("streams Responses API when provider returns AsyncIterable", async () => {
@@ -1461,7 +1478,7 @@ describe("OpenAI Compatible - Responses API (multimodal)", () => {
 					{
 						type: "image" as const,
 						// Minimal Anthropic-style inline image (base64) block
-						source: { media_type: "image/png", data: "BASE64DATA" } as any,
+						source: { type: "base64" as const, media_type: "image/png", data: "BASE64DATA" },
 					},
 				],
 			},
@@ -1478,7 +1495,7 @@ describe("OpenAI Compatible - Responses API (multimodal)", () => {
 
 		// Input should be an array (structured input mode)
 		expect(Array.isArray(args.input)).toBe(true)
-		const arr = args.input as any[]
+		const arr = Array.isArray(args.input) ? args.input : []
 
 		// First element should be Developer preface as input_text
 		expect(arr[0]?.role).toBe("user")
@@ -1537,7 +1554,7 @@ describe("OpenAI Compatible - Responses API (multimodal)", () => {
 					{ type: "text" as const, text: "Look at this" },
 					{
 						type: "image" as const,
-						source: { media_type: "image/jpeg", data: "IMGDATA" } as any,
+						source: { type: "base64" as const, media_type: "image/jpeg", data: "IMGDATA" },
 					},
 				],
 			},
@@ -1648,7 +1665,7 @@ describe("OpenAI Compatible - Responses API conversation continuity", () => {
 		for await (const _ of handler.createMessage(
 			"sys",
 			[{ role: "user", content: [{ type: "text" as const, text: "Turn 2" }] }],
-			{ suppressPreviousResponseId: true } as any,
+			{ taskId: "test", suppressPreviousResponseId: true },
 		)) {
 		}
 
@@ -1668,9 +1685,9 @@ describe("OpenAI Compatible - Responses API parity improvements", () => {
 	it("retries without previous_response_id when server returns 400 'Previous response ... not found' (non-streaming)", async () => {
 		// First call throws 400 for previous_response_id, second succeeds
 		mockResponsesCreate
-			.mockImplementationOnce((_opts: any) => {
-				const err = new Error("Previous response rid-bad not found")
-				;(err as any).status = 400
+			.mockImplementationOnce((_opts: unknown) => {
+				const err: ErrorWithStatus = new Error("Previous response rid-bad not found")
+				err.status = 400
 				throw err
 			})
 			.mockImplementationOnce(async (_opts: any) => {
@@ -1688,7 +1705,7 @@ describe("OpenAI Compatible - Responses API parity improvements", () => {
 		for await (const ch of h.createMessage(
 			"sys",
 			[{ role: "user", content: [{ type: "text" as const, text: "Turn" }] }],
-			{ previousResponseId: "rid-bad" } as any,
+			{ taskId: "test", previousResponseId: "rid-bad" },
 		)) {
 			chunks.push(ch)
 		}
@@ -1709,9 +1726,9 @@ describe("OpenAI Compatible - Responses API parity improvements", () => {
 	it("retries without previous_response_id when server returns 400 (streaming)", async () => {
 		// First call throws, second returns a stream
 		mockResponsesCreate
-			.mockImplementationOnce((_opts: any) => {
-				const err = new Error("Previous response not found")
-				;(err as any).status = 400
+			.mockImplementationOnce((_opts: unknown) => {
+				const err: ErrorWithStatus = new Error("Previous response not found")
+				err.status = 400
 				throw err
 			})
 			.mockImplementationOnce(async (_opts: any) => {
@@ -1734,7 +1751,7 @@ describe("OpenAI Compatible - Responses API parity improvements", () => {
 		for await (const ch of h.createMessage(
 			"sys",
 			[{ role: "user", content: [{ type: "text" as const, text: "Hi" }] }],
-			{ previousResponseId: "bad-id" } as any,
+			{ taskId: "test", previousResponseId: "bad-id" },
 		)) {
 			out.push(ch)
 		}
@@ -1884,7 +1901,10 @@ describe("OpenAI Compatible - Responses API minimal input parity (new tests)", (
 		]
 
 		const chunks: any[] = []
-		for await (const ch of handler.createMessage("System Inst", msgs, { previousResponseId: "prev-1" } as any)) {
+		for await (const ch of handler.createMessage("System Inst", msgs, {
+			taskId: "test",
+			previousResponseId: "prev-1",
+		})) {
 			chunks.push(ch)
 		}
 
@@ -1914,12 +1934,15 @@ describe("OpenAI Compatible - Responses API minimal input parity (new tests)", (
 				role: "user",
 				content: [
 					{ type: "text" as const, text: "See" },
-					{ type: "image" as const, source: { media_type: "image/png", data: "IMGDATA" } as any },
+					{
+						type: "image" as const,
+						source: { type: "base64" as const, media_type: "image/png", data: "IMGDATA" },
+					},
 				],
 			},
 		]
 
-		const iter = handler.createMessage("Sys", msgs, { previousResponseId: "prev-2" } as any)
+		const iter = handler.createMessage("Sys", msgs, { taskId: "test", previousResponseId: "prev-2" })
 		for await (const _ of iter) {
 			// consume
 		}
@@ -1928,7 +1951,7 @@ describe("OpenAI Compatible - Responses API minimal input parity (new tests)", (
 		const args = mockResponsesCreate.mock.calls.pop()?.[0]
 		expect(Array.isArray(args.input)).toBe(true)
 
-		const arr = args.input as any[]
+		const arr = Array.isArray(args.input) ? args.input : []
 		expect(arr.length).toBe(1)
 		expect(arr[0]?.role).toBe("user")
 
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index 4dda6f2afb0..2b12015aaf1 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -25,7 +25,6 @@ import { DEFAULT_HEADERS } from "./constants"
 import { BaseProvider } from "./base-provider"
 import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
 import { getApiRequestTimeout } from "./utils/timeout-config"
-import { ResponseCreateParamsNonStreaming } from "openai/resources/responses/responses"
 
 // TODO: Rename this to OpenAICompatibleHandler. Also, I think the
 // `OpenAINativeHandler` can subclass from this, since it's obviously
@@ -151,19 +150,14 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
 		// Gather model params (centralized: temperature, max tokens, reasoning, verbosity)
-		const modelParams = this.getModel()
-		const {
-			info: modelInfo,
-			reasoning,
-			reasoningEffort,
-			verbosity,
-		} = modelParams as unknown as {
-			id: string
-			info: ModelInfo
-			reasoning?: { reasoning_effort?: "low" | "medium" | "high" }
-			reasoningEffort?: "minimal" | "low" | "medium" | "high"
-			verbosity?: "low" | "medium" | "high"
-		}
+		const { info: modelInfo } = this.getModel()
+		const openAiParams = getModelParams({
+			format: "openai",
+			modelId: this.options.openAiModelId ?? "",
+			model: modelInfo,
+			settings: this.options,
+		})
+		const { reasoning, reasoningEffort, verbosity } = openAiParams
 
 		const modelUrl = this.options.openAiBaseUrl ?? ""
 		const modelId = this.options.openAiModelId ?? ""
@@ -280,7 +274,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 			}
 
 			// Always include max_output_tokens for Responses API to cap output length
-			const reservedMax = (modelParams as any)?.maxTokens
+			const reservedMax = openAiParams.maxTokens
 			;(basePayload as Record<string, unknown>).max_output_tokens =
 				this.options.modelMaxTokens || reservedMax || modelInfo.maxTokens
 
@@ -293,7 +287,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 					systemPrompt,
 					messages,
 				})
-				yield* this._yieldResponsesResult(response as unknown, modelInfo)
+				yield* this._yieldResponsesResult(response, modelInfo)
 				return
 			}
 
@@ -320,7 +314,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				}
 			} else {
 				// Some providers may ignore the stream flag and return a complete response
-				yield* this._yieldResponsesResult(maybeStream as unknown, modelInfo)
+				yield* this._yieldResponsesResult(maybeStream, modelInfo)
 			}
 			return
 		}
@@ -521,7 +515,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 					} as Anthropic.Messages.MessageParam,
 					/*includeRole*/ true,
 				)
-				const payload: ResponseCreateParamsNonStreaming = {
+				const payload: Record<string, unknown> = {
 					model: model.id,
 					input: formattedInput,
 				}
@@ -552,7 +546,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 					payload.max_output_tokens = this.options.modelMaxTokens || modelInfo.maxTokens
 				}
 
-				const response = await this._responsesCreateWithRetries(payload as unknown as Record<string, unknown>, {
+				const response = await this._responsesCreateWithRetries(payload, {
 					usedArrayInput: false,
 					lastUserMessage: undefined,
 					previousId: undefined,
@@ -976,10 +970,20 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 			messages: Anthropic.Messages.MessageParam[]
 		},
 	): Promise<unknown> {
-		const create = (body: Record<string, unknown>) =>
-			(
-				this.client as unknown as { responses: { create: (b: Record<string, unknown>) => Promise<unknown> } }
-			).responses.create(body)
+		const create = (body: Record<string, unknown>) => {
+			const hasResponsesCreate = (
+				obj: unknown,
+			): obj is { responses: { create: (b: Record<string, unknown>) => Promise<unknown> } } => {
+				if (obj == null || typeof obj !== "object") return false
+				const responses = (obj as Record<string, unknown>).responses
+				if (responses == null || typeof responses !== "object") return false
+				return typeof (responses as Record<string, unknown>).create === "function"
+			}
+			if (!hasResponsesCreate(this.client)) {
+				throw new Error("Responses API not available on client")
+			}
+			return this.client.responses.create(body)
+		}
 
 		try {
 			return await create(payload)
diff --git a/src/api/transform/responses-stream.ts b/src/api/transform/responses-stream.ts
index 8b6eb3fa7ec..f0152c9ec36 100644
--- a/src/api/transform/responses-stream.ts
+++ b/src/api/transform/responses-stream.ts
@@ -18,7 +18,7 @@ export async function* handleResponsesStream(
 	for await (const event of stream) {
 		// Surface response.id to callers when available (for conversation continuity)
 		if (isObject(event)) {
-			const resp = (event as Record<string, unknown>).response as unknown
+			const resp = (event as Record<string, unknown>).response
 			if (isObject(resp)) {
 				const rid = (resp as Record<string, unknown>).id
 				if (typeof rid === "string") {
@@ -224,11 +224,11 @@ function isDoneEvent(event: unknown): event is DoneEvent {
 
 function getChoiceDeltaContent(event: unknown): string | undefined {
 	if (!isObject(event)) return undefined
-	const choices = (event as Record<string, unknown>).choices as unknown
+	const choices = (event as Record<string, unknown>).choices
 	if (!Array.isArray(choices) || choices.length === 0) return undefined
-	const first = choices[0] as unknown
+	const first = choices[0]
 	if (!isObject(first)) return undefined
-	const delta = (first as Record<string, unknown>).delta as unknown
+	const delta = (first as Record<string, unknown>).delta
 	if (!isObject(delta)) return undefined
 	const content = (delta as Record<string, unknown>).content
 	if (content == null) return undefined
@@ -237,11 +237,11 @@ function getChoiceDeltaContent(event: unknown): string | undefined {
 
 function extractUsage(event: unknown): ResponseUsage | undefined {
 	if (!isObject(event)) return undefined
-	const resp = (event as Record<string, unknown>).response as unknown
+	const resp = (event as Record<string, unknown>).response
 	if (isObject(resp) && isObject((resp as Record<string, unknown>).usage)) {
 		return (resp as Record<string, unknown>).usage as ResponseUsage
 	}
-	const usage = (event as Record<string, unknown>).usage as unknown
+	const usage = (event as Record<string, unknown>).usage
 	if (isObject(usage)) {
 		return usage as ResponseUsage
 	}

From 1144bf97152367a1eea336dc6f7a5a007f75dc59 Mon Sep 17 00:00:00 2001
From: Lagyu <sasaki.y@ruri.waseda.jp>
Date: Fri, 5 Sep 2025 16:44:46 +0900
Subject: [PATCH 11/14] refactor(openai): extract Responses API handling into
 helper and delegate from createMessage

- Move Responses API logic to private _handleResponsesFlavor
- Preserve streaming, retries, conversation continuity, reasoning/verbosity, and usage
- All existing tests pass
---
 src/api/providers/openai.ts | 300 +++++++++++++++++++-----------------
 1 file changed, 156 insertions(+), 144 deletions(-)

diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index 16c5316b4be..bf83af08a7d 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -174,150 +174,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 
 		// If Responses API is selected, use the Responses payload and endpoint
 		if (flavor === "responses") {
-			const nonStreaming = !(this.options.openAiStreamingEnabled ?? true)
-
-			// Build Responses payload (align with OpenAI Native Responses API formatting)
-			// Azure- and Responses-compatible multimodal handling:
-			// - Use array input ONLY when the latest user message contains images (initial turn)
-			// - When previous_response_id is present, send only the latest user turn:
-			//   • Text-only => single string "User: ...", no Developer preface
-			//   • With images => one-item array containing only the latest user content (no Developer preface)
-			const lastUserMessage = [...messages].reverse().find((m) => m.role === "user")
-			const lastUserHasImages =
-				!!lastUserMessage &&
-				Array.isArray(lastUserMessage.content) &&
-				lastUserMessage.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image")
-
-			// Conversation continuity (parity with OpenAiNativeHandler.prepareGpt5Input)
-			const previousId = metadata?.suppressPreviousResponseId
-				? undefined
-				: (metadata?.previousResponseId ?? this.lastResponseId)
-
-			const minimalInputMode = Boolean(previousId)
-
-			let inputPayload: unknown
-			if (minimalInputMode && lastUserMessage) {
-				// Minimal-mode: only the latest user message (no Developer preface)
-				if (lastUserHasImages) {
-					// Single-item array with just the latest user content
-					inputPayload = this._toResponsesInput([lastUserMessage])
-				} else {
-					// Single message string "User: ..."
-					inputPayload = this._formatResponsesSingleMessage(lastUserMessage, true)
-				}
-			} else if (lastUserHasImages && lastUserMessage) {
-				// Initial turn with images: include Developer preface and minimal prior context to preserve continuity
-				const lastAssistantMessage = [...messages].reverse().find((m) => m.role === "assistant")
-
-				const messagesForArray = messages.filter((m) => {
-					if (m.role === "assistant") {
-						return lastAssistantMessage ? m === lastAssistantMessage : false
-					}
-					if (m.role === "user") {
-						const hasImage =
-							Array.isArray(m.content) &&
-							m.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image")
-						return hasImage || m === lastUserMessage
-					}
-					return false
-				})
-
-				const arrayInput = this._toResponsesInput(messagesForArray)
-				const developerPreface = {
-					role: "user" as const,
-					content: [{ type: "input_text" as const, text: `Developer: ${systemPrompt}` }],
-				}
-				inputPayload = [developerPreface, ...arrayInput]
-			} else {
-				// Pure text history: full compact transcript (includes both user and assistant turns)
-				inputPayload = this._formatResponsesInput(systemPrompt, messages)
-			}
-			const usedArrayInput = Array.isArray(inputPayload)
-
-			const basePayload: Record<string, unknown> = {
-				model: modelId,
-				input: inputPayload,
-				...(previousId ? { previous_response_id: previousId } : {}),
-			}
-
-			// Reasoning effort (Responses expects: reasoning: { effort, summary? })
-			// Parity with native: support "minimal" and include summary: "auto" unless explicitly disabled
-			if (this.options.enableReasoningEffort && (this.options.reasoningEffort || reasoningEffort)) {
-				const effort = (this.options.reasoningEffort || reasoningEffort) as
-					| "minimal"
-					| "low"
-					| "medium"
-					| "high"
-					| undefined
-				if (effort) {
-					;(
-						basePayload as {
-							reasoning?: { effort: "minimal" | "low" | "medium" | "high"; summary?: "auto" }
-						}
-					).reasoning = {
-						effort,
-						...(this.options.enableGpt5ReasoningSummary !== false ? { summary: "auto" as const } : {}),
-					}
-				}
-			}
-
-			// Temperature (only include when explicitly set by the user)
-			if (this.options.modelTemperature !== undefined) {
-				basePayload.temperature = this.options.modelTemperature
-			} else if (deepseekReasoner) {
-				basePayload.temperature = DEEP_SEEK_DEFAULT_TEMPERATURE
-			}
-
-			// Verbosity: include only when explicitly specified in settings
-			if (this.options.verbosity) {
-				;(basePayload as { text?: { verbosity: "low" | "medium" | "high" } }).text = {
-					verbosity: this.options.verbosity as "low" | "medium" | "high",
-				}
-			}
-
-			// Always include max_output_tokens for Responses API to cap output length
-			const reservedMax = openAiParams.maxTokens
-			;(basePayload as Record<string, unknown>).max_output_tokens =
-				this.options.modelMaxTokens || reservedMax || modelInfo.maxTokens
-
-			// Non-streaming path
-			if (nonStreaming) {
-				const response = await this._responsesCreateWithRetries(basePayload, {
-					usedArrayInput,
-					lastUserMessage,
-					previousId,
-					systemPrompt,
-					messages,
-				})
-				yield* this._yieldResponsesResult(response, modelInfo)
-				return
-			}
-
-			// Streaming path (auto-fallback to non-streaming result if provider ignores stream flag)
-			const streamingPayload: Record<string, unknown> = { ...basePayload, stream: true }
-			const maybeStream = await this._responsesCreateWithRetries(streamingPayload, {
-				usedArrayInput,
-				lastUserMessage,
-				previousId,
-				systemPrompt,
-				messages,
-			})
-
-			const isAsyncIterable = (obj: unknown): obj is AsyncIterable<unknown> =>
-				typeof (obj as AsyncIterable<unknown>)[Symbol.asyncIterator] === "function"
-
-			if (isAsyncIterable(maybeStream)) {
-				for await (const chunk of handleResponsesStream(maybeStream, {
-					onResponseId: (id) => {
-						this.lastResponseId = id
-					},
-				})) {
-					yield chunk
-				}
-			} else {
-				// Some providers may ignore the stream flag and return a complete response
-				yield* this._yieldResponsesResult(maybeStream, modelInfo)
-			}
+			yield* this._handleResponsesFlavor(systemPrompt, messages, metadata, modelInfo, openAiParams)
 			return
 		}
 
@@ -869,6 +726,161 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 
 	// --- Responses helpers ---
 
+	private async *_handleResponsesFlavor(
+		systemPrompt: string,
+		messages: Anthropic.Messages.MessageParam[],
+		metadata: ApiHandlerCreateMessageMetadata | undefined,
+		modelInfo: ModelInfo,
+		openAiParams: any,
+	): ApiStream {
+		const modelId = this.options.openAiModelId ?? ""
+		const nonStreaming = !(this.options.openAiStreamingEnabled ?? true)
+
+		// Build Responses payload (align with OpenAI Native Responses API formatting)
+		// Azure- and Responses-compatible multimodal handling:
+		// - Use array input ONLY when the latest user message contains images (initial turn)
+		// - When previous_response_id is present, send only the latest user turn:
+		//   • Text-only => single string "User: ...", no Developer preface
+		//   • With images => one-item array containing only the latest user content (no Developer preface)
+		const lastUserMessage = [...messages].reverse().find((m) => m.role === "user")
+		const lastUserHasImages =
+			!!lastUserMessage &&
+			Array.isArray(lastUserMessage.content) &&
+			lastUserMessage.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image")
+
+		// Conversation continuity (parity with OpenAiNativeHandler.prepareGpt5Input)
+		const previousId = metadata?.suppressPreviousResponseId
+			? undefined
+			: (metadata?.previousResponseId ?? this.lastResponseId)
+
+		const minimalInputMode = Boolean(previousId)
+
+		let inputPayload: unknown
+		if (minimalInputMode && lastUserMessage) {
+			// Minimal-mode: only the latest user message (no Developer preface)
+			if (lastUserHasImages) {
+				// Single-item array with just the latest user content
+				inputPayload = this._toResponsesInput([lastUserMessage])
+			} else {
+				// Single message string "User: ..."
+				inputPayload = this._formatResponsesSingleMessage(lastUserMessage, true)
+			}
+		} else if (lastUserHasImages && lastUserMessage) {
+			// Initial turn with images: include Developer preface and minimal prior context to preserve continuity
+			const lastAssistantMessage = [...messages].reverse().find((m) => m.role === "assistant")
+
+			const messagesForArray = messages.filter((m) => {
+				if (m.role === "assistant") {
+					return lastAssistantMessage ? m === lastAssistantMessage : false
+				}
+				if (m.role === "user") {
+					const hasImage =
+						Array.isArray(m.content) &&
+						m.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image")
+					return hasImage || m === lastUserMessage
+				}
+				return false
+			})
+
+			const arrayInput = this._toResponsesInput(messagesForArray)
+			const developerPreface = {
+				role: "user" as const,
+				content: [{ type: "input_text" as const, text: `Developer: ${systemPrompt}` }],
+			}
+			inputPayload = [developerPreface, ...arrayInput]
+		} else {
+			// Pure text history: full compact transcript (includes both user and assistant turns)
+			inputPayload = this._formatResponsesInput(systemPrompt, messages)
+		}
+		const usedArrayInput = Array.isArray(inputPayload)
+
+		const basePayload: Record<string, unknown> = {
+			model: modelId,
+			input: inputPayload,
+			...(previousId ? { previous_response_id: previousId } : {}),
+		}
+
+		// Reasoning effort (Responses expects: reasoning: { effort, summary? })
+		// Parity with native: support "minimal" and include summary: "auto" unless explicitly disabled
+		if (this.options.enableReasoningEffort && (this.options.reasoningEffort || openAiParams?.reasoningEffort)) {
+			const effort = (this.options.reasoningEffort || openAiParams?.reasoningEffort) as
+				| "minimal"
+				| "low"
+				| "medium"
+				| "high"
+				| undefined
+			if (effort) {
+				;(
+					basePayload as {
+						reasoning?: { effort: "minimal" | "low" | "medium" | "high"; summary?: "auto" }
+					}
+				).reasoning = {
+					effort,
+					...(this.options.enableGpt5ReasoningSummary !== false ? { summary: "auto" as const } : {}),
+				}
+			}
+		}
+
+		// Temperature (only include when explicitly set by the user)
+		const deepseekReasoner = modelId.includes("deepseek-reasoner") || (this.options.openAiR1FormatEnabled ?? false)
+		if (this.options.modelTemperature !== undefined) {
+			basePayload.temperature = this.options.modelTemperature
+		} else if (deepseekReasoner) {
+			basePayload.temperature = DEEP_SEEK_DEFAULT_TEMPERATURE
+		}
+
+		// Verbosity: include only when explicitly specified in settings
+		if (this.options.verbosity) {
+			;(basePayload as { text?: { verbosity: "low" | "medium" | "high" } }).text = {
+				verbosity: this.options.verbosity as "low" | "medium" | "high",
+			}
+		}
+
+		// Always include max_output_tokens for Responses API to cap output length
+		const reservedMax = openAiParams?.maxTokens
+		;(basePayload as Record<string, unknown>).max_output_tokens =
+			this.options.modelMaxTokens || reservedMax || modelInfo.maxTokens
+
+		// Non-streaming path
+		if (nonStreaming) {
+			const response = await this._responsesCreateWithRetries(basePayload, {
+				usedArrayInput,
+				lastUserMessage,
+				previousId,
+				systemPrompt,
+				messages,
+			})
+			yield* this._yieldResponsesResult(response, modelInfo)
+			return
+		}
+
+		// Streaming path (auto-fallback to non-streaming result if provider ignores stream flag)
+		const streamingPayload: Record<string, unknown> = { ...basePayload, stream: true }
+		const maybeStream = await this._responsesCreateWithRetries(streamingPayload, {
+			usedArrayInput,
+			lastUserMessage,
+			previousId,
+			systemPrompt,
+			messages,
+		})
+
+		const isAsyncIterable = (obj: unknown): obj is AsyncIterable<unknown> =>
+			typeof (obj as AsyncIterable<unknown>)[Symbol.asyncIterator] === "function"
+
+		if (isAsyncIterable(maybeStream)) {
+			for await (const chunk of handleResponsesStream(maybeStream, {
+				onResponseId: (id) => {
+					this.lastResponseId = id
+				},
+			})) {
+				yield chunk
+			}
+		} else {
+			// Some providers may ignore the stream flag and return a complete response
+			yield* this._yieldResponsesResult(maybeStream, modelInfo)
+		}
+	}
+
 	/**
 	 * Determines which OpenAI-compatible API flavor to use based on the URL path.
 	 * - This is purely path-based and provider-agnostic (works for OpenAI, Azure OpenAI after normalization, etc.).

From 43eaa3c27f4acc5c1282e50965c937bc257f2ab1 Mon Sep 17 00:00:00 2001
From: Lagyu <sasaki.y@ruri.waseda.jp>
Date: Fri, 5 Sep 2025 18:28:32 +0900
Subject: [PATCH 12/14] fix(openai): Responses API parity with native 
 structured input, continuity (previous_response_id/store), temp/verbosity
 gating, and image support (input_image/output_text)

---
 src/api/providers/openai.ts | 180 +++++++++++++++++-------------------
 1 file changed, 84 insertions(+), 96 deletions(-)

diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index bf83af08a7d..389d375a114 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -376,47 +376,55 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 
 			// Use Responses API when selected (non-streaming convenience method)
 			if (flavor === "responses") {
-				// Build a single-turn formatted string input (Developer/User style) for Responses API
-				const formattedInput = this._formatResponsesSingleMessage(
-					{
-						role: "user",
-						content: [{ type: "text", text: prompt }],
-					} as Anthropic.Messages.MessageParam,
-					/*includeRole*/ true,
-				)
+				// Build structured single-turn input
 				const payload: Record<string, unknown> = {
 					model: model.id,
-					input: formattedInput,
+					input: [
+						{
+							role: "user",
+							content: [{ type: "input_text", text: prompt }],
+						},
+					],
+					stream: false,
+					store: false,
 				}
 
-				// Reasoning effort (Responses)
+				// Reasoning effort (support "minimal"; include summary: "auto" unless disabled)
 				const effort = (this.options.reasoningEffort || model.reasoningEffort) as
 					| "minimal"
 					| "low"
 					| "medium"
 					| "high"
 					| undefined
-				if (this.options.enableReasoningEffort && effort && effort !== "minimal") {
-					payload.reasoning = { effort }
+				if (this.options.enableReasoningEffort && effort) {
+					;(
+						payload as { reasoning?: { effort: "minimal" | "low" | "medium" | "high"; summary?: "auto" } }
+					).reasoning = {
+						effort,
+						...(this.options.enableGpt5ReasoningSummary !== false ? { summary: "auto" as const } : {}),
+					}
 				}
 
-				// Temperature if set
-				if (this.options.modelTemperature !== undefined) {
-					payload.temperature = this.options.modelTemperature
+				// Temperature if supported and set
+				if (modelInfo.supportsTemperature !== false && this.options.modelTemperature !== undefined) {
+					;(payload as Record<string, unknown>).temperature = this.options.modelTemperature
 				}
 
-				// Verbosity via text.verbosity - include only when explicitly specified
-				if (this.options.verbosity) {
-					payload.text = { verbosity: this.options.verbosity as "low" | "medium" | "high" }
+				// Verbosity via text.verbosity - include only when supported
+				if (this.options.verbosity && modelInfo.supportsVerbosity) {
+					;(payload as { text?: { verbosity: "low" | "medium" | "high" } }).text = {
+						verbosity: this.options.verbosity as "low" | "medium" | "high",
+					}
 				}
 
 				// max_output_tokens
 				if (this.options.includeMaxTokens === true) {
-					payload.max_output_tokens = this.options.modelMaxTokens || modelInfo.maxTokens
+					;(payload as Record<string, unknown>).max_output_tokens =
+						this.options.modelMaxTokens || modelInfo.maxTokens
 				}
 
 				const response = await this._responsesCreateWithRetries(payload, {
-					usedArrayInput: false,
+					usedArrayInput: true,
 					lastUserMessage: undefined,
 					previousId: undefined,
 					systemPrompt: "",
@@ -736,72 +744,29 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		const modelId = this.options.openAiModelId ?? ""
 		const nonStreaming = !(this.options.openAiStreamingEnabled ?? true)
 
-		// Build Responses payload (align with OpenAI Native Responses API formatting)
-		// Azure- and Responses-compatible multimodal handling:
-		// - Use array input ONLY when the latest user message contains images (initial turn)
-		// - When previous_response_id is present, send only the latest user turn:
-		//   • Text-only => single string "User: ...", no Developer preface
-		//   • With images => one-item array containing only the latest user content (no Developer preface)
-		const lastUserMessage = [...messages].reverse().find((m) => m.role === "user")
-		const lastUserHasImages =
-			!!lastUserMessage &&
-			Array.isArray(lastUserMessage.content) &&
-			lastUserMessage.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image")
-
-		// Conversation continuity (parity with OpenAiNativeHandler.prepareGpt5Input)
+		// Determine conversation continuity id (skip when explicitly suppressed)
 		const previousId = metadata?.suppressPreviousResponseId
 			? undefined
 			: (metadata?.previousResponseId ?? this.lastResponseId)
 
-		const minimalInputMode = Boolean(previousId)
-
-		let inputPayload: unknown
-		if (minimalInputMode && lastUserMessage) {
-			// Minimal-mode: only the latest user message (no Developer preface)
-			if (lastUserHasImages) {
-				// Single-item array with just the latest user content
-				inputPayload = this._toResponsesInput([lastUserMessage])
-			} else {
-				// Single message string "User: ..."
-				inputPayload = this._formatResponsesSingleMessage(lastUserMessage, true)
-			}
-		} else if (lastUserHasImages && lastUserMessage) {
-			// Initial turn with images: include Developer preface and minimal prior context to preserve continuity
-			const lastAssistantMessage = [...messages].reverse().find((m) => m.role === "assistant")
-
-			const messagesForArray = messages.filter((m) => {
-				if (m.role === "assistant") {
-					return lastAssistantMessage ? m === lastAssistantMessage : false
-				}
-				if (m.role === "user") {
-					const hasImage =
-						Array.isArray(m.content) &&
-						m.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image")
-					return hasImage || m === lastUserMessage
-				}
-				return false
-			})
+		// Prepare structured input for Responses API
+		const lastUserMessage = [...messages].reverse().find((m) => m.role === "user")
+		const minimalInputMode = Boolean(previousId && lastUserMessage)
 
-			const arrayInput = this._toResponsesInput(messagesForArray)
-			const developerPreface = {
-				role: "user" as const,
-				content: [{ type: "input_text" as const, text: `Developer: ${systemPrompt}` }],
-			}
-			inputPayload = [developerPreface, ...arrayInput]
-		} else {
-			// Pure text history: full compact transcript (includes both user and assistant turns)
-			inputPayload = this._formatResponsesInput(systemPrompt, messages)
-		}
-		const usedArrayInput = Array.isArray(inputPayload)
+		const inputPayload = minimalInputMode
+			? this._toResponsesInput([lastUserMessage as Anthropic.Messages.MessageParam])
+			: this._toResponsesInput(messages)
 
+		// Build base payload: use top-level instructions; default to storing unless explicitly disabled
 		const basePayload: Record<string, unknown> = {
 			model: modelId,
 			input: inputPayload,
 			...(previousId ? { previous_response_id: previousId } : {}),
+			instructions: systemPrompt,
+			store: metadata?.store !== false,
 		}
 
-		// Reasoning effort (Responses expects: reasoning: { effort, summary? })
-		// Parity with native: support "minimal" and include summary: "auto" unless explicitly disabled
+		// Reasoning effort (support "minimal"; include summary: "auto" unless disabled)
 		if (this.options.enableReasoningEffort && (this.options.reasoningEffort || openAiParams?.reasoningEffort)) {
 			const effort = (this.options.reasoningEffort || openAiParams?.reasoningEffort) as
 				| "minimal"
@@ -811,9 +776,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				| undefined
 			if (effort) {
 				;(
-					basePayload as {
-						reasoning?: { effort: "minimal" | "low" | "medium" | "high"; summary?: "auto" }
-					}
+					basePayload as { reasoning?: { effort: "minimal" | "low" | "medium" | "high"; summary?: "auto" } }
 				).reasoning = {
 					effort,
 					...(this.options.enableGpt5ReasoningSummary !== false ? { summary: "auto" as const } : {}),
@@ -821,16 +784,18 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 			}
 		}
 
-		// Temperature (only include when explicitly set by the user)
+		// Temperature: include only if model supports it
 		const deepseekReasoner = modelId.includes("deepseek-reasoner") || (this.options.openAiR1FormatEnabled ?? false)
-		if (this.options.modelTemperature !== undefined) {
-			basePayload.temperature = this.options.modelTemperature
-		} else if (deepseekReasoner) {
-			basePayload.temperature = DEEP_SEEK_DEFAULT_TEMPERATURE
+		if (modelInfo.supportsTemperature !== false) {
+			if (this.options.modelTemperature !== undefined) {
+				;(basePayload as Record<string, unknown>).temperature = this.options.modelTemperature
+			} else if (deepseekReasoner) {
+				;(basePayload as Record<string, unknown>).temperature = DEEP_SEEK_DEFAULT_TEMPERATURE
+			}
 		}
 
-		// Verbosity: include only when explicitly specified in settings
-		if (this.options.verbosity) {
+		// Verbosity: include only when model supports it
+		if (this.options.verbosity && modelInfo.supportsVerbosity) {
 			;(basePayload as { text?: { verbosity: "low" | "medium" | "high" } }).text = {
 				verbosity: this.options.verbosity as "low" | "medium" | "high",
 			}
@@ -844,7 +809,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		// Non-streaming path
 		if (nonStreaming) {
 			const response = await this._responsesCreateWithRetries(basePayload, {
-				usedArrayInput,
+				usedArrayInput: Array.isArray(inputPayload),
 				lastUserMessage,
 				previousId,
 				systemPrompt,
@@ -857,7 +822,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		// Streaming path (auto-fallback to non-streaming result if provider ignores stream flag)
 		const streamingPayload: Record<string, unknown> = { ...basePayload, stream: true }
 		const maybeStream = await this._responsesCreateWithRetries(streamingPayload, {
-			usedArrayInput,
+			usedArrayInput: Array.isArray(inputPayload),
 			lastUserMessage,
 			previousId,
 			systemPrompt,
@@ -925,30 +890,53 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 
 	private _toResponsesInput(anthropicMessages: Anthropic.Messages.MessageParam[]): Array<{
 		role: "user" | "assistant"
-		content: Array<{ type: "input_text"; text: string } | { type: "input_image"; image_url: string }>
+		content: Array<
+			| { type: "input_text"; text: string }
+			| { type: "input_image"; image_url: string }
+			| { type: "output_text"; text: string }
+		>
 	}> {
 		const input: Array<{
 			role: "user" | "assistant"
-			content: Array<{ type: "input_text"; text: string } | { type: "input_image"; image_url: string }>
+			content: Array<
+				| { type: "input_text"; text: string }
+				| { type: "input_image"; image_url: string }
+				| { type: "output_text"; text: string }
+			>
 		}> = []
 
 		for (const msg of anthropicMessages) {
 			const role = msg.role === "assistant" ? "assistant" : "user"
-			const parts: Array<{ type: "input_text"; text: string } | { type: "input_image"; image_url: string }> = []
+			const parts: Array<
+				| { type: "input_text"; text: string }
+				| { type: "input_image"; image_url: string }
+				| { type: "output_text"; text: string }
+			> = []
 
 			if (typeof msg.content === "string") {
 				if (msg.content.length > 0) {
-					parts.push({ type: "input_text", text: msg.content })
+					if (role === "assistant") {
+						parts.push({ type: "output_text", text: msg.content })
+					} else {
+						parts.push({ type: "input_text", text: msg.content })
+					}
 				}
-			} else {
+			} else if (Array.isArray(msg.content)) {
 				for (const block of msg.content) {
 					if (block.type === "text") {
-						parts.push({ type: "input_text", text: block.text })
+						if (role === "assistant") {
+							parts.push({ type: "output_text", text: block.text })
+						} else {
+							parts.push({ type: "input_text", text: block.text })
+						}
 					} else if (block.type === "image") {
-						parts.push({
-							type: "input_image",
-							image_url: `data:${block.source.media_type};base64,${block.source.data}`,
-						})
+						// Images are treated as user input; ignore images on assistant turns
+						if (role === "user") {
+							parts.push({
+								type: "input_image",
+								image_url: `data:${block.source.media_type};base64,${block.source.data}`,
+							})
+						}
 					}
 					// tool_use/tool_result are omitted in this minimal mapping (can be added as needed)
 				}

From 0126f3a1d051993a9a999ba7940f5fad2c615c18 Mon Sep 17 00:00:00 2001
From: Lagyu <sasaki.y@ruri.waseda.jp>
Date: Fri, 5 Sep 2025 18:42:39 +0900
Subject: [PATCH 13/14] test(openai): align Responses API payload shape with
 tests  string transcript for text-only, array for multimodal;
 retry-on-verbosity; continuity handling

---
 src/api/providers/openai.ts | 51 ++++++++++++++++++++++++++++++++-----
 1 file changed, 44 insertions(+), 7 deletions(-)

diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index 389d375a114..b9b5b7da586 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -749,13 +749,50 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 			? undefined
 			: (metadata?.previousResponseId ?? this.lastResponseId)
 
-		// Prepare structured input for Responses API
+		// Prepare Responses API input per test expectations:
+		// - Non-minimal text-only => single string with Developer/User lines
+		// - Minimal (previous_response_id) => single string "User: ..." when last user has no images
+		// - Image cases => structured array; inject Developer preface as first item (non-minimal only)
 		const lastUserMessage = [...messages].reverse().find((m) => m.role === "user")
-		const minimalInputMode = Boolean(previousId && lastUserMessage)
+		const lastUserHasImages =
+			!!lastUserMessage &&
+			Array.isArray(lastUserMessage.content) &&
+			lastUserMessage.content.some((b: any) => (b as any)?.type === "image")
+		const minimalInputMode = Boolean(previousId)
+
+		let inputPayload: unknown
+		if (minimalInputMode && lastUserMessage) {
+			// Minimal mode: only latest user turn
+			if (lastUserHasImages) {
+				inputPayload = this._toResponsesInput([lastUserMessage])
+			} else {
+				inputPayload = this._formatResponsesSingleMessage(lastUserMessage, true)
+			}
+		} else if (lastUserHasImages && lastUserMessage) {
+			// Initial turn with images: include Developer preface and minimal context
+			const lastAssistantMessage = [...messages].reverse().find((m) => m.role === "assistant")
+			const messagesForArray = messages.filter((m) => {
+				if (m.role === "assistant") {
+					return lastAssistantMessage ? m === lastAssistantMessage : false
+				}
+				if (m.role === "user") {
+					const hasImage =
+						Array.isArray(m.content) && m.content.some((b: any) => (b as any)?.type === "image")
+					return hasImage || m === lastUserMessage
+				}
+				return false
+			})
 
-		const inputPayload = minimalInputMode
-			? this._toResponsesInput([lastUserMessage as Anthropic.Messages.MessageParam])
-			: this._toResponsesInput(messages)
+			const arrayInput = this._toResponsesInput(messagesForArray)
+			const developerPreface = {
+				role: "user" as const,
+				content: [{ type: "input_text" as const, text: `Developer: ${systemPrompt}` }],
+			}
+			inputPayload = [developerPreface, ...arrayInput]
+		} else {
+			// Pure text history: compact transcript string
+			inputPayload = this._formatResponsesInput(systemPrompt, messages)
+		}
 
 		// Build base payload: use top-level instructions; default to storing unless explicitly disabled
 		const basePayload: Record<string, unknown> = {
@@ -794,8 +831,8 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 			}
 		}
 
-		// Verbosity: include only when model supports it
-		if (this.options.verbosity && modelInfo.supportsVerbosity) {
+		// Verbosity: include when provided; retry logic removes it on 400
+		if (this.options.verbosity) {
 			;(basePayload as { text?: { verbosity: "low" | "medium" | "high" } }).text = {
 				verbosity: this.options.verbosity as "low" | "medium" | "high",
 			}

From 848a0edb576b5ebcba6f121d9df43df1937923b5 Mon Sep 17 00:00:00 2001
From: Lagyu <sasaki.y@ruri.waseda.jp>
Date: Fri, 5 Sep 2025 21:06:16 +0900
Subject: [PATCH 14/14] test(openai): add regression for Responses continuity
 when prior stream fails before id (store: true default)

---
 src/api/providers/__tests__/openai.spec.ts | 57 ++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts
index cd2a64a87eb..0affafdabff 100644
--- a/src/api/providers/__tests__/openai.spec.ts
+++ b/src/api/providers/__tests__/openai.spec.ts
@@ -1673,6 +1673,63 @@ describe("OpenAI Compatible - Responses API conversation continuity", () => {
 		const args = mockResponsesCreate.mock.calls[1][0]
 		expect(args).not.toHaveProperty("previous_response_id")
 	})
+	it("does not include previous_response_id when prior stream fails before id; defaults to store:true", async () => {
+		// First call: stream throws before emitting any response.id
+		mockResponsesCreate
+			.mockImplementationOnce(async (_opts: any) => {
+				return {
+					[Symbol.asyncIterator]: async function* () {
+						yield { type: "response.text.delta", delta: "Partial " }
+						throw new Error("stream interrupted")
+					},
+				}
+			})
+			// Second call: normal stream
+			.mockImplementationOnce(async (_opts: any) => {
+				return {
+					[Symbol.asyncIterator]: async function* () {
+						yield { type: "response.text.delta", delta: "OK" }
+						yield {
+							type: "response.completed",
+							response: { usage: { input_tokens: 1, output_tokens: 1 } },
+						}
+					},
+				}
+			})
+
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5-mini",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+		})
+
+		// First call fails mid-stream, so no response.id is captured
+		const first = handler.createMessage("You are Roo.", [
+			{ role: "user", content: [{ type: "text" as const, text: "Hi" }] },
+		])
+
+		await expect(async () => {
+			for await (const _ of first) {
+				// drain until error
+			}
+		}).rejects.toThrow("stream interrupted")
+
+		// Second call should not include previous_response_id and should default to store:true
+		const chunks: any[] = []
+		for await (const ch of handler.createMessage("You are Roo.", [
+			{ role: "user", content: [{ type: "text" as const, text: "Hi" }] },
+		])) {
+			chunks.push(ch)
+		}
+
+		expect(mockResponsesCreate).toHaveBeenCalledTimes(2)
+		const secondArgs = mockResponsesCreate.mock.calls[1][0]
+		expect(secondArgs).not.toHaveProperty("previous_response_id")
+		expect(secondArgs).toHaveProperty("store", true)
+		expect(typeof secondArgs.input).toBe("string")
+		expect(secondArgs.input).toContain("Developer: You are Roo.")
+		expect(secondArgs.input).toContain("User: Hi")
+	})
 })
 
 // --- New: Responses API parity improvements tests ---