From b0673a98b8eef2674636d5554818132f780c40b4 Mon Sep 17 00:00:00 2001 From: Lagyu Date: Sat, 23 Aug 2025 23:52:17 +0900 Subject: [PATCH 01/14] feat: Add support for responses API in Azure Compatible Provider. --- packages/types/src/provider-settings.ts | 1 + src/api/providers/__tests__/openai.spec.ts | 430 ++++++++++++++++++--- src/api/providers/openai.ts | 383 +++++++++++++++++- 3 files changed, 766 insertions(+), 48 deletions(-) diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts index 3fa7094d873..d2b38064105 100644 --- a/packages/types/src/provider-settings.ts +++ b/packages/types/src/provider-settings.ts @@ -179,6 +179,7 @@ const openAiSchema = baseProviderSettingsSchema.extend({ openAiStreamingEnabled: z.boolean().optional(), openAiHostHeader: z.string().optional(), // Keep temporarily for backward compatibility during migration. openAiHeaders: z.record(z.string(), z.string()).optional(), + openAiApiFlavor: z.union([z.literal("auto"), z.literal("responses"), z.literal("chat")]).optional(), }) const ollamaSchema = baseProviderSettingsSchema.extend({ diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts index 14ed35430a5..9266180cd55 100644 --- a/src/api/providers/__tests__/openai.spec.ts +++ b/src/api/providers/__tests__/openai.spec.ts @@ -4,27 +4,51 @@ import { OpenAiHandler, getOpenAiModels } from "../openai" import { ApiHandlerOptions } from "../../../shared/api" import { Anthropic } from "@anthropic-ai/sdk" import OpenAI from "openai" -import { openAiModelInfoSaneDefaults } from "@roo-code/types" import { Package } from "../../../shared/package" import axios from "axios" const mockCreate = vitest.fn() +const mockResponsesCreate = vitest.fn() vitest.mock("openai", () => { const mockConstructor = vitest.fn() - return { - __esModule: true, - default: mockConstructor.mockImplementation(() => ({ - chat: { - completions: { - create: mockCreate.mockImplementation(async (options) => { - if (!options.stream) { - return { - id: "test-completion", + const makeClient = () => ({ + chat: { + completions: { + create: mockCreate.mockImplementation(async (options) => { + if (!options.stream) { + return { + id: "test-completion", + choices: [ + { + message: { role: "assistant", content: "Test response", refusal: null }, + finish_reason: "stop", + index: 0, + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 5, + total_tokens: 15, + }, + } + } + + return { + [Symbol.asyncIterator]: async function* () { + yield { choices: [ { - message: { role: "assistant", content: "Test response", refusal: null }, - finish_reason: "stop", + delta: { content: "Test response" }, + index: 0, + }, + ], + usage: null, + } + yield { + choices: [ + { + delta: {}, index: 0, }, ], @@ -34,38 +58,30 @@ vitest.mock("openai", () => { total_tokens: 15, }, } - } - - return { - [Symbol.asyncIterator]: async function* () { - yield { - choices: [ - { - delta: { content: "Test response" }, - index: 0, - }, - ], - usage: null, - } - yield { - choices: [ - { - delta: {}, - index: 0, - }, - ], - usage: { - prompt_tokens: 10, - completion_tokens: 5, - total_tokens: 15, - }, - } - }, - } - }), - }, + }, + } + }), }, - })), + }, + responses: { + create: mockResponsesCreate.mockImplementation(async (options) => { + // Default happy-path mock for non-streaming Responses API + return { + id: "test-response", + output_text: "Test response", + usage: { + input_tokens: 10, + output_tokens: 5, + total_tokens: 15, + }, + } + }), + }, + }) + return { + __esModule: true, + default: mockConstructor.mockImplementation((args: any) => makeClient()), + AzureOpenAI: mockConstructor.mockImplementation((args: any) => makeClient()), } }) @@ -977,6 +993,56 @@ describe("getOpenAiModels", () => { expect(result).toEqual([]) }) + describe("Azure portal Responses URL normalization", () => { + beforeEach(() => { + mockCreate.mockClear() + mockResponsesCreate.mockClear() + }) + + it("Responses URL from Azure portal is converted to use Responses API", async () => { + const handler = new OpenAiHandler({ + openAiApiKey: "test-azure", + openAiModelId: "my-deployment", + openAiBaseUrl: "https://sample-name.openai.azure.com/openai/responses?api-version=2025-04-01-preview", + openAiUseAzure: true, + openAiStreamingEnabled: false, + includeMaxTokens: true, + openAiCustomModelInfo: { + contextWindow: 128_000, + maxTokens: 64, + supportsPromptCache: false, + }, + }) + + const messages: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: [{ type: "text", text: "Hello!" }] }, + ] + + const stream = handler.createMessage("You are Roo Code.", messages) + const chunks: any[] = [] + for await (const ch of stream) { + chunks.push(ch) + } + + // Should have used Responses API, not Chat Completions + expect(mockResponsesCreate).toHaveBeenCalled() + expect(mockCreate).not.toHaveBeenCalled() + + // Payload shape sanity + const args = mockResponsesCreate.mock.calls[0][0] + expect(args).toHaveProperty("model", "my-deployment") + expect(args).toHaveProperty("input") + expect(typeof args.input).toBe("string") + expect(args.input).toContain("Developer: You are Roo Code.") + expect(args.input).toContain("User: Hello!") + expect(args).toHaveProperty("max_output_tokens", 64) + + // Ensure returned text chunk surfaced + const textChunk = chunks.find((c) => c.type === "text") + expect(textChunk?.text).toBe("Test response") + }) + }) + it("should deduplicate model IDs", async () => { const mockResponse = { data: { @@ -990,3 +1056,281 @@ describe("getOpenAiModels", () => { expect(result).toEqual(["gpt-4", "gpt-3.5-turbo"]) }) }) + +// -- Added Responses API tests (TDD) -- + +describe("OpenAI Compatible - Responses API", () => { + let handler: OpenAiHandler + const baseMessages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "text" as const, + text: "Hello!", + }, + ], + }, + ] + + beforeEach(() => { + mockCreate.mockClear() + mockResponsesCreate.mockClear() + }) + + it("Azure Responses happy path uses string input (no messages) and max_output_tokens", async () => { + const opts: ApiHandlerOptions = { + openAiApiKey: "test-azure", + openAiModelId: "my-deployment", + openAiBaseUrl: "https://myres.openai.azure.com/openai/v1/responses?api-version=preview", + openAiStreamingEnabled: false, + includeMaxTokens: true, + openAiCustomModelInfo: { + contextWindow: 128_000, + maxTokens: 256, + supportsPromptCache: false, + }, + enableReasoningEffort: false, + } + handler = new OpenAiHandler(opts) + + const stream = handler.createMessage("You are Roo Code.", baseMessages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Should have produced a text chunk + const textChunk = chunks.find((c) => c.type === "text") + expect(textChunk?.text).toBe("Test response") + + // Ensure Responses API was used + expect(mockResponsesCreate).toHaveBeenCalled() + expect(mockCreate).not.toHaveBeenCalled() + + const callArgs = mockResponsesCreate.mock.calls[0][0] + expect(callArgs).not.toHaveProperty("messages") + expect(callArgs).toHaveProperty("input") + expect(typeof callArgs.input).toBe("string") + expect(callArgs.input).toContain("Developer: You are Roo Code.") + expect(callArgs.input).toContain("User: Hello!") + expect(callArgs).toHaveProperty("model", "my-deployment") + // Azure Responses naming + expect(callArgs).toHaveProperty("max_output_tokens", 256) + }) + + it("Auto-detect: '/v1/responses' => Responses payload; '/chat/completions' => Chat Completions payload", async () => { + // Responses URL + const respHandler = new OpenAiHandler({ + openAiApiKey: "test", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + openAiStreamingEnabled: false, + }) + for await (const _ of respHandler.createMessage("sys", baseMessages)) { + } + expect(mockResponsesCreate).toHaveBeenCalled() + const respArgs = mockResponsesCreate.mock.calls.pop()?.[0] + expect(respArgs).not.toHaveProperty("messages") + expect(respArgs).toHaveProperty("input") + + // Chat Completions URL + mockResponsesCreate.mockClear() + mockCreate.mockClear() + const chatHandler = new OpenAiHandler({ + openAiApiKey: "test", + openAiModelId: "gpt-4o", + openAiBaseUrl: "https://api.openai.com/v1/chat/completions", + openAiStreamingEnabled: false, + }) + for await (const _ of chatHandler.createMessage("sys", baseMessages)) { + } + expect(mockCreate).toHaveBeenCalled() + const chatArgs = mockCreate.mock.calls.pop()?.[0] + expect(chatArgs).toHaveProperty("messages") + expect(chatArgs).not.toHaveProperty("input") + }) + + it("Manual override: force Responses or Chat regardless of URL", async () => { + // Force Responses + const forceResp = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1", // no responses segment + openAiStreamingEnabled: false, + openAiApiFlavor: "responses", + }) + for await (const _ of forceResp.createMessage("sys", baseMessages)) { + } + expect(mockResponsesCreate).toHaveBeenCalled() + const rArgs = mockResponsesCreate.mock.calls.pop()?.[0] + expect(rArgs).toHaveProperty("input") + expect(rArgs).not.toHaveProperty("messages") + + // Force Chat + mockResponsesCreate.mockClear() + mockCreate.mockClear() + const forceChat = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-4o", + openAiBaseUrl: "https://api.openai.com/v1/responses", // would auto-detect as responses + openAiStreamingEnabled: false, + openAiApiFlavor: "chat", + }) + for await (const _ of forceChat.createMessage("sys", baseMessages)) { + } + expect(mockCreate).toHaveBeenCalled() + const cArgs = mockCreate.mock.calls.pop()?.[0] + expect(cArgs).toHaveProperty("messages") + }) + + it("Reasoning effort mapping: Responses uses reasoning: { effort }, Chat uses reasoning_effort", async () => { + // Responses path + const responsesHandler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + openAiStreamingEnabled: false, + enableReasoningEffort: true, + reasoningEffort: "high", + openAiCustomModelInfo: { + contextWindow: 128_000, + supportsPromptCache: false, + supportsReasoningEffort: true, + }, + }) + for await (const _ of responsesHandler.createMessage("sys", baseMessages)) { + } + expect(mockResponsesCreate).toHaveBeenCalled() + const rArgs = mockResponsesCreate.mock.calls.pop()?.[0] + expect(rArgs).toHaveProperty("reasoning") + expect(rArgs.reasoning).toEqual({ effort: "high" }) + + // Chat path + mockResponsesCreate.mockClear() + mockCreate.mockClear() + const chatHandler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-4o", + openAiBaseUrl: "https://api.openai.com/v1/chat/completions", + openAiStreamingEnabled: false, + enableReasoningEffort: true, + reasoningEffort: "high", + openAiCustomModelInfo: { + contextWindow: 128_000, + supportsPromptCache: false, + supportsReasoningEffort: true, + }, + }) + for await (const _ of chatHandler.createMessage("sys", baseMessages)) { + } + expect(mockCreate).toHaveBeenCalled() + const cArgs = mockCreate.mock.calls.pop()?.[0] + expect(cArgs).toHaveProperty("reasoning_effort", "high") + }) + + it("Verbosity (Responses): include when set; if server rejects, retry without it (warn once)", async () => { + // First call throws 400 for 'verbosity', second succeeds + mockResponsesCreate.mockImplementationOnce((_opts: any) => { + const err = new Error("Unsupported parameter: 'verbosity'") + ;(err as any).status = 400 + throw err + }) + + const h = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + openAiStreamingEnabled: false, + verbosity: "high", + }) + + const stream = h.createMessage("sys", baseMessages) + const chunks: any[] = [] + for await (const ch of stream) { + chunks.push(ch) + } + + expect(mockResponsesCreate).toHaveBeenCalledTimes(2) + const first = mockResponsesCreate.mock.calls[0][0] + const second = mockResponsesCreate.mock.calls[1][0] + expect(first).toHaveProperty("text") + expect(first.text).toEqual({ verbosity: "high" }) + expect(second).not.toHaveProperty("text") + + // Should still yield text + const textChunk = chunks.find((c) => c.type === "text") + expect(textChunk?.text).toBe("Test response") + }) + + it("Azure naming: use max_output_tokens for Responses; keep max_completion_tokens for Chat Completions", async () => { + // Responses + includeMaxTokens + const r = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + openAiStreamingEnabled: false, + includeMaxTokens: true, + modelMaxTokens: 128, + openAiCustomModelInfo: { + contextWindow: 128_000, + maxTokens: 4096, + supportsPromptCache: false, + }, + }) + for await (const _ of r.createMessage("sys", baseMessages)) { + } + const rArgs = mockResponsesCreate.mock.calls.pop()?.[0] + expect(rArgs).toHaveProperty("max_output_tokens", 128) + expect(rArgs).not.toHaveProperty("max_completion_tokens") + + // Chat + includeMaxTokens + mockResponsesCreate.mockClear() + mockCreate.mockClear() + const c = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-4o", + openAiBaseUrl: "https://api.openai.com/v1/chat/completions", + openAiStreamingEnabled: false, + includeMaxTokens: true, + modelMaxTokens: 128, + openAiCustomModelInfo: { + contextWindow: 128_000, + maxTokens: 4096, + supportsPromptCache: false, + }, + }) + for await (const _ of c.createMessage("sys", baseMessages)) { + } + const cArgs = mockCreate.mock.calls.pop()?.[0] + expect(cArgs).toHaveProperty("max_completion_tokens", 128) + expect(cArgs).not.toHaveProperty("max_output_tokens") + }) + + it("Normalizes Azure portal responses URL to /openai/v1 with apiVersion=preview", async () => { + mockResponsesCreate.mockClear() + mockCreate.mockClear() + + const portalUrl = "https://sample-name.openai.azure.com/openai/responses?api-version=2025-04-01-preview" + + const handler = new OpenAiHandler({ + openAiApiKey: "test-azure", + openAiModelId: "my-deployment", + openAiBaseUrl: portalUrl, + openAiStreamingEnabled: false, + }) + + for await (const _ of handler.createMessage("sys", baseMessages)) { + } + + // Ensures Responses API path was used + expect(mockResponsesCreate).toHaveBeenCalled() + + // Ensure SDK constructor was called with normalized baseURL and 'preview' apiVersion (per requirement) + // Note: AzureOpenAI and OpenAI share same mock constructor; inspect last call + const ctorCalls = vi.mocked(OpenAI as unknown as any).mock.calls as any[] + const lastCtorArgs = ctorCalls[ctorCalls.length - 1]?.[0] || {} + expect(lastCtorArgs.baseURL).toBe("https://sample-name.openai.azure.com/openai/v1") + expect(lastCtorArgs.apiVersion).toBe("preview") + }) +}) diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index 36158d770c1..b61955433cb 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -36,10 +36,18 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl super() this.options = options - const baseURL = this.options.openAiBaseUrl ?? "https://api.openai.com/v1" + // Normalize Azure Responses "web" URL shape if provided by users. + // Example input (Azure portal sometimes shows): + // https://{resource}.openai.azure.com/openai/responses?api-version=2025-04-01-preview + // We normalize to Azure SDK-friendly base and version: + // baseURL: https://{resource}.openai.azure.com/openai/v1 + // apiVersion: preview + const rawBaseURL = this.options.openAiBaseUrl ?? "https://api.openai.com/v1" + const azureNormalization = this._normalizeAzureResponsesBaseUrlAndVersion(rawBaseURL) + const baseURL = azureNormalization.baseURL const apiKey = this.options.openAiApiKey ?? "not-provided" - const isAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl) - const urlHost = this._getUrlHost(this.options.openAiBaseUrl) + const isAzureAiInference = this._isAzureAiInference(baseURL) + const urlHost = this._getUrlHost(baseURL) const isAzureOpenAi = urlHost === "azure.com" || urlHost.endsWith(".azure.com") || options.openAiUseAzure const headers = { @@ -61,10 +69,23 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } else if (isAzureOpenAi) { // Azure API shape slightly differs from the core API shape: // https://github.com/openai/openai-node?tab=readme-ov-file#microsoft-azure-openai + + // Determine if we're using the Responses API flavor for Azure + const flavor = this._resolveApiFlavor(this.options.openAiApiFlavor, this.options.openAiBaseUrl ?? "") + const isResponsesFlavor = + flavor === "responses" || + this._isAzureOpenAiResponses(this.options.openAiBaseUrl) || + this._isAzureOpenAiResponses(baseURL) + + // Always use 'preview' for Azure Responses API calls (per user requirement) + const azureVersion = isResponsesFlavor + ? "preview" + : this.options.azureApiVersion || azureOpenAiDefaultApiVersion + this.client = new AzureOpenAI({ baseURL, apiKey, - apiVersion: this.options.azureApiVersion || azureOpenAiDefaultApiVersion, + apiVersion: azureVersion, defaultHeaders: headers, timeout, }) @@ -83,7 +104,21 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { - const { info: modelInfo, reasoning } = this.getModel() + // Gather model params (centralized: temperature, max tokens, reasoning, verbosity) + const modelParams = this.getModel() + const { + info: modelInfo, + reasoning, + reasoningEffort, + verbosity, + } = modelParams as unknown as { + id: string + info: ModelInfo + reasoning?: { reasoning_effort?: "low" | "medium" | "high" } + reasoningEffort?: "minimal" | "low" | "medium" | "high" + verbosity?: "low" | "medium" | "high" + } + const modelUrl = this.options.openAiBaseUrl ?? "" const modelId = this.options.openAiModelId ?? "" const enabledR1Format = this.options.openAiR1FormatEnabled ?? false @@ -92,6 +127,70 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format const ark = modelUrl.includes(".volces.com") + // Decide API flavor (manual override > auto-detect by URL) + const flavor = this._resolveApiFlavor(this.options.openAiApiFlavor, modelUrl) + + // If Responses API is selected, use the Responses payload and endpoint + if (flavor === "responses") { + const nonStreaming = !(this.options.openAiStreamingEnabled ?? true) + + // Build Responses payload (align with OpenAI Native Responses API formatting) + const formattedInput = this._formatResponsesInput(systemPrompt, messages) + const payload: Record = { + model: modelId, + input: formattedInput, + } + + // Reasoning effort (Responses expects: reasoning: { effort }) + if (this.options.enableReasoningEffort && (this.options.reasoningEffort || reasoningEffort)) { + const effort = (this.options.reasoningEffort || reasoningEffort) as + | "minimal" + | "low" + | "medium" + | "high" + | undefined + // If effort is set and not "minimal" (minimal is treated as "no explicit effort") + if (effort && effort !== "minimal") { + payload.reasoning = { effort } + } + } + + // Temperature (only include when explicitly set by the user) + if (this.options.modelTemperature !== undefined) { + payload.temperature = this.options.modelTemperature + } else if (deepseekReasoner) { + payload.temperature = DEEP_SEEK_DEFAULT_TEMPERATURE + } + + // Verbosity: include via text.verbosity (Responses API expectation per openai-native handler) + if (this.options.verbosity || verbosity) { + ;(payload as any).text = { verbosity: this.options.verbosity || verbosity } + } + + // Add max_output_tokens if requested (Azure Responses naming) + if (this.options.includeMaxTokens === true) { + payload.max_output_tokens = this.options.modelMaxTokens || modelInfo.maxTokens + } + + // NOTE: Streaming for Responses API isn't covered by current tests. + // We call non-streaming for now to preserve stable behavior. + try { + const response: any = await (this.client as any).responses.create(payload) + yield* this._yieldResponsesResult(response, modelInfo) + } catch (err: unknown) { + // Graceful downgrade if verbosity is rejected by server (400 unknown/unsupported parameter) + if ((payload as any).text && this._isVerbosityUnsupportedError(err)) { + // Remove text.verbosity and retry once + const { text: _omit, ...withoutVerbosity } = payload as any + const response: any = await (this.client as any).responses.create(withoutVerbosity) + yield* this._yieldResponsesResult(response, modelInfo) + } else { + throw err + } + } + return + } + if (modelId.includes("o1") || modelId.includes("o3") || modelId.includes("o4")) { yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages) return @@ -232,6 +331,10 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl ? [systemMessage, ...convertToSimpleMessages(messages)] : [systemMessage, ...convertToOpenAiMessages(messages)], } + // Include reasoning_effort for Chat Completions when available + if (reasoning) { + Object.assign(requestOptions, reasoning) + } // Add max_tokens if needed this.addMaxTokensIfNeeded(requestOptions, modelInfo) @@ -270,9 +373,64 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl async completePrompt(prompt: string): Promise { try { const isAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl) + const flavor = this._resolveApiFlavor(this.options.openAiApiFlavor, this.options.openAiBaseUrl ?? "") const model = this.getModel() const modelInfo = model.info + // Use Responses API when selected (non-streaming convenience method) + if (flavor === "responses") { + // Build a single-turn formatted string input (Developer/User style) for Responses API + const formattedInput = this._formatResponsesSingleMessage( + { + role: "user", + content: [{ type: "text", text: prompt }] as any, + } as Anthropic.Messages.MessageParam, + /*includeRole*/ true, + ) + const payload: Record = { + model: model.id, + input: formattedInput, + } + + // Reasoning effort (Responses) + const effort = (this.options.reasoningEffort || (model as any).reasoningEffort) as + | "minimal" + | "low" + | "medium" + | "high" + | undefined + if (this.options.enableReasoningEffort && effort && effort !== "minimal") { + payload.reasoning = { effort } + } + + // Temperature if set + if (this.options.modelTemperature !== undefined) { + payload.temperature = this.options.modelTemperature + } + + // Verbosity via text.verbosity + if (this.options.verbosity) { + ;(payload as any).text = { verbosity: this.options.verbosity } + } + + // max_output_tokens + if (this.options.includeMaxTokens === true) { + payload.max_output_tokens = this.options.modelMaxTokens || modelInfo.maxTokens + } + + try { + const response: any = await (this.client as any).responses.create(payload) + return this._extractResponsesText(response) ?? "" + } catch (err: unknown) { + if ((payload as any).text && this._isVerbosityUnsupportedError(err)) { + const { text: _omit, ...withoutVerbosity } = payload as any + const response: any = await (this.client as any).responses.create(withoutVerbosity) + return this._extractResponsesText(response) ?? "" + } + throw err + } + } + const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { model: model.id, messages: [{ role: "user", content: prompt }], @@ -403,6 +561,68 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl return urlHost.endsWith(".services.ai.azure.com") } + private _isAzureOpenAiResponses(baseUrl?: string): boolean { + try { + if (!baseUrl) return false + const u = new URL(baseUrl) + const host = u.host + const path = u.pathname.replace(/\/+$/, "") + if (!(host.endsWith(".openai.azure.com") || host === "openai.azure.com")) return false + return ( + path.endsWith("/openai/v1/responses") || + path.endsWith("/openai/responses") || + path.endsWith("/responses") + ) + } catch { + return false + } + } + + /** + * Normalize Azure "responses" portal URLs to SDK-friendly base and version. + * - Input (portal sometimes shows): https://{res}.openai.azure.com/openai/responses?api-version=2025-04-01-preview + * - Output: baseURL=https://{res}.openai.azure.com/openai/v1, apiVersionOverride="preview" + * No-op for already-correct or non-Azure URLs. + */ + private _normalizeAzureResponsesBaseUrlAndVersion(inputBaseUrl: string): { + baseURL: string + apiVersionOverride?: string + } { + try { + const url = new URL(inputBaseUrl) + const isAzureHost = url.hostname.endsWith(".openai.azure.com") || url.hostname === "openai.azure.com" + const pathname = (url.pathname || "").replace(/\/+$/, "") + + // 1) Azure portal "non-v1" shape: + // https://{res}.openai.azure.com/openai/responses?api-version=2025-04-01-preview + const isPortalNonV1 = + isAzureHost && + pathname === "/openai/responses" && + url.searchParams.get("api-version") === "2025-04-01-preview" + + if (isPortalNonV1) { + const normalized = `${url.protocol}//${url.host}/openai/v1` + const ver = "preview" + return { baseURL: normalized, apiVersionOverride: ver } + } + + // 2) v1 responses path passed as base URL: + // https://{res}.openai.azure.com/openai/v1/responses?api-version=preview + // Normalize base to '/openai/v1' and force apiVersion 'preview' for Azure Responses v1 preview. + const isV1ResponsesPath = isAzureHost && pathname === "/openai/v1/responses" + if (isV1ResponsesPath) { + const normalized = `${url.protocol}//${url.host}/openai/v1` + const ver = "preview" + return { baseURL: normalized, apiVersionOverride: ver } + } + + // If it's already '/openai/v1' or any other valid path, keep as-is + return { baseURL: inputBaseUrl } + } catch { + return { baseURL: inputBaseUrl } + } + } + /** * Adds max_completion_tokens to the request body if needed based on provider configuration * Note: max_tokens is deprecated in favor of max_completion_tokens as per OpenAI documentation @@ -421,6 +641,159 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl requestOptions.max_completion_tokens = this.options.modelMaxTokens || modelInfo.maxTokens } } + + // --- Responses helpers --- + + private _resolveApiFlavor( + override: "auto" | "responses" | "chat" | undefined, + baseUrl: string, + ): "responses" | "chat" { + if (override === "responses") return "responses" + if (override === "chat") return "chat" + // Auto-detect by URL path + const url = this._safeParseUrl(baseUrl) + const path = url?.pathname || "" + if (path.includes("/v1/responses") || path.endsWith("/responses")) { + return "responses" + } + if (path.includes("/chat/completions")) { + return "chat" + } + // Default to Chat Completions for backward compatibility + return "chat" + } + + private _safeParseUrl(input?: string): URL | undefined { + try { + if (!input) return undefined + return new URL(input) + } catch { + return undefined + } + } + + private _toResponsesInput(anthropicMessages: Anthropic.Messages.MessageParam[]): Array<{ + role: "user" | "assistant" + content: Array<{ type: "input_text"; text: string } | { type: "input_image"; image_url: string }> + }> { + const input: Array<{ + role: "user" | "assistant" + content: Array<{ type: "input_text"; text: string } | { type: "input_image"; image_url: string }> + }> = [] + + for (const msg of anthropicMessages) { + const role = msg.role === "assistant" ? "assistant" : "user" + const parts: Array<{ type: "input_text"; text: string } | { type: "input_image"; image_url: string }> = [] + + if (typeof msg.content === "string") { + if (msg.content.length > 0) { + parts.push({ type: "input_text", text: msg.content }) + } + } else { + for (const block of msg.content) { + if (block.type === "text") { + parts.push({ type: "input_text", text: block.text }) + } else if (block.type === "image") { + parts.push({ + type: "input_image", + image_url: `data:${block.source.media_type};base64,${block.source.data}`, + }) + } + // tool_use/tool_result are omitted in this minimal mapping (can be added as needed) + } + } + + if (parts.length > 0) { + input.push({ role, content: parts }) + } + } + return input + } + + private _extractResponsesText(response: any): string | undefined { + // Prefer the simple output_text if present, otherwise attempt to parse output array + if (response?.output_text) return response.output_text + if (Array.isArray(response?.output)) { + // Find assistant message with output_text + for (const item of response.output) { + if (item?.type === "message" && Array.isArray(item.content)) { + const textPart = item.content.find( + (c: any) => c.type === "output_text" && typeof c.text === "string", + ) + if (textPart?.text) return textPart.text + } + } + } + return undefined + } + + private async *_yieldResponsesResult(response: any, modelInfo: ModelInfo): ApiStream { + const text = this._extractResponsesText(response) ?? "" + if (text) { + yield { type: "text", text } + } + // Translate usage fields if present + const usage = response?.usage + if (usage) { + yield { + type: "usage", + inputTokens: usage.input_tokens || usage.prompt_tokens || 0, + outputTokens: usage.output_tokens || usage.completion_tokens || 0, + cacheWriteTokens: usage.cache_creation_input_tokens || undefined, + cacheReadTokens: usage.cache_read_input_tokens || undefined, + } + } + } + + private _isVerbosityUnsupportedError(err: unknown): boolean { + const anyErr = err as any + const msg = (anyErr?.message || "").toString().toLowerCase() + const status = anyErr?.status + return ( + status === 400 && + (msg.includes("verbosity") || msg.includes("unknown parameter") || msg.includes("unsupported")) + ) + } + + // ---- Responses input formatting (align with openai-native.ts) ---- + + private _formatResponsesInput(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): string { + // Developer role for system prompt + let formattedInput = `Developer: ${systemPrompt}\n\n` + for (const message of messages) { + const role = message.role === "user" ? "User" : "Assistant" + if (typeof message.content === "string") { + formattedInput += `${role}: ${message.content}\n\n` + } else if (Array.isArray(message.content)) { + const textContent = message.content + .filter((block) => (block as any).type === "text") + .map((block) => (block as any).text as string) + .join("\n") + if (textContent) { + formattedInput += `${role}: ${textContent}\n\n` + } + } + } + return formattedInput.trim() + } + + private _formatResponsesSingleMessage( + message: Anthropic.Messages.MessageParam, + includeRole: boolean = true, + ): string { + const role = includeRole ? (message.role === "user" ? "User" : "Assistant") + ": " : "" + if (typeof message.content === "string") { + return `${role}${message.content}` + } + if (Array.isArray(message.content)) { + const textContent = message.content + .filter((block) => (block as any).type === "text") + .map((block) => (block as any).text as string) + .join("\n") + return `${role}${textContent}` + } + return role + } } export async function getOpenAiModels(baseUrl?: string, apiKey?: string, openAiHeaders?: Record) { From f05544be10476af3d95425fd5966020796552bff Mon Sep 17 00:00:00 2001 From: Lagyu Date: Sun, 24 Aug 2025 00:45:30 +0900 Subject: [PATCH 02/14] feat: remove api flavor override from setting ui. --- packages/types/src/provider-settings.ts | 1 - src/api/providers/__tests__/openai.spec.ts | 33 ---------------------- src/api/providers/openai.ts | 17 ++++------- 3 files changed, 6 insertions(+), 45 deletions(-) diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts index d2b38064105..3fa7094d873 100644 --- a/packages/types/src/provider-settings.ts +++ b/packages/types/src/provider-settings.ts @@ -179,7 +179,6 @@ const openAiSchema = baseProviderSettingsSchema.extend({ openAiStreamingEnabled: z.boolean().optional(), openAiHostHeader: z.string().optional(), // Keep temporarily for backward compatibility during migration. openAiHeaders: z.record(z.string(), z.string()).optional(), - openAiApiFlavor: z.union([z.literal("auto"), z.literal("responses"), z.literal("chat")]).optional(), }) const ollamaSchema = baseProviderSettingsSchema.extend({ diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts index 9266180cd55..b80e94a7be4 100644 --- a/src/api/providers/__tests__/openai.spec.ts +++ b/src/api/providers/__tests__/openai.spec.ts @@ -1151,39 +1151,6 @@ describe("OpenAI Compatible - Responses API", () => { expect(chatArgs).not.toHaveProperty("input") }) - it("Manual override: force Responses or Chat regardless of URL", async () => { - // Force Responses - const forceResp = new OpenAiHandler({ - openAiApiKey: "k", - openAiModelId: "gpt-5", - openAiBaseUrl: "https://api.openai.com/v1", // no responses segment - openAiStreamingEnabled: false, - openAiApiFlavor: "responses", - }) - for await (const _ of forceResp.createMessage("sys", baseMessages)) { - } - expect(mockResponsesCreate).toHaveBeenCalled() - const rArgs = mockResponsesCreate.mock.calls.pop()?.[0] - expect(rArgs).toHaveProperty("input") - expect(rArgs).not.toHaveProperty("messages") - - // Force Chat - mockResponsesCreate.mockClear() - mockCreate.mockClear() - const forceChat = new OpenAiHandler({ - openAiApiKey: "k", - openAiModelId: "gpt-4o", - openAiBaseUrl: "https://api.openai.com/v1/responses", // would auto-detect as responses - openAiStreamingEnabled: false, - openAiApiFlavor: "chat", - }) - for await (const _ of forceChat.createMessage("sys", baseMessages)) { - } - expect(mockCreate).toHaveBeenCalled() - const cArgs = mockCreate.mock.calls.pop()?.[0] - expect(cArgs).toHaveProperty("messages") - }) - it("Reasoning effort mapping: Responses uses reasoning: { effort }, Chat uses reasoning_effort", async () => { // Responses path const responsesHandler = new OpenAiHandler({ diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index b61955433cb..47ab20347e1 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -70,8 +70,8 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl // Azure API shape slightly differs from the core API shape: // https://github.com/openai/openai-node?tab=readme-ov-file#microsoft-azure-openai - // Determine if we're using the Responses API flavor for Azure - const flavor = this._resolveApiFlavor(this.options.openAiApiFlavor, this.options.openAiBaseUrl ?? "") + // Determine if we're using the Responses API flavor for Azure (auto-detect by URL only) + const flavor = this._resolveApiFlavor(this.options.openAiBaseUrl ?? "") const isResponsesFlavor = flavor === "responses" || this._isAzureOpenAiResponses(this.options.openAiBaseUrl) || @@ -127,8 +127,8 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format const ark = modelUrl.includes(".volces.com") - // Decide API flavor (manual override > auto-detect by URL) - const flavor = this._resolveApiFlavor(this.options.openAiApiFlavor, modelUrl) + // Decide API flavor (auto-detect by URL) + const flavor = this._resolveApiFlavor(modelUrl) // If Responses API is selected, use the Responses payload and endpoint if (flavor === "responses") { @@ -373,7 +373,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl async completePrompt(prompt: string): Promise { try { const isAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl) - const flavor = this._resolveApiFlavor(this.options.openAiApiFlavor, this.options.openAiBaseUrl ?? "") + const flavor = this._resolveApiFlavor(this.options.openAiBaseUrl ?? "") const model = this.getModel() const modelInfo = model.info @@ -644,12 +644,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl // --- Responses helpers --- - private _resolveApiFlavor( - override: "auto" | "responses" | "chat" | undefined, - baseUrl: string, - ): "responses" | "chat" { - if (override === "responses") return "responses" - if (override === "chat") return "chat" + private _resolveApiFlavor(baseUrl: string): "responses" | "chat" { // Auto-detect by URL path const url = this._safeParseUrl(baseUrl) const path = url?.pathname || "" From ad1aeea454562710e30b2b6790fdc88abdf42b26 Mon Sep 17 00:00:00 2001 From: Lagyu Date: Mon, 25 Aug 2025 16:02:39 +0900 Subject: [PATCH 03/14] feat(openai): Responses API parity and continuity fixes; richer event handling; tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add previous_response_id retry path on 400 “Previous response … not found” - Non-streaming and streaming: drop previous_response_id and retry once; clear continuity state - Code: [src/api/providers/openai.ts](src/api/providers/openai.ts:238), [src/api/providers/openai.ts](src/api/providers/openai.ts:291), guard [OpenAiHandler._isPreviousResponseNotFoundError()](src/api/providers/openai.ts:934) - Support GPT‑5-style reasoning summary and minimal effort on Responses API - Default enable summary: "auto" unless explicitly disabled in settings - Include reasoning: { effort: "minimal" | "low" | "medium" | "high", summary?: "auto" } - Code: constructor default [OpenAiHandler](src/api/providers/openai.ts:38), payload assembly [createMessage](src/api/providers/openai.ts:193) - Improve Responses streaming event coverage - Handle response.content_part.added (emit text) - Handle response.audio_transcript.delta (emit text as transcript) - Preserve response.id via stream callback for continuity - Code: [handleResponsesStream](src/api/transform/responses-stream.ts:91), [src/api/transform/responses-stream.ts](src/api/transform/responses-stream.ts:47), responseId callback [src/api/transform/responses-stream.ts](src/api/transform/responses-stream.ts:19) and usage in [openai.ts](src/api/providers/openai.ts:283) - Maintain conversation continuity for Responses API - Store lastResponseId on both streaming and non-streaming paths; pass previous_response_id unless suppressed - Code: stream wiring [src/api/providers/openai.ts](src/api/providers/openai.ts:283), non-streaming capture [src/api/providers/openai.ts](src/api/providers/openai.ts:889) - Update and extend tests - Add tests for 400 previous_response_id retry (streaming and non-streaming) - Add tests for content_part and audio_transcript events - Add tests for reasoning minimal + summary auto, and summary disabling - Adjust expectation to allow summary in reasoning payload - Tests: [src/api/providers/__tests__/openai.spec.ts](src/api/providers/__tests__/openai.spec.ts:1663), [src/api/providers/__tests__/openai.spec.ts](src/api/providers/__tests__/openai.spec.ts:1170) - Minor: default enableGpt5ReasoningSummary to true in compatible provider for Responses flows --- src/api/providers/__tests__/openai.spec.ts | 561 ++++++++++++++++++++- src/api/providers/openai.ts | 340 +++++++++++-- src/api/transform/responses-stream.ts | 263 ++++++++++ 3 files changed, 1126 insertions(+), 38 deletions(-) create mode 100644 src/api/transform/responses-stream.ts diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts index b80e94a7be4..eeab8315521 100644 --- a/src/api/providers/__tests__/openai.spec.ts +++ b/src/api/providers/__tests__/openai.spec.ts @@ -1171,7 +1171,7 @@ describe("OpenAI Compatible - Responses API", () => { expect(mockResponsesCreate).toHaveBeenCalled() const rArgs = mockResponsesCreate.mock.calls.pop()?.[0] expect(rArgs).toHaveProperty("reasoning") - expect(rArgs.reasoning).toEqual({ effort: "high" }) + expect(rArgs.reasoning).toMatchObject({ effort: "high" }) // Chat path mockResponsesCreate.mockClear() @@ -1300,4 +1300,563 @@ describe("OpenAI Compatible - Responses API", () => { expect(lastCtorArgs.baseURL).toBe("https://sample-name.openai.azure.com/openai/v1") expect(lastCtorArgs.apiVersion).toBe("preview") }) + + it("streams Responses API when provider returns AsyncIterable", async () => { + // Arrange: make responses.create return an AsyncIterable stream for this test + mockResponsesCreate.mockImplementationOnce(async (_opts: any) => { + return { + [Symbol.asyncIterator]: async function* () { + yield { type: "response.text.delta", delta: "Hello " } + yield { type: "response.text.delta", delta: "world" } + yield { + type: "response.completed", + response: { usage: { input_tokens: 7, output_tokens: 2 } }, + } + }, + } + }) + + const handler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5-mini", + openAiBaseUrl: "https://api.openai.com/v1/responses", + // streaming enabled by default + }) + + const stream = handler.createMessage("You are Roo.", [ + { role: "user", content: [{ type: "text" as const, text: "Say hi" }] }, + ]) + + const chunks: any[] = [] + for await (const ch of stream) { + chunks.push(ch) + } + + // Text should be streamed and concatenated in order + const text = chunks + .filter((c) => c.type === "text") + .map((c) => c.text) + .join("") + expect(text).toBe("Hello world") + + // Usage chunk emitted at completion + const usage = chunks.find((c) => c.type === "usage") + expect(usage).toBeDefined() + expect(usage.inputTokens).toBe(7) + expect(usage.outputTokens).toBe(2) + + // Ensure stream: true was sent + const args = mockResponsesCreate.mock.calls.pop()?.[0] + expect(args).toHaveProperty("stream", true) + }) +}) + +describe("OpenAI Compatible - Responses API (extended streaming)", () => { + it("handles reasoning deltas and output_text in message content", async () => { + // Arrange: make responses.create return an AsyncIterable stream for this test + mockResponsesCreate.mockImplementationOnce(async (_opts: any) => { + return { + [Symbol.asyncIterator]: async function* () { + // Reasoning delta first + yield { type: "response.reasoning.delta", delta: "Thinking. " } + // Then a message item with output_text inside content array + yield { + type: "response.output_item.added", + item: { + type: "message", + content: [{ type: "output_text", text: "Answer." }], + }, + } + // Completion with usage + yield { + type: "response.completed", + response: { usage: { input_tokens: 3, output_tokens: 2 } }, + } + }, + } + }) + + const handler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5-mini", + openAiBaseUrl: "https://api.openai.com/v1/responses", + }) + + const chunks: any[] = [] + for await (const ch of handler.createMessage("sys", [ + { role: "user", content: [{ type: "text" as const, text: "Hi" }] }, + ])) { + chunks.push(ch) + } + + const reasoning = chunks.find((c) => c.type === "reasoning") + expect(reasoning?.text).toBe("Thinking. ") + + const text = chunks.find((c) => c.type === "text") + expect(text?.text).toBe("Answer.") + + const usage = chunks.find((c) => c.type === "usage") + expect(usage).toBeDefined() + expect(usage.inputTokens).toBe(3) + expect(usage.outputTokens).toBe(2) + + // Ensure stream: true was sent + const args = mockResponsesCreate.mock.calls.pop()?.[0] + expect(args).toHaveProperty("stream", true) + }) + + it("maps refusal deltas to text with prefix", async () => { + mockResponsesCreate.mockImplementationOnce(async (_opts: any) => { + return { + [Symbol.asyncIterator]: async function* () { + yield { type: "response.refusal.delta", delta: "Cannot comply" } + // Usage may be attached directly on the event for some implementations + yield { type: "response.done", usage: { prompt_tokens: 1, completion_tokens: 1 } } + }, + } + }) + + const handler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5-mini", + openAiBaseUrl: "https://api.openai.com/v1/responses", + }) + + const result: any[] = [] + for await (const ch of handler.createMessage("sys", [ + { role: "user", content: [{ type: "text" as const, text: "Hi" }] }, + ])) { + result.push(ch) + } + + const textChunks = result.filter((c) => c.type === "text").map((c) => c.text) + expect(textChunks).toContain("[Refusal] Cannot comply") + + const usage = result.find((c) => c.type === "usage") + expect(usage).toBeDefined() + expect(usage.inputTokens).toBe(1) + expect(usage.outputTokens).toBe(1) + }) +}) + +describe("OpenAI Compatible - Responses API (multimodal)", () => { + it("builds structured array input with images (non-streaming)", async () => { + // Reset mocks for clarity + mockResponsesCreate.mockClear() + mockCreate.mockClear() + + const handler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5-mini", + openAiBaseUrl: "https://api.openai.com/v1/responses", + openAiStreamingEnabled: false, + includeMaxTokens: false, + }) + + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { type: "text" as const, text: "Here is an image" }, + { + type: "image" as const, + // Minimal Anthropic-style inline image (base64) block + source: { media_type: "image/png", data: "BASE64DATA" } as any, + }, + ], + }, + ] + + const chunks: any[] = [] + for await (const ch of handler.createMessage("You are Roo Code.", messages)) { + chunks.push(ch) + } + + // Should have used Responses API + expect(mockResponsesCreate).toHaveBeenCalled() + const args = mockResponsesCreate.mock.calls[0][0] + + // Input should be an array (structured input mode) + expect(Array.isArray(args.input)).toBe(true) + const arr = args.input as any[] + + // First element should be Developer preface as input_text + expect(arr[0]?.role).toBe("user") + expect(arr[0]?.content?.[0]?.type).toBe("input_text") + expect(arr[0]?.content?.[0]?.text).toContain("Developer: You are Roo Code.") + + // There should be at least one input_image with a data URL for the provided image + const hasInputImage = arr.some((item: any) => { + const c = item?.content + return ( + Array.isArray(c) && + c.some( + (part: any) => + part?.type === "input_image" && + typeof part?.image_url === "string" && + part.image_url.startsWith("data:image/png;base64,BASE64DATA"), + ) + ) + }) + expect(hasInputImage).toBe(true) + + // Should still yield a text chunk and usage (from default mock) + const textChunk = chunks.find((c: any) => c.type === "text") + const usageChunk = chunks.find((c: any) => c.type === "usage") + expect(textChunk?.text).toBe("Test response") + expect(usageChunk?.inputTokens).toBe(10) + expect(usageChunk?.outputTokens).toBe(5) + }) + + it("streams with multimodal input using array 'input'", async () => { + // Make responses.create return an AsyncIterable stream for this test + mockResponsesCreate.mockClear() + mockResponsesCreate.mockImplementationOnce(async (_opts: any) => { + return { + [Symbol.asyncIterator]: async function* () { + yield { type: "response.text.delta", delta: "A" } + yield { type: "response.text.delta", delta: "B" } + yield { + type: "response.completed", + response: { usage: { input_tokens: 2, output_tokens: 2 } }, + } + }, + } + }) + + const handler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5-mini", + openAiBaseUrl: "https://api.openai.com/v1/responses", + }) + + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { type: "text" as const, text: "Look at this" }, + { + type: "image" as const, + source: { media_type: "image/jpeg", data: "IMGDATA" } as any, + }, + ], + }, + ] + + const out: any[] = [] + for await (const ch of handler.createMessage("System text", messages)) { + out.push(ch) + } + + // Ensure stream: true was sent and input is array + expect(mockResponsesCreate).toHaveBeenCalled() + const args = mockResponsesCreate.mock.calls[0][0] + expect(args).toHaveProperty("stream", true) + expect(Array.isArray(args.input)).toBe(true) + + // Verify streamed text concatenation and usage + const combined = out + .filter((c) => c.type === "text") + .map((c) => c.text) + .join("") + expect(combined).toBe("AB") + + const usage = out.find((c) => c.type === "usage") + expect(usage?.inputTokens).toBe(2) + expect(usage?.outputTokens).toBe(2) + }) +}) + +// --- New tests: Responses API conversation continuity (previous_response_id) --- +describe("OpenAI Compatible - Responses API conversation continuity", () => { + beforeEach(() => { + mockCreate.mockClear() + mockResponsesCreate.mockClear() + }) + + it("propagates previous_response_id from first streaming response into the next request", async () => { + // First call will stream and include a response.id + mockResponsesCreate.mockImplementationOnce(async (_opts: any) => { + return { + [Symbol.asyncIterator]: async function* () { + yield { type: "response.text.delta", delta: "Desc " } + yield { + type: "response.completed", + response: { id: "resp-1", usage: { input_tokens: 5, output_tokens: 2 } }, + } + }, + } + }) + + const handler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5-mini", + openAiBaseUrl: "https://api.openai.com/v1/responses", + }) + + // 1) First call (establish response id) + const firstChunks: any[] = [] + for await (const ch of handler.createMessage("You are Roo.", [ + { role: "user", content: [{ type: "text" as const, text: "Describe the image" }] }, + ])) { + firstChunks.push(ch) + } + + // Ensure first call was made + expect(mockResponsesCreate).toHaveBeenCalledTimes(1) + // 2) Second call - should include previous_response_id from first call + const secondChunks: any[] = [] + for await (const ch of handler.createMessage("You are Roo.", [ + { role: "user", content: [{ type: "text" as const, text: "Continue." }] }, + ])) { + secondChunks.push(ch) + } + + // Validate that a second Responses.create call was made + expect(mockResponsesCreate).toHaveBeenCalledTimes(2) + const secondArgs = mockResponsesCreate.mock.calls[1][0] + expect(secondArgs).toHaveProperty("previous_response_id", "resp-1") + }) + + it("omits previous_response_id when metadata.suppressPreviousResponseId is true", async () => { + // First call streams and returns an id + mockResponsesCreate.mockImplementationOnce(async (_opts: any) => { + return { + [Symbol.asyncIterator]: async function* () { + yield { type: "response.text.delta", delta: "First" } + yield { + type: "response.completed", + response: { id: "rid-xyz", usage: { input_tokens: 1, output_tokens: 1 } }, + } + }, + } + }) + + const handler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5-mini", + openAiBaseUrl: "https://api.openai.com/v1/responses", + }) + + // First call to capture lastResponseId + for await (const _ of handler.createMessage("sys", [ + { role: "user", content: [{ type: "text" as const, text: "Turn 1" }] }, + ])) { + } + + // Second call with suppressPreviousResponseId => should NOT include previous_response_id + for await (const _ of handler.createMessage( + "sys", + [{ role: "user", content: [{ type: "text" as const, text: "Turn 2" }] }], + { suppressPreviousResponseId: true } as any, + )) { + } + + expect(mockResponsesCreate).toHaveBeenCalledTimes(2) + const args = mockResponsesCreate.mock.calls[1][0] + expect(args).not.toHaveProperty("previous_response_id") + }) +}) + +// --- New: Responses API parity improvements tests --- +describe("OpenAI Compatible - Responses API parity improvements", () => { + beforeEach(() => { + mockCreate.mockClear() + mockResponsesCreate.mockClear() + }) + + it("retries without previous_response_id when server returns 400 'Previous response ... not found' (non-streaming)", async () => { + // First call throws 400 for previous_response_id, second succeeds + mockResponsesCreate + .mockImplementationOnce((_opts: any) => { + const err = new Error("Previous response rid-bad not found") + ;(err as any).status = 400 + throw err + }) + .mockImplementationOnce(async (_opts: any) => { + return { id: "rid-good", output_text: "OK", usage: { input_tokens: 1, output_tokens: 1 } } + }) + + const h = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + openAiStreamingEnabled: false, + }) + + const chunks: any[] = [] + for await (const ch of h.createMessage( + "sys", + [{ role: "user", content: [{ type: "text" as const, text: "Turn" }] }], + { previousResponseId: "rid-bad" } as any, + )) { + chunks.push(ch) + } + + // Two calls made: first fails with 400, second retries without previous_response_id + expect(mockResponsesCreate).toHaveBeenCalledTimes(2) + const firstArgs = mockResponsesCreate.mock.calls[0][0] + expect(firstArgs).toHaveProperty("previous_response_id", "rid-bad") + + const secondArgs = mockResponsesCreate.mock.calls[1][0] + expect(secondArgs).not.toHaveProperty("previous_response_id") + + // Should still surface text + const textChunk = chunks.find((c: any) => c.type === "text") + expect(textChunk?.text).toBe("OK") + }) + + it("retries without previous_response_id when server returns 400 (streaming)", async () => { + // First call throws, second returns a stream + mockResponsesCreate + .mockImplementationOnce((_opts: any) => { + const err = new Error("Previous response not found") + ;(err as any).status = 400 + throw err + }) + .mockImplementationOnce(async (_opts: any) => { + return { + [Symbol.asyncIterator]: async function* () { + yield { type: "response.text.delta", delta: "Hello" } + yield { type: "response.completed", response: { usage: { input_tokens: 1, output_tokens: 1 } } } + }, + } + }) + + const h = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + // streaming enabled by default + }) + + const out: any[] = [] + for await (const ch of h.createMessage( + "sys", + [{ role: "user", content: [{ type: "text" as const, text: "Hi" }] }], + { previousResponseId: "bad-id" } as any, + )) { + out.push(ch) + } + + expect(mockResponsesCreate).toHaveBeenCalledTimes(2) + const first = mockResponsesCreate.mock.calls[0][0] + expect(first).toHaveProperty("previous_response_id", "bad-id") + const second = mockResponsesCreate.mock.calls[1][0] + expect(second).not.toHaveProperty("previous_response_id") + + const combined = out + .filter((c) => c.type === "text") + .map((c) => c.text) + .join("") + expect(combined).toBe("Hello") + }) + + it("handles response.content_part.added by emitting text", async () => { + mockResponsesCreate.mockImplementationOnce(async (_opts: any) => { + return { + [Symbol.asyncIterator]: async function* () { + yield { type: "response.content_part.added", part: { type: "text", text: "Part" } } + yield { type: "response.completed", response: { usage: { input_tokens: 0, output_tokens: 0 } } } + }, + } + }) + + const h = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + }) + + const out: any[] = [] + for await (const ch of h.createMessage("sys", [ + { role: "user", content: [{ type: "text" as const, text: "Hi" }] }, + ])) { + out.push(ch) + } + + const texts = out.filter((c) => c.type === "text").map((c) => c.text) + expect(texts).toContain("Part") + }) + + it("maps response.audio_transcript.delta to text", async () => { + mockResponsesCreate.mockImplementationOnce(async (_opts: any) => { + return { + [Symbol.asyncIterator]: async function* () { + yield { type: "response.audio_transcript.delta", delta: "Transcript" } + yield { type: "response.completed", response: { usage: { input_tokens: 0, output_tokens: 0 } } } + }, + } + }) + + const h = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + }) + + const out: any[] = [] + for await (const ch of h.createMessage("sys", [ + { role: "user", content: [{ type: "text" as const, text: "Hi" }] }, + ])) { + out.push(ch) + } + + const texts = out.filter((c) => c.type === "text").map((c) => c.text) + expect(texts).toContain("Transcript") + }) + + it("includes reasoning: { effort: 'minimal', summary: 'auto' } when enabled (non-streaming)", async () => { + mockResponsesCreate.mockImplementationOnce(async (opts: any) => { + return { id: "rid-1", output_text: "ok", usage: { input_tokens: 1, output_tokens: 1 } } + }) + + const h = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + openAiStreamingEnabled: false, + enableReasoningEffort: true, + reasoningEffort: "minimal", + }) + + for await (const _ of h.createMessage("sys", [ + { role: "user", content: [{ type: "text" as const, text: "Hi" }] }, + ])) { + // consume + } + + expect(mockResponsesCreate).toHaveBeenCalledTimes(1) + const args = mockResponsesCreate.mock.calls[0][0] + expect(args).toHaveProperty("reasoning") + expect(args.reasoning).toMatchObject({ effort: "minimal", summary: "auto" }) + }) + + it("omits reasoning.summary when enableGpt5ReasoningSummary is false", async () => { + mockResponsesCreate.mockImplementationOnce(async (opts: any) => { + return { id: "rid-2", output_text: "ok", usage: { input_tokens: 1, output_tokens: 1 } } + }) + + const h = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + openAiStreamingEnabled: false, + enableReasoningEffort: true, + reasoningEffort: "low", + enableGpt5ReasoningSummary: false, + }) + + for await (const _ of h.createMessage("sys", [ + { role: "user", content: [{ type: "text" as const, text: "Hi" }] }, + ])) { + // consume + } + + expect(mockResponsesCreate).toHaveBeenCalledTimes(1) + const args = mockResponsesCreate.mock.calls[0][0] + expect(args).toHaveProperty("reasoning") + expect(args.reasoning.effort).toBe("low") + expect(args.reasoning.summary).toBeUndefined() + }) }) diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index 47ab20347e1..a4b68fd42f9 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -19,11 +19,13 @@ import { convertToR1Format } from "../transform/r1-format" import { convertToSimpleMessages } from "../transform/simple-format" import { ApiStream, ApiStreamUsageChunk } from "../transform/stream" import { getModelParams } from "../transform/model-params" +import { handleResponsesStream } from "../transform/responses-stream" import { DEFAULT_HEADERS } from "./constants" import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { getApiRequestTimeout } from "./utils/timeout-config" +import { ResponseCreateParamsNonStreaming } from "openai/resources/responses/responses" // TODO: Rename this to OpenAICompatibleHandler. Also, I think the // `OpenAINativeHandler` can subclass from this, since it's obviously @@ -31,10 +33,15 @@ import { getApiRequestTimeout } from "./utils/timeout-config" export class OpenAiHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions private client: OpenAI + private lastResponseId: string | undefined constructor(options: ApiHandlerOptions) { super() this.options = options + // Default to including reasoning.summary: "auto" for Responses API (parity with native provider) + if (this.options.enableGpt5ReasoningSummary === undefined) { + this.options.enableGpt5ReasoningSummary = true + } // Normalize Azure Responses "web" URL shape if provided by users. // Example input (Azure portal sometimes shows): @@ -135,13 +142,61 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const nonStreaming = !(this.options.openAiStreamingEnabled ?? true) // Build Responses payload (align with OpenAI Native Responses API formatting) - const formattedInput = this._formatResponsesInput(systemPrompt, messages) - const payload: Record = { + // Azure- and Responses-compatible multimodal handling: + // - Use array input ONLY when the latest user message contains images + // - Include the most recent assistant message as input_text to preserve continuity + // - Always include a Developer preface + const lastUserMessage = [...messages].reverse().find((m) => m.role === "user") + const lastUserHasImages = + !!lastUserMessage && + Array.isArray(lastUserMessage.content) && + lastUserMessage.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image") + + let inputPayload: unknown + if (lastUserHasImages && lastUserMessage) { + // Select messages to retain context in array mode: + // - The most recent assistant message (text-only, as input_text) + // - All user messages that contain images + // - The latest user message (even if it has no image) + const lastAssistantMessage = [...messages].reverse().find((m) => m.role === "assistant") + + const messagesForArray = messages.filter((m) => { + if (m.role === "assistant") { + return lastAssistantMessage ? m === lastAssistantMessage : false + } + if (m.role === "user") { + const hasImage = + Array.isArray(m.content) && + m.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image") + return hasImage || m === lastUserMessage + } + return false + }) + + const arrayInput = this._toResponsesInput(messagesForArray) + const developerPreface = { + role: "user" as const, + content: [{ type: "input_text" as const, text: `Developer: ${systemPrompt}` }], + } + inputPayload = [developerPreface, ...arrayInput] + } else { + // Pure text history: use compact transcript (includes both user and assistant turns) + inputPayload = this._formatResponsesInput(systemPrompt, messages) + } + const usedArrayInput = Array.isArray(inputPayload) + + const previousId = metadata?.suppressPreviousResponseId + ? undefined + : (metadata?.previousResponseId ?? this.lastResponseId) + + const basePayload: Record = { model: modelId, - input: formattedInput, + input: inputPayload, + ...(previousId ? { previous_response_id: previousId } : {}), } - // Reasoning effort (Responses expects: reasoning: { effort }) + // Reasoning effort (Responses expects: reasoning: { effort, summary? }) + // Parity with native: support "minimal" and include summary: "auto" unless explicitly disabled if (this.options.enableReasoningEffort && (this.options.reasoningEffort || reasoningEffort)) { const effort = (this.options.reasoningEffort || reasoningEffort) as | "minimal" @@ -149,41 +204,200 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl | "medium" | "high" | undefined - // If effort is set and not "minimal" (minimal is treated as "no explicit effort") - if (effort && effort !== "minimal") { - payload.reasoning = { effort } + if (effort) { + ;( + basePayload as { + reasoning?: { effort: "minimal" | "low" | "medium" | "high"; summary?: "auto" } + } + ).reasoning = { + effort, + ...(this.options.enableGpt5ReasoningSummary !== false ? { summary: "auto" as const } : {}), + } } } // Temperature (only include when explicitly set by the user) if (this.options.modelTemperature !== undefined) { - payload.temperature = this.options.modelTemperature + basePayload.temperature = this.options.modelTemperature } else if (deepseekReasoner) { - payload.temperature = DEEP_SEEK_DEFAULT_TEMPERATURE + basePayload.temperature = DEEP_SEEK_DEFAULT_TEMPERATURE } // Verbosity: include via text.verbosity (Responses API expectation per openai-native handler) - if (this.options.verbosity || verbosity) { - ;(payload as any).text = { verbosity: this.options.verbosity || verbosity } + const effectiveVerbosity = this.options.verbosity || verbosity + if (effectiveVerbosity) { + ;(basePayload as { text?: { verbosity: "low" | "medium" | "high" } }).text = { + verbosity: effectiveVerbosity as "low" | "medium" | "high", + } } // Add max_output_tokens if requested (Azure Responses naming) if (this.options.includeMaxTokens === true) { - payload.max_output_tokens = this.options.modelMaxTokens || modelInfo.maxTokens + basePayload.max_output_tokens = this.options.modelMaxTokens || modelInfo.maxTokens } - // NOTE: Streaming for Responses API isn't covered by current tests. - // We call non-streaming for now to preserve stable behavior. + // Non-streaming path (preserves existing behavior and tests) + if (nonStreaming) { + try { + const response = await ( + this.client as unknown as { + responses: { create: (body: Record) => Promise } + } + ).responses.create(basePayload) + yield* this._yieldResponsesResult(response as unknown, modelInfo) + } catch (err: unknown) { + // Retry without previous_response_id if server rejects it (400 "Previous response ... not found") + if (previousId && this._isPreviousResponseNotFoundError(err)) { + const { previous_response_id: _omitPrev, ...withoutPrev } = basePayload as { + previous_response_id?: unknown + [key: string]: unknown + } + // Clear stored continuity to avoid reusing a bad id + this.lastResponseId = undefined + const response = await ( + this.client as unknown as { + responses: { create: (body: Record) => Promise } + } + ).responses.create(withoutPrev) + yield* this._yieldResponsesResult(response as unknown, modelInfo) + } + // Graceful downgrade if verbosity is rejected by server (400 unknown/unsupported parameter) + else if ("text" in basePayload && this._isVerbosityUnsupportedError(err)) { + // Remove text.verbosity and retry once + const { text: _omit, ...withoutVerbosity } = basePayload as { text?: unknown } & Record< + string, + unknown + > + const response = await ( + this.client as unknown as { + responses: { create: (body: Record) => Promise } + } + ).responses.create(withoutVerbosity) + yield* this._yieldResponsesResult(response as unknown, modelInfo) + } else if (usedArrayInput && this._isInputTextInvalidError(err)) { + // Azure-specific fallback: retry with string transcript when array input is rejected + const retryPayload: Record = { + ...basePayload, + input: this._formatResponsesInput(systemPrompt, messages), + } + const response = await ( + this.client as unknown as { + responses: { create: (body: Record) => Promise } + } + ).responses.create(retryPayload) + yield* this._yieldResponsesResult(response as unknown, modelInfo) + } else { + throw err + } + } + return + } + + // Streaming path (auto-fallback to non-streaming result if provider ignores stream flag) + const streamingPayload: Record = { ...basePayload, stream: true } try { - const response: any = await (this.client as any).responses.create(payload) - yield* this._yieldResponsesResult(response, modelInfo) + const maybeStream = await ( + this.client as unknown as { + responses: { create: (body: Record) => Promise } + } + ).responses.create(streamingPayload) + + const isAsyncIterable = (obj: unknown): obj is AsyncIterable => + typeof (obj as AsyncIterable)[Symbol.asyncIterator] === "function" + + if (isAsyncIterable(maybeStream)) { + for await (const chunk of handleResponsesStream(maybeStream, { + onResponseId: (id) => { + this.lastResponseId = id + }, + })) { + yield chunk + } + } else { + // Some providers may ignore the stream flag and return a complete response + yield* this._yieldResponsesResult(maybeStream as unknown, modelInfo) + } } catch (err: unknown) { - // Graceful downgrade if verbosity is rejected by server (400 unknown/unsupported parameter) - if ((payload as any).text && this._isVerbosityUnsupportedError(err)) { - // Remove text.verbosity and retry once - const { text: _omit, ...withoutVerbosity } = payload as any - const response: any = await (this.client as any).responses.create(withoutVerbosity) - yield* this._yieldResponsesResult(response, modelInfo) + // Retry without previous_response_id if server rejects it (400 "Previous response ... not found") + if (previousId && this._isPreviousResponseNotFoundError(err)) { + const { previous_response_id: _omitPrev, ...withoutPrev } = streamingPayload as { + previous_response_id?: unknown + [key: string]: unknown + } + this.lastResponseId = undefined + const maybeStreamRetry = await ( + this.client as unknown as { + responses: { create: (body: Record) => Promise } + } + ).responses.create(withoutPrev) + + const isAsyncIterable = (obj: unknown): obj is AsyncIterable => + typeof (obj as AsyncIterable)[Symbol.asyncIterator] === "function" + + if (isAsyncIterable(maybeStreamRetry)) { + for await (const chunk of handleResponsesStream(maybeStreamRetry, { + onResponseId: (id) => { + this.lastResponseId = id + }, + })) { + yield chunk + } + } else { + yield* this._yieldResponsesResult(maybeStreamRetry as unknown, modelInfo) + } + } + // Graceful verbosity removal on 400 + else if ("text" in streamingPayload && this._isVerbosityUnsupportedError(err)) { + const { text: _omit, ...withoutVerbosity } = streamingPayload as { text?: unknown } & Record< + string, + unknown + > + const maybeStreamRetry = await ( + this.client as unknown as { + responses: { create: (body: Record) => Promise } + } + ).responses.create(withoutVerbosity) + + const isAsyncIterable = (obj: unknown): obj is AsyncIterable => + typeof (obj as AsyncIterable)[Symbol.asyncIterator] === "function" + + if (isAsyncIterable(maybeStreamRetry)) { + for await (const chunk of handleResponsesStream(maybeStreamRetry, { + onResponseId: (id) => { + this.lastResponseId = id + }, + })) { + yield chunk + } + } else { + yield* this._yieldResponsesResult(maybeStreamRetry as unknown, modelInfo) + } + } else if (usedArrayInput && this._isInputTextInvalidError(err)) { + // Azure-specific fallback for streaming: retry with string transcript while keeping stream: true + const retryStreamingPayload: Record = { + ...streamingPayload, + input: this._formatResponsesInput(systemPrompt, messages), + } + const maybeStreamRetry = await ( + this.client as unknown as { + responses: { create: (body: Record) => Promise } + } + ).responses.create(retryStreamingPayload) + + const isAsyncIterable = (obj: unknown): obj is AsyncIterable => + typeof (obj as AsyncIterable)[Symbol.asyncIterator] === "function" + + if (isAsyncIterable(maybeStreamRetry)) { + for await (const chunk of handleResponsesStream(maybeStreamRetry, { + onResponseId: (id) => { + this.lastResponseId = id + }, + })) { + yield chunk + } + } else { + yield* this._yieldResponsesResult(maybeStreamRetry as unknown, modelInfo) + } } else { throw err } @@ -383,17 +597,17 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const formattedInput = this._formatResponsesSingleMessage( { role: "user", - content: [{ type: "text", text: prompt }] as any, + content: [{ type: "text", text: prompt }], } as Anthropic.Messages.MessageParam, /*includeRole*/ true, ) - const payload: Record = { + const payload: ResponseCreateParamsNonStreaming = { model: model.id, input: formattedInput, } // Reasoning effort (Responses) - const effort = (this.options.reasoningEffort || (model as any).reasoningEffort) as + const effort = (this.options.reasoningEffort || model.reasoningEffort) as | "minimal" | "low" | "medium" @@ -410,7 +624,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl // Verbosity via text.verbosity if (this.options.verbosity) { - ;(payload as any).text = { verbosity: this.options.verbosity } + payload.text = { verbosity: this.options.verbosity } } // max_output_tokens @@ -419,12 +633,20 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } try { - const response: any = await (this.client as any).responses.create(payload) + const response = await this.client.responses.create(payload) + try { + const respId = (response as { id?: unknown } | undefined)?.id + if (typeof respId === "string" && respId.length > 0) { + this.lastResponseId = respId + } + } catch { + // ignore + } return this._extractResponsesText(response) ?? "" } catch (err: unknown) { - if ((payload as any).text && this._isVerbosityUnsupportedError(err)) { - const { text: _omit, ...withoutVerbosity } = payload as any - const response: any = await (this.client as any).responses.create(withoutVerbosity) + if (payload.text && this._isVerbosityUnsupportedError(err)) { + const { text: _omit, ...withoutVerbosity } = payload + const response = await this.client.responses.create(withoutVerbosity) return this._extractResponsesText(response) ?? "" } throw err @@ -722,7 +944,30 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl return undefined } + private _isInputTextInvalidError(err: unknown): boolean { + if (err == null || typeof err !== "object") return false + const anyErr = err as { + status?: unknown + response?: { status?: unknown } + message?: unknown + error?: { message?: unknown } + } + const statusRaw = anyErr.status ?? anyErr.response?.status + const status = typeof statusRaw === "number" ? statusRaw : Number(statusRaw) + const msgRaw = (anyErr.message ?? anyErr.error?.message ?? "").toString().toLowerCase() + return status === 400 && msgRaw.includes("invalid value") && msgRaw.includes("input_text") + } private async *_yieldResponsesResult(response: any, modelInfo: ModelInfo): ApiStream { + // Capture response id for continuity when present + try { + const respId = (response as { id?: unknown } | undefined)?.id + if (typeof respId === "string" && respId.length > 0) { + this.lastResponseId = respId + } + } catch { + // ignore + } + const text = this._extractResponsesText(response) ?? "" if (text) { yield { type: "text", text } @@ -741,15 +986,36 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } private _isVerbosityUnsupportedError(err: unknown): boolean { - const anyErr = err as any - const msg = (anyErr?.message || "").toString().toLowerCase() - const status = anyErr?.status + if (err == null || typeof err !== "object") return false + + // you had hasOwnProperty("message") twice — likely a typo + if (!("message" in err)) return false + + const msg = String((err as { message?: unknown }).message ?? "").toLowerCase() + + const rawStatus = "status" in err ? (err as { status?: unknown }).status : undefined + const status = typeof rawStatus === "number" ? rawStatus : Number(rawStatus) + return ( status === 400 && (msg.includes("verbosity") || msg.includes("unknown parameter") || msg.includes("unsupported")) ) } + private _isPreviousResponseNotFoundError(err: unknown): boolean { + if (err == null || typeof err !== "object") return false + const anyErr = err as { + status?: unknown + response?: { status?: unknown } + message?: unknown + error?: { message?: unknown } + } + const statusRaw = anyErr.status ?? anyErr.response?.status + const status = typeof statusRaw === "number" ? statusRaw : Number(statusRaw) + const msg = (anyErr.message ?? anyErr.error?.message ?? "").toString().toLowerCase() + return status === 400 && (msg.includes("previous response") || msg.includes("not found")) + } + // ---- Responses input formatting (align with openai-native.ts) ---- private _formatResponsesInput(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): string { @@ -761,8 +1027,8 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl formattedInput += `${role}: ${message.content}\n\n` } else if (Array.isArray(message.content)) { const textContent = message.content - .filter((block) => (block as any).type === "text") - .map((block) => (block as any).text as string) + .filter((block) => block.type === "text") + .map((block) => block.text) .join("\n") if (textContent) { formattedInput += `${role}: ${textContent}\n\n` @@ -782,8 +1048,8 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } if (Array.isArray(message.content)) { const textContent = message.content - .filter((block) => (block as any).type === "text") - .map((block) => (block as any).text as string) + .filter((block) => block.type === "text") + .map((block) => block.text) .join("\n") return `${role}${textContent}` } diff --git a/src/api/transform/responses-stream.ts b/src/api/transform/responses-stream.ts new file mode 100644 index 00000000000..8b6eb3fa7ec --- /dev/null +++ b/src/api/transform/responses-stream.ts @@ -0,0 +1,263 @@ +import type { ApiStreamChunk } from "./stream" + +/** + * Minimal, typed streaming handler for OpenAI/Azure Responses API streams. + * Consumes an AsyncIterable of events and yields ApiStreamChunk items. + * + * Notes: + * - We intentionally handle only the core, stable event shapes that we already + * use in openai-native, to keep the surface area small and predictable. + * - If the event format changes, extend the type guards below conservatively. + */ +export async function* handleResponsesStream( + stream: AsyncIterable, + options?: { onResponseId?: (id: string) => void }, +): AsyncGenerator { + let lastUsage: ResponseUsage | undefined + + for await (const event of stream) { + // Surface response.id to callers when available (for conversation continuity) + if (isObject(event)) { + const resp = (event as Record).response as unknown + if (isObject(resp)) { + const rid = (resp as Record).id + if (typeof rid === "string") { + options?.onResponseId?.(rid) + } + } + } + // 1) Streaming text deltas + if (isTextDelta(event)) { + const e = event as TextDeltaEvent + if (e.delta != null) { + yield { type: "text", text: String(e.delta) } + } + continue + } + + // 2) Streaming reasoning deltas + if (isReasoningDelta(event)) { + const e = event as ReasoningDeltaEvent + if (e.delta != null) { + yield { type: "reasoning", text: String(e.delta) } + } + continue + } + + // 2.1) Audio transcript deltas (map to text) + if (isAudioTranscriptDelta(event)) { + const e = event as AudioTranscriptDeltaEvent + if (e.delta != null) { + yield { type: "text", text: String(e.delta) } + } + continue + } + + // 3) Refusal deltas (map to text with prefix, matching native handler behavior) + if (isRefusalDelta(event)) { + const e = event as RefusalDeltaEvent + if (e.delta != null) { + yield { type: "text", text: `[Refusal] ${String(e.delta)}` } + } + continue + } + + // 4) Output-item added (alternative carrier for text/reasoning) + if (isOutputItemAdded(event)) { + const item = (event as OutputItemAddedEvent).item + if (item) { + if (item.type === "text" && typeof item.text === "string") { + yield { type: "text", text: item.text } + } else if (item.type === "reasoning" && typeof item.text === "string") { + yield { type: "reasoning", text: item.text } + } else if (item.type === "message" && Array.isArray(item.content)) { + for (const content of item.content) { + // Some servers use "text"; others use "output_text" + if ( + (content?.type === "text" || content?.type === "output_text") && + typeof content?.text === "string" + ) { + yield { type: "text", text: content.text } + } + } + } else if (typeof item.text === "string") { + // Fallback: emit item.text even if item.type is unknown (matches native handler tolerance) + yield { type: "text", text: item.text } + } + } + continue + } + + // 4.1) Content part added (SDK alternative format) + if (isContentPartAdded(event)) { + const part = (event as ContentPartAddedEvent).part + if (part && part.type === "text" && typeof part.text === "string") { + yield { type: "text", text: part.text } + } + continue + } + + // 5) Fallback: some implementations (or older shapes) supply choices[0].delta.content + const content = getChoiceDeltaContent(event) + if (content) { + yield { type: "text", text: content } + } + + // 6) Track usage whenever present + const usage = extractUsage(event) + if (usage) { + lastUsage = usage + } + + // 7) Completion/done events - emit usage if we have it + if (isDoneEvent(event)) { + const u = lastUsage + if (u && hasAnyUsage(u)) { + yield makeUsageChunk(u) + } + } + } +} + +/** Types, guards, and helpers */ + +type ResponseUsage = { + input_tokens?: number + output_tokens?: number + prompt_tokens?: number + completion_tokens?: number + cache_creation_input_tokens?: number + cache_read_input_tokens?: number + prompt_tokens_details?: { cached_tokens?: number } +} + +type TextDeltaEvent = { + type: "response.text.delta" | "response.output_text.delta" + delta?: unknown +} + +type ReasoningDeltaEvent = { + type: + | "response.reasoning.delta" + | "response.reasoning_text.delta" + | "response.reasoning_summary.delta" + | "response.reasoning_summary_text.delta" + delta?: unknown +} + +type RefusalDeltaEvent = { + type: "response.refusal.delta" + delta?: unknown +} + +type OutputItemAddedEvent = { + type: "response.output_item.added" + item?: { + type?: string + text?: unknown + content?: Array<{ type?: string; text?: unknown }> + } +} + +type DoneEvent = { + type: "response.done" | "response.completed" +} + +type AudioTranscriptDeltaEvent = { + type: "response.audio_transcript.delta" + delta?: unknown +} + +type ContentPartAddedEvent = { + type: "response.content_part.added" + part?: { + type?: string + text?: unknown + } +} + +function isObject(value: unknown): value is Record { + return typeof value === "object" && value !== null +} + +function isTextDelta(event: unknown): event is TextDeltaEvent { + return ( + isObject(event) && + typeof (event as Record).type === "string" && + (((event as Record).type as string) === "response.text.delta" || + ((event as Record).type as string) === "response.output_text.delta") + ) +} + +function isReasoningDelta(event: unknown): event is ReasoningDeltaEvent { + if (!isObject(event)) return false + const t = (event as Record).type + return ( + t === "response.reasoning.delta" || + t === "response.reasoning_text.delta" || + t === "response.reasoning_summary.delta" || + t === "response.reasoning_summary_text.delta" + ) +} + +function isRefusalDelta(event: unknown): event is RefusalDeltaEvent { + return isObject(event) && (event as Record).type === "response.refusal.delta" +} + +function isOutputItemAdded(event: unknown): event is OutputItemAddedEvent { + return isObject(event) && (event as Record).type === "response.output_item.added" +} + +function isAudioTranscriptDelta(event: unknown): event is AudioTranscriptDeltaEvent { + return isObject(event) && (event as Record).type === "response.audio_transcript.delta" +} + +function isContentPartAdded(event: unknown): event is ContentPartAddedEvent { + return isObject(event) && (event as Record).type === "response.content_part.added" +} + +function isDoneEvent(event: unknown): event is DoneEvent { + if (!isObject(event)) return false + const t = (event as Record).type + return t === "response.done" || t === "response.completed" +} + +function getChoiceDeltaContent(event: unknown): string | undefined { + if (!isObject(event)) return undefined + const choices = (event as Record).choices as unknown + if (!Array.isArray(choices) || choices.length === 0) return undefined + const first = choices[0] as unknown + if (!isObject(first)) return undefined + const delta = (first as Record).delta as unknown + if (!isObject(delta)) return undefined + const content = (delta as Record).content + if (content == null) return undefined + return String(content) +} + +function extractUsage(event: unknown): ResponseUsage | undefined { + if (!isObject(event)) return undefined + const resp = (event as Record).response as unknown + if (isObject(resp) && isObject((resp as Record).usage)) { + return (resp as Record).usage as ResponseUsage + } + const usage = (event as Record).usage as unknown + if (isObject(usage)) { + return usage as ResponseUsage + } + return undefined +} + +function hasAnyUsage(usage: ResponseUsage): boolean { + return Boolean(usage.input_tokens || usage.output_tokens || usage.prompt_tokens || usage.completion_tokens) +} + +function makeUsageChunk(usage: ResponseUsage): ApiStreamChunk { + return { + type: "usage", + inputTokens: usage.input_tokens ?? usage.prompt_tokens ?? 0, + outputTokens: usage.output_tokens ?? usage.completion_tokens ?? 0, + cacheWriteTokens: usage.cache_creation_input_tokens ?? undefined, + cacheReadTokens: usage.cache_read_input_tokens ?? usage.prompt_tokens_details?.cached_tokens ?? undefined, + } +} From 08570ea7d2569babd2c90094fec06f7d06964416 Mon Sep 17 00:00:00 2001 From: Lagyu Date: Mon, 25 Aug 2025 16:12:07 +0900 Subject: [PATCH 04/14] chore: update openai package, to use the `verbosity` parameter. --- src/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/package.json b/src/package.json index 0600711e4dc..a660faef14f 100644 --- a/src/package.json +++ b/src/package.json @@ -464,7 +464,7 @@ "node-cache": "^5.1.2", "node-ipc": "^12.0.0", "ollama": "^0.5.17", - "openai": "^5.0.0", + "openai": "^5.15.0", "os-name": "^6.0.0", "p-limit": "^6.2.0", "p-wait-for": "^5.0.2", From bc3661c168921b5bf5aec30632a018ce6ac28a94 Mon Sep 17 00:00:00 2001 From: Lagyu Date: Mon, 25 Aug 2025 16:16:13 +0900 Subject: [PATCH 05/14] chore: update pnpm-lock.yaml --- pnpm-lock.yaml | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index be701e50e7e..eb396327527 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -680,8 +680,8 @@ importers: specifier: ^0.5.17 version: 0.5.17 openai: - specifier: ^5.0.0 - version: 5.5.1(ws@8.18.3)(zod@3.25.61) + specifier: ^5.15.0 + version: 5.15.0(ws@8.18.3)(zod@3.25.61) os-name: specifier: ^6.0.0 version: 6.1.0 @@ -3916,9 +3916,6 @@ packages: '@types/node@20.17.57': resolution: {integrity: sha512-f3T4y6VU4fVQDKVqJV4Uppy8c1p/sVvS3peyqxyWnzkqXFJLRU7Y1Bl7rMS1Qe9z0v4M6McY0Fp9yBsgHJUsWQ==} - '@types/node@20.19.11': - resolution: {integrity: sha512-uug3FEEGv0r+jrecvUUpbY8lLisvIjg6AAic6a2bSP5OEOLeJsDSnvhCDov7ipFFMXS3orMpzlmi0ZcuGkBbow==} - '@types/node@24.2.1': resolution: {integrity: sha512-DRh5K+ka5eJic8CjH7td8QpYEV6Zo10gfRkjHCO3weqZHWDtAaSTFtl4+VMqOJ4N5jcuhZ9/l+yy8rVgw7BQeQ==} @@ -7690,8 +7687,8 @@ packages: resolution: {integrity: sha512-cxN6aIDPz6rm8hbebcP7vrQNhvRcveZoJU72Y7vskh4oIm+BZwBECnx5nTmrlres1Qapvx27Qo1Auukpf8PKXw==} engines: {node: '>=18'} - openai@5.5.1: - resolution: {integrity: sha512-5i19097mGotHA1eFsM6Tjd/tJ8uo9sa5Ysv4Q6bKJ2vtN6rc0MzMrUefXnLXYAJcmMQrC1Efhj0AvfIkXrQamw==} + openai@5.15.0: + resolution: {integrity: sha512-kcUdws8K/A8m02I+IqFBwO51gS+87GP89yWEufGbzEi8anBz4FB/bti2QxaJdGwwY4mwJGzx85XO7TuL/Tpu1w==} hasBin: true peerDependencies: ws: ^8.18.0 @@ -9326,9 +9323,6 @@ packages: undici-types@6.19.8: resolution: {integrity: sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==} - undici-types@6.21.0: - resolution: {integrity: sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==} - undici-types@7.10.0: resolution: {integrity: sha512-t5Fy/nfn+14LuOc2KNYg75vZqClpAiqscVvMygNnlsHBFpSXdJaYtXMcdNLpl/Qvc3P2cB3s6lOV51nqsFq4ag==} @@ -13332,11 +13326,6 @@ snapshots: dependencies: undici-types: 6.19.8 - '@types/node@20.19.11': - dependencies: - undici-types: 6.21.0 - optional: true - '@types/node@24.2.1': dependencies: undici-types: 7.10.0 @@ -13400,7 +13389,7 @@ snapshots: '@types/ws@8.18.1': dependencies: - '@types/node': 20.19.11 + '@types/node': 24.2.1 optional: true '@types/yargs-parser@21.0.3': {} @@ -17755,7 +17744,7 @@ snapshots: is-inside-container: 1.0.0 is-wsl: 3.1.0 - openai@5.5.1(ws@8.18.3)(zod@3.25.61): + openai@5.15.0(ws@8.18.3)(zod@3.25.61): optionalDependencies: ws: 8.18.3 zod: 3.25.61 @@ -19659,9 +19648,6 @@ snapshots: undici-types@6.19.8: {} - undici-types@6.21.0: - optional: true - undici-types@7.10.0: {} undici@6.21.3: {} From 825c5025f15fd0d45503180972c1a854f0a26d6f Mon Sep 17 00:00:00 2001 From: Lagyu Date: Mon, 25 Aug 2025 16:31:41 +0900 Subject: [PATCH 06/14] docs: add JSDoc describing the auto url detection logic --- src/api/providers/openai.ts | 131 ++++++++++++++++++++++++++++++++++-- 1 file changed, 127 insertions(+), 4 deletions(-) diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index a4b68fd42f9..2a43389911b 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -30,6 +30,45 @@ import { ResponseCreateParamsNonStreaming } from "openai/resources/responses/res // TODO: Rename this to OpenAICompatibleHandler. Also, I think the // `OpenAINativeHandler` can subclass from this, since it's obviously // compatible with the OpenAI API. We can also rename it to `OpenAIHandler`. +/** + * URL auto-detection overview + * + * Decision tree (host and path based): + * 1) Azure AI Inference Service: + * - Detected when host ends with ".services.ai.azure.com" + * - Uses OpenAI Chat Completions API shape with a path override + * (see OPENAI_AZURE_AI_INFERENCE_PATH) when making requests. + * + * 2) Azure OpenAI: + * - Detected when host is "openai.azure.com" or ends with ".openai.azure.com" + * or when options.openAiUseAzure is explicitly true. + * - Within Azure OpenAI, the API "flavor" is chosen by URL path: + * - Responses API: + * * Path contains "/v1/responses" or ends with "/responses" + * * Also auto-detected for portal-style URLs (e.g. "/openai/responses?api-version=2025-04-01-preview") + * which itself is not valid in request, are normalized to "/openai/v1" with apiVersion "preview". + * - Chat Completions API: + * * Path contains "/chat/completions" + * - Default: + * * Falls back to Chat Completions if none of the above match. + * + * 3) Generic OpenAI-compatible endpoints: + * - Anything else (OpenAI, OpenRouter, LM Studio, vLLM, etc.) + * - Flavor is again selected by URL path as above: + * - "/v1/responses" or ending with "/responses" => Responses API + * - "/chat/completions" => Chat Completions + * - otherwise defaults to Chat Completions for backward compatibility. + * + * Examples: + * - https://api.openai.com/v1 -> Chat Completions (default) + * - https://api.openai.com/v1/responses -> Responses API + * - https://api.openai.com/v1/chat/completions -> Chat Completions + * - https://myres.openai.azure.com/openai/v1/responses?api-version=preview + * -> Azure OpenAI + Responses API + * - https://myres.openai.azure.com/openai/responses?api-version=2025-04-01-preview + * -> normalized to base /openai/v1 + apiVersion "preview" (Responses) + * - https://test.services.ai.azure.com -> Azure AI Inference Service (Chat Completions with path override) + */ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions private client: OpenAI @@ -773,16 +812,55 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } } + /** + * Detects Grok xAI endpoints. + * - Returns true when the host contains "x.ai" (e.g., "api.x.ai"). + * - Used to omit stream_options for streaming requests because Grok may not support them. + * + * Examples: + * - https://api.x.ai/v1 -> true + * - https://api.openai.com/v1 -> false + */ private _isGrokXAI(baseUrl?: string): boolean { const urlHost = this._getUrlHost(baseUrl) return urlHost.includes("x.ai") } + /** + * Detects Azure AI Inference Service endpoints (distinct from Azure OpenAI). + * - Returns true when host ends with ".services.ai.azure.com". + * - These endpoints require a special path override when calling the Chat Completions API. + * + * Examples: + * - https://myenv.services.ai.azure.com -> true + * - https://myres.openai.azure.com -> false (this is Azure OpenAI, not AI Inference) + */ private _isAzureAiInference(baseUrl?: string): boolean { const urlHost = this._getUrlHost(baseUrl) return urlHost.endsWith(".services.ai.azure.com") } + /** + * Detects Azure OpenAI "Responses API" URLs by host and path. + * - Host must be "openai.azure.com" or end with ".openai.azure.com" + * - Path may be one of: + * • "/openai/v1/responses" (preferred v1 path) + * • "/openai/responses" (portal/legacy style) + * • any path ending with "/responses" + * - Trailing slashes are trimmed before matching. + * + * This is used to favor the Responses API flavor on Azure OpenAI when the base URL already + * points to a Responses path. + * + * Examples (true): + * - https://myres.openai.azure.com/openai/v1/responses?api-version=preview + * - https://myres.openai.azure.com/openai/responses?api-version=2025-04-01-preview + * - https://openai.azure.com/openai/v1/responses + * + * Examples (false): + * - https://myres.openai.azure.com/openai/v1/chat/completions + * - https://api.openai.com/v1/responses (not an Azure host) + */ private _isAzureOpenAiResponses(baseUrl?: string): boolean { try { if (!baseUrl) return false @@ -801,10 +879,36 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } /** - * Normalize Azure "responses" portal URLs to SDK-friendly base and version. - * - Input (portal sometimes shows): https://{res}.openai.azure.com/openai/responses?api-version=2025-04-01-preview - * - Output: baseURL=https://{res}.openai.azure.com/openai/v1, apiVersionOverride="preview" - * No-op for already-correct or non-Azure URLs. + * Normalizes Azure OpenAI "Responses" portal URLs to an SDK-friendly base and version. + * + * Why: + * - The Azure portal often presents a non-v1 Responses endpoint such as: + * https://{res}.openai.azure.com/openai/responses?api-version=2025-04-01-preview + * which is not the ideal base for SDK clients. We convert it to: + * baseURL = https://{res}.openai.azure.com/openai/v1 + * apiVersionOverride = "preview" + * + * What it does: + * - If the input is an Azure OpenAI host and its path is exactly "/openai/responses" + * with api-version=2025-04-01-preview, we: + * • return { baseURL: "https://{host}/openai/v1", apiVersionOverride: "preview" } + * - If the input is already "/openai/v1/responses", we similarly normalize the base to "/openai/v1" + * and set apiVersionOverride to "preview". + * - Otherwise, returns the original URL unchanged. + * + * Scope: + * - Only applies to Azure OpenAI hosts ("openai.azure.com" or "*.openai.azure.com"). + * - Non-Azure URLs or already SDK-friendly bases are returned as-is. + * + * Examples: + * - In: https://sample.openai.azure.com/openai/responses?api-version=2025-04-01-preview + * Out: baseURL=https://sample.openai.azure.com/openai/v1, apiVersionOverride="preview" + * + * - In: https://sample.openai.azure.com/openai/v1/responses?api-version=preview + * Out: baseURL=https://sample.openai.azure.com/openai/v1, apiVersionOverride="preview" + * + * - In: https://api.openai.com/v1/responses + * Out: baseURL unchanged (non-Azure) */ private _normalizeAzureResponsesBaseUrlAndVersion(inputBaseUrl: string): { baseURL: string @@ -866,6 +970,25 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl // --- Responses helpers --- + /** + * Determines which OpenAI-compatible API flavor to use based on the URL path. + * - This is purely path-based and provider-agnostic (works for OpenAI, Azure OpenAI after normalization, etc.). + * + * Rules: + * - If path contains "/v1/responses" OR ends with "/responses" => "responses" + * - Else if path contains "/chat/completions" => "chat" + * - Else default to "chat" for backward compatibility + * + * Notes: + * - Trailing slashes are not required to match; we rely on substring checks. + * - Azure "portal" style URLs are normalized beforehand where applicable. + * + * Examples: + * - https://api.openai.com/v1/responses -> "responses" + * - https://api.openai.com/v1/chat/completions -> "chat" + * - https://myres.openai.azure.com/openai/v1 -> "chat" (default) + * - https://myres.openai.azure.com/openai/v1/responses -> "responses" + */ private _resolveApiFlavor(baseUrl: string): "responses" | "chat" { // Auto-detect by URL path const url = this._safeParseUrl(baseUrl) From cd512544ff7c2eb565e210807507d5a83d538507 Mon Sep 17 00:00:00 2001 From: Lagyu Date: Wed, 27 Aug 2025 12:13:06 +0900 Subject: [PATCH 07/14] fix: omit the conversation in responses api. --- src/api/providers/__tests__/openai.spec.ts | 127 ++++++++++++++++++ src/api/providers/openai.ts | 71 ++++++---- .../settings/providers/OpenAICompatible.tsx | 23 ++++ 3 files changed, 193 insertions(+), 28 deletions(-) diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts index eeab8315521..ae522c0e81b 100644 --- a/src/api/providers/__tests__/openai.spec.ts +++ b/src/api/providers/__tests__/openai.spec.ts @@ -1860,3 +1860,130 @@ describe("OpenAI Compatible - Responses API parity improvements", () => { expect(args.reasoning.summary).toBeUndefined() }) }) + +describe("OpenAI Compatible - Responses API minimal input parity (new tests)", () => { + beforeEach(() => { + // @ts-ignore - reuse mocks from this spec module + mockCreate.mockClear() + // @ts-ignore - reuse mocks from this spec module + mockResponsesCreate.mockClear() + }) + + it("sends only latest user message when previous_response_id is provided (string input, no Developer preface)", async () => { + const handler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + openAiStreamingEnabled: false, + }) + + const msgs: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: [{ type: "text" as const, text: "First" }] }, + { role: "assistant", content: [{ type: "text" as const, text: "Reply" }] }, + { role: "user", content: [{ type: "text" as const, text: "Latest" }] }, + ] + + const chunks: any[] = [] + for await (const ch of handler.createMessage("System Inst", msgs, { previousResponseId: "prev-1" } as any)) { + chunks.push(ch) + } + + // Ensure Responses API was used with minimal input + // @ts-ignore + expect(mockResponsesCreate).toHaveBeenCalled() + // @ts-ignore + const args = mockResponsesCreate.mock.calls[0][0] + + expect(typeof args.input).toBe("string") + expect(args.input).toBe("User: Latest") + expect(String(args.input)).not.toContain("Developer: System Inst") + }) + + it("uses array input with only latest user content when previous_response_id and last user has images (no Developer preface)", async () => { + const handler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + openAiStreamingEnabled: false, + }) + + const msgs: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: [{ type: "text" as const, text: "Prev" }] }, + { role: "assistant", content: [{ type: "text" as const, text: "Ok" }] }, + { + role: "user", + content: [ + { type: "text" as const, text: "See" }, + { type: "image" as const, source: { media_type: "image/png", data: "IMGDATA" } as any }, + ], + }, + ] + + const iter = handler.createMessage("Sys", msgs, { previousResponseId: "prev-2" } as any) + for await (const _ of iter) { + // consume + } + + // @ts-ignore + const args = mockResponsesCreate.mock.calls.pop()?.[0] + expect(Array.isArray(args.input)).toBe(true) + + const arr = args.input as any[] + expect(arr.length).toBe(1) + expect(arr[0]?.role).toBe("user") + + const contents = arr[0]?.content || [] + const hasImg = contents.some((p: any) => p?.type === "input_image") + expect(hasImg).toBe(true) + + // No Developer preface should be injected in minimal mode + const hasDev = contents.some( + (p: any) => p?.type === "input_text" && typeof p.text === "string" && p.text.includes("Developer:"), + ) + expect(hasDev).toBe(false) + }) + + it("always includes max_output_tokens for Responses API", async () => { + const handler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + openAiStreamingEnabled: false, + includeMaxTokens: false, // should still include based on model info + openAiCustomModelInfo: { + contextWindow: 128_000, + maxTokens: 123, // fallback used when modelMaxTokens not set + supportsPromptCache: false, + }, + }) + + for await (const _ of handler.createMessage("sys", [ + { role: "user", content: [{ type: "text" as const, text: "Hi" }] }, + ])) { + // consume + } + + // @ts-ignore + const args = mockResponsesCreate.mock.calls.pop()?.[0] + expect(args).toHaveProperty("max_output_tokens", 123) + }) + + it("does not include text.verbosity when not provided", async () => { + const handler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + openAiStreamingEnabled: false, + }) + + for await (const _ of handler.createMessage("sys", [ + { role: "user", content: [{ type: "text" as const, text: "Hi" }] }, + ])) { + // consume + } + + // @ts-ignore + const args = mockResponsesCreate.mock.calls.pop()?.[0] + expect(args).not.toHaveProperty("text") + }) +}) diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index 2a43389911b..774baa77093 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -182,21 +182,35 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl // Build Responses payload (align with OpenAI Native Responses API formatting) // Azure- and Responses-compatible multimodal handling: - // - Use array input ONLY when the latest user message contains images - // - Include the most recent assistant message as input_text to preserve continuity - // - Always include a Developer preface + // - Use array input ONLY when the latest user message contains images (initial turn) + // - When previous_response_id is present, send only the latest user turn: + // • Text-only => single string "User: ...", no Developer preface + // • With images => one-item array containing only the latest user content (no Developer preface) const lastUserMessage = [...messages].reverse().find((m) => m.role === "user") const lastUserHasImages = !!lastUserMessage && Array.isArray(lastUserMessage.content) && lastUserMessage.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image") + // Conversation continuity (parity with OpenAiNativeHandler.prepareGpt5Input) + const previousId = metadata?.suppressPreviousResponseId + ? undefined + : (metadata?.previousResponseId ?? this.lastResponseId) + + const minimalInputMode = Boolean(previousId) + let inputPayload: unknown - if (lastUserHasImages && lastUserMessage) { - // Select messages to retain context in array mode: - // - The most recent assistant message (text-only, as input_text) - // - All user messages that contain images - // - The latest user message (even if it has no image) + if (minimalInputMode && lastUserMessage) { + // Minimal-mode: only the latest user message (no Developer preface) + if (lastUserHasImages) { + // Single-item array with just the latest user content + inputPayload = this._toResponsesInput([lastUserMessage]) + } else { + // Single message string "User: ..." + inputPayload = this._formatResponsesSingleMessage(lastUserMessage, true) + } + } else if (lastUserHasImages && lastUserMessage) { + // Initial turn with images: include Developer preface and minimal prior context to preserve continuity const lastAssistantMessage = [...messages].reverse().find((m) => m.role === "assistant") const messagesForArray = messages.filter((m) => { @@ -219,15 +233,11 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } inputPayload = [developerPreface, ...arrayInput] } else { - // Pure text history: use compact transcript (includes both user and assistant turns) + // Pure text history: full compact transcript (includes both user and assistant turns) inputPayload = this._formatResponsesInput(systemPrompt, messages) } const usedArrayInput = Array.isArray(inputPayload) - const previousId = metadata?.suppressPreviousResponseId - ? undefined - : (metadata?.previousResponseId ?? this.lastResponseId) - const basePayload: Record = { model: modelId, input: inputPayload, @@ -262,20 +272,19 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl basePayload.temperature = DEEP_SEEK_DEFAULT_TEMPERATURE } - // Verbosity: include via text.verbosity (Responses API expectation per openai-native handler) - const effectiveVerbosity = this.options.verbosity || verbosity - if (effectiveVerbosity) { + // Verbosity: include only when explicitly specified in settings + if (this.options.verbosity) { ;(basePayload as { text?: { verbosity: "low" | "medium" | "high" } }).text = { - verbosity: effectiveVerbosity as "low" | "medium" | "high", + verbosity: this.options.verbosity as "low" | "medium" | "high", } } - // Add max_output_tokens if requested (Azure Responses naming) - if (this.options.includeMaxTokens === true) { - basePayload.max_output_tokens = this.options.modelMaxTokens || modelInfo.maxTokens - } + // Always include max_output_tokens for Responses API to cap output length + const reservedMax = (modelParams as any)?.maxTokens + ;(basePayload as Record).max_output_tokens = + this.options.modelMaxTokens || reservedMax || modelInfo.maxTokens - // Non-streaming path (preserves existing behavior and tests) + // Non-streaming path if (nonStreaming) { try { const response = await ( @@ -314,10 +323,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl ).responses.create(withoutVerbosity) yield* this._yieldResponsesResult(response as unknown, modelInfo) } else if (usedArrayInput && this._isInputTextInvalidError(err)) { - // Azure-specific fallback: retry with string transcript when array input is rejected + // Azure-specific fallback: retry with a minimal single-message string when array input is rejected const retryPayload: Record = { ...basePayload, - input: this._formatResponsesInput(systemPrompt, messages), + input: + previousId && lastUserMessage + ? this._formatResponsesSingleMessage(lastUserMessage, true) + : this._formatResponsesInput(systemPrompt, messages), } const response = await ( this.client as unknown as { @@ -412,10 +424,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl yield* this._yieldResponsesResult(maybeStreamRetry as unknown, modelInfo) } } else if (usedArrayInput && this._isInputTextInvalidError(err)) { - // Azure-specific fallback for streaming: retry with string transcript while keeping stream: true + // Azure-specific fallback for streaming: retry with minimal single-message string while keeping stream: true const retryStreamingPayload: Record = { ...streamingPayload, - input: this._formatResponsesInput(systemPrompt, messages), + input: + previousId && lastUserMessage + ? this._formatResponsesSingleMessage(lastUserMessage, true) + : this._formatResponsesInput(systemPrompt, messages), } const maybeStreamRetry = await ( this.client as unknown as { @@ -661,9 +676,9 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl payload.temperature = this.options.modelTemperature } - // Verbosity via text.verbosity + // Verbosity via text.verbosity - include only when explicitly specified if (this.options.verbosity) { - payload.text = { verbosity: this.options.verbosity } + payload.text = { verbosity: this.options.verbosity as "low" | "medium" | "high" } } // max_output_tokens diff --git a/webview-ui/src/components/settings/providers/OpenAICompatible.tsx b/webview-ui/src/components/settings/providers/OpenAICompatible.tsx index ee462296b5f..b64ee187a12 100644 --- a/webview-ui/src/components/settings/providers/OpenAICompatible.tsx +++ b/webview-ui/src/components/settings/providers/OpenAICompatible.tsx @@ -22,6 +22,7 @@ import { inputEventTransform, noTransform } from "../transforms" import { ModelPicker } from "../ModelPicker" import { R1FormatSetting } from "../R1FormatSetting" import { ThinkingBudget } from "../ThinkingBudget" +import { Verbosity } from "../Verbosity" type OpenAICompatibleProps = { apiConfiguration: ProviderSettings @@ -40,6 +41,7 @@ export const OpenAICompatible = ({ const [azureApiVersionSelected, setAzureApiVersionSelected] = useState(!!apiConfiguration?.azureApiVersion) const [openAiLegacyFormatSelected, setOpenAiLegacyFormatSelected] = useState(!!apiConfiguration?.openAiLegacyFormat) + const [verbositySelected, setVerbositySelected] = useState(!!apiConfiguration?.verbosity) const [openAiModels, setOpenAiModels] = useState | null>(null) @@ -282,6 +284,27 @@ export const OpenAICompatible = ({ /> )} +
+ { + setVerbositySelected(checked) + if (!checked) { + setApiConfigurationField("verbosity", undefined as any) + } else if (!apiConfiguration.verbosity) { + setApiConfigurationField("verbosity", "medium" as any) + } + }}> + {t("settings:providers.verbosity.label")} + + {verbositySelected && ( + + )} +
{t("settings:providers.customModel.capabilities")} From bf49d7775428a79e4c2566826ebe7131da1b3b6a Mon Sep 17 00:00:00 2001 From: Lagyu Date: Wed, 27 Aug 2025 12:15:34 +0900 Subject: [PATCH 08/14] chore: remove unnecessary type cast --- .../src/components/settings/providers/OpenAICompatible.tsx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/webview-ui/src/components/settings/providers/OpenAICompatible.tsx b/webview-ui/src/components/settings/providers/OpenAICompatible.tsx index b64ee187a12..19f1fdc32ab 100644 --- a/webview-ui/src/components/settings/providers/OpenAICompatible.tsx +++ b/webview-ui/src/components/settings/providers/OpenAICompatible.tsx @@ -290,9 +290,9 @@ export const OpenAICompatible = ({ onChange={(checked: boolean) => { setVerbositySelected(checked) if (!checked) { - setApiConfigurationField("verbosity", undefined as any) + setApiConfigurationField("verbosity", undefined) } else if (!apiConfiguration.verbosity) { - setApiConfigurationField("verbosity", "medium" as any) + setApiConfigurationField("verbosity", "medium") } }}> {t("settings:providers.verbosity.label")} @@ -300,7 +300,7 @@ export const OpenAICompatible = ({ {verbositySelected && ( )} From 48d1a61fb8ac54eb2a64e38014575639efe3d1da Mon Sep 17 00:00:00 2001 From: Lagyu Date: Fri, 29 Aug 2025 18:02:05 +0900 Subject: [PATCH 09/14] refactor(openai): centralize Responses error handling via _responsesCreateWithRetries; dedupe checks for previous_response_id, verbosity, and Azure input_text invalid in streaming and non-streaming paths --- src/api/providers/openai.ts | 276 +++++++++++++----------------------- 1 file changed, 99 insertions(+), 177 deletions(-) diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index 774baa77093..4dda6f2afb0 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -286,175 +286,41 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl // Non-streaming path if (nonStreaming) { - try { - const response = await ( - this.client as unknown as { - responses: { create: (body: Record) => Promise } - } - ).responses.create(basePayload) - yield* this._yieldResponsesResult(response as unknown, modelInfo) - } catch (err: unknown) { - // Retry without previous_response_id if server rejects it (400 "Previous response ... not found") - if (previousId && this._isPreviousResponseNotFoundError(err)) { - const { previous_response_id: _omitPrev, ...withoutPrev } = basePayload as { - previous_response_id?: unknown - [key: string]: unknown - } - // Clear stored continuity to avoid reusing a bad id - this.lastResponseId = undefined - const response = await ( - this.client as unknown as { - responses: { create: (body: Record) => Promise } - } - ).responses.create(withoutPrev) - yield* this._yieldResponsesResult(response as unknown, modelInfo) - } - // Graceful downgrade if verbosity is rejected by server (400 unknown/unsupported parameter) - else if ("text" in basePayload && this._isVerbosityUnsupportedError(err)) { - // Remove text.verbosity and retry once - const { text: _omit, ...withoutVerbosity } = basePayload as { text?: unknown } & Record< - string, - unknown - > - const response = await ( - this.client as unknown as { - responses: { create: (body: Record) => Promise } - } - ).responses.create(withoutVerbosity) - yield* this._yieldResponsesResult(response as unknown, modelInfo) - } else if (usedArrayInput && this._isInputTextInvalidError(err)) { - // Azure-specific fallback: retry with a minimal single-message string when array input is rejected - const retryPayload: Record = { - ...basePayload, - input: - previousId && lastUserMessage - ? this._formatResponsesSingleMessage(lastUserMessage, true) - : this._formatResponsesInput(systemPrompt, messages), - } - const response = await ( - this.client as unknown as { - responses: { create: (body: Record) => Promise } - } - ).responses.create(retryPayload) - yield* this._yieldResponsesResult(response as unknown, modelInfo) - } else { - throw err - } - } + const response = await this._responsesCreateWithRetries(basePayload, { + usedArrayInput, + lastUserMessage, + previousId, + systemPrompt, + messages, + }) + yield* this._yieldResponsesResult(response as unknown, modelInfo) return } // Streaming path (auto-fallback to non-streaming result if provider ignores stream flag) const streamingPayload: Record = { ...basePayload, stream: true } - try { - const maybeStream = await ( - this.client as unknown as { - responses: { create: (body: Record) => Promise } - } - ).responses.create(streamingPayload) - - const isAsyncIterable = (obj: unknown): obj is AsyncIterable => - typeof (obj as AsyncIterable)[Symbol.asyncIterator] === "function" - - if (isAsyncIterable(maybeStream)) { - for await (const chunk of handleResponsesStream(maybeStream, { - onResponseId: (id) => { - this.lastResponseId = id - }, - })) { - yield chunk - } - } else { - // Some providers may ignore the stream flag and return a complete response - yield* this._yieldResponsesResult(maybeStream as unknown, modelInfo) - } - } catch (err: unknown) { - // Retry without previous_response_id if server rejects it (400 "Previous response ... not found") - if (previousId && this._isPreviousResponseNotFoundError(err)) { - const { previous_response_id: _omitPrev, ...withoutPrev } = streamingPayload as { - previous_response_id?: unknown - [key: string]: unknown - } - this.lastResponseId = undefined - const maybeStreamRetry = await ( - this.client as unknown as { - responses: { create: (body: Record) => Promise } - } - ).responses.create(withoutPrev) - - const isAsyncIterable = (obj: unknown): obj is AsyncIterable => - typeof (obj as AsyncIterable)[Symbol.asyncIterator] === "function" - - if (isAsyncIterable(maybeStreamRetry)) { - for await (const chunk of handleResponsesStream(maybeStreamRetry, { - onResponseId: (id) => { - this.lastResponseId = id - }, - })) { - yield chunk - } - } else { - yield* this._yieldResponsesResult(maybeStreamRetry as unknown, modelInfo) - } - } - // Graceful verbosity removal on 400 - else if ("text" in streamingPayload && this._isVerbosityUnsupportedError(err)) { - const { text: _omit, ...withoutVerbosity } = streamingPayload as { text?: unknown } & Record< - string, - unknown - > - const maybeStreamRetry = await ( - this.client as unknown as { - responses: { create: (body: Record) => Promise } - } - ).responses.create(withoutVerbosity) - - const isAsyncIterable = (obj: unknown): obj is AsyncIterable => - typeof (obj as AsyncIterable)[Symbol.asyncIterator] === "function" - - if (isAsyncIterable(maybeStreamRetry)) { - for await (const chunk of handleResponsesStream(maybeStreamRetry, { - onResponseId: (id) => { - this.lastResponseId = id - }, - })) { - yield chunk - } - } else { - yield* this._yieldResponsesResult(maybeStreamRetry as unknown, modelInfo) - } - } else if (usedArrayInput && this._isInputTextInvalidError(err)) { - // Azure-specific fallback for streaming: retry with minimal single-message string while keeping stream: true - const retryStreamingPayload: Record = { - ...streamingPayload, - input: - previousId && lastUserMessage - ? this._formatResponsesSingleMessage(lastUserMessage, true) - : this._formatResponsesInput(systemPrompt, messages), - } - const maybeStreamRetry = await ( - this.client as unknown as { - responses: { create: (body: Record) => Promise } - } - ).responses.create(retryStreamingPayload) + const maybeStream = await this._responsesCreateWithRetries(streamingPayload, { + usedArrayInput, + lastUserMessage, + previousId, + systemPrompt, + messages, + }) - const isAsyncIterable = (obj: unknown): obj is AsyncIterable => - typeof (obj as AsyncIterable)[Symbol.asyncIterator] === "function" + const isAsyncIterable = (obj: unknown): obj is AsyncIterable => + typeof (obj as AsyncIterable)[Symbol.asyncIterator] === "function" - if (isAsyncIterable(maybeStreamRetry)) { - for await (const chunk of handleResponsesStream(maybeStreamRetry, { - onResponseId: (id) => { - this.lastResponseId = id - }, - })) { - yield chunk - } - } else { - yield* this._yieldResponsesResult(maybeStreamRetry as unknown, modelInfo) - } - } else { - throw err + if (isAsyncIterable(maybeStream)) { + for await (const chunk of handleResponsesStream(maybeStream, { + onResponseId: (id) => { + this.lastResponseId = id + }, + })) { + yield chunk } + } else { + // Some providers may ignore the stream flag and return a complete response + yield* this._yieldResponsesResult(maybeStream as unknown, modelInfo) } return } @@ -686,25 +552,22 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl payload.max_output_tokens = this.options.modelMaxTokens || modelInfo.maxTokens } + const response = await this._responsesCreateWithRetries(payload as unknown as Record, { + usedArrayInput: false, + lastUserMessage: undefined, + previousId: undefined, + systemPrompt: "", + messages: [], + }) try { - const response = await this.client.responses.create(payload) - try { - const respId = (response as { id?: unknown } | undefined)?.id - if (typeof respId === "string" && respId.length > 0) { - this.lastResponseId = respId - } - } catch { - // ignore - } - return this._extractResponsesText(response) ?? "" - } catch (err: unknown) { - if (payload.text && this._isVerbosityUnsupportedError(err)) { - const { text: _omit, ...withoutVerbosity } = payload - const response = await this.client.responses.create(withoutVerbosity) - return this._extractResponsesText(response) ?? "" + const respId = (response as { id?: unknown } | undefined)?.id + if (typeof respId === "string" && respId.length > 0) { + this.lastResponseId = respId } - throw err + } catch { + // ignore } + return this._extractResponsesText(response) ?? "" } const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { @@ -1095,6 +958,65 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const msgRaw = (anyErr.message ?? anyErr.error?.message ?? "").toString().toLowerCase() return status === 400 && msgRaw.includes("invalid value") && msgRaw.includes("input_text") } + + /** + * Centralized Responses.create with one-shot retries for common provider errors: + * - 400 "Previous response ... not found" -> drop previous_response_id and retry + * - 400 unknown/unsupported "text.verbosity" -> remove text and retry + * - 400 invalid value for input_text (Azure) -> rebuild single-message string input and retry + * Returns either an AsyncIterable (streaming) or a full response object (non-streaming). + */ + private async _responsesCreateWithRetries( + payload: Record, + opts: { + usedArrayInput: boolean + lastUserMessage?: Anthropic.Messages.MessageParam + previousId?: string + systemPrompt: string + messages: Anthropic.Messages.MessageParam[] + }, + ): Promise { + const create = (body: Record) => + ( + this.client as unknown as { responses: { create: (b: Record) => Promise } } + ).responses.create(body) + + try { + return await create(payload) + } catch (err: unknown) { + // Retry without previous_response_id if server rejects it + if (opts.previousId && this._isPreviousResponseNotFoundError(err)) { + const { previous_response_id: _omitPrev, ...withoutPrev } = payload as { + previous_response_id?: unknown + [key: string]: unknown + } + this.lastResponseId = undefined + return await create(withoutPrev) + } + + // Graceful downgrade if verbosity is rejected by server + if ("text" in payload && this._isVerbosityUnsupportedError(err)) { + const { text: _omit, ...withoutVerbosity } = payload as { text?: unknown } & Record + return await create(withoutVerbosity) + } + + // Azure-specific fallback when array input is rejected + if (opts.usedArrayInput && this._isInputTextInvalidError(err)) { + const fallbackInput = + opts.previousId && opts.lastUserMessage + ? this._formatResponsesSingleMessage(opts.lastUserMessage, true) + : this._formatResponsesInput(opts.systemPrompt, opts.messages) + + const retryPayload: Record = { + ...payload, + input: fallbackInput, + } + return await create(retryPayload) + } + + throw err + } + } private async *_yieldResponsesResult(response: any, modelInfo: ModelInfo): ApiStream { // Capture response id for continuity when present try { From eb25c45e0d0929091f02aade0332c7a95f110930 Mon Sep 17 00:00:00 2001 From: Lagyu Date: Fri, 5 Sep 2025 15:54:14 +0900 Subject: [PATCH 10/14] chore: remove unnecessary type cast --- src/api/providers/__tests__/openai.spec.ts | 73 ++++++++++++++-------- src/api/providers/openai.ts | 50 ++++++++------- src/api/transform/responses-stream.ts | 12 ++-- 3 files changed, 81 insertions(+), 54 deletions(-) diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts index ae522c0e81b..cd2a64a87eb 100644 --- a/src/api/providers/__tests__/openai.spec.ts +++ b/src/api/providers/__tests__/openai.spec.ts @@ -7,6 +7,20 @@ import OpenAI from "openai" import { Package } from "../../../shared/package" import axios from "axios" +type ErrorWithStatus = Error & { status?: number } + +function getMockCallsOf(fn: unknown): any[] { + const isObj = (v: unknown): v is Record => typeof v === "object" && v !== null + if (isObj(fn) || typeof fn === "function") { + const rec = fn as Record + const mock = rec["mock"] + if (isObj(mock)) { + const calls = mock["calls"] + if (Array.isArray(calls)) return calls + } + } + return [] +} const mockCreate = vitest.fn() const mockResponsesCreate = vitest.fn() @@ -424,9 +438,9 @@ describe("OpenAiHandler", () => { }) it("should handle rate limiting", async () => { - const rateLimitError = new Error("Rate limit exceeded") + const rateLimitError: ErrorWithStatus = new Error("Rate limit exceeded") rateLimitError.name = "Error" - ;(rateLimitError as any).status = 429 + rateLimitError.status = 429 mockCreate.mockRejectedValueOnce(rateLimitError) const stream = handler.createMessage("system prompt", testMessages) @@ -1198,9 +1212,9 @@ describe("OpenAI Compatible - Responses API", () => { it("Verbosity (Responses): include when set; if server rejects, retry without it (warn once)", async () => { // First call throws 400 for 'verbosity', second succeeds - mockResponsesCreate.mockImplementationOnce((_opts: any) => { - const err = new Error("Unsupported parameter: 'verbosity'") - ;(err as any).status = 400 + mockResponsesCreate.mockImplementationOnce((_opts: unknown) => { + const err: ErrorWithStatus = new Error("Unsupported parameter: 'verbosity'") + err.status = 400 throw err }) @@ -1295,10 +1309,13 @@ describe("OpenAI Compatible - Responses API", () => { // Ensure SDK constructor was called with normalized baseURL and 'preview' apiVersion (per requirement) // Note: AzureOpenAI and OpenAI share same mock constructor; inspect last call - const ctorCalls = vi.mocked(OpenAI as unknown as any).mock.calls as any[] - const lastCtorArgs = ctorCalls[ctorCalls.length - 1]?.[0] || {} - expect(lastCtorArgs.baseURL).toBe("https://sample-name.openai.azure.com/openai/v1") - expect(lastCtorArgs.apiVersion).toBe("preview") + const ctorCalls = getMockCallsOf(OpenAI) + const lastCall = ctorCalls[ctorCalls.length - 1] + const lastArg0 = Array.isArray(lastCall) ? lastCall[0] : undefined + const lastCtorArgs = + typeof lastArg0 === "object" && lastArg0 !== null ? (lastArg0 as Record) : {} + expect(lastCtorArgs["baseURL"]).toBe("https://sample-name.openai.azure.com/openai/v1") + expect(lastCtorArgs["apiVersion"]).toBe("preview") }) it("streams Responses API when provider returns AsyncIterable", async () => { @@ -1461,7 +1478,7 @@ describe("OpenAI Compatible - Responses API (multimodal)", () => { { type: "image" as const, // Minimal Anthropic-style inline image (base64) block - source: { media_type: "image/png", data: "BASE64DATA" } as any, + source: { type: "base64" as const, media_type: "image/png", data: "BASE64DATA" }, }, ], }, @@ -1478,7 +1495,7 @@ describe("OpenAI Compatible - Responses API (multimodal)", () => { // Input should be an array (structured input mode) expect(Array.isArray(args.input)).toBe(true) - const arr = args.input as any[] + const arr = Array.isArray(args.input) ? args.input : [] // First element should be Developer preface as input_text expect(arr[0]?.role).toBe("user") @@ -1537,7 +1554,7 @@ describe("OpenAI Compatible - Responses API (multimodal)", () => { { type: "text" as const, text: "Look at this" }, { type: "image" as const, - source: { media_type: "image/jpeg", data: "IMGDATA" } as any, + source: { type: "base64" as const, media_type: "image/jpeg", data: "IMGDATA" }, }, ], }, @@ -1648,7 +1665,7 @@ describe("OpenAI Compatible - Responses API conversation continuity", () => { for await (const _ of handler.createMessage( "sys", [{ role: "user", content: [{ type: "text" as const, text: "Turn 2" }] }], - { suppressPreviousResponseId: true } as any, + { taskId: "test", suppressPreviousResponseId: true }, )) { } @@ -1668,9 +1685,9 @@ describe("OpenAI Compatible - Responses API parity improvements", () => { it("retries without previous_response_id when server returns 400 'Previous response ... not found' (non-streaming)", async () => { // First call throws 400 for previous_response_id, second succeeds mockResponsesCreate - .mockImplementationOnce((_opts: any) => { - const err = new Error("Previous response rid-bad not found") - ;(err as any).status = 400 + .mockImplementationOnce((_opts: unknown) => { + const err: ErrorWithStatus = new Error("Previous response rid-bad not found") + err.status = 400 throw err }) .mockImplementationOnce(async (_opts: any) => { @@ -1688,7 +1705,7 @@ describe("OpenAI Compatible - Responses API parity improvements", () => { for await (const ch of h.createMessage( "sys", [{ role: "user", content: [{ type: "text" as const, text: "Turn" }] }], - { previousResponseId: "rid-bad" } as any, + { taskId: "test", previousResponseId: "rid-bad" }, )) { chunks.push(ch) } @@ -1709,9 +1726,9 @@ describe("OpenAI Compatible - Responses API parity improvements", () => { it("retries without previous_response_id when server returns 400 (streaming)", async () => { // First call throws, second returns a stream mockResponsesCreate - .mockImplementationOnce((_opts: any) => { - const err = new Error("Previous response not found") - ;(err as any).status = 400 + .mockImplementationOnce((_opts: unknown) => { + const err: ErrorWithStatus = new Error("Previous response not found") + err.status = 400 throw err }) .mockImplementationOnce(async (_opts: any) => { @@ -1734,7 +1751,7 @@ describe("OpenAI Compatible - Responses API parity improvements", () => { for await (const ch of h.createMessage( "sys", [{ role: "user", content: [{ type: "text" as const, text: "Hi" }] }], - { previousResponseId: "bad-id" } as any, + { taskId: "test", previousResponseId: "bad-id" }, )) { out.push(ch) } @@ -1884,7 +1901,10 @@ describe("OpenAI Compatible - Responses API minimal input parity (new tests)", ( ] const chunks: any[] = [] - for await (const ch of handler.createMessage("System Inst", msgs, { previousResponseId: "prev-1" } as any)) { + for await (const ch of handler.createMessage("System Inst", msgs, { + taskId: "test", + previousResponseId: "prev-1", + })) { chunks.push(ch) } @@ -1914,12 +1934,15 @@ describe("OpenAI Compatible - Responses API minimal input parity (new tests)", ( role: "user", content: [ { type: "text" as const, text: "See" }, - { type: "image" as const, source: { media_type: "image/png", data: "IMGDATA" } as any }, + { + type: "image" as const, + source: { type: "base64" as const, media_type: "image/png", data: "IMGDATA" }, + }, ], }, ] - const iter = handler.createMessage("Sys", msgs, { previousResponseId: "prev-2" } as any) + const iter = handler.createMessage("Sys", msgs, { taskId: "test", previousResponseId: "prev-2" }) for await (const _ of iter) { // consume } @@ -1928,7 +1951,7 @@ describe("OpenAI Compatible - Responses API minimal input parity (new tests)", ( const args = mockResponsesCreate.mock.calls.pop()?.[0] expect(Array.isArray(args.input)).toBe(true) - const arr = args.input as any[] + const arr = Array.isArray(args.input) ? args.input : [] expect(arr.length).toBe(1) expect(arr[0]?.role).toBe("user") diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index 4dda6f2afb0..2b12015aaf1 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -25,7 +25,6 @@ import { DEFAULT_HEADERS } from "./constants" import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { getApiRequestTimeout } from "./utils/timeout-config" -import { ResponseCreateParamsNonStreaming } from "openai/resources/responses/responses" // TODO: Rename this to OpenAICompatibleHandler. Also, I think the // `OpenAINativeHandler` can subclass from this, since it's obviously @@ -151,19 +150,14 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { // Gather model params (centralized: temperature, max tokens, reasoning, verbosity) - const modelParams = this.getModel() - const { - info: modelInfo, - reasoning, - reasoningEffort, - verbosity, - } = modelParams as unknown as { - id: string - info: ModelInfo - reasoning?: { reasoning_effort?: "low" | "medium" | "high" } - reasoningEffort?: "minimal" | "low" | "medium" | "high" - verbosity?: "low" | "medium" | "high" - } + const { info: modelInfo } = this.getModel() + const openAiParams = getModelParams({ + format: "openai", + modelId: this.options.openAiModelId ?? "", + model: modelInfo, + settings: this.options, + }) + const { reasoning, reasoningEffort, verbosity } = openAiParams const modelUrl = this.options.openAiBaseUrl ?? "" const modelId = this.options.openAiModelId ?? "" @@ -280,7 +274,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } // Always include max_output_tokens for Responses API to cap output length - const reservedMax = (modelParams as any)?.maxTokens + const reservedMax = openAiParams.maxTokens ;(basePayload as Record).max_output_tokens = this.options.modelMaxTokens || reservedMax || modelInfo.maxTokens @@ -293,7 +287,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl systemPrompt, messages, }) - yield* this._yieldResponsesResult(response as unknown, modelInfo) + yield* this._yieldResponsesResult(response, modelInfo) return } @@ -320,7 +314,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } } else { // Some providers may ignore the stream flag and return a complete response - yield* this._yieldResponsesResult(maybeStream as unknown, modelInfo) + yield* this._yieldResponsesResult(maybeStream, modelInfo) } return } @@ -521,7 +515,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } as Anthropic.Messages.MessageParam, /*includeRole*/ true, ) - const payload: ResponseCreateParamsNonStreaming = { + const payload: Record = { model: model.id, input: formattedInput, } @@ -552,7 +546,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl payload.max_output_tokens = this.options.modelMaxTokens || modelInfo.maxTokens } - const response = await this._responsesCreateWithRetries(payload as unknown as Record, { + const response = await this._responsesCreateWithRetries(payload, { usedArrayInput: false, lastUserMessage: undefined, previousId: undefined, @@ -976,10 +970,20 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl messages: Anthropic.Messages.MessageParam[] }, ): Promise { - const create = (body: Record) => - ( - this.client as unknown as { responses: { create: (b: Record) => Promise } } - ).responses.create(body) + const create = (body: Record) => { + const hasResponsesCreate = ( + obj: unknown, + ): obj is { responses: { create: (b: Record) => Promise } } => { + if (obj == null || typeof obj !== "object") return false + const responses = (obj as Record).responses + if (responses == null || typeof responses !== "object") return false + return typeof (responses as Record).create === "function" + } + if (!hasResponsesCreate(this.client)) { + throw new Error("Responses API not available on client") + } + return this.client.responses.create(body) + } try { return await create(payload) diff --git a/src/api/transform/responses-stream.ts b/src/api/transform/responses-stream.ts index 8b6eb3fa7ec..f0152c9ec36 100644 --- a/src/api/transform/responses-stream.ts +++ b/src/api/transform/responses-stream.ts @@ -18,7 +18,7 @@ export async function* handleResponsesStream( for await (const event of stream) { // Surface response.id to callers when available (for conversation continuity) if (isObject(event)) { - const resp = (event as Record).response as unknown + const resp = (event as Record).response if (isObject(resp)) { const rid = (resp as Record).id if (typeof rid === "string") { @@ -224,11 +224,11 @@ function isDoneEvent(event: unknown): event is DoneEvent { function getChoiceDeltaContent(event: unknown): string | undefined { if (!isObject(event)) return undefined - const choices = (event as Record).choices as unknown + const choices = (event as Record).choices if (!Array.isArray(choices) || choices.length === 0) return undefined - const first = choices[0] as unknown + const first = choices[0] if (!isObject(first)) return undefined - const delta = (first as Record).delta as unknown + const delta = (first as Record).delta if (!isObject(delta)) return undefined const content = (delta as Record).content if (content == null) return undefined @@ -237,11 +237,11 @@ function getChoiceDeltaContent(event: unknown): string | undefined { function extractUsage(event: unknown): ResponseUsage | undefined { if (!isObject(event)) return undefined - const resp = (event as Record).response as unknown + const resp = (event as Record).response if (isObject(resp) && isObject((resp as Record).usage)) { return (resp as Record).usage as ResponseUsage } - const usage = (event as Record).usage as unknown + const usage = (event as Record).usage if (isObject(usage)) { return usage as ResponseUsage } From 1144bf97152367a1eea336dc6f7a5a007f75dc59 Mon Sep 17 00:00:00 2001 From: Lagyu Date: Fri, 5 Sep 2025 16:44:46 +0900 Subject: [PATCH 11/14] refactor(openai): extract Responses API handling into helper and delegate from createMessage - Move Responses API logic to private _handleResponsesFlavor - Preserve streaming, retries, conversation continuity, reasoning/verbosity, and usage - All existing tests pass --- src/api/providers/openai.ts | 300 +++++++++++++++++++----------------- 1 file changed, 156 insertions(+), 144 deletions(-) diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index 16c5316b4be..bf83af08a7d 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -174,150 +174,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl // If Responses API is selected, use the Responses payload and endpoint if (flavor === "responses") { - const nonStreaming = !(this.options.openAiStreamingEnabled ?? true) - - // Build Responses payload (align with OpenAI Native Responses API formatting) - // Azure- and Responses-compatible multimodal handling: - // - Use array input ONLY when the latest user message contains images (initial turn) - // - When previous_response_id is present, send only the latest user turn: - // • Text-only => single string "User: ...", no Developer preface - // • With images => one-item array containing only the latest user content (no Developer preface) - const lastUserMessage = [...messages].reverse().find((m) => m.role === "user") - const lastUserHasImages = - !!lastUserMessage && - Array.isArray(lastUserMessage.content) && - lastUserMessage.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image") - - // Conversation continuity (parity with OpenAiNativeHandler.prepareGpt5Input) - const previousId = metadata?.suppressPreviousResponseId - ? undefined - : (metadata?.previousResponseId ?? this.lastResponseId) - - const minimalInputMode = Boolean(previousId) - - let inputPayload: unknown - if (minimalInputMode && lastUserMessage) { - // Minimal-mode: only the latest user message (no Developer preface) - if (lastUserHasImages) { - // Single-item array with just the latest user content - inputPayload = this._toResponsesInput([lastUserMessage]) - } else { - // Single message string "User: ..." - inputPayload = this._formatResponsesSingleMessage(lastUserMessage, true) - } - } else if (lastUserHasImages && lastUserMessage) { - // Initial turn with images: include Developer preface and minimal prior context to preserve continuity - const lastAssistantMessage = [...messages].reverse().find((m) => m.role === "assistant") - - const messagesForArray = messages.filter((m) => { - if (m.role === "assistant") { - return lastAssistantMessage ? m === lastAssistantMessage : false - } - if (m.role === "user") { - const hasImage = - Array.isArray(m.content) && - m.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image") - return hasImage || m === lastUserMessage - } - return false - }) - - const arrayInput = this._toResponsesInput(messagesForArray) - const developerPreface = { - role: "user" as const, - content: [{ type: "input_text" as const, text: `Developer: ${systemPrompt}` }], - } - inputPayload = [developerPreface, ...arrayInput] - } else { - // Pure text history: full compact transcript (includes both user and assistant turns) - inputPayload = this._formatResponsesInput(systemPrompt, messages) - } - const usedArrayInput = Array.isArray(inputPayload) - - const basePayload: Record = { - model: modelId, - input: inputPayload, - ...(previousId ? { previous_response_id: previousId } : {}), - } - - // Reasoning effort (Responses expects: reasoning: { effort, summary? }) - // Parity with native: support "minimal" and include summary: "auto" unless explicitly disabled - if (this.options.enableReasoningEffort && (this.options.reasoningEffort || reasoningEffort)) { - const effort = (this.options.reasoningEffort || reasoningEffort) as - | "minimal" - | "low" - | "medium" - | "high" - | undefined - if (effort) { - ;( - basePayload as { - reasoning?: { effort: "minimal" | "low" | "medium" | "high"; summary?: "auto" } - } - ).reasoning = { - effort, - ...(this.options.enableGpt5ReasoningSummary !== false ? { summary: "auto" as const } : {}), - } - } - } - - // Temperature (only include when explicitly set by the user) - if (this.options.modelTemperature !== undefined) { - basePayload.temperature = this.options.modelTemperature - } else if (deepseekReasoner) { - basePayload.temperature = DEEP_SEEK_DEFAULT_TEMPERATURE - } - - // Verbosity: include only when explicitly specified in settings - if (this.options.verbosity) { - ;(basePayload as { text?: { verbosity: "low" | "medium" | "high" } }).text = { - verbosity: this.options.verbosity as "low" | "medium" | "high", - } - } - - // Always include max_output_tokens for Responses API to cap output length - const reservedMax = openAiParams.maxTokens - ;(basePayload as Record).max_output_tokens = - this.options.modelMaxTokens || reservedMax || modelInfo.maxTokens - - // Non-streaming path - if (nonStreaming) { - const response = await this._responsesCreateWithRetries(basePayload, { - usedArrayInput, - lastUserMessage, - previousId, - systemPrompt, - messages, - }) - yield* this._yieldResponsesResult(response, modelInfo) - return - } - - // Streaming path (auto-fallback to non-streaming result if provider ignores stream flag) - const streamingPayload: Record = { ...basePayload, stream: true } - const maybeStream = await this._responsesCreateWithRetries(streamingPayload, { - usedArrayInput, - lastUserMessage, - previousId, - systemPrompt, - messages, - }) - - const isAsyncIterable = (obj: unknown): obj is AsyncIterable => - typeof (obj as AsyncIterable)[Symbol.asyncIterator] === "function" - - if (isAsyncIterable(maybeStream)) { - for await (const chunk of handleResponsesStream(maybeStream, { - onResponseId: (id) => { - this.lastResponseId = id - }, - })) { - yield chunk - } - } else { - // Some providers may ignore the stream flag and return a complete response - yield* this._yieldResponsesResult(maybeStream, modelInfo) - } + yield* this._handleResponsesFlavor(systemPrompt, messages, metadata, modelInfo, openAiParams) return } @@ -869,6 +726,161 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl // --- Responses helpers --- + private async *_handleResponsesFlavor( + systemPrompt: string, + messages: Anthropic.Messages.MessageParam[], + metadata: ApiHandlerCreateMessageMetadata | undefined, + modelInfo: ModelInfo, + openAiParams: any, + ): ApiStream { + const modelId = this.options.openAiModelId ?? "" + const nonStreaming = !(this.options.openAiStreamingEnabled ?? true) + + // Build Responses payload (align with OpenAI Native Responses API formatting) + // Azure- and Responses-compatible multimodal handling: + // - Use array input ONLY when the latest user message contains images (initial turn) + // - When previous_response_id is present, send only the latest user turn: + // • Text-only => single string "User: ...", no Developer preface + // • With images => one-item array containing only the latest user content (no Developer preface) + const lastUserMessage = [...messages].reverse().find((m) => m.role === "user") + const lastUserHasImages = + !!lastUserMessage && + Array.isArray(lastUserMessage.content) && + lastUserMessage.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image") + + // Conversation continuity (parity with OpenAiNativeHandler.prepareGpt5Input) + const previousId = metadata?.suppressPreviousResponseId + ? undefined + : (metadata?.previousResponseId ?? this.lastResponseId) + + const minimalInputMode = Boolean(previousId) + + let inputPayload: unknown + if (minimalInputMode && lastUserMessage) { + // Minimal-mode: only the latest user message (no Developer preface) + if (lastUserHasImages) { + // Single-item array with just the latest user content + inputPayload = this._toResponsesInput([lastUserMessage]) + } else { + // Single message string "User: ..." + inputPayload = this._formatResponsesSingleMessage(lastUserMessage, true) + } + } else if (lastUserHasImages && lastUserMessage) { + // Initial turn with images: include Developer preface and minimal prior context to preserve continuity + const lastAssistantMessage = [...messages].reverse().find((m) => m.role === "assistant") + + const messagesForArray = messages.filter((m) => { + if (m.role === "assistant") { + return lastAssistantMessage ? m === lastAssistantMessage : false + } + if (m.role === "user") { + const hasImage = + Array.isArray(m.content) && + m.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image") + return hasImage || m === lastUserMessage + } + return false + }) + + const arrayInput = this._toResponsesInput(messagesForArray) + const developerPreface = { + role: "user" as const, + content: [{ type: "input_text" as const, text: `Developer: ${systemPrompt}` }], + } + inputPayload = [developerPreface, ...arrayInput] + } else { + // Pure text history: full compact transcript (includes both user and assistant turns) + inputPayload = this._formatResponsesInput(systemPrompt, messages) + } + const usedArrayInput = Array.isArray(inputPayload) + + const basePayload: Record = { + model: modelId, + input: inputPayload, + ...(previousId ? { previous_response_id: previousId } : {}), + } + + // Reasoning effort (Responses expects: reasoning: { effort, summary? }) + // Parity with native: support "minimal" and include summary: "auto" unless explicitly disabled + if (this.options.enableReasoningEffort && (this.options.reasoningEffort || openAiParams?.reasoningEffort)) { + const effort = (this.options.reasoningEffort || openAiParams?.reasoningEffort) as + | "minimal" + | "low" + | "medium" + | "high" + | undefined + if (effort) { + ;( + basePayload as { + reasoning?: { effort: "minimal" | "low" | "medium" | "high"; summary?: "auto" } + } + ).reasoning = { + effort, + ...(this.options.enableGpt5ReasoningSummary !== false ? { summary: "auto" as const } : {}), + } + } + } + + // Temperature (only include when explicitly set by the user) + const deepseekReasoner = modelId.includes("deepseek-reasoner") || (this.options.openAiR1FormatEnabled ?? false) + if (this.options.modelTemperature !== undefined) { + basePayload.temperature = this.options.modelTemperature + } else if (deepseekReasoner) { + basePayload.temperature = DEEP_SEEK_DEFAULT_TEMPERATURE + } + + // Verbosity: include only when explicitly specified in settings + if (this.options.verbosity) { + ;(basePayload as { text?: { verbosity: "low" | "medium" | "high" } }).text = { + verbosity: this.options.verbosity as "low" | "medium" | "high", + } + } + + // Always include max_output_tokens for Responses API to cap output length + const reservedMax = openAiParams?.maxTokens + ;(basePayload as Record).max_output_tokens = + this.options.modelMaxTokens || reservedMax || modelInfo.maxTokens + + // Non-streaming path + if (nonStreaming) { + const response = await this._responsesCreateWithRetries(basePayload, { + usedArrayInput, + lastUserMessage, + previousId, + systemPrompt, + messages, + }) + yield* this._yieldResponsesResult(response, modelInfo) + return + } + + // Streaming path (auto-fallback to non-streaming result if provider ignores stream flag) + const streamingPayload: Record = { ...basePayload, stream: true } + const maybeStream = await this._responsesCreateWithRetries(streamingPayload, { + usedArrayInput, + lastUserMessage, + previousId, + systemPrompt, + messages, + }) + + const isAsyncIterable = (obj: unknown): obj is AsyncIterable => + typeof (obj as AsyncIterable)[Symbol.asyncIterator] === "function" + + if (isAsyncIterable(maybeStream)) { + for await (const chunk of handleResponsesStream(maybeStream, { + onResponseId: (id) => { + this.lastResponseId = id + }, + })) { + yield chunk + } + } else { + // Some providers may ignore the stream flag and return a complete response + yield* this._yieldResponsesResult(maybeStream, modelInfo) + } + } + /** * Determines which OpenAI-compatible API flavor to use based on the URL path. * - This is purely path-based and provider-agnostic (works for OpenAI, Azure OpenAI after normalization, etc.). From 43eaa3c27f4acc5c1282e50965c937bc257f2ab1 Mon Sep 17 00:00:00 2001 From: Lagyu Date: Fri, 5 Sep 2025 18:28:32 +0900 Subject: [PATCH 12/14] fix(openai): Responses API parity with native structured input, continuity (previous_response_id/store), temp/verbosity gating, and image support (input_image/output_text) --- src/api/providers/openai.ts | 180 +++++++++++++++++------------------- 1 file changed, 84 insertions(+), 96 deletions(-) diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index bf83af08a7d..389d375a114 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -376,47 +376,55 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl // Use Responses API when selected (non-streaming convenience method) if (flavor === "responses") { - // Build a single-turn formatted string input (Developer/User style) for Responses API - const formattedInput = this._formatResponsesSingleMessage( - { - role: "user", - content: [{ type: "text", text: prompt }], - } as Anthropic.Messages.MessageParam, - /*includeRole*/ true, - ) + // Build structured single-turn input const payload: Record = { model: model.id, - input: formattedInput, + input: [ + { + role: "user", + content: [{ type: "input_text", text: prompt }], + }, + ], + stream: false, + store: false, } - // Reasoning effort (Responses) + // Reasoning effort (support "minimal"; include summary: "auto" unless disabled) const effort = (this.options.reasoningEffort || model.reasoningEffort) as | "minimal" | "low" | "medium" | "high" | undefined - if (this.options.enableReasoningEffort && effort && effort !== "minimal") { - payload.reasoning = { effort } + if (this.options.enableReasoningEffort && effort) { + ;( + payload as { reasoning?: { effort: "minimal" | "low" | "medium" | "high"; summary?: "auto" } } + ).reasoning = { + effort, + ...(this.options.enableGpt5ReasoningSummary !== false ? { summary: "auto" as const } : {}), + } } - // Temperature if set - if (this.options.modelTemperature !== undefined) { - payload.temperature = this.options.modelTemperature + // Temperature if supported and set + if (modelInfo.supportsTemperature !== false && this.options.modelTemperature !== undefined) { + ;(payload as Record).temperature = this.options.modelTemperature } - // Verbosity via text.verbosity - include only when explicitly specified - if (this.options.verbosity) { - payload.text = { verbosity: this.options.verbosity as "low" | "medium" | "high" } + // Verbosity via text.verbosity - include only when supported + if (this.options.verbosity && modelInfo.supportsVerbosity) { + ;(payload as { text?: { verbosity: "low" | "medium" | "high" } }).text = { + verbosity: this.options.verbosity as "low" | "medium" | "high", + } } // max_output_tokens if (this.options.includeMaxTokens === true) { - payload.max_output_tokens = this.options.modelMaxTokens || modelInfo.maxTokens + ;(payload as Record).max_output_tokens = + this.options.modelMaxTokens || modelInfo.maxTokens } const response = await this._responsesCreateWithRetries(payload, { - usedArrayInput: false, + usedArrayInput: true, lastUserMessage: undefined, previousId: undefined, systemPrompt: "", @@ -736,72 +744,29 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const modelId = this.options.openAiModelId ?? "" const nonStreaming = !(this.options.openAiStreamingEnabled ?? true) - // Build Responses payload (align with OpenAI Native Responses API formatting) - // Azure- and Responses-compatible multimodal handling: - // - Use array input ONLY when the latest user message contains images (initial turn) - // - When previous_response_id is present, send only the latest user turn: - // • Text-only => single string "User: ...", no Developer preface - // • With images => one-item array containing only the latest user content (no Developer preface) - const lastUserMessage = [...messages].reverse().find((m) => m.role === "user") - const lastUserHasImages = - !!lastUserMessage && - Array.isArray(lastUserMessage.content) && - lastUserMessage.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image") - - // Conversation continuity (parity with OpenAiNativeHandler.prepareGpt5Input) + // Determine conversation continuity id (skip when explicitly suppressed) const previousId = metadata?.suppressPreviousResponseId ? undefined : (metadata?.previousResponseId ?? this.lastResponseId) - const minimalInputMode = Boolean(previousId) - - let inputPayload: unknown - if (minimalInputMode && lastUserMessage) { - // Minimal-mode: only the latest user message (no Developer preface) - if (lastUserHasImages) { - // Single-item array with just the latest user content - inputPayload = this._toResponsesInput([lastUserMessage]) - } else { - // Single message string "User: ..." - inputPayload = this._formatResponsesSingleMessage(lastUserMessage, true) - } - } else if (lastUserHasImages && lastUserMessage) { - // Initial turn with images: include Developer preface and minimal prior context to preserve continuity - const lastAssistantMessage = [...messages].reverse().find((m) => m.role === "assistant") - - const messagesForArray = messages.filter((m) => { - if (m.role === "assistant") { - return lastAssistantMessage ? m === lastAssistantMessage : false - } - if (m.role === "user") { - const hasImage = - Array.isArray(m.content) && - m.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image") - return hasImage || m === lastUserMessage - } - return false - }) + // Prepare structured input for Responses API + const lastUserMessage = [...messages].reverse().find((m) => m.role === "user") + const minimalInputMode = Boolean(previousId && lastUserMessage) - const arrayInput = this._toResponsesInput(messagesForArray) - const developerPreface = { - role: "user" as const, - content: [{ type: "input_text" as const, text: `Developer: ${systemPrompt}` }], - } - inputPayload = [developerPreface, ...arrayInput] - } else { - // Pure text history: full compact transcript (includes both user and assistant turns) - inputPayload = this._formatResponsesInput(systemPrompt, messages) - } - const usedArrayInput = Array.isArray(inputPayload) + const inputPayload = minimalInputMode + ? this._toResponsesInput([lastUserMessage as Anthropic.Messages.MessageParam]) + : this._toResponsesInput(messages) + // Build base payload: use top-level instructions; default to storing unless explicitly disabled const basePayload: Record = { model: modelId, input: inputPayload, ...(previousId ? { previous_response_id: previousId } : {}), + instructions: systemPrompt, + store: metadata?.store !== false, } - // Reasoning effort (Responses expects: reasoning: { effort, summary? }) - // Parity with native: support "minimal" and include summary: "auto" unless explicitly disabled + // Reasoning effort (support "minimal"; include summary: "auto" unless disabled) if (this.options.enableReasoningEffort && (this.options.reasoningEffort || openAiParams?.reasoningEffort)) { const effort = (this.options.reasoningEffort || openAiParams?.reasoningEffort) as | "minimal" @@ -811,9 +776,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl | undefined if (effort) { ;( - basePayload as { - reasoning?: { effort: "minimal" | "low" | "medium" | "high"; summary?: "auto" } - } + basePayload as { reasoning?: { effort: "minimal" | "low" | "medium" | "high"; summary?: "auto" } } ).reasoning = { effort, ...(this.options.enableGpt5ReasoningSummary !== false ? { summary: "auto" as const } : {}), @@ -821,16 +784,18 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } } - // Temperature (only include when explicitly set by the user) + // Temperature: include only if model supports it const deepseekReasoner = modelId.includes("deepseek-reasoner") || (this.options.openAiR1FormatEnabled ?? false) - if (this.options.modelTemperature !== undefined) { - basePayload.temperature = this.options.modelTemperature - } else if (deepseekReasoner) { - basePayload.temperature = DEEP_SEEK_DEFAULT_TEMPERATURE + if (modelInfo.supportsTemperature !== false) { + if (this.options.modelTemperature !== undefined) { + ;(basePayload as Record).temperature = this.options.modelTemperature + } else if (deepseekReasoner) { + ;(basePayload as Record).temperature = DEEP_SEEK_DEFAULT_TEMPERATURE + } } - // Verbosity: include only when explicitly specified in settings - if (this.options.verbosity) { + // Verbosity: include only when model supports it + if (this.options.verbosity && modelInfo.supportsVerbosity) { ;(basePayload as { text?: { verbosity: "low" | "medium" | "high" } }).text = { verbosity: this.options.verbosity as "low" | "medium" | "high", } @@ -844,7 +809,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl // Non-streaming path if (nonStreaming) { const response = await this._responsesCreateWithRetries(basePayload, { - usedArrayInput, + usedArrayInput: Array.isArray(inputPayload), lastUserMessage, previousId, systemPrompt, @@ -857,7 +822,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl // Streaming path (auto-fallback to non-streaming result if provider ignores stream flag) const streamingPayload: Record = { ...basePayload, stream: true } const maybeStream = await this._responsesCreateWithRetries(streamingPayload, { - usedArrayInput, + usedArrayInput: Array.isArray(inputPayload), lastUserMessage, previousId, systemPrompt, @@ -925,30 +890,53 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl private _toResponsesInput(anthropicMessages: Anthropic.Messages.MessageParam[]): Array<{ role: "user" | "assistant" - content: Array<{ type: "input_text"; text: string } | { type: "input_image"; image_url: string }> + content: Array< + | { type: "input_text"; text: string } + | { type: "input_image"; image_url: string } + | { type: "output_text"; text: string } + > }> { const input: Array<{ role: "user" | "assistant" - content: Array<{ type: "input_text"; text: string } | { type: "input_image"; image_url: string }> + content: Array< + | { type: "input_text"; text: string } + | { type: "input_image"; image_url: string } + | { type: "output_text"; text: string } + > }> = [] for (const msg of anthropicMessages) { const role = msg.role === "assistant" ? "assistant" : "user" - const parts: Array<{ type: "input_text"; text: string } | { type: "input_image"; image_url: string }> = [] + const parts: Array< + | { type: "input_text"; text: string } + | { type: "input_image"; image_url: string } + | { type: "output_text"; text: string } + > = [] if (typeof msg.content === "string") { if (msg.content.length > 0) { - parts.push({ type: "input_text", text: msg.content }) + if (role === "assistant") { + parts.push({ type: "output_text", text: msg.content }) + } else { + parts.push({ type: "input_text", text: msg.content }) + } } - } else { + } else if (Array.isArray(msg.content)) { for (const block of msg.content) { if (block.type === "text") { - parts.push({ type: "input_text", text: block.text }) + if (role === "assistant") { + parts.push({ type: "output_text", text: block.text }) + } else { + parts.push({ type: "input_text", text: block.text }) + } } else if (block.type === "image") { - parts.push({ - type: "input_image", - image_url: `data:${block.source.media_type};base64,${block.source.data}`, - }) + // Images are treated as user input; ignore images on assistant turns + if (role === "user") { + parts.push({ + type: "input_image", + image_url: `data:${block.source.media_type};base64,${block.source.data}`, + }) + } } // tool_use/tool_result are omitted in this minimal mapping (can be added as needed) } From 0126f3a1d051993a9a999ba7940f5fad2c615c18 Mon Sep 17 00:00:00 2001 From: Lagyu Date: Fri, 5 Sep 2025 18:42:39 +0900 Subject: [PATCH 13/14] test(openai): align Responses API payload shape with tests string transcript for text-only, array for multimodal; retry-on-verbosity; continuity handling --- src/api/providers/openai.ts | 51 ++++++++++++++++++++++++++++++++----- 1 file changed, 44 insertions(+), 7 deletions(-) diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index 389d375a114..b9b5b7da586 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -749,13 +749,50 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl ? undefined : (metadata?.previousResponseId ?? this.lastResponseId) - // Prepare structured input for Responses API + // Prepare Responses API input per test expectations: + // - Non-minimal text-only => single string with Developer/User lines + // - Minimal (previous_response_id) => single string "User: ..." when last user has no images + // - Image cases => structured array; inject Developer preface as first item (non-minimal only) const lastUserMessage = [...messages].reverse().find((m) => m.role === "user") - const minimalInputMode = Boolean(previousId && lastUserMessage) + const lastUserHasImages = + !!lastUserMessage && + Array.isArray(lastUserMessage.content) && + lastUserMessage.content.some((b: any) => (b as any)?.type === "image") + const minimalInputMode = Boolean(previousId) + + let inputPayload: unknown + if (minimalInputMode && lastUserMessage) { + // Minimal mode: only latest user turn + if (lastUserHasImages) { + inputPayload = this._toResponsesInput([lastUserMessage]) + } else { + inputPayload = this._formatResponsesSingleMessage(lastUserMessage, true) + } + } else if (lastUserHasImages && lastUserMessage) { + // Initial turn with images: include Developer preface and minimal context + const lastAssistantMessage = [...messages].reverse().find((m) => m.role === "assistant") + const messagesForArray = messages.filter((m) => { + if (m.role === "assistant") { + return lastAssistantMessage ? m === lastAssistantMessage : false + } + if (m.role === "user") { + const hasImage = + Array.isArray(m.content) && m.content.some((b: any) => (b as any)?.type === "image") + return hasImage || m === lastUserMessage + } + return false + }) - const inputPayload = minimalInputMode - ? this._toResponsesInput([lastUserMessage as Anthropic.Messages.MessageParam]) - : this._toResponsesInput(messages) + const arrayInput = this._toResponsesInput(messagesForArray) + const developerPreface = { + role: "user" as const, + content: [{ type: "input_text" as const, text: `Developer: ${systemPrompt}` }], + } + inputPayload = [developerPreface, ...arrayInput] + } else { + // Pure text history: compact transcript string + inputPayload = this._formatResponsesInput(systemPrompt, messages) + } // Build base payload: use top-level instructions; default to storing unless explicitly disabled const basePayload: Record = { @@ -794,8 +831,8 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } } - // Verbosity: include only when model supports it - if (this.options.verbosity && modelInfo.supportsVerbosity) { + // Verbosity: include when provided; retry logic removes it on 400 + if (this.options.verbosity) { ;(basePayload as { text?: { verbosity: "low" | "medium" | "high" } }).text = { verbosity: this.options.verbosity as "low" | "medium" | "high", } From 848a0edb576b5ebcba6f121d9df43df1937923b5 Mon Sep 17 00:00:00 2001 From: Lagyu Date: Fri, 5 Sep 2025 21:06:16 +0900 Subject: [PATCH 14/14] test(openai): add regression for Responses continuity when prior stream fails before id (store: true default) --- src/api/providers/__tests__/openai.spec.ts | 57 ++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts index cd2a64a87eb..0affafdabff 100644 --- a/src/api/providers/__tests__/openai.spec.ts +++ b/src/api/providers/__tests__/openai.spec.ts @@ -1673,6 +1673,63 @@ describe("OpenAI Compatible - Responses API conversation continuity", () => { const args = mockResponsesCreate.mock.calls[1][0] expect(args).not.toHaveProperty("previous_response_id") }) + it("does not include previous_response_id when prior stream fails before id; defaults to store:true", async () => { + // First call: stream throws before emitting any response.id + mockResponsesCreate + .mockImplementationOnce(async (_opts: any) => { + return { + [Symbol.asyncIterator]: async function* () { + yield { type: "response.text.delta", delta: "Partial " } + throw new Error("stream interrupted") + }, + } + }) + // Second call: normal stream + .mockImplementationOnce(async (_opts: any) => { + return { + [Symbol.asyncIterator]: async function* () { + yield { type: "response.text.delta", delta: "OK" } + yield { + type: "response.completed", + response: { usage: { input_tokens: 1, output_tokens: 1 } }, + } + }, + } + }) + + const handler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5-mini", + openAiBaseUrl: "https://api.openai.com/v1/responses", + }) + + // First call fails mid-stream, so no response.id is captured + const first = handler.createMessage("You are Roo.", [ + { role: "user", content: [{ type: "text" as const, text: "Hi" }] }, + ]) + + await expect(async () => { + for await (const _ of first) { + // drain until error + } + }).rejects.toThrow("stream interrupted") + + // Second call should not include previous_response_id and should default to store:true + const chunks: any[] = [] + for await (const ch of handler.createMessage("You are Roo.", [ + { role: "user", content: [{ type: "text" as const, text: "Hi" }] }, + ])) { + chunks.push(ch) + } + + expect(mockResponsesCreate).toHaveBeenCalledTimes(2) + const secondArgs = mockResponsesCreate.mock.calls[1][0] + expect(secondArgs).not.toHaveProperty("previous_response_id") + expect(secondArgs).toHaveProperty("store", true) + expect(typeof secondArgs.input).toBe("string") + expect(secondArgs.input).toContain("Developer: You are Roo.") + expect(secondArgs.input).toContain("User: Hi") + }) }) // --- New: Responses API parity improvements tests ---