From 84f58cd518854f83e71406ebab6f7e8471c615b4 Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Fri, 10 Oct 2025 13:27:17 -0600 Subject: [PATCH 01/17] feat(models): add per-model timeout disable to avoid global override for long-running models (e.g., gpt-5-pro) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Introduce ModelInfo.disableTimeout to opt out of request timeouts on a per-model basis - Apply in OpenAI-compatible, Ollama, and LM Studio providers (timeout=0 when flag is true) - Preserve global “API Request Timeout” behavior (0 still disables globally); per-model flag takes precedence for that model - Motivation: gpt-5-pro often requires longer runtimes; per-model override avoids forcing a global setting that impacts all models - Add/extend unit tests to validate provider behavior --- packages/types/src/model.ts | 2 ++ .../__tests__/lm-studio-timeout.spec.ts | 30 +++++++++++++++++++ .../__tests__/openai-timeout.spec.ts | 24 +++++++++++++++ src/api/providers/lm-studio.ts | 2 +- src/api/providers/openai.ts | 8 ++++- 5 files changed, 64 insertions(+), 2 deletions(-) diff --git a/packages/types/src/model.ts b/packages/types/src/model.ts index 6c7d0a4b4b6..cacb53d2b28 100644 --- a/packages/types/src/model.ts +++ b/packages/types/src/model.ts @@ -86,6 +86,8 @@ export const modelInfoSchema = z.object({ // Capability flag to indicate whether the model supports temperature parameter supportsTemperature: z.boolean().optional(), defaultTemperature: z.number().optional(), + // When true, force-disable request timeouts for this model (providers will set timeout=0) + disableTimeout: z.boolean().optional(), requiredReasoningBudget: z.boolean().optional(), supportsReasoningEffort: z .union([z.boolean(), z.array(z.enum(["disable", "none", "minimal", "low", "medium", "high", "xhigh"]))]) diff --git a/src/api/providers/__tests__/lm-studio-timeout.spec.ts b/src/api/providers/__tests__/lm-studio-timeout.spec.ts index 659fcaaf670..a7abdb46798 100644 --- a/src/api/providers/__tests__/lm-studio-timeout.spec.ts +++ b/src/api/providers/__tests__/lm-studio-timeout.spec.ts @@ -88,4 +88,34 @@ describe("LmStudioHandler timeout configuration", () => { }), ) }) + + it("should force zero timeout when model info disables timeout", () => { + ;(getApiRequestTimeout as any).mockReturnValue(600000) + + const spy = vitest.spyOn(LmStudioHandler.prototype as any, "getModel").mockReturnValue({ + id: "llama2", + info: { + maxTokens: -1, + contextWindow: 128000, + supportsPromptCache: false, + supportsImages: true, + disableTimeout: true, + }, + }) + + const options: ApiHandlerOptions = { + apiModelId: "llama2", + lmStudioModelId: "llama2", + } + + new LmStudioHandler(options) + + expect(mockOpenAIConstructor).toHaveBeenCalledWith( + expect.objectContaining({ + timeout: 0, + }), + ) + + spy.mockRestore() + }) }) diff --git a/src/api/providers/__tests__/openai-timeout.spec.ts b/src/api/providers/__tests__/openai-timeout.spec.ts index 2a09fd94ffa..2ef7df2adaf 100644 --- a/src/api/providers/__tests__/openai-timeout.spec.ts +++ b/src/api/providers/__tests__/openai-timeout.spec.ts @@ -141,4 +141,28 @@ describe("OpenAiHandler timeout configuration", () => { }), ) }) + + it("should force zero timeout when model info disables timeout", () => { + ;(getApiRequestTimeout as any).mockReturnValue(600000) + + const options: ApiHandlerOptions = { + apiModelId: "gpt-4", + openAiModelId: "gpt-4", + openAiCustomModelInfo: { + maxTokens: -1, + contextWindow: 128000, + supportsPromptCache: false, + supportsImages: true, + disableTimeout: true, + } as any, + } + + new OpenAiHandler(options) + + expect(mockOpenAIConstructor).toHaveBeenCalledWith( + expect.objectContaining({ + timeout: 0, // Forced no timeout via model info + }), + ) + }) }) diff --git a/src/api/providers/lm-studio.ts b/src/api/providers/lm-studio.ts index 6c58a96ae1f..1d048bf6791 100644 --- a/src/api/providers/lm-studio.ts +++ b/src/api/providers/lm-studio.ts @@ -32,7 +32,7 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan this.client = new OpenAI({ baseURL: (this.options.lmStudioBaseUrl || "http://localhost:1234") + "/v1", apiKey: apiKey, - timeout: getApiRequestTimeout(), + timeout: this.getModel().info?.disableTimeout === true ? 0 : getApiRequestTimeout(), }) } diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index 2a2065edd6e..586a86458c0 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -49,7 +49,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl ...(this.options.openAiHeaders || {}), } - const timeout = getApiRequestTimeout() + let timeout = getApiRequestTimeout() + try { + const modelInfo = this.getModel().info + if (modelInfo?.disableTimeout === true) { + timeout = 0 + } + } catch {} if (isAzureAiInference) { // Azure AI Inference Service (e.g., for DeepSeek) uses a different path structure From 9ecd7bb1ec2eddc8db9a9745b041b61c45a73c8f Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Fri, 10 Oct 2025 14:00:22 -0600 Subject: [PATCH 02/17] =?UTF-8?q?feat(openai-models):=20add=20gpt-5-pro-20?= =?UTF-8?q?25-10-06=20with=20timeout=20disabled=20and=20non=E2=80=91stream?= =?UTF-8?q?ing=20notice?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add GPT‑5 Pro to model registry with: - contextWindow: 400k, maxTokens: 272k - supportsImages: true, supportsPromptCache: true, supportsVerbosity: true, supportsTemperature: false - reasoningEffort: high (Responses API only) - pricing: $15/1M input tokens, $120/1M output tokens - Set disableTimeout: true to avoid requiring a global timeout override - Description clarifies: this is a slow, reasoning‑focused model designed for tough problems; requests may take several minutes; it does not stream (UI may appear idle until completion) --- packages/types/src/providers/openai.ts | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/packages/types/src/providers/openai.ts b/packages/types/src/providers/openai.ts index 722b57677cc..6b68e9bfe92 100644 --- a/packages/types/src/providers/openai.ts +++ b/packages/types/src/providers/openai.ts @@ -47,6 +47,21 @@ export const openAiNativeModels = { ], description: "GPT-5.1: The best model for coding and agentic tasks across domains", }, + "gpt-5-pro-2025-10-06": { + maxTokens: 272000, + contextWindow: 400000, + supportsImages: true, + supportsPromptCache: true, + supportsReasoningEffort: false, + reasoningEffort: "high", + inputPrice: 15.0, + outputPrice: 120.0, + description: + "GPT-5 Pro: a slow, reasoning-focused model built to tackle tough problems. Requests can take several minutes to finish. Responses API only; no streaming, so it may appear stuck until the reply is ready.", + supportsVerbosity: true, + supportsTemperature: false, + disableTimeout: true, + }, "gpt-5.1-codex": { maxTokens: 128000, contextWindow: 400000, From d73bdf36d71bbd116802f235f6d0177f4641fbbb Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Fri, 10 Oct 2025 15:28:08 -0600 Subject: [PATCH 03/17] revert: per-model disableTimeout implementation; remove flag from gpt-5-pro model entry (server-side timeouts). Prep for background mode approach. --- packages/types/src/providers/openai.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/types/src/providers/openai.ts b/packages/types/src/providers/openai.ts index 6b68e9bfe92..2f2d4aa76d7 100644 --- a/packages/types/src/providers/openai.ts +++ b/packages/types/src/providers/openai.ts @@ -60,7 +60,6 @@ export const openAiNativeModels = { "GPT-5 Pro: a slow, reasoning-focused model built to tackle tough problems. Requests can take several minutes to finish. Responses API only; no streaming, so it may appear stuck until the reply is ready.", supportsVerbosity: true, supportsTemperature: false, - disableTimeout: true, }, "gpt-5.1-codex": { maxTokens: 128000, From 3949621cf18f0058814bb65d29c20b5d61fe678d Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Sun, 12 Oct 2025 15:02:57 -0600 Subject: [PATCH 04/17] feat(openai-native): background mode + auto-resume and poll fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable OpenAI Responses background mode with resilient streaming for GPT‑5 Pro and any model flagged via metadata. Key changes: - Background mode enablement • Auto-enable for models with info.backgroundMode === true (e.g., gpt-5-pro-2025-10-06) defined in [packages/types/src/providers/openai.ts](packages/types/src/providers/openai.ts). • Also respects manual override (openAiNativeBackgroundMode) from ProviderSettings/ApiHandlerOptions. - Request shape (Responses API) • background:true, stream:true, store:true set in [OpenAiNativeHandler.buildRequestBody()](src/api/providers/openai-native.ts:224). - Streaming UX and status events • New ApiStreamStatusChunk in [src/api/transform/stream.ts](src/api/transform/stream.ts) with statuses: queued, in_progress, completed, failed, canceled, reconnecting, polling. • Provider emits status chunks in SDK + SSE paths via [OpenAiNativeHandler.processEvent()](src/api/providers/openai-native.ts:1100) and [OpenAiNativeHandler.handleStreamResponse()](src/api/providers/openai-native.ts:651). • UI spinner shows background lifecycle labels in [webview-ui/src/components/chat/ChatRow.tsx](webview-ui/src/components/chat/ChatRow.tsx) using [webview-ui/src/utils/backgroundStatus.ts](webview-ui/src/utils/backgroundStatus.ts). - Resilience: auto-resume + poll fallback • On stream drop for background tasks, attempt SSE resume using response.id and last sequence_number with exponential backoff in [OpenAiNativeHandler.attemptResumeOrPoll()](src/api/providers/openai-native.ts:1215). • If resume fails, poll GET /v1/responses/{id} every 2s until terminal and synthesize final output/usage. • Deduplicate resumed events via resumeCutoffSequence in [handleStreamResponse()](src/api/providers/openai-native.ts:737). - Settings (no new UI switch) • Added optional provider settings and ApiHandlerOptions: autoResume, resumeMaxRetries, resumeBaseDelayMs, pollIntervalMs, pollMaxMinutes in [packages/types/src/provider-settings.ts](packages/types/src/provider-settings.ts) and [src/shared/api.ts](src/shared/api.ts). - Cleanup • Removed VS Code contributes toggle for background mode; behavior now model-driven + programmatic override. - Tests • Provider: coverage for background status emission, auto-resume success, resume→poll fallback, non-background negative in [src/api/providers/__tests__/openai-native.spec.ts](src/api/providers/__tests__/openai-native.spec.ts). • Usage parity unchanged validated in [src/api/providers/__tests__/openai-native-usage.spec.ts](src/api/providers/__tests__/openai-native-usage.spec.ts). • UI: label mapping tests for background statuses in [webview-ui/src/utils/__tests__/backgroundStatus.spec.ts](webview-ui/src/utils/__tests__/backgroundStatus.spec.ts). Notes: - Aligns with TEMP_OPENAI_BACKGROUND_TASK_DOCS.DM: background requires store=true; supports streaming resume via response.id + sequence_number. - Default behavior unchanged for non-background models; no breaking changes. --- TEMP_OPENAI_BACKGROUND_TASK_DOCS.DM | 212 +++++++ packages/types/src/model.ts | 3 + packages/types/src/provider-settings.ts | 9 + packages/types/src/providers/openai.ts | 1 + .../__tests__/openai-native-usage.spec.ts | 32 ++ .../providers/__tests__/openai-native.spec.ts | 535 ++++++++++++++++++ src/api/providers/openai-native.ts | 351 +++++++++++- src/api/transform/stream.ts | 8 + src/core/task/Task.ts | 18 + src/shared/api.ts | 14 + webview-ui/src/components/chat/ChatRow.tsx | 38 +- .../utils/__tests__/backgroundStatus.spec.ts | 35 ++ .../utils/__tests__/backgroundStatus.test.ts | 35 ++ webview-ui/src/utils/backgroundStatus.ts | 29 + 14 files changed, 1309 insertions(+), 11 deletions(-) create mode 100644 TEMP_OPENAI_BACKGROUND_TASK_DOCS.DM create mode 100644 webview-ui/src/utils/__tests__/backgroundStatus.spec.ts create mode 100644 webview-ui/src/utils/__tests__/backgroundStatus.test.ts create mode 100644 webview-ui/src/utils/backgroundStatus.ts diff --git a/TEMP_OPENAI_BACKGROUND_TASK_DOCS.DM b/TEMP_OPENAI_BACKGROUND_TASK_DOCS.DM new file mode 100644 index 00000000000..483b6a37245 --- /dev/null +++ b/TEMP_OPENAI_BACKGROUND_TASK_DOCS.DM @@ -0,0 +1,212 @@ +Background mode +=============== + +Run long running tasks asynchronously in the background. + +Agents like [Codex](https://openai.com/index/introducing-codex/) and [Deep Research](https://openai.com/index/introducing-deep-research/) show that reasoning models can take several minutes to solve complex problems. Background mode enables you to execute long-running tasks on models like o3 and o1-pro reliably, without having to worry about timeouts or other connectivity issues. + +Background mode kicks off these tasks asynchronously, and developers can poll response objects to check status over time. To start response generation in the background, make an API request with `background` set to `true`: + +Generate a response in the background + +```bash +curl https://api.openai.com/v1/responses \ +-H "Content-Type: application/json" \ +-H "Authorization: Bearer $OPENAI_API_KEY" \ +-d '{ + "model": "o3", + "input": "Write a very long novel about otters in space.", + "background": true +}' +``` + +```javascript +import OpenAI from "openai"; +const client = new OpenAI(); + +const resp = await client.responses.create({ + model: "o3", + input: "Write a very long novel about otters in space.", + background: true, +}); + +console.log(resp.status); +``` + +```python +from openai import OpenAI + +client = OpenAI() + +resp = client.responses.create( + model="o3", + input="Write a very long novel about otters in space.", + background=True, +) + +print(resp.status) +``` + +Polling background responses +---------------------------- + +To check the status of background requests, use the GET endpoint for Responses. Keep polling while the request is in the queued or in\_progress state. When it leaves these states, it has reached a final (terminal) state. + +Retrieve a response executing in the background + +```bash +curl https://api.openai.com/v1/responses/resp_123 \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" +``` + +```javascript +import OpenAI from "openai"; +const client = new OpenAI(); + +let resp = await client.responses.create({ +model: "o3", +input: "Write a very long novel about otters in space.", +background: true, +}); + +while (resp.status === "queued" || resp.status === "in_progress") { +console.log("Current status: " + resp.status); +await new Promise(resolve => setTimeout(resolve, 2000)); // wait 2 seconds +resp = await client.responses.retrieve(resp.id); +} + +console.log("Final status: " + resp.status + "\nOutput:\n" + resp.output_text); +``` + +```python +from openai import OpenAI +from time import sleep + +client = OpenAI() + +resp = client.responses.create( + model="o3", + input="Write a very long novel about otters in space.", + background=True, +) + +while resp.status in {"queued", "in_progress"}: + print(f"Current status: {resp.status}") + sleep(2) + resp = client.responses.retrieve(resp.id) + +print(f"Final status: {resp.status}\nOutput:\n{resp.output_text}") +``` + +Cancelling a background response +-------------------------------- + +You can also cancel an in-flight response like this: + +Cancel an ongoing response + +```bash +curl -X POST https://api.openai.com/v1/responses/resp_123/cancel \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" +``` + +```javascript +import OpenAI from "openai"; +const client = new OpenAI(); + +const resp = await client.responses.cancel("resp_123"); + +console.log(resp.status); +``` + +```python +from openai import OpenAI +client = OpenAI() + +resp = client.responses.cancel("resp_123") + +print(resp.status) +``` + +Cancelling twice is idempotent - subsequent calls simply return the final `Response` object. + +Streaming a background response +------------------------------- + +You can create a background Response and start streaming events from it right away. This may be helpful if you expect the client to drop the stream and want the option of picking it back up later. To do this, create a Response with both `background` and `stream` set to `true`. You will want to keep track of a "cursor" corresponding to the `sequence_number` you receive in each streaming event. + +Currently, the time to first token you receive from a background response is higher than what you receive from a synchronous one. We are working to reduce this latency gap in the coming weeks. + +Generate and stream a background response + +```bash +curl https://api.openai.com/v1/responses \ +-H "Content-Type: application/json" \ +-H "Authorization: Bearer $OPENAI_API_KEY" \ +-d '{ + "model": "o3", + "input": "Write a very long novel about otters in space.", + "background": true, + "stream": true +}' + +// To resume: +curl "https://api.openai.com/v1/responses/resp_123?stream=true&starting_after=42" \ +-H "Content-Type: application/json" \ +-H "Authorization: Bearer $OPENAI_API_KEY" +``` + +```javascript +import OpenAI from "openai"; +const client = new OpenAI(); + +const stream = await client.responses.create({ + model: "o3", + input: "Write a very long novel about otters in space.", + background: true, + stream: true, +}); + +let cursor = null; +for await (const event of stream) { + console.log(event); + cursor = event.sequence_number; +} + +// If the connection drops, you can resume streaming from the last cursor (SDK support coming soon): +// const resumedStream = await client.responses.stream(resp.id, { starting_after: cursor }); +// for await (const event of resumedStream) { ... } +``` + +```python +from openai import OpenAI + +client = OpenAI() + +# Fire off an async response but also start streaming immediately +stream = client.responses.create( + model="o3", + input="Write a very long novel about otters in space.", + background=True, + stream=True, +) + +cursor = None +for event in stream: + print(event) + cursor = event.sequence_number + +# If your connection drops, the response continues running and you can reconnect: +# SDK support for resuming the stream is coming soon. +# for event in client.responses.stream(resp.id, starting_after=cursor): +# print(event) +``` + +Limits +------ + +1. Background sampling requires `store=true`; stateless requests are rejected. +2. To cancel a synchronous response, terminate the connection +3. You can only start a new stream from a background response if you created it with `stream=true`. \ No newline at end of file diff --git a/packages/types/src/model.ts b/packages/types/src/model.ts index cacb53d2b28..7bb689d7884 100644 --- a/packages/types/src/model.ts +++ b/packages/types/src/model.ts @@ -88,6 +88,9 @@ export const modelInfoSchema = z.object({ defaultTemperature: z.number().optional(), // When true, force-disable request timeouts for this model (providers will set timeout=0) disableTimeout: z.boolean().optional(), + // When true, this model must be invoked using Responses background mode. + // Providers should auto-enable background:true, stream:true, and store:true. + backgroundMode: z.boolean().optional(), requiredReasoningBudget: z.boolean().optional(), supportsReasoningEffort: z .union([z.boolean(), z.array(z.enum(["disable", "none", "minimal", "low", "medium", "high", "xhigh"]))]) diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts index d713a47d6b4..a577ad4d1ba 100644 --- a/packages/types/src/provider-settings.ts +++ b/packages/types/src/provider-settings.ts @@ -304,6 +304,15 @@ const openAiNativeSchema = apiModelIdProviderModelSchema.extend({ // OpenAI Responses API service tier for openai-native provider only. // UI should only expose this when the selected model supports flex/priority. openAiNativeServiceTier: serviceTierSchema.optional(), + // Enable OpenAI Responses background mode when using Responses API. + // Opt-in; defaults to false when omitted. + openAiNativeBackgroundMode: z.boolean().optional(), + // Background auto-resume/poll settings (no UI; plumbed via options) + openAiNativeBackgroundAutoResume: z.boolean().optional(), + openAiNativeBackgroundResumeMaxRetries: z.number().int().min(0).optional(), + openAiNativeBackgroundResumeBaseDelayMs: z.number().int().min(0).optional(), + openAiNativeBackgroundPollIntervalMs: z.number().int().min(0).optional(), + openAiNativeBackgroundPollMaxMinutes: z.number().int().min(1).optional(), }) const mistralSchema = apiModelIdProviderModelSchema.extend({ diff --git a/packages/types/src/providers/openai.ts b/packages/types/src/providers/openai.ts index 2f2d4aa76d7..d05c6eedf34 100644 --- a/packages/types/src/providers/openai.ts +++ b/packages/types/src/providers/openai.ts @@ -60,6 +60,7 @@ export const openAiNativeModels = { "GPT-5 Pro: a slow, reasoning-focused model built to tackle tough problems. Requests can take several minutes to finish. Responses API only; no streaming, so it may appear stuck until the reply is ready.", supportsVerbosity: true, supportsTemperature: false, + backgroundMode: true, }, "gpt-5.1-codex": { maxTokens: 128000, diff --git a/src/api/providers/__tests__/openai-native-usage.spec.ts b/src/api/providers/__tests__/openai-native-usage.spec.ts index 48e1c26877b..4068a91bcd6 100644 --- a/src/api/providers/__tests__/openai-native-usage.spec.ts +++ b/src/api/providers/__tests__/openai-native-usage.spec.ts @@ -389,6 +389,38 @@ describe("OpenAiNativeHandler - normalizeUsage", () => { }) }) + it("should produce identical usage chunk when background mode is enabled", () => { + const usage = { + input_tokens: 120, + output_tokens: 60, + cache_creation_input_tokens: 10, + cache_read_input_tokens: 30, + } + + const baselineHandler = new OpenAiNativeHandler({ + openAiNativeApiKey: "test-key", + apiModelId: "gpt-5-pro-2025-10-06", + }) + const backgroundHandler = new OpenAiNativeHandler({ + openAiNativeApiKey: "test-key", + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeBackgroundMode: true, + }) + + const baselineUsage = (baselineHandler as any).normalizeUsage(usage, baselineHandler.getModel()) + const backgroundUsage = (backgroundHandler as any).normalizeUsage(usage, backgroundHandler.getModel()) + + expect(baselineUsage).toMatchObject({ + type: "usage", + inputTokens: 120, + outputTokens: 60, + cacheWriteTokens: 10, + cacheReadTokens: 30, + totalCost: expect.any(Number), + }) + expect(backgroundUsage).toEqual(baselineUsage) + }) + describe("cost calculation", () => { it("should pass total input tokens to calculateApiCostOpenAI", () => { const usage = { diff --git a/src/api/providers/__tests__/openai-native.spec.ts b/src/api/providers/__tests__/openai-native.spec.ts index 0482b8893b8..fa872db4257 100644 --- a/src/api/providers/__tests__/openai-native.spec.ts +++ b/src/api/providers/__tests__/openai-native.spec.ts @@ -3,6 +3,7 @@ import { Anthropic } from "@anthropic-ai/sdk" import { OpenAiNativeHandler } from "../openai-native" +import type { ApiHandlerCreateMessageMetadata } from "../../index" import { ApiHandlerOptions } from "../../../shared/api" // Mock OpenAI client - now everything uses Responses API @@ -1402,3 +1403,537 @@ describe("GPT-5 streaming event coverage (additional)", () => { }) }) }) + +describe("OpenAI Native background mode behavior", () => { + const systemPrompt = "System prompt" + const baseMessages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "hi" }] + const createMinimalIterable = () => ({ + async *[Symbol.asyncIterator]() { + yield { + type: "response.done", + response: { id: "resp_minimal", usage: { input_tokens: 1, output_tokens: 1 } }, + } + }, + }) + const createUsageIterable = () => ({ + async *[Symbol.asyncIterator]() { + yield { type: "response.text.delta", delta: "Hello" } + yield { + type: "response.done", + response: { + id: "resp_usage", + usage: { input_tokens: 120, output_tokens: 60 }, + }, + } + }, + }) + + beforeEach(() => { + mockResponsesCreate.mockClear() + }) + + afterEach(() => { + if ((global as any).fetch) { + delete (global as any).fetch + } + }) + + const metadataStoreFalse: ApiHandlerCreateMessageMetadata = { taskId: "background-test", store: false } + + it("auto-enables background mode for gpt-5-pro when no override is specified", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + // openAiNativeBackgroundMode is undefined + }) + + mockResponsesCreate.mockResolvedValueOnce(createMinimalIterable()) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, baseMessages, metadataStoreFalse)) { + chunks.push(chunk) + } + + expect(chunks).not.toHaveLength(0) + const requestBody = mockResponsesCreate.mock.calls[0][0] + expect(requestBody.background).toBe(true) + expect(requestBody.stream).toBe(true) + expect(requestBody.store).toBe(true) + }) + it("sends background:true, stream:true, and forces store:true for gpt-5-pro when background mode is enabled", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: true, + }) + + mockResponsesCreate.mockResolvedValueOnce(createMinimalIterable()) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, baseMessages, metadataStoreFalse)) { + chunks.push(chunk) + } + + expect(chunks).not.toHaveLength(0) + + const requestBody = mockResponsesCreate.mock.calls[0][0] + expect(requestBody.background).toBe(true) + expect(requestBody.stream).toBe(true) + expect(requestBody.store).toBe(true) + expect(requestBody.instructions).toBe(systemPrompt) + expect(requestBody.model).toBe("gpt-5-pro-2025-10-06") + expect(Array.isArray(requestBody.input)).toBe(true) + expect(requestBody.input.length).toBeGreaterThan(0) + + mockResponsesCreate.mockClear() + + const handlerWithOptionFalse = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: false, // metadata still enforces background mode + }) + + mockResponsesCreate.mockResolvedValueOnce(createMinimalIterable()) + + for await (const chunk of handlerWithOptionFalse.createMessage( + systemPrompt, + baseMessages, + metadataStoreFalse, + )) { + chunks.push(chunk) + } + + const requestBodyWithOptionFalse = mockResponsesCreate.mock.calls[0][0] + // Still enabled due to model.info.backgroundMode + expect(requestBodyWithOptionFalse.background).toBe(true) + expect(requestBodyWithOptionFalse.store).toBe(true) + expect(requestBodyWithOptionFalse.stream).toBe(true) + }) + + it("auto-enables background mode for gpt-5-pro when no override is specified", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + // no openAiNativeBackgroundMode provided + }) + + mockResponsesCreate.mockResolvedValueOnce(createMinimalIterable()) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, baseMessages, metadataStoreFalse)) { + chunks.push(chunk) + } + + expect(chunks).not.toHaveLength(0) + const requestBody = mockResponsesCreate.mock.calls[0][0] + expect(requestBody.background).toBe(true) + expect(requestBody.stream).toBe(true) + expect(requestBody.store).toBe(true) + }) + it("forces store:true and includes background:true when falling back to SSE", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: true, + }) + + mockResponsesCreate.mockResolvedValueOnce({}) + + const encoder = new TextEncoder() + const sseStream = new ReadableStream({ + start(controller) { + controller.enqueue( + encoder.encode( + 'data: {"type":"response.done","response":{"id":"resp_1","usage":{"input_tokens":1,"output_tokens":1}}}\n\n', + ), + ) + controller.enqueue(encoder.encode("data: [DONE]\n\n")) + controller.close() + }, + }) + + const mockFetch = vitest.fn().mockResolvedValue( + new Response(sseStream, { + status: 200, + headers: { "Content-Type": "text/event-stream" }, + }), + ) + global.fetch = mockFetch as any + + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, baseMessages, metadataStoreFalse)) { + chunks.push(chunk) + } + + expect(mockFetch).toHaveBeenCalledTimes(1) + const requestInit = mockFetch.mock.calls[0][1] as RequestInit + expect(requestInit?.body).toBeDefined() + + const parsedBody = JSON.parse(requestInit?.body as string) + expect(parsedBody.background).toBe(true) + expect(parsedBody.store).toBe(true) + expect(parsedBody.stream).toBe(true) + expect(parsedBody.model).toBe("gpt-5-pro-2025-10-06") + }) + + it("emits identical usage chunk when background mode is enabled", async () => { + const collectUsageChunk = async (options: ApiHandlerOptions) => { + mockResponsesCreate.mockResolvedValueOnce(createUsageIterable()) + const handler = new OpenAiNativeHandler(options) + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, baseMessages)) { + chunks.push(chunk) + } + const usageChunk = chunks.find((chunk) => chunk.type === "usage") + mockResponsesCreate.mockClear() + return usageChunk + } + + const baselineUsage = await collectUsageChunk({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + }) + + expect(baselineUsage).toBeDefined() + + const backgroundUsage = await collectUsageChunk({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: true, + }) + + expect(backgroundUsage).toBeDefined() + expect(backgroundUsage).toEqual(baselineUsage) + }) + + it("emits background status chunks for Responses events (SDK path)", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: true, + }) + + const createStatusIterable = () => ({ + async *[Symbol.asyncIterator]() { + yield { type: "response.queued", response: { id: "resp_bg" } } + yield { type: "response.in_progress" } + yield { type: "response.text.delta", delta: "Hello" } + yield { + type: "response.done", + response: { id: "resp_bg", usage: { input_tokens: 1, output_tokens: 1 } }, + } + }, + }) + mockResponsesCreate.mockResolvedValueOnce(createStatusIterable()) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, baseMessages)) { + chunks.push(chunk) + } + + const statusChunks = chunks.filter((c) => c.type === "status") + expect(statusChunks).toEqual([ + { type: "status", mode: "background", status: "queued", responseId: "resp_bg" }, + { type: "status", mode: "background", status: "in_progress" }, + { type: "status", mode: "background", status: "completed", responseId: "resp_bg" }, + ]) + }) + + it("emits background status chunks for Responses events (SSE fallback)", async () => { + // Force fallback by making SDK return non-iterable + mockResponsesCreate.mockResolvedValueOnce({}) + + const encoder = new TextEncoder() + const sseStream = new ReadableStream({ + start(controller) { + controller.enqueue(encoder.encode('data: {"type":"response.queued","response":{"id":"resp_bg2"}}\n\n')) + controller.enqueue(encoder.encode('data: {"type":"response.in_progress"}\n\n')) + controller.enqueue(encoder.encode('data: {"type":"response.text.delta","delta":"Hi"}\n\n')) + controller.enqueue( + encoder.encode( + 'data: {"type":"response.done","response":{"id":"resp_bg2","usage":{"input_tokens":1,"output_tokens":1}}}\n\n', + ), + ) + controller.enqueue(encoder.encode("data: [DONE]\n\n")) + controller.close() + }, + }) + + const mockFetch = vitest.fn().mockResolvedValue( + new Response(sseStream, { + status: 200, + headers: { "Content-Type": "text/event-stream" }, + }), + ) + global.fetch = mockFetch as any + + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: true, + }) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, baseMessages)) { + chunks.push(chunk) + } + + const statusChunks = chunks.filter((c) => c.type === "status") + expect(statusChunks).toEqual([ + { type: "status", mode: "background", status: "queued", responseId: "resp_bg2" }, + { type: "status", mode: "background", status: "in_progress" }, + { type: "status", mode: "background", status: "completed", responseId: "resp_bg2" }, + ]) + + // Clean up fetch + delete (global as any).fetch + }) +}) + +describe("OpenAI Native streaming metadata tracking", () => { + beforeEach(() => { + mockResponsesCreate.mockClear() + }) + + it("tracks sequence_number from streaming events and exposes via getLastSequenceNumber", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + }) + + const createSequenceIterable = () => ({ + async *[Symbol.asyncIterator]() { + yield { type: "response.text.delta", delta: "A", sequence_number: 1 } + yield { type: "response.reasoning.delta", delta: "B", sequence_number: 2 } + yield { + type: "response.done", + sequence_number: 3, + response: { id: "resp_123", usage: { input_tokens: 1, output_tokens: 2 } }, + } + }, + }) + + mockResponsesCreate.mockResolvedValueOnce(createSequenceIterable()) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage("System", [{ role: "user", content: "hi" }])) { + chunks.push(chunk) + } + + expect(chunks).toContainEqual({ type: "text", text: "A" }) + expect(chunks).toContainEqual({ type: "reasoning", text: "B" }) + expect(handler.getLastSequenceNumber()).toBe(3) + expect(handler.getLastResponseId()).toBe("resp_123") + }) +}) + +// Added plumbing test for openAiNativeBackgroundMode +describe("OpenAI Native background mode setting (plumbing)", () => { + it("should surface openAiNativeBackgroundMode in handler options when provided", () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-4.1", + openAiNativeApiKey: "test-api-key", + openAiNativeBackgroundMode: true, + } as ApiHandlerOptions) + + // Access protected options via runtime cast to verify pass-through + expect((handler as any).options.openAiNativeBackgroundMode).toBe(true) + }) +}) + +describe("OpenAI Native background auto-resume and polling", () => { + const systemPrompt = "System prompt" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "hello" }] + + beforeEach(() => { + mockResponsesCreate.mockClear() + if ((global as any).fetch) { + delete (global as any).fetch + } + }) + + it("resumes background stream on drop and emits no duplicate deltas", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: true, + }) + + const dropIterable = { + async *[Symbol.asyncIterator]() { + yield { type: "response.queued", response: { id: "resp_resume" }, sequence_number: 0 } + yield { type: "response.in_progress", sequence_number: 1 } + yield { type: "response.text.delta", delta: "Hello", sequence_number: 2 } + throw new Error("network drop") + }, + } + mockResponsesCreate.mockResolvedValueOnce(dropIterable as any) + + const encoder = new TextEncoder() + const sseStream = new ReadableStream({ + start(controller) { + controller.enqueue( + encoder.encode( + 'data: {"type":"response.output_item.added","item":{"type":"text","text":"SHOULD_SKIP"},"sequence_number":2}\n\n', + ), + ) + controller.enqueue( + encoder.encode( + 'data: {"type":"response.output_item.added","item":{"type":"text","text":" world"},"sequence_number":3}\n\n', + ), + ) + controller.enqueue( + encoder.encode( + 'data: {"type":"response.done","response":{"id":"resp_resume","usage":{"input_tokens":10,"output_tokens":5}},"sequence_number":4}\n\n', + ), + ) + controller.enqueue(encoder.encode("data: [DONE]\n\n")) + controller.close() + }, + }) + ;(global as any).fetch = vitest + .fn() + .mockResolvedValue( + new Response(sseStream, { status: 200, headers: { "Content-Type": "text/event-stream" } }), + ) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const c of stream) { + chunks.push(c) + } + + const statusChunks = chunks.filter((c) => c.type === "status") + const statusNames = statusChunks.map((s: any) => s.status) + const reconnectIdx = statusNames.indexOf("reconnecting") + const inProgIdx = statusNames.findIndex((s, i) => s === "in_progress" && i > reconnectIdx) + expect(reconnectIdx).toBeGreaterThanOrEqual(0) + expect(inProgIdx).toBeGreaterThan(reconnectIdx) + + const fullText = chunks + .filter((c) => c.type === "text") + .map((c: any) => c.text) + .join("") + expect(fullText).toBe("Hello world") + expect(fullText).not.toContain("SHOULD_SKIP") + + const usageChunks = chunks.filter((c) => c.type === "usage") + expect(usageChunks).toHaveLength(1) + }) + + it("falls back to polling after failed resume and yields final output/usage", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: true, + openAiNativeBackgroundResumeMaxRetries: 1, + openAiNativeBackgroundResumeBaseDelayMs: 0, + openAiNativeBackgroundPollIntervalMs: 1, + openAiNativeBackgroundPollMaxMinutes: 1, + } as ApiHandlerOptions) + + const dropIterable = { + async *[Symbol.asyncIterator]() { + yield { type: "response.queued", response: { id: "resp_poll" }, sequence_number: 0 } + yield { type: "response.in_progress", sequence_number: 1 } + throw new Error("network drop") + }, + } + mockResponsesCreate.mockResolvedValueOnce(dropIterable as any) + + let pollStep = 0 + ;(global as any).fetch = vitest.fn().mockImplementation((url: string) => { + if (url.includes("?stream=true")) { + return Promise.resolve({ + ok: false, + status: 500, + text: async () => "resume failed", + } as any) + } + // polling path + const payloads = [ + { response: { id: "resp_poll", status: "queued" } }, + { response: { id: "resp_poll", status: "in_progress" } }, + { + response: { + id: "resp_poll", + status: "completed", + output: [{ type: "message", content: [{ type: "output_text", text: "Polled result" }] }], + usage: { input_tokens: 7, output_tokens: 3 }, + }, + }, + ] + const payload = payloads[Math.min(pollStep++, payloads.length - 1)] + return Promise.resolve( + new Response(JSON.stringify(payload), { status: 200, headers: { "Content-Type": "application/json" } }), + ) + }) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const c of stream) { + chunks.push(c) + } + + const statusNames = chunks.filter((c) => c.type === "status").map((s: any) => s.status) + const idxReconnect = statusNames.indexOf("reconnecting") + const idxPolling = statusNames.indexOf("polling") + const idxQueued = statusNames.indexOf("queued") + const idxInProgress = statusNames.indexOf("in_progress") + const idxCompleted = statusNames.indexOf("completed") + expect(idxReconnect).toBeGreaterThanOrEqual(0) + expect(idxPolling).toBeGreaterThan(idxReconnect) + + const idxQueuedAfterPolling = statusNames.findIndex((s, i) => s === "queued" && i > idxPolling) + const idxInProgressAfterQueued = statusNames.findIndex( + (s, i) => s === "in_progress" && i > idxQueuedAfterPolling, + ) + const idxCompletedAfterInProgress = statusNames.findIndex( + (s, i) => s === "completed" && i > idxInProgressAfterQueued, + ) + + expect(idxQueuedAfterPolling).toBeGreaterThan(idxPolling) + expect(idxInProgressAfterQueued).toBeGreaterThan(idxQueuedAfterPolling) + expect(idxCompletedAfterInProgress).toBeGreaterThan(idxInProgressAfterQueued) + + const finalText = chunks + .filter((c) => c.type === "text") + .map((c: any) => c.text) + .join("") + expect(finalText).toBe("Polled result") + + const usageChunks = chunks.filter((c) => c.type === "usage") + expect(usageChunks).toHaveLength(1) + expect(usageChunks[0]).toMatchObject({ type: "usage", inputTokens: 7, outputTokens: 3 }) + }) + + it("does not attempt resume when not in background mode", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-4.1", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: false, + }) + + const dropIterable = { + async *[Symbol.asyncIterator]() { + yield { type: "response.text.delta", delta: "Hi", sequence_number: 1 } + throw new Error("drop") + }, + } + mockResponsesCreate.mockResolvedValueOnce(dropIterable as any) + ;(global as any).fetch = vitest.fn().mockRejectedValue(new Error("SSE fallback failed")) + + const stream = handler.createMessage(systemPrompt, messages) + + const chunks: any[] = [] + await expect(async () => { + for await (const c of stream) { + chunks.push(c) + } + }).rejects.toThrow() + + const statuses = chunks.filter((c) => c.type === "status").map((s: any) => s.status) + expect(statuses).not.toContain("reconnecting") + expect(statuses).not.toContain("polling") + }) +}) diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts index b5fb417ee3a..9f3983d98a8 100644 --- a/src/api/providers/openai-native.ts +++ b/src/api/providers/openai-native.ts @@ -36,6 +36,12 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio private lastResponseId: string | undefined // Abort controller for cancelling ongoing requests private abortController?: AbortController + // Sequence number for background mode stream resumption + private lastSequenceNumber: number | undefined + // Track whether current request is in background mode for status chunk annotation + private currentRequestIsBackground?: boolean + // Cutoff sequence for filtering stale events during resume + private resumeCutoffSequence?: number // Event types handled by the shared event processor to avoid duplication private readonly coreHandledEventTypes = new Set([ @@ -241,6 +247,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio }> tool_choice?: any parallel_tool_calls?: boolean + background?: boolean } // Validate requested tier against model support; if not supported, omit. @@ -312,6 +319,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio body.text = { verbosity: (verbosity || "medium") as VerbosityLevel } } + // Enable background mode when either explicitly opted in or required by model metadata + if (this.options.openAiNativeBackgroundMode === true || model.info.backgroundMode === true) { + body.background = true + body.stream = true + body.store = true + } + return body } @@ -325,6 +339,15 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio // Create AbortController for cancellation this.abortController = new AbortController() + // Annotate if this request uses background mode (used for status chunks) + this.currentRequestIsBackground = !!requestBody?.background + + const canAttemptResume = () => + this.currentRequestIsBackground && + (this.options.openAiNativeBackgroundAutoResume ?? true) && + !!this.lastResponseId && + typeof this.lastSequenceNumber === "number" + try { // Use the official SDK const stream = (await (this.client as any).responses.create(requestBody, { @@ -337,21 +360,53 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio ) } - for await (const event of stream) { - // Check if request was aborted - if (this.abortController.signal.aborted) { - break - } + try { + for await (const event of stream) { + // Check if request was aborted + if (this.abortController?.signal.aborted) { + break + } - for await (const outChunk of this.processEvent(event, model)) { - yield outChunk + for await (const outChunk of this.processEvent(event, model)) { + yield outChunk + } } + } catch (iterErr) { + // Stream dropped mid-flight; attempt resume for background requests + if (canAttemptResume()) { + for await (const chunk of this.attemptResumeOrPoll( + this.lastResponseId!, + this.lastSequenceNumber!, + model, + )) { + yield chunk + } + return + } + throw iterErr } } catch (sdkErr: any) { // For errors, fallback to manual SSE via fetch - yield* this.makeResponsesApiRequest(requestBody, model, metadata, systemPrompt, messages) + try { + yield* this.makeResponsesApiRequest(requestBody, model, metadata, systemPrompt, messages) + } catch (fallbackErr) { + // If SSE fallback fails mid-stream and we can resume, try that + if (canAttemptResume()) { + for await (const chunk of this.attemptResumeOrPoll( + this.lastResponseId!, + this.lastSequenceNumber!, + model, + )) { + yield chunk + } + return + } + throw fallbackErr + } } finally { this.abortController = undefined + // Always clear background flag at end of request lifecycle + this.currentRequestIsBackground = undefined } } @@ -590,6 +645,20 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio try { const parsed = JSON.parse(data) + // Skip stale events when resuming a dropped background stream + if ( + typeof parsed?.sequence_number === "number" && + this.resumeCutoffSequence !== undefined && + parsed.sequence_number <= this.resumeCutoffSequence + ) { + continue + } + + // Record sequence number for cursor tracking + if (typeof parsed?.sequence_number === "number") { + this.lastSequenceNumber = parsed.sequence_number + } + // Capture resolved service tier if present if (parsed.response?.service_tier) { this.lastServiceTier = parsed.response.service_tier as ServiceTier @@ -879,14 +948,31 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } // Handle queued event else if (parsed.type === "response.queued") { - // Response is queued + yield { + type: "status", + mode: this.currentRequestIsBackground ? "background" : undefined, + status: "queued", + ...(parsed.response?.id ? { responseId: parsed.response.id } : {}), + } } // Handle in_progress event else if (parsed.type === "response.in_progress") { - // Response is being processed + yield { + type: "status", + mode: this.currentRequestIsBackground ? "background" : undefined, + status: "in_progress", + ...(parsed.response?.id ? { responseId: parsed.response.id } : {}), + } } // Handle failed event else if (parsed.type === "response.failed") { + // Emit failed status for UI lifecycle + yield { + type: "status", + mode: this.currentRequestIsBackground ? "background" : undefined, + status: "failed", + ...(parsed.response?.id ? { responseId: parsed.response.id } : {}), + } // Response failed if (parsed.error || parsed.message) { throw new Error( @@ -907,6 +993,16 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio this.lastResponseOutput = parsed.response.output } + // Emit completed status for UI lifecycle + yield { + type: "status", + mode: this.currentRequestIsBackground ? "background" : undefined, + status: "completed", + ...(parsed.response?.id ? { responseId: parsed.response.id } : {}), + } + // Clear background marker on completion + this.currentRequestIsBackground = undefined + // Check if the done event contains the complete output (as a fallback) if ( !hasContent && @@ -1022,6 +1118,196 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } } + /** + * Attempt to resume a dropped background stream; if resume fails, fall back to polling. + */ + private async *attemptResumeOrPoll(responseId: string, lastSeq: number, model: OpenAiNativeModel): ApiStream { + // Emit reconnecting status + yield { + type: "status", + mode: "background", + status: "reconnecting", + responseId, + } + + const apiKey = this.options.openAiNativeApiKey ?? "not-provided" + const baseUrl = this.options.openAiNativeBaseUrl || "https://api.openai.com" + const resumeMaxRetries = this.options.openAiNativeBackgroundResumeMaxRetries ?? 3 + const resumeBaseDelayMs = this.options.openAiNativeBackgroundResumeBaseDelayMs ?? 1000 + + // Try streaming resume with exponential backoff + for (let attempt = 0; attempt < resumeMaxRetries; attempt++) { + try { + const resumeUrl = `${baseUrl}/v1/responses/${responseId}?stream=true&starting_after=${lastSeq}` + const res = await fetch(resumeUrl, { + method: "GET", + headers: { + Authorization: `Bearer ${apiKey}`, + Accept: "text/event-stream", + }, + }) + + if (!res.ok || !res.body) { + throw new Error(`Resume request failed (${res.status})`) + } + + this.resumeCutoffSequence = lastSeq + + let emittedInProgress = false + try { + for await (const chunk of this.handleStreamResponse(res.body, model)) { + // After the handshake and first accepted chunk, emit in_progress once + if (!emittedInProgress) { + emittedInProgress = true + yield { + type: "status", + mode: "background", + status: "in_progress", + responseId, + } + } + // Avoid double-emitting in_progress if the inner handler surfaces it + if (chunk.type === "status" && (chunk as any).status === "in_progress") { + continue + } + yield chunk + } + // Successful resume + this.resumeCutoffSequence = undefined + return + } catch (e) { + // Resume stream failed mid-flight; reset and throw to retry + this.resumeCutoffSequence = undefined + throw e + } + } catch { + // Wait with backoff before next attempt + const delay = resumeBaseDelayMs * Math.pow(2, attempt) + if (delay > 0) { + await new Promise((r) => setTimeout(r, delay)) + } + } + } + + // Resume failed - begin polling fallback + yield { + type: "status", + mode: "background", + status: "polling", + responseId, + } + + const pollIntervalMs = this.options.openAiNativeBackgroundPollIntervalMs ?? 2000 + const pollMaxMinutes = this.options.openAiNativeBackgroundPollMaxMinutes ?? 20 + const deadline = Date.now() + pollMaxMinutes * 60_000 + + let lastEmittedStatus: "queued" | "in_progress" | "completed" | "failed" | "canceled" | undefined = undefined + + while (Date.now() <= deadline) { + try { + const pollRes = await fetch(`${baseUrl}/v1/responses/${responseId}`, { + method: "GET", + headers: { + Authorization: `Bearer ${apiKey}`, + }, + }) + + if (!pollRes.ok) { + // transient; wait and retry + await new Promise((r) => setTimeout(r, pollIntervalMs)) + continue + } + + let raw: any + try { + raw = await pollRes.json() + } catch { + await new Promise((r) => setTimeout(r, pollIntervalMs)) + continue + } + + const resp = raw?.response ?? raw + const status: string | undefined = resp?.status + const respId: string | undefined = resp?.id ?? responseId + + // Capture resolved service tier if present + if (resp?.service_tier) { + this.lastServiceTier = resp.service_tier as ServiceTier + } + + // Emit status transitions + if ( + status && + (status === "queued" || + status === "in_progress" || + status === "completed" || + status === "failed" || + status === "canceled") + ) { + if (status !== lastEmittedStatus) { + yield { + type: "status", + mode: "background", + status: status as any, + ...(respId ? { responseId: respId } : {}), + } + lastEmittedStatus = status as any + } + } + + if (status === "completed") { + // Synthesize final output + const output = resp?.output ?? raw?.output + if (Array.isArray(output)) { + for (const outputItem of output) { + if (outputItem.type === "text" && Array.isArray(outputItem.content)) { + for (const content of outputItem.content) { + if (content?.type === "text" && typeof content.text === "string") { + yield { type: "text", text: content.text } + } + } + } else if (outputItem.type === "message" && Array.isArray(outputItem.content)) { + for (const content of outputItem.content) { + if ( + (content?.type === "output_text" || content?.type === "text") && + typeof content.text === "string" + ) { + yield { type: "text", text: content.text } + } + } + } else if (outputItem.type === "reasoning" && Array.isArray(outputItem.summary)) { + for (const summary of outputItem.summary) { + if (summary?.type === "summary_text" && typeof summary.text === "string") { + yield { type: "reasoning", text: summary.text } + } + } + } + } + } + + // Synthesize usage + const usage = resp?.usage ?? raw?.usage + const usageData = this.normalizeUsage(usage, model) + if (usageData) { + yield usageData + } + + return + } + + if (status === "failed" || status === "canceled") { + throw new Error(`Response ${status}: ${respId || responseId}`) + } + } catch { + // ignore transient poll errors + } + + await new Promise((r) => setTimeout(r, pollIntervalMs)) + } + + throw new Error(`Background response polling timed out for ${responseId}`) + } + /** * Shared processor for Responses API events. */ @@ -1038,6 +1324,34 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio if (event?.response?.id) { this.lastResponseId = event.response.id as string } + // Record sequence number for cursor tracking + if (typeof event?.sequence_number === "number") { + this.lastSequenceNumber = event.sequence_number + } + + // Map lifecycle events to status chunks + const statusMap: Record = { + "response.queued": "queued", + "response.in_progress": "in_progress", + "response.completed": "completed", + "response.done": "completed", + "response.failed": "failed", + "response.canceled": "canceled", + } + const mappedStatus = statusMap[event?.type as string] + if (mappedStatus) { + yield { + type: "status", + mode: this.currentRequestIsBackground ? "background" : undefined, + status: mappedStatus, + ...(event?.response?.id ? { responseId: event.response.id } : {}), + } + // Clear background flag for terminal statuses + if (mappedStatus === "completed" || mappedStatus === "failed" || mappedStatus === "canceled") { + this.currentRequestIsBackground = undefined + } + // Do not return; allow further handling (e.g., usage on done/completed) + } // Handle known streaming text deltas if (event?.type === "response.text.delta" || event?.type === "response.output_text.delta") { @@ -1252,6 +1566,23 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio return this.lastResponseId } + /** + * Gets the last sequence number observed from streaming events. + * @returns The sequence number, or undefined if not available yet + */ + getLastSequenceNumber(): number | undefined { + return this.lastSequenceNumber + } + + /** + * Sets the last response ID for conversation continuity. + * Typically only used in tests or special flows. + * @param responseId The response ID to store + */ + setResponseId(responseId: string): void { + this.lastResponseId = responseId + } + async completePrompt(prompt: string): Promise { // Create AbortController for cancellation this.abortController = new AbortController() diff --git a/src/api/transform/stream.ts b/src/api/transform/stream.ts index a4a0fe4a9a7..a3011617996 100644 --- a/src/api/transform/stream.ts +++ b/src/api/transform/stream.ts @@ -10,6 +10,7 @@ export type ApiStreamChunk = | ApiStreamToolCallDeltaChunk | ApiStreamToolCallEndChunk | ApiStreamToolCallPartialChunk + | ApiStreamStatusChunk | ApiStreamError export interface ApiStreamError { @@ -85,3 +86,10 @@ export interface GroundingSource { url: string snippet?: string } + +export interface ApiStreamStatusChunk { + type: "status" + mode?: "background" + status: "queued" | "in_progress" | "completed" | "failed" | "canceled" | "reconnecting" | "polling" + responseId?: string +} diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 8ed9ab56405..6b8fbeaedbb 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -2612,6 +2612,24 @@ export class Task extends EventEmitter implements TaskLike { presentAssistantMessage(this) break } + + case "status": { + try { + const apiReqMsg = this.clineMessages[lastApiReqIndex] + if (apiReqMsg && apiReqMsg.type === "say" && apiReqMsg.say === "api_req_started") { + ;(apiReqMsg as any).metadata = (apiReqMsg as any).metadata || {} + if (chunk.mode === "background") { + ;(apiReqMsg as any).metadata.background = true + } + ;(apiReqMsg as any).metadata.backgroundStatus = chunk.status + if (chunk.responseId) { + ;(apiReqMsg as any).metadata.responseId = chunk.responseId + } + await this.updateClineMessage(apiReqMsg) + } + } catch {} + break + } case "text": { assistantMessage += chunk.text diff --git a/src/shared/api.ts b/src/shared/api.ts index 4f4c8a4ae9a..b014d0dae9f 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -24,6 +24,20 @@ export type ApiHandlerOptions = Omit & { * When undefined, Ollama will use the model's default num_ctx from the Modelfile. */ ollamaNumCtx?: number + /** + * Opt-in for OpenAI Responses background mode when using apiProvider=openai-native. + * Defaults to false when omitted. + */ + openAiNativeBackgroundMode?: boolean + /** + * Auto-resume/poll configuration for OpenAI Responses background mode. + * These are plumbed-only (no UI). Defaults are resolved in the handler. + */ + openAiNativeBackgroundAutoResume?: boolean + openAiNativeBackgroundResumeMaxRetries?: number + openAiNativeBackgroundResumeBaseDelayMs?: number + openAiNativeBackgroundPollIntervalMs?: number + openAiNativeBackgroundPollMaxMinutes?: number } // RouterName diff --git a/webview-ui/src/components/chat/ChatRow.tsx b/webview-ui/src/components/chat/ChatRow.tsx index 48cd46350d5..2d93005d68b 100644 --- a/webview-ui/src/components/chat/ChatRow.tsx +++ b/webview-ui/src/components/chat/ChatRow.tsx @@ -15,6 +15,9 @@ import { useExtensionState } from "@src/context/ExtensionStateContext" import { findMatchingResourceOrTemplate } from "@src/utils/mcp" import { vscode } from "@src/utils/vscode" import { formatPathTooltip } from "@src/utils/formatPathTooltip" +import { removeLeadingNonAlphanumeric } from "@src/utils/removeLeadingNonAlphanumeric" +import { getLanguageFromPath } from "@src/utils/getLanguageFromPath" +import { labelForBackgroundStatus } from "@src/utils/backgroundStatus" import { ToolUseBlock, ToolUseBlockHeader } from "../common/ToolUseBlock" import UpdateTodoListToolBlock from "./UpdateTodoListToolBlock" @@ -313,6 +316,21 @@ export const ChatRowContent = ({ /> ) + // Background mode UI label/icon handling + const meta: any = message.metadata + const isBackground = meta?.background === true + const bgStatus = meta?.backgroundStatus as + | "queued" + | "in_progress" + | "reconnecting" + | "polling" + | "completed" + | "failed" + | "canceled" + | undefined + const bgDone = + isBackground && (bgStatus === "completed" || bgStatus === "failed" || bgStatus === "canceled") + const label = isBackground ? labelForBackgroundStatus(bgStatus) : undefined return [ apiReqCancelReason !== null && apiReqCancelReason !== undefined ? ( apiReqCancelReason === "user_cancelled" ? ( @@ -320,6 +338,16 @@ export const ChatRowContent = ({ ) : ( getIconSpan("error", errorColor) ) + ) : bgDone ? ( + bgStatus === "completed" ? ( + isExpanded ? ( + + ) : ( + + ) + ) : ( + getIconSpan("error", bgStatus === "canceled" ? cancelledColor : errorColor) + ) ) : cost !== null && cost !== undefined ? ( getIconSpan("arrow-swap", normalColor) ) : apiRequestFailedMessage ? ( @@ -337,6 +365,8 @@ export const ChatRowContent = ({ {t("chat:apiRequest.streamingFailed")} ) + ) : label ? ( + {label} ) : cost !== null && cost !== undefined ? ( {t("chat:apiRequest.title")} ) : apiRequestFailedMessage ? ( @@ -1066,8 +1096,14 @@ export const ChatRowContent = ({ ) case "api_req_started": // Determine if the API request is in progress + const bgMeta: any = message.metadata + const bgStatus = bgMeta?.background === true ? bgMeta?.backgroundStatus : undefined + const bgDone = bgStatus === "completed" || bgStatus === "failed" || bgStatus === "canceled" const isApiRequestInProgress = - apiReqCancelReason === undefined && apiRequestFailedMessage === undefined && cost === undefined + apiReqCancelReason === undefined && + apiRequestFailedMessage === undefined && + cost === undefined && + !bgDone return ( <> diff --git a/webview-ui/src/utils/__tests__/backgroundStatus.spec.ts b/webview-ui/src/utils/__tests__/backgroundStatus.spec.ts new file mode 100644 index 00000000000..aac4c73b3e9 --- /dev/null +++ b/webview-ui/src/utils/__tests__/backgroundStatus.spec.ts @@ -0,0 +1,35 @@ +import { labelForBackgroundStatus } from "@src/utils/backgroundStatus" + +describe("labelForBackgroundStatus()", () => { + it("maps queued", () => { + expect(labelForBackgroundStatus("queued")).toBe("API Request: background mode (queued)…") + }) + + it("maps in_progress", () => { + expect(labelForBackgroundStatus("in_progress")).toBe("API Request: background mode (in progress)…") + }) + + it("maps reconnecting", () => { + expect(labelForBackgroundStatus("reconnecting")).toBe("API Request: background mode (reconnecting…)") + }) + + it("maps polling", () => { + expect(labelForBackgroundStatus("polling")).toBe("API Request: background mode (polling…)") + }) + + it("maps completed", () => { + expect(labelForBackgroundStatus("completed")).toBe("API Request: background mode (completed)") + }) + + it("maps failed", () => { + expect(labelForBackgroundStatus("failed")).toBe("API Request: background mode (failed)") + }) + + it("maps canceled", () => { + expect(labelForBackgroundStatus("canceled")).toBe("API Request: background mode (canceled)") + }) + + it("maps undefined to generic label", () => { + expect(labelForBackgroundStatus(undefined)).toBe("API Request: background mode") + }) +}) diff --git a/webview-ui/src/utils/__tests__/backgroundStatus.test.ts b/webview-ui/src/utils/__tests__/backgroundStatus.test.ts new file mode 100644 index 00000000000..aac4c73b3e9 --- /dev/null +++ b/webview-ui/src/utils/__tests__/backgroundStatus.test.ts @@ -0,0 +1,35 @@ +import { labelForBackgroundStatus } from "@src/utils/backgroundStatus" + +describe("labelForBackgroundStatus()", () => { + it("maps queued", () => { + expect(labelForBackgroundStatus("queued")).toBe("API Request: background mode (queued)…") + }) + + it("maps in_progress", () => { + expect(labelForBackgroundStatus("in_progress")).toBe("API Request: background mode (in progress)…") + }) + + it("maps reconnecting", () => { + expect(labelForBackgroundStatus("reconnecting")).toBe("API Request: background mode (reconnecting…)") + }) + + it("maps polling", () => { + expect(labelForBackgroundStatus("polling")).toBe("API Request: background mode (polling…)") + }) + + it("maps completed", () => { + expect(labelForBackgroundStatus("completed")).toBe("API Request: background mode (completed)") + }) + + it("maps failed", () => { + expect(labelForBackgroundStatus("failed")).toBe("API Request: background mode (failed)") + }) + + it("maps canceled", () => { + expect(labelForBackgroundStatus("canceled")).toBe("API Request: background mode (canceled)") + }) + + it("maps undefined to generic label", () => { + expect(labelForBackgroundStatus(undefined)).toBe("API Request: background mode") + }) +}) diff --git a/webview-ui/src/utils/backgroundStatus.ts b/webview-ui/src/utils/backgroundStatus.ts new file mode 100644 index 00000000000..ad56c2d6e2a --- /dev/null +++ b/webview-ui/src/utils/backgroundStatus.ts @@ -0,0 +1,29 @@ +export type BackgroundStatus = + | "queued" + | "in_progress" + | "completed" + | "failed" + | "canceled" + | "reconnecting" + | "polling" + +export function labelForBackgroundStatus(s?: BackgroundStatus): string { + switch (s) { + case "queued": + return "API Request: background mode (queued)…" + case "in_progress": + return "API Request: background mode (in progress)…" + case "reconnecting": + return "API Request: background mode (reconnecting…)" + case "polling": + return "API Request: background mode (polling…)" + case "completed": + return "API Request: background mode (completed)" + case "failed": + return "API Request: background mode (failed)" + case "canceled": + return "API Request: background mode (canceled)" + default: + return "API Request: background mode" + } +} From f3b654d320dc267da2e985606c036ce486027d4d Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Sun, 12 Oct 2025 15:05:51 -0600 Subject: [PATCH 05/17] chore: remove TEMP_OPENAI_BACKGROUND_TASK_DOCS.DM and ignore temp docs --- .gitignore | 6 +- TEMP_OPENAI_BACKGROUND_TASK_DOCS.DM | 212 ---------------------------- 2 files changed, 5 insertions(+), 213 deletions(-) delete mode 100644 TEMP_OPENAI_BACKGROUND_TASK_DOCS.DM diff --git a/.gitignore b/.gitignore index 54cf66cee7a..b7bfacd9201 100644 --- a/.gitignore +++ b/.gitignore @@ -51,4 +51,8 @@ logs qdrant_storage/ # Architect plans -plans/ \ No newline at end of file +plans/ + +# ignore temp background docs +TEMP_OPENAI_BACKGROUND_TASK_DOCS.DM +TEMP_DOCS/ diff --git a/TEMP_OPENAI_BACKGROUND_TASK_DOCS.DM b/TEMP_OPENAI_BACKGROUND_TASK_DOCS.DM deleted file mode 100644 index 483b6a37245..00000000000 --- a/TEMP_OPENAI_BACKGROUND_TASK_DOCS.DM +++ /dev/null @@ -1,212 +0,0 @@ -Background mode -=============== - -Run long running tasks asynchronously in the background. - -Agents like [Codex](https://openai.com/index/introducing-codex/) and [Deep Research](https://openai.com/index/introducing-deep-research/) show that reasoning models can take several minutes to solve complex problems. Background mode enables you to execute long-running tasks on models like o3 and o1-pro reliably, without having to worry about timeouts or other connectivity issues. - -Background mode kicks off these tasks asynchronously, and developers can poll response objects to check status over time. To start response generation in the background, make an API request with `background` set to `true`: - -Generate a response in the background - -```bash -curl https://api.openai.com/v1/responses \ --H "Content-Type: application/json" \ --H "Authorization: Bearer $OPENAI_API_KEY" \ --d '{ - "model": "o3", - "input": "Write a very long novel about otters in space.", - "background": true -}' -``` - -```javascript -import OpenAI from "openai"; -const client = new OpenAI(); - -const resp = await client.responses.create({ - model: "o3", - input: "Write a very long novel about otters in space.", - background: true, -}); - -console.log(resp.status); -``` - -```python -from openai import OpenAI - -client = OpenAI() - -resp = client.responses.create( - model="o3", - input="Write a very long novel about otters in space.", - background=True, -) - -print(resp.status) -``` - -Polling background responses ----------------------------- - -To check the status of background requests, use the GET endpoint for Responses. Keep polling while the request is in the queued or in\_progress state. When it leaves these states, it has reached a final (terminal) state. - -Retrieve a response executing in the background - -```bash -curl https://api.openai.com/v1/responses/resp_123 \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer $OPENAI_API_KEY" -``` - -```javascript -import OpenAI from "openai"; -const client = new OpenAI(); - -let resp = await client.responses.create({ -model: "o3", -input: "Write a very long novel about otters in space.", -background: true, -}); - -while (resp.status === "queued" || resp.status === "in_progress") { -console.log("Current status: " + resp.status); -await new Promise(resolve => setTimeout(resolve, 2000)); // wait 2 seconds -resp = await client.responses.retrieve(resp.id); -} - -console.log("Final status: " + resp.status + "\nOutput:\n" + resp.output_text); -``` - -```python -from openai import OpenAI -from time import sleep - -client = OpenAI() - -resp = client.responses.create( - model="o3", - input="Write a very long novel about otters in space.", - background=True, -) - -while resp.status in {"queued", "in_progress"}: - print(f"Current status: {resp.status}") - sleep(2) - resp = client.responses.retrieve(resp.id) - -print(f"Final status: {resp.status}\nOutput:\n{resp.output_text}") -``` - -Cancelling a background response --------------------------------- - -You can also cancel an in-flight response like this: - -Cancel an ongoing response - -```bash -curl -X POST https://api.openai.com/v1/responses/resp_123/cancel \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer $OPENAI_API_KEY" -``` - -```javascript -import OpenAI from "openai"; -const client = new OpenAI(); - -const resp = await client.responses.cancel("resp_123"); - -console.log(resp.status); -``` - -```python -from openai import OpenAI -client = OpenAI() - -resp = client.responses.cancel("resp_123") - -print(resp.status) -``` - -Cancelling twice is idempotent - subsequent calls simply return the final `Response` object. - -Streaming a background response -------------------------------- - -You can create a background Response and start streaming events from it right away. This may be helpful if you expect the client to drop the stream and want the option of picking it back up later. To do this, create a Response with both `background` and `stream` set to `true`. You will want to keep track of a "cursor" corresponding to the `sequence_number` you receive in each streaming event. - -Currently, the time to first token you receive from a background response is higher than what you receive from a synchronous one. We are working to reduce this latency gap in the coming weeks. - -Generate and stream a background response - -```bash -curl https://api.openai.com/v1/responses \ --H "Content-Type: application/json" \ --H "Authorization: Bearer $OPENAI_API_KEY" \ --d '{ - "model": "o3", - "input": "Write a very long novel about otters in space.", - "background": true, - "stream": true -}' - -// To resume: -curl "https://api.openai.com/v1/responses/resp_123?stream=true&starting_after=42" \ --H "Content-Type: application/json" \ --H "Authorization: Bearer $OPENAI_API_KEY" -``` - -```javascript -import OpenAI from "openai"; -const client = new OpenAI(); - -const stream = await client.responses.create({ - model: "o3", - input: "Write a very long novel about otters in space.", - background: true, - stream: true, -}); - -let cursor = null; -for await (const event of stream) { - console.log(event); - cursor = event.sequence_number; -} - -// If the connection drops, you can resume streaming from the last cursor (SDK support coming soon): -// const resumedStream = await client.responses.stream(resp.id, { starting_after: cursor }); -// for await (const event of resumedStream) { ... } -``` - -```python -from openai import OpenAI - -client = OpenAI() - -# Fire off an async response but also start streaming immediately -stream = client.responses.create( - model="o3", - input="Write a very long novel about otters in space.", - background=True, - stream=True, -) - -cursor = None -for event in stream: - print(event) - cursor = event.sequence_number - -# If your connection drops, the response continues running and you can reconnect: -# SDK support for resuming the stream is coming soon. -# for event in client.responses.stream(resp.id, starting_after=cursor): -# print(event) -``` - -Limits ------- - -1. Background sampling requires `store=true`; stateless requests are rejected. -2. To cancel a synchronous response, terminate the connection -3. You can only start a new stream from a background response if you created it with `stream=true`. \ No newline at end of file From cb975a2e7b73386eb705d9f66bd7fff74053a3f2 Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Thu, 16 Oct 2025 12:22:50 -0600 Subject: [PATCH 06/17] feat(openai-models): update maxTokens for gpt-5-pro-2025-10-06 from 272000 to 128000 --- packages/types/src/providers/openai.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/types/src/providers/openai.ts b/packages/types/src/providers/openai.ts index d05c6eedf34..4bfefa5eb12 100644 --- a/packages/types/src/providers/openai.ts +++ b/packages/types/src/providers/openai.ts @@ -48,7 +48,7 @@ export const openAiNativeModels = { description: "GPT-5.1: The best model for coding and agentic tasks across domains", }, "gpt-5-pro-2025-10-06": { - maxTokens: 272000, + maxTokens: 128000, contextWindow: 400000, supportsImages: true, supportsPromptCache: true, From c6b768162260df12c8c724db19fcd4fc0f4fe172 Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Thu, 16 Oct 2025 13:46:46 -0600 Subject: [PATCH 07/17] feat(chat): enhance background status handling and UI updates for terminal states --- src/api/providers/openai-native.ts | 104 ++++++++++++++++---- src/core/task/Task.ts | 39 +++++++- webview-ui/src/components/chat/ChatView.tsx | 56 +++++++---- 3 files changed, 159 insertions(+), 40 deletions(-) diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts index 9f3983d98a8..bd0be481c59 100644 --- a/src/api/providers/openai-native.ts +++ b/src/api/providers/openai-native.ts @@ -25,6 +25,22 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ". export type OpenAiNativeModel = ReturnType +// GPT-5 specific types + +// Constants for model identification +const GPT5_MODEL_PREFIX = "gpt-5" + +// Marker for terminal background-mode failures so we don't attempt resume/poll fallbacks +function createTerminalBackgroundError(message: string): Error { + const err = new Error(message) + ;(err as any).isTerminalBackgroundError = true + err.name = "TerminalBackgroundError" + return err +} +function isTerminalBackgroundError(err: any): boolean { + return !!(err && (err as any).isTerminalBackgroundError) +} + export class OpenAiNativeHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions private client: OpenAI @@ -372,6 +388,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } } } catch (iterErr) { + // If terminal failure, propagate and do not attempt resume/poll + if (isTerminalBackgroundError(iterErr)) { + throw iterErr + } // Stream dropped mid-flight; attempt resume for background requests if (canAttemptResume()) { for await (const chunk of this.attemptResumeOrPoll( @@ -386,11 +406,18 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio throw iterErr } } catch (sdkErr: any) { + // Propagate terminal background failures without fallback + if (isTerminalBackgroundError(sdkErr)) { + throw sdkErr + } // For errors, fallback to manual SSE via fetch try { yield* this.makeResponsesApiRequest(requestBody, model, metadata, systemPrompt, messages) } catch (fallbackErr) { // If SSE fallback fails mid-stream and we can resume, try that + if (isTerminalBackgroundError(fallbackErr)) { + throw fallbackErr + } if (canAttemptResume()) { for await (const chunk of this.attemptResumeOrPoll( this.lastResponseId!, @@ -937,9 +964,20 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio else if (parsed.type === "response.error" || parsed.type === "error") { // Error event from the API if (parsed.error || parsed.message) { - throw new Error( - `Responses API error: ${parsed.error?.message || parsed.message || "Unknown error"}`, - ) + const errMsg = `Responses API error: ${parsed.error?.message || parsed.message || "Unknown error"}` + // For background mode, treat as terminal to avoid futile resume attempts + if (this.currentRequestIsBackground) { + // Surface a failed status for UI lifecycle before terminating + yield { + type: "status", + mode: "background", + status: "failed", + ...(parsed.response?.id ? { responseId: parsed.response.id } : {}), + } + throw createTerminalBackgroundError(errMsg) + } + // Non-background: propagate as a standard error + throw new Error(errMsg) } } // Handle incomplete event @@ -975,7 +1013,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } // Response failed if (parsed.error || parsed.message) { - throw new Error( + throw createTerminalBackgroundError( `Response failed: ${parsed.error?.message || parsed.message || "Unknown failure"}`, ) } @@ -1110,6 +1148,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio // This can happen in certain edge cases and shouldn't break the flow } catch (error) { if (error instanceof Error) { + // Preserve terminal background errors so callers can avoid resume attempts + if ((error as any).isTerminalBackgroundError) { + throw error + } throw new Error(`Error processing response stream: ${error.message}`) } throw new Error("Unexpected error processing response stream") @@ -1147,25 +1189,25 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio }, }) - if (!res.ok || !res.body) { + if (!res.ok) { throw new Error(`Resume request failed (${res.status})`) } + if (!res.body) { + throw new Error("Resume request failed (no body)") + } this.resumeCutoffSequence = lastSeq - let emittedInProgress = false + // Handshake accepted: immediately switch UI from reconnecting -> in_progress + yield { + type: "status", + mode: "background", + status: "in_progress", + responseId, + } + try { for await (const chunk of this.handleStreamResponse(res.body, model)) { - // After the handshake and first accepted chunk, emit in_progress once - if (!emittedInProgress) { - emittedInProgress = true - yield { - type: "status", - mode: "background", - status: "in_progress", - responseId, - } - } // Avoid double-emitting in_progress if the inner handler surfaces it if (chunk.type === "status" && (chunk as any).status === "in_progress") { continue @@ -1180,9 +1222,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio this.resumeCutoffSequence = undefined throw e } - } catch { - // Wait with backoff before next attempt + } catch (err) { + // If terminal error, don't keep retrying resume; fall back to polling immediately const delay = resumeBaseDelayMs * Math.pow(2, attempt) + if (isTerminalBackgroundError(err)) { + break + } + // Otherwise retry with backoff if (delay > 0) { await new Promise((r) => setTimeout(r, delay)) } @@ -1296,10 +1342,21 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } if (status === "failed" || status === "canceled") { - throw new Error(`Response ${status}: ${respId || responseId}`) + const detail: string | undefined = resp?.error?.message ?? raw?.error?.message + const msg = detail ? `Response ${status}: ${detail}` : `Response ${status}: ${respId || responseId}` + throw createTerminalBackgroundError(msg) + } + } catch (err) { + // If we've already emitted a terminal status, propagate to consumer to stop polling. + if (lastEmittedStatus === "failed" || lastEmittedStatus === "canceled") { + throw err } - } catch { - // ignore transient poll errors + // Otherwise ignore transient poll errors + } + + // Stop polling immediately on terminal background statuses + if (lastEmittedStatus === "failed" || lastEmittedStatus === "canceled") { + throw new Error(`Background polling terminated with status=${lastEmittedStatus} for ${responseId}`) } await new Promise((r) => setTimeout(r, pollIntervalMs)) @@ -1350,6 +1407,11 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio if (mappedStatus === "completed" || mappedStatus === "failed" || mappedStatus === "canceled") { this.currentRequestIsBackground = undefined } + // Throw terminal error to integrate with standard failure path (surfaced in UI) + if (mappedStatus === "failed" || mappedStatus === "canceled") { + const msg = (event as any)?.error?.message || (event as any)?.message || `Response ${mappedStatus}` + throw createTerminalBackgroundError(msg) + } // Do not return; allow further handling (e.g., usage on done/completed) } diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 6b8fbeaedbb..b2a0da548ef 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -2437,6 +2437,7 @@ export class Task extends EventEmitter implements TaskLike { if (!chunk) { // Sometimes chunk is undefined, no idea that can cause // it, but this workaround seems to fix it. + item = await iterator.next() continue } @@ -2625,7 +2626,14 @@ export class Task extends EventEmitter implements TaskLike { if (chunk.responseId) { ;(apiReqMsg as any).metadata.responseId = chunk.responseId } + // Temporary debug to confirm UI metadata updates + console.log( + `[BackgroundMode] status update -> ${chunk.status} (resp=${chunk.responseId ?? "n/a"})`, + ) await this.updateClineMessage(apiReqMsg) + // Force state refresh to ensure UI recomputes derived labels/memos + const provider = this.providerRef.deref() + await provider?.postStateToWebview() } } catch {} break @@ -2704,6 +2712,10 @@ export class Task extends EventEmitter implements TaskLike { "\n\n[Response interrupted by a tool use result. Only one tool may be used at a time and should be placed at the end of the message.]" break } + // Prefetch the next item after processing the current chunk. + // This ensures terminal status chunks (e.g., failed/canceled/completed) + // are not skipped when the provider throws on the following next(). + item = await iterator.next() } // Finalize any remaining streaming tool calls that weren't explicitly ended @@ -3186,8 +3198,31 @@ export class Task extends EventEmitter implements TaskLike { continue } else { // If there's no assistant_responses, that means we got no text - // or tool_use content blocks from API which we should assume is - // an error. + // or tool_use content blocks from API which we should assume is an error. + // Prefer any streaming failure details captured on the last api_req_started message. + let errorText = + "Unexpected API Response: The language model did not provide any assistant messages. This may indicate an issue with the API or the model's output." + try { + const lastApiReqStartedIdx = findLastIndex( + this.clineMessages, + (m) => m.type === "say" && m.say === "api_req_started", + ) + if (lastApiReqStartedIdx !== -1) { + const info = JSON.parse( + this.clineMessages[lastApiReqStartedIdx].text || "{}", + ) as ClineApiReqInfo + if ( + typeof info?.streamingFailedMessage === "string" && + info.streamingFailedMessage.trim().length > 0 + ) { + errorText = info.streamingFailedMessage + } + } + } catch { + // ignore parse issues and keep default message + } + + await this.say("error", errorText) // IMPORTANT: For native tool protocol, we already added the user message to // apiConversationHistory at line 1876. Since the assistant failed to respond, diff --git a/webview-ui/src/components/chat/ChatView.tsx b/webview-ui/src/components/chat/ChatView.tsx index 6ee163fe41b..fd61af6078c 100644 --- a/webview-ui/src/components/chat/ChatView.tsx +++ b/webview-ui/src/components/chat/ChatView.tsx @@ -505,27 +505,49 @@ const ChatViewComponent: React.ForwardRefRenderFunction message.say === "api_req_started", + ) + + // Extract background terminal state and cancel reason/cost if present + let bgDone = false + let cancelReason: string | null | undefined = undefined + let cost: any = undefined + + if (lastApiReqStarted && lastApiReqStarted.say === "api_req_started") { + const meta: any = (lastApiReqStarted as any).metadata + const bgStatus = meta?.background === true ? meta?.backgroundStatus : undefined + bgDone = bgStatus === "completed" || bgStatus === "failed" || bgStatus === "canceled" + + try { + if (lastApiReqStarted.text !== null && lastApiReqStarted.text !== undefined) { + const info = JSON.parse(lastApiReqStarted.text) + cost = info?.cost + cancelReason = info?.cancelReason + } + } catch { + // ignore malformed json + } + } + + // If background reached a terminal state or the provider recorded a cancel reason, + // treat UI as not streaming regardless of partial flags or missing cost. + if (bgDone || cancelReason != null) { + return false + } + // Partial assistant content means streaming unless overridden by the terminal checks above. + const isLastMessagePartial = modifiedMessages.at(-1)?.partial === true if (isLastMessagePartial) { return true - } else { - const lastApiReqStarted = findLast( - modifiedMessages, - (message: ClineMessage) => message.say === "api_req_started", - ) - - if ( - lastApiReqStarted && - lastApiReqStarted.text !== null && - lastApiReqStarted.text !== undefined && - lastApiReqStarted.say === "api_req_started" - ) { - const cost = JSON.parse(lastApiReqStarted.text).cost + } - if (cost === undefined) { - return true // API request has not finished yet. - } + // Otherwise, if the API request hasn't finished (no cost yet), consider it streaming. + if (lastApiReqStarted && lastApiReqStarted.say === "api_req_started") { + if (cost === undefined) { + return true } } From e118846aa5bd25fc57e11f2460935033e987a895 Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Thu, 16 Oct 2025 14:24:56 -0600 Subject: [PATCH 08/17] fix: Address PR review feedback - fix stale resume IDs, update model description, remove duplicate test, revert gitignore --- .gitignore | 1 - packages/types/src/providers/openai.ts | 2 +- src/api/providers/openai-native.ts | 22 ++++++++---- .../utils/__tests__/backgroundStatus.test.ts | 35 ------------------- 4 files changed, 17 insertions(+), 43 deletions(-) delete mode 100644 webview-ui/src/utils/__tests__/backgroundStatus.test.ts diff --git a/.gitignore b/.gitignore index b7bfacd9201..ed8e3978995 100644 --- a/.gitignore +++ b/.gitignore @@ -49,7 +49,6 @@ logs # Qdrant qdrant_storage/ - # Architect plans plans/ diff --git a/packages/types/src/providers/openai.ts b/packages/types/src/providers/openai.ts index 4bfefa5eb12..b05185402d7 100644 --- a/packages/types/src/providers/openai.ts +++ b/packages/types/src/providers/openai.ts @@ -57,7 +57,7 @@ export const openAiNativeModels = { inputPrice: 15.0, outputPrice: 120.0, description: - "GPT-5 Pro: a slow, reasoning-focused model built to tackle tough problems. Requests can take several minutes to finish. Responses API only; no streaming, so it may appear stuck until the reply is ready.", + "GPT-5 Pro: A slow, reasoning-focused model for complex problems. Uses background mode with resilient streaming - requests may take several minutes with automatic recovery if connection drops.", supportsVerbosity: true, supportsTemperature: false, backgroundMode: true, diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts index bd0be481c59..15e6255c180 100644 --- a/src/api/providers/openai-native.ts +++ b/src/api/providers/openai-native.ts @@ -58,6 +58,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio private currentRequestIsBackground?: boolean // Cutoff sequence for filtering stale events during resume private resumeCutoffSequence?: number + // Per-request tracking to prevent stale resume attempts + private currentRequestResponseId?: string + private currentRequestSequenceNumber?: number // Event types handled by the shared event processor to avoid duplication private readonly coreHandledEventTypes = new Set([ @@ -357,12 +360,15 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio // Annotate if this request uses background mode (used for status chunks) this.currentRequestIsBackground = !!requestBody?.background + // Reset per-request tracking to prevent stale values from previous requests + this.currentRequestResponseId = undefined + this.currentRequestSequenceNumber = undefined const canAttemptResume = () => this.currentRequestIsBackground && (this.options.openAiNativeBackgroundAutoResume ?? true) && - !!this.lastResponseId && - typeof this.lastSequenceNumber === "number" + !!this.currentRequestResponseId && + typeof this.currentRequestSequenceNumber === "number" try { // Use the official SDK @@ -395,8 +401,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio // Stream dropped mid-flight; attempt resume for background requests if (canAttemptResume()) { for await (const chunk of this.attemptResumeOrPoll( - this.lastResponseId!, - this.lastSequenceNumber!, + this.currentRequestResponseId!, + this.currentRequestSequenceNumber!, model, )) { yield chunk @@ -420,8 +426,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } if (canAttemptResume()) { for await (const chunk of this.attemptResumeOrPoll( - this.lastResponseId!, - this.lastSequenceNumber!, + this.currentRequestResponseId!, + this.currentRequestSequenceNumber!, model, )) { yield chunk @@ -684,6 +690,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio // Record sequence number for cursor tracking if (typeof parsed?.sequence_number === "number") { this.lastSequenceNumber = parsed.sequence_number + // Also track for per-request resume capability + this.currentRequestSequenceNumber = parsed.sequence_number } // Capture resolved service tier if present @@ -1384,6 +1392,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio // Record sequence number for cursor tracking if (typeof event?.sequence_number === "number") { this.lastSequenceNumber = event.sequence_number + // Also track for per-request resume capability + this.currentRequestSequenceNumber = event.sequence_number } // Map lifecycle events to status chunks diff --git a/webview-ui/src/utils/__tests__/backgroundStatus.test.ts b/webview-ui/src/utils/__tests__/backgroundStatus.test.ts deleted file mode 100644 index aac4c73b3e9..00000000000 --- a/webview-ui/src/utils/__tests__/backgroundStatus.test.ts +++ /dev/null @@ -1,35 +0,0 @@ -import { labelForBackgroundStatus } from "@src/utils/backgroundStatus" - -describe("labelForBackgroundStatus()", () => { - it("maps queued", () => { - expect(labelForBackgroundStatus("queued")).toBe("API Request: background mode (queued)…") - }) - - it("maps in_progress", () => { - expect(labelForBackgroundStatus("in_progress")).toBe("API Request: background mode (in progress)…") - }) - - it("maps reconnecting", () => { - expect(labelForBackgroundStatus("reconnecting")).toBe("API Request: background mode (reconnecting…)") - }) - - it("maps polling", () => { - expect(labelForBackgroundStatus("polling")).toBe("API Request: background mode (polling…)") - }) - - it("maps completed", () => { - expect(labelForBackgroundStatus("completed")).toBe("API Request: background mode (completed)") - }) - - it("maps failed", () => { - expect(labelForBackgroundStatus("failed")).toBe("API Request: background mode (failed)") - }) - - it("maps canceled", () => { - expect(labelForBackgroundStatus("canceled")).toBe("API Request: background mode (canceled)") - }) - - it("maps undefined to generic label", () => { - expect(labelForBackgroundStatus(undefined)).toBe("API Request: background mode") - }) -}) From 7cff4db96c7c26c689b97f3a76ad9da6c1d9c666 Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Fri, 24 Oct 2025 16:50:14 -0600 Subject: [PATCH 09/17] fix(webview): define chevron icon via codicon and add missing isExpanded dep to useMemo; test: remove duplicate GPT-5 Pro background-mode test; chore(core): remove temp debug log --- packages/types/src/providers/openai.ts | 3 +-- .../providers/__tests__/openai-native.spec.ts | 20 ------------------- src/core/task/Task.ts | 3 --- webview-ui/src/components/chat/ChatRow.tsx | 18 +++++++++++------ 4 files changed, 13 insertions(+), 31 deletions(-) diff --git a/packages/types/src/providers/openai.ts b/packages/types/src/providers/openai.ts index b05185402d7..8e38472ea17 100644 --- a/packages/types/src/providers/openai.ts +++ b/packages/types/src/providers/openai.ts @@ -51,9 +51,8 @@ export const openAiNativeModels = { maxTokens: 128000, contextWindow: 400000, supportsImages: true, - supportsPromptCache: true, + supportsPromptCache: false, supportsReasoningEffort: false, - reasoningEffort: "high", inputPrice: 15.0, outputPrice: 120.0, description: diff --git a/src/api/providers/__tests__/openai-native.spec.ts b/src/api/providers/__tests__/openai-native.spec.ts index fa872db4257..b3500763c9a 100644 --- a/src/api/providers/__tests__/openai-native.spec.ts +++ b/src/api/providers/__tests__/openai-native.spec.ts @@ -1510,26 +1510,6 @@ describe("OpenAI Native background mode behavior", () => { expect(requestBodyWithOptionFalse.stream).toBe(true) }) - it("auto-enables background mode for gpt-5-pro when no override is specified", async () => { - const handler = new OpenAiNativeHandler({ - apiModelId: "gpt-5-pro-2025-10-06", - openAiNativeApiKey: "test", - // no openAiNativeBackgroundMode provided - }) - - mockResponsesCreate.mockResolvedValueOnce(createMinimalIterable()) - - const chunks: any[] = [] - for await (const chunk of handler.createMessage(systemPrompt, baseMessages, metadataStoreFalse)) { - chunks.push(chunk) - } - - expect(chunks).not.toHaveLength(0) - const requestBody = mockResponsesCreate.mock.calls[0][0] - expect(requestBody.background).toBe(true) - expect(requestBody.stream).toBe(true) - expect(requestBody.store).toBe(true) - }) it("forces store:true and includes background:true when falling back to SSE", async () => { const handler = new OpenAiNativeHandler({ apiModelId: "gpt-5-pro-2025-10-06", diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index b2a0da548ef..66ebffe6993 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -2627,9 +2627,6 @@ export class Task extends EventEmitter implements TaskLike { ;(apiReqMsg as any).metadata.responseId = chunk.responseId } // Temporary debug to confirm UI metadata updates - console.log( - `[BackgroundMode] status update -> ${chunk.status} (resp=${chunk.responseId ?? "n/a"})`, - ) await this.updateClineMessage(apiReqMsg) // Force state refresh to ensure UI recomputes derived labels/memos const provider = this.providerRef.deref() diff --git a/webview-ui/src/components/chat/ChatRow.tsx b/webview-ui/src/components/chat/ChatRow.tsx index 2d93005d68b..c7743985a24 100644 --- a/webview-ui/src/components/chat/ChatRow.tsx +++ b/webview-ui/src/components/chat/ChatRow.tsx @@ -340,11 +340,7 @@ export const ChatRowContent = ({ ) ) : bgDone ? ( bgStatus === "completed" ? ( - isExpanded ? ( - - ) : ( - - ) + ) : ( getIconSpan("error", bgStatus === "canceled" ? cancelledColor : errorColor) ) @@ -383,7 +379,17 @@ export const ChatRowContent = ({ default: return [null, null] } - }, [type, isCommandExecuting, message, isMcpServerResponding, apiReqCancelReason, cost, apiRequestFailedMessage, t]) + }, [ + type, + isCommandExecuting, + message, + isExpanded, + isMcpServerResponding, + apiReqCancelReason, + cost, + apiRequestFailedMessage, + t, + ]) const headerStyle: React.CSSProperties = { display: "flex", From d552cce01f579a6a87cef1c6b31190a632f728a3 Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Fri, 24 Oct 2025 17:14:04 -0600 Subject: [PATCH 10/17] fix(openai): update reasoning effort default to high and improve model description for clarity --- packages/types/src/providers/openai.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/types/src/providers/openai.ts b/packages/types/src/providers/openai.ts index 8e38472ea17..988591c37a3 100644 --- a/packages/types/src/providers/openai.ts +++ b/packages/types/src/providers/openai.ts @@ -52,11 +52,12 @@ export const openAiNativeModels = { contextWindow: 400000, supportsImages: true, supportsPromptCache: false, - supportsReasoningEffort: false, + supportsReasoningEffort: false, // This is set to false to prevent the ui from displaying the reasoning effort selector + reasoningEffort: "high", // Pro model uses high reasoning effort by default and must be specified inputPrice: 15.0, outputPrice: 120.0, description: - "GPT-5 Pro: A slow, reasoning-focused model for complex problems. Uses background mode with resilient streaming - requests may take several minutes with automatic recovery if connection drops.", + "GPT-5 Pro: A slow, reasoning-focused model for complex problems. Uses background mode with resilient streaming - requests may somte time and will automatically reconnect if they timeout.", supportsVerbosity: true, supportsTemperature: false, backgroundMode: true, From 49f90b98f56c8551410922e946d4c6b384266d28 Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Fri, 24 Oct 2025 17:30:28 -0600 Subject: [PATCH 11/17] webview-ui: use standard API Request icons for background mode; keep background labels; fix deps warning in ChatRow useMemo --- webview-ui/src/components/chat/ChatRow.tsx | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/webview-ui/src/components/chat/ChatRow.tsx b/webview-ui/src/components/chat/ChatRow.tsx index c7743985a24..fa17bf15415 100644 --- a/webview-ui/src/components/chat/ChatRow.tsx +++ b/webview-ui/src/components/chat/ChatRow.tsx @@ -340,7 +340,7 @@ export const ChatRowContent = ({ ) ) : bgDone ? ( bgStatus === "completed" ? ( - + getIconSpan("arrow-swap", normalColor) ) : ( getIconSpan("error", bgStatus === "canceled" ? cancelledColor : errorColor) ) @@ -352,7 +352,9 @@ export const ChatRowContent = ({ ), apiReqCancelReason !== null && apiReqCancelReason !== undefined ? ( - apiReqCancelReason === "user_cancelled" ? ( + isBackground && label ? ( + {label} + ) : apiReqCancelReason === "user_cancelled" ? ( {t("chat:apiRequest.cancelled")} @@ -379,17 +381,7 @@ export const ChatRowContent = ({ default: return [null, null] } - }, [ - type, - isCommandExecuting, - message, - isExpanded, - isMcpServerResponding, - apiReqCancelReason, - cost, - apiRequestFailedMessage, - t, - ]) + }, [type, isCommandExecuting, message, isMcpServerResponding, apiReqCancelReason, cost, apiRequestFailedMessage, t]) const headerStyle: React.CSSProperties = { display: "flex", From 50242b4ee95001745c0495579d507f6852f38e0a Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Fri, 24 Oct 2025 17:52:57 -0600 Subject: [PATCH 12/17] fix(openai-native): add logging for background resume and polling; classify permanent vs transient errors; chore(task): remove temporary debug log --- src/api/providers/openai-native.ts | 32 ++++++++++++++++++++++++++---- src/core/task/Task.ts | 1 - 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts index 15e6255c180..4eb449c63de 100644 --- a/src/api/providers/openai-native.ts +++ b/src/api/providers/openai-native.ts @@ -1230,13 +1230,18 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio this.resumeCutoffSequence = undefined throw e } - } catch (err) { + } catch (err: any) { // If terminal error, don't keep retrying resume; fall back to polling immediately const delay = resumeBaseDelayMs * Math.pow(2, attempt) + const msg = err instanceof Error ? err.message : String(err) + if (isTerminalBackgroundError(err)) { + console.error(`[OpenAiNative][resume] terminal background error on attempt ${attempt + 1}: ${msg}`) break } - // Otherwise retry with backoff + + // Otherwise retry with backoff (transient failure) + console.warn(`[OpenAiNative][resume] attempt ${attempt + 1} failed; retrying in ${delay}ms: ${msg}`) if (delay > 0) { await new Promise((r) => setTimeout(r, delay)) } @@ -1354,12 +1359,31 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio const msg = detail ? `Response ${status}: ${detail}` : `Response ${status}: ${respId || responseId}` throw createTerminalBackgroundError(msg) } - } catch (err) { + } catch (err: any) { // If we've already emitted a terminal status, propagate to consumer to stop polling. if (lastEmittedStatus === "failed" || lastEmittedStatus === "canceled") { throw err } - // Otherwise ignore transient poll errors + + // Classify polling errors and log appropriately + const statusCode = err?.status ?? err?.response?.status + const msg = err instanceof Error ? err.message : String(err) + + // Permanent errors: stop polling + if (statusCode === 401 || statusCode === 403 || statusCode === 404) { + console.error(`[OpenAiNative][poll] permanent error (status ${statusCode}); stopping: ${msg}`) + throw createTerminalBackgroundError(`Polling failed with status ${statusCode}: ${msg}`) + } + + // Rate limit: transient, will retry + if (statusCode === 429) { + console.warn(`[OpenAiNative][poll] rate limited; will retry: ${msg}`) + } else { + // Other transient/network errors + console.warn( + `[OpenAiNative][poll] transient error; will retry${statusCode ? ` (status ${statusCode})` : ""}: ${msg}`, + ) + } } // Stop polling immediately on terminal background statuses diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 66ebffe6993..a88fc32ec8a 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -2351,7 +2351,6 @@ export class Task extends EventEmitter implements TaskLike { // lastMessage.ts = Date.now() DO NOT update ts since it is used as a key for virtuoso list lastMessage.partial = false // instead of streaming partialMessage events, we do a save and post like normal to persist to disk - console.log("updating partial message", lastMessage) } // Update `api_req_started` to have cancelled and cost, so that From 9803d434a6f7d012e76d9db981931667c503cfa6 Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Fri, 24 Oct 2025 20:46:15 -0600 Subject: [PATCH 13/17] fix(types/openai): correct GPT-5 Pro description typos/grammar; perf(core/task): avoid full-state refresh on each background status chunk to reduce re-renders --- packages/types/src/providers/openai.ts | 2 +- src/core/task/Task.ts | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/packages/types/src/providers/openai.ts b/packages/types/src/providers/openai.ts index 988591c37a3..4f89984394f 100644 --- a/packages/types/src/providers/openai.ts +++ b/packages/types/src/providers/openai.ts @@ -57,7 +57,7 @@ export const openAiNativeModels = { inputPrice: 15.0, outputPrice: 120.0, description: - "GPT-5 Pro: A slow, reasoning-focused model for complex problems. Uses background mode with resilient streaming - requests may somte time and will automatically reconnect if they timeout.", + "GPT-5 Pro: A slow, reasoning-focused model for complex problems. Uses background mode with resilient streaming — requests may take some time and will automatically reconnect if they time out.", supportsVerbosity: true, supportsTemperature: false, backgroundMode: true, diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index a88fc32ec8a..311884fe56e 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -2625,11 +2625,8 @@ export class Task extends EventEmitter implements TaskLike { if (chunk.responseId) { ;(apiReqMsg as any).metadata.responseId = chunk.responseId } - // Temporary debug to confirm UI metadata updates + // Update the specific message; avoid full-state refresh on every status chunk to reduce re-renders await this.updateClineMessage(apiReqMsg) - // Force state refresh to ensure UI recomputes derived labels/memos - const provider = this.providerRef.deref() - await provider?.postStateToWebview() } } catch {} break From f988dee5ca6a1f93ba23edb234d4e293173daa38 Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Tue, 9 Dec 2025 18:46:35 -0700 Subject: [PATCH 14/17] fix: use hyphen instead of em dash in GPT-5 Pro description, remove unused imports --- packages/types/src/providers/openai.ts | 2 +- webview-ui/src/components/chat/ChatRow.tsx | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/packages/types/src/providers/openai.ts b/packages/types/src/providers/openai.ts index 4f89984394f..4fc245907ac 100644 --- a/packages/types/src/providers/openai.ts +++ b/packages/types/src/providers/openai.ts @@ -57,7 +57,7 @@ export const openAiNativeModels = { inputPrice: 15.0, outputPrice: 120.0, description: - "GPT-5 Pro: A slow, reasoning-focused model for complex problems. Uses background mode with resilient streaming — requests may take some time and will automatically reconnect if they time out.", + "GPT-5 Pro: A slow, reasoning-focused model for complex problems. Uses background mode with resilient streaming - requests may take some time and will automatically reconnect if they time out.", supportsVerbosity: true, supportsTemperature: false, backgroundMode: true, diff --git a/webview-ui/src/components/chat/ChatRow.tsx b/webview-ui/src/components/chat/ChatRow.tsx index fa17bf15415..1171c2ad310 100644 --- a/webview-ui/src/components/chat/ChatRow.tsx +++ b/webview-ui/src/components/chat/ChatRow.tsx @@ -15,8 +15,6 @@ import { useExtensionState } from "@src/context/ExtensionStateContext" import { findMatchingResourceOrTemplate } from "@src/utils/mcp" import { vscode } from "@src/utils/vscode" import { formatPathTooltip } from "@src/utils/formatPathTooltip" -import { removeLeadingNonAlphanumeric } from "@src/utils/removeLeadingNonAlphanumeric" -import { getLanguageFromPath } from "@src/utils/getLanguageFromPath" import { labelForBackgroundStatus } from "@src/utils/backgroundStatus" import { ToolUseBlock, ToolUseBlockHeader } from "../common/ToolUseBlock" From f138361e870b2743f84b5e6805ba0d8c3d01dc45 Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Tue, 9 Dec 2025 18:49:05 -0700 Subject: [PATCH 15/17] fix: fix type errors - add metadata to ClineMessage, fix getResponseId method name --- packages/types/src/message.ts | 12 ++++++++++++ src/api/providers/__tests__/openai-native.spec.ts | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/packages/types/src/message.ts b/packages/types/src/message.ts index 82f58f29f28..39b7ed25e3b 100644 --- a/packages/types/src/message.ts +++ b/packages/types/src/message.ts @@ -279,6 +279,18 @@ export const clineMessageSchema = z.object({ isProtected: z.boolean().optional(), apiProtocol: z.union([z.literal("openai"), z.literal("anthropic")]).optional(), isAnswered: z.boolean().optional(), + /** + * Optional metadata for API request tracking. + * Used for background mode status display. + */ + metadata: z + .object({ + background: z.boolean().optional(), + backgroundStatus: z + .enum(["queued", "in_progress", "reconnecting", "polling", "completed", "failed", "canceled"]) + .optional(), + }) + .optional(), }) export type ClineMessage = z.infer diff --git a/src/api/providers/__tests__/openai-native.spec.ts b/src/api/providers/__tests__/openai-native.spec.ts index b3500763c9a..c0682d16449 100644 --- a/src/api/providers/__tests__/openai-native.spec.ts +++ b/src/api/providers/__tests__/openai-native.spec.ts @@ -1703,7 +1703,7 @@ describe("OpenAI Native streaming metadata tracking", () => { expect(chunks).toContainEqual({ type: "text", text: "A" }) expect(chunks).toContainEqual({ type: "reasoning", text: "B" }) expect(handler.getLastSequenceNumber()).toBe(3) - expect(handler.getLastResponseId()).toBe("resp_123") + expect(handler.getResponseId()).toBe("resp_123") }) }) From 8f5e2ed304c120538e69aaeef9af225447ac16b5 Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Tue, 9 Dec 2025 19:00:06 -0700 Subject: [PATCH 16/17] fix: track currentRequestResponseId in SSE fallback path for background mode resume --- src/api/providers/openai-native.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts index 4eb449c63de..e35a6c61626 100644 --- a/src/api/providers/openai-native.ts +++ b/src/api/providers/openai-native.ts @@ -705,6 +705,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio // Capture top-level response id if (parsed.response?.id) { this.lastResponseId = parsed.response.id as string + // Also track for per-request resume capability + this.currentRequestResponseId = parsed.response.id as string } // Delegate standard event types to the shared processor to avoid duplication @@ -1412,6 +1414,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio // Capture top-level response id if (event?.response?.id) { this.lastResponseId = event.response.id as string + // Also track for per-request resume capability + this.currentRequestResponseId = event.response.id as string } // Record sequence number for cursor tracking if (typeof event?.sequence_number === "number") { From 706138e7fe17fd77b7b07f3eb7a2b89e9a877826 Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Wed, 10 Dec 2025 00:50:28 -0700 Subject: [PATCH 17/17] Handle terminal failures in background resume/poll --- src/api/providers/openai-native.ts | 31 +++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts index e35a6c61626..e6bcc5d021a 100644 --- a/src/api/providers/openai-native.ts +++ b/src/api/providers/openai-native.ts @@ -1197,10 +1197,25 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio Authorization: `Bearer ${apiKey}`, Accept: "text/event-stream", }, + signal: this.abortController?.signal, }) if (!res.ok) { - throw new Error(`Resume request failed (${res.status})`) + const status = res.status + if (status === 401 || status === 403 || status === 404) { + yield { + type: "status", + mode: "background", + status: "failed", + responseId, + } + + const terminalErr = createTerminalBackgroundError(`Resume request failed (${status})`) + ;(terminalErr as any).status = status + throw terminalErr + } + + throw new Error(`Resume request failed (${status})`) } if (!res.body) { throw new Error("Resume request failed (no body)") @@ -1271,9 +1286,23 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio headers: { Authorization: `Bearer ${apiKey}`, }, + signal: this.abortController?.signal, }) if (!pollRes.ok) { + const status = pollRes.status + if (status === 401 || status === 403 || status === 404) { + yield { + type: "status", + mode: "background", + status: "failed", + responseId, + } + const terminalErr = createTerminalBackgroundError(`Polling failed with status ${status}`) + ;(terminalErr as any).status = status + throw terminalErr + } + // transient; wait and retry await new Promise((r) => setTimeout(r, pollIntervalMs)) continue