diff --git a/packages/types/src/message.ts b/packages/types/src/message.ts index 7197ab29a12..21baf3f2033 100644 --- a/packages/types/src/message.ts +++ b/packages/types/src/message.ts @@ -176,17 +176,6 @@ export const clineMessageSchema = z.object({ contextCondense: contextCondenseSchema.optional(), isProtected: z.boolean().optional(), apiProtocol: z.union([z.literal("openai"), z.literal("anthropic")]).optional(), - metadata: z - .object({ - gpt5: z - .object({ - previous_response_id: z.string().optional(), - instructions: z.string().optional(), - reasoning_summary: z.string().optional(), - }) - .optional(), - }) - .optional(), }) export type ClineMessage = z.infer diff --git a/packages/types/src/model.ts b/packages/types/src/model.ts index 90b61ad879e..a09790578b5 100644 --- a/packages/types/src/model.ts +++ b/packages/types/src/model.ts @@ -44,8 +44,6 @@ export const modelInfoSchema = z.object({ supportsImages: z.boolean().optional(), supportsComputerUse: z.boolean().optional(), supportsPromptCache: z.boolean(), - // Capability flag to indicate whether the model supports an output verbosity parameter - supportsVerbosity: z.boolean().optional(), supportsReasoningBudget: z.boolean().optional(), requiredReasoningBudget: z.boolean().optional(), supportsReasoningEffort: z.boolean().optional(), diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts index aebfd4dbe57..f0c90101fcb 100644 --- a/packages/types/src/provider-settings.ts +++ b/packages/types/src/provider-settings.ts @@ -3,11 +3,6 @@ import { z } from "zod" import { reasoningEffortsSchema, verbosityLevelsSchema, modelInfoSchema } from "./model.js" import { codebaseIndexProviderSchema } from "./codebase-index.js" -// Extended schema that includes "minimal" for GPT-5 models -export const extendedReasoningEffortsSchema = z.union([reasoningEffortsSchema, z.literal("minimal")]) - -export type ReasoningEffortWithMinimal = z.infer - /** * ProviderName */ @@ -81,7 +76,7 @@ const baseProviderSettingsSchema = z.object({ // Model reasoning. enableReasoningEffort: z.boolean().optional(), - reasoningEffort: extendedReasoningEffortsSchema.optional(), + reasoningEffort: reasoningEffortsSchema.optional(), modelMaxTokens: z.number().optional(), modelMaxThinkingTokens: z.number().optional(), diff --git a/packages/types/src/providers/openai.ts b/packages/types/src/providers/openai.ts index 78d3cb63344..22e48e2b07d 100644 --- a/packages/types/src/providers/openai.ts +++ b/packages/types/src/providers/openai.ts @@ -12,13 +12,10 @@ export const openAiNativeModels = { supportsImages: true, supportsPromptCache: true, supportsReasoningEffort: true, - reasoningEffort: "medium", inputPrice: 1.25, outputPrice: 10.0, cacheReadsPrice: 0.13, description: "GPT-5: The best model for coding and agentic tasks across domains", - // supportsVerbosity is a new capability; ensure ModelInfo includes it - supportsVerbosity: true, }, "gpt-5-mini-2025-08-07": { maxTokens: 128000, @@ -26,12 +23,10 @@ export const openAiNativeModels = { supportsImages: true, supportsPromptCache: true, supportsReasoningEffort: true, - reasoningEffort: "medium", inputPrice: 0.25, outputPrice: 2.0, cacheReadsPrice: 0.03, description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks", - supportsVerbosity: true, }, "gpt-5-nano-2025-08-07": { maxTokens: 128000, @@ -39,12 +34,10 @@ export const openAiNativeModels = { supportsImages: true, supportsPromptCache: true, supportsReasoningEffort: true, - reasoningEffort: "medium", inputPrice: 0.05, outputPrice: 0.4, cacheReadsPrice: 0.01, description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5", - supportsVerbosity: true, }, "gpt-4.1": { maxTokens: 32_768, @@ -247,6 +240,5 @@ export const openAiModelInfoSaneDefaults: ModelInfo = { export const azureOpenAiDefaultApiVersion = "2024-08-01-preview" export const OPENAI_NATIVE_DEFAULT_TEMPERATURE = 0 -export const GPT5_DEFAULT_TEMPERATURE = 1.0 export const OPENAI_AZURE_AI_INFERENCE_PATH = "/models/chat/completions" diff --git a/src/api/index.ts b/src/api/index.ts index 5e705a80d24..57b06f7bbdb 100644 --- a/src/api/index.ts +++ b/src/api/index.ts @@ -44,13 +44,6 @@ export interface SingleCompletionHandler { export interface ApiHandlerCreateMessageMetadata { mode?: string taskId: string - previousResponseId?: string - /** - * When true, the provider must NOT fall back to internal continuity state - * (e.g., lastResponseId) if previousResponseId is absent. - * Used to enforce "skip once" after a condense operation. - */ - suppressPreviousResponseId?: boolean } export interface ApiHandler { diff --git a/src/api/providers/__tests__/openai-native.spec.ts b/src/api/providers/__tests__/openai-native.spec.ts index 1d76d387a9f..fdd71ba3f6d 100644 --- a/src/api/providers/__tests__/openai-native.spec.ts +++ b/src/api/providers/__tests__/openai-native.spec.ts @@ -160,12 +160,8 @@ describe("OpenAiNativeHandler", () => { expect(results.length).toBe(1) expect(results[0].type).toBe("usage") // Use type assertion to avoid TypeScript errors - const usageResult = results[0] as any - expect(usageResult.inputTokens).toBe(0) - expect(usageResult.outputTokens).toBe(0) - // When no cache tokens are present, they should be undefined - expect(usageResult.cacheWriteTokens).toBeUndefined() - expect(usageResult.cacheReadTokens).toBeUndefined() + expect((results[0] as any).inputTokens).toBe(0) + expect((results[0] as any).outputTokens).toBe(0) // Verify developer role is used for system prompt with o1 model expect(mockCreate).toHaveBeenCalledWith({ @@ -290,111 +286,6 @@ describe("OpenAiNativeHandler", () => { expect((results[1] as any).outputTokens).toBe(5) expect((results[1] as any).totalCost).toBeCloseTo(0.00006, 6) }) - - it("should handle cache tokens in streaming response", async () => { - const mockStream = [ - { choices: [{ delta: { content: "Hello" } }], usage: null }, - { choices: [{ delta: { content: " cached" } }], usage: null }, - { - choices: [{ delta: { content: " response" } }], - usage: { - prompt_tokens: 100, - completion_tokens: 10, - prompt_tokens_details: { - cached_tokens: 80, - audio_tokens: 0, - }, - completion_tokens_details: { - reasoning_tokens: 0, - audio_tokens: 0, - accepted_prediction_tokens: 0, - rejected_prediction_tokens: 0, - }, - }, - }, - ] - - mockCreate.mockResolvedValueOnce( - (async function* () { - for (const chunk of mockStream) { - yield chunk - } - })(), - ) - - const generator = handler.createMessage(systemPrompt, messages) - const results = [] - for await (const result of generator) { - results.push(result) - } - - // Verify text responses - expect(results.length).toBe(4) - expect(results[0]).toMatchObject({ type: "text", text: "Hello" }) - expect(results[1]).toMatchObject({ type: "text", text: " cached" }) - expect(results[2]).toMatchObject({ type: "text", text: " response" }) - - // Check usage data includes cache tokens - expect(results[3].type).toBe("usage") - const usageChunk = results[3] as any - expect(usageChunk.inputTokens).toBe(100) // Total input tokens (includes cached) - expect(usageChunk.outputTokens).toBe(10) - expect(usageChunk.cacheReadTokens).toBe(80) // Cached tokens from prompt_tokens_details - expect(usageChunk.cacheWriteTokens).toBeUndefined() // No cache write tokens in standard response - - // Verify cost calculation takes cache into account - // GPT-4.1 pricing: input $2/1M, output $8/1M, cache read $0.5/1M - // OpenAI's prompt_tokens includes cached tokens, so we need to calculate: - // - Non-cached input tokens: 100 - 80 = 20 - // - Cost for non-cached input: (20 / 1_000_000) * 2.0 - // - Cost for cached input: (80 / 1_000_000) * 0.5 - // - Cost for output: (10 / 1_000_000) * 8.0 - const nonCachedInputTokens = 100 - 80 - const expectedNonCachedInputCost = (nonCachedInputTokens / 1_000_000) * 2.0 - const expectedCacheReadCost = (80 / 1_000_000) * 0.5 - const expectedOutputCost = (10 / 1_000_000) * 8.0 - const expectedTotalCost = expectedNonCachedInputCost + expectedCacheReadCost + expectedOutputCost - expect(usageChunk.totalCost).toBeCloseTo(expectedTotalCost, 10) - }) - - it("should handle cache write tokens if present", async () => { - const mockStream = [ - { choices: [{ delta: { content: "Test" } }], usage: null }, - { - choices: [{ delta: {} }], - usage: { - prompt_tokens: 150, - completion_tokens: 5, - prompt_tokens_details: { - cached_tokens: 50, - }, - cache_creation_input_tokens: 30, // Cache write tokens - }, - }, - ] - - mockCreate.mockResolvedValueOnce( - (async function* () { - for (const chunk of mockStream) { - yield chunk - } - })(), - ) - - const generator = handler.createMessage(systemPrompt, messages) - const results = [] - for await (const result of generator) { - results.push(result) - } - - // Check usage data includes both cache read and write tokens - const usageChunk = results.find((r) => r.type === "usage") as any - expect(usageChunk).toBeDefined() - expect(usageChunk.inputTokens).toBe(150) - expect(usageChunk.outputTokens).toBe(5) - expect(usageChunk.cacheReadTokens).toBe(50) - expect(usageChunk.cacheWriteTokens).toBe(30) - }) }) describe("completePrompt", () => { @@ -570,40 +461,7 @@ describe("OpenAiNativeHandler", () => { }) describe("GPT-5 models", () => { - it("should handle GPT-5 model with Responses API", async () => { - // Mock fetch for Responses API - const mockFetch = vitest.fn().mockResolvedValue({ - ok: true, - body: new ReadableStream({ - start(controller) { - // Simulate actual GPT-5 Responses API SSE stream format - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.created","response":{"id":"test","status":"in_progress"}}\n\n', - ), - ) - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.output_item.added","item":{"type":"text","text":"Hello"}}\n\n', - ), - ) - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.output_item.added","item":{"type":"text","text":" world"}}\n\n', - ), - ) - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.done","response":{"usage":{"prompt_tokens":10,"completion_tokens":2}}}\n\n', - ), - ) - controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) - controller.close() - }, - }), - }) - global.fetch = mockFetch as any - + it("should handle GPT-5 model with developer role", async () => { handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "gpt-5-2025-08-07", @@ -615,56 +473,20 @@ describe("OpenAiNativeHandler", () => { chunks.push(chunk) } - // Verify Responses API is called with correct parameters - expect(mockFetch).toHaveBeenCalledWith( - "https://api.openai.com/v1/responses", + // Verify developer role is used for GPT-5 with default parameters + expect(mockCreate).toHaveBeenCalledWith( expect.objectContaining({ - method: "POST", - headers: expect.objectContaining({ - "Content-Type": "application/json", - Authorization: "Bearer test-api-key", - Accept: "text/event-stream", - }), - body: expect.any(String), + model: "gpt-5-2025-08-07", + messages: [{ role: "developer", content: expect.stringContaining(systemPrompt) }], + stream: true, + stream_options: { include_usage: true }, + reasoning_effort: "minimal", // Default for GPT-5 + verbosity: "medium", // Default verbosity }), ) - const body1 = (mockFetch.mock.calls[0][1] as any).body as string - expect(body1).toContain('"model":"gpt-5-2025-08-07"') - expect(body1).toContain('"input":"Developer: You are a helpful assistant.\\n\\nUser: Hello!"') - expect(body1).toContain('"effort":"medium"') - expect(body1).toContain('"summary":"auto"') - expect(body1).toContain('"verbosity":"medium"') - expect(body1).toContain('"temperature":1') - expect(body1).toContain('"max_output_tokens"') - - // Verify the streamed content - const textChunks = chunks.filter((c) => c.type === "text") - expect(textChunks).toHaveLength(2) - expect(textChunks[0].text).toBe("Hello") - expect(textChunks[1].text).toBe(" world") - - // Clean up - delete (global as any).fetch }) - it("should handle GPT-5-mini model with Responses API", async () => { - // Mock fetch for Responses API - const mockFetch = vitest.fn().mockResolvedValue({ - ok: true, - body: new ReadableStream({ - start(controller) { - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.output_item.added","item":{"type":"text","text":"Response"}}\n\n', - ), - ) - controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) - controller.close() - }, - }), - }) - global.fetch = mockFetch as any - + it("should handle GPT-5-mini model", async () => { handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "gpt-5-mini-2025-08-07", @@ -676,36 +498,19 @@ describe("OpenAiNativeHandler", () => { chunks.push(chunk) } - // Verify correct model and default parameters - expect(mockFetch).toHaveBeenCalledWith( - "https://api.openai.com/v1/responses", + expect(mockCreate).toHaveBeenCalledWith( expect.objectContaining({ - body: expect.stringContaining('"model":"gpt-5-mini-2025-08-07"'), + model: "gpt-5-mini-2025-08-07", + messages: [{ role: "developer", content: expect.stringContaining(systemPrompt) }], + stream: true, + stream_options: { include_usage: true }, + reasoning_effort: "minimal", // Default for GPT-5 + verbosity: "medium", // Default verbosity }), ) - - // Clean up - delete (global as any).fetch }) - it("should handle GPT-5-nano model with Responses API", async () => { - // Mock fetch for Responses API - const mockFetch = vitest.fn().mockResolvedValue({ - ok: true, - body: new ReadableStream({ - start(controller) { - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.output_item.added","item":{"type":"text","text":"Nano response"}}\n\n', - ), - ) - controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) - controller.close() - }, - }), - }) - global.fetch = mockFetch as any - + it("should handle GPT-5-nano model", async () => { handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "gpt-5-nano-2025-08-07", @@ -717,36 +522,19 @@ describe("OpenAiNativeHandler", () => { chunks.push(chunk) } - // Verify correct model - expect(mockFetch).toHaveBeenCalledWith( - "https://api.openai.com/v1/responses", + expect(mockCreate).toHaveBeenCalledWith( expect.objectContaining({ - body: expect.stringContaining('"model":"gpt-5-nano-2025-08-07"'), + model: "gpt-5-nano-2025-08-07", + messages: [{ role: "developer", content: expect.stringContaining(systemPrompt) }], + stream: true, + stream_options: { include_usage: true }, + reasoning_effort: "minimal", // Default for GPT-5 + verbosity: "medium", // Default verbosity }), ) - - // Clean up - delete (global as any).fetch }) it("should support verbosity control for GPT-5", async () => { - // Mock fetch for Responses API - const mockFetch = vitest.fn().mockResolvedValue({ - ok: true, - body: new ReadableStream({ - start(controller) { - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.output_item.added","item":{"type":"text","text":"Low verbosity"}}\n\n', - ), - ) - controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) - controller.close() - }, - }), - }) - global.fetch = mockFetch as any - handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "gpt-5-2025-08-07", @@ -761,77 +549,18 @@ describe("OpenAiNativeHandler", () => { } // Verify that verbosity is passed in the request - expect(mockFetch).toHaveBeenCalledWith( - "https://api.openai.com/v1/responses", + expect(mockCreate).toHaveBeenCalledWith( expect.objectContaining({ - body: expect.stringContaining('"verbosity":"low"'), + model: "gpt-5-2025-08-07", + messages: expect.any(Array), + stream: true, + stream_options: { include_usage: true }, + verbosity: "low", }), ) - - // Clean up - delete (global as any).fetch }) it("should support minimal reasoning effort for GPT-5", async () => { - // Mock fetch for Responses API - const mockFetch = vitest.fn().mockResolvedValue({ - ok: true, - body: new ReadableStream({ - start(controller) { - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.output_item.added","item":{"type":"text","text":"Minimal effort"}}\n\n', - ), - ) - controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) - controller.close() - }, - }), - }) - global.fetch = mockFetch as any - - handler = new OpenAiNativeHandler({ - ...mockOptions, - apiModelId: "gpt-5-2025-08-07", - reasoningEffort: "minimal" as any, // GPT-5 supports minimal - }) - - const stream = handler.createMessage(systemPrompt, messages) - const chunks: any[] = [] - for await (const chunk of stream) { - chunks.push(chunk) - } - - // With minimal reasoning effort, the model should pass it through - expect(mockFetch).toHaveBeenCalledWith( - "https://api.openai.com/v1/responses", - expect.objectContaining({ - body: expect.stringContaining('"effort":"minimal"'), - }), - ) - - // Clean up - delete (global as any).fetch - }) - - it("should support low reasoning effort for GPT-5", async () => { - // Mock fetch for Responses API - const mockFetch = vitest.fn().mockResolvedValue({ - ok: true, - body: new ReadableStream({ - start(controller) { - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.output_item.added","item":{"type":"text","text":"Low effort response"}}\n\n', - ), - ) - controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) - controller.close() - }, - }), - }) - global.fetch = mockFetch as any - handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "gpt-5-2025-08-07", @@ -844,48 +573,25 @@ describe("OpenAiNativeHandler", () => { chunks.push(chunk) } - // Should use Responses API with low reasoning effort - expect(mockFetch).toHaveBeenCalledWith( - "https://api.openai.com/v1/responses", + // With low reasoning effort, the model should pass it through + expect(mockCreate).toHaveBeenCalledWith( expect.objectContaining({ - body: expect.any(String), + model: "gpt-5-2025-08-07", + messages: expect.any(Array), + stream: true, + stream_options: { include_usage: true }, + reasoning_effort: "low", + verbosity: "medium", // Default verbosity }), ) - const body2 = (mockFetch.mock.calls[0][1] as any).body as string - expect(body2).toContain('"model":"gpt-5-2025-08-07"') - expect(body2).toContain('"effort":"low"') - expect(body2).toContain('"summary":"auto"') - expect(body2).toContain('"verbosity":"medium"') - expect(body2).toContain('"temperature":1') - expect(body2).toContain('"max_output_tokens"') - - // Clean up - delete (global as any).fetch }) it("should support both verbosity and reasoning effort together for GPT-5", async () => { - // Mock fetch for Responses API - const mockFetch = vitest.fn().mockResolvedValue({ - ok: true, - body: new ReadableStream({ - start(controller) { - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.output_item.added","item":{"type":"text","text":"High verbosity minimal effort"}}\n\n', - ), - ) - controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) - controller.close() - }, - }), - }) - global.fetch = mockFetch as any - handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "gpt-5-2025-08-07", - verbosity: "high", - reasoningEffort: "minimal" as any, + verbosity: "high", // Set verbosity through options + reasoningEffort: "low", // Set reasoning effort }) const stream = handler.createMessage(systemPrompt, messages) @@ -894,863 +600,17 @@ describe("OpenAiNativeHandler", () => { chunks.push(chunk) } - // Should use Responses API with both parameters - expect(mockFetch).toHaveBeenCalledWith( - "https://api.openai.com/v1/responses", + // Verify both parameters are passed + expect(mockCreate).toHaveBeenCalledWith( expect.objectContaining({ - body: expect.any(String), + model: "gpt-5-2025-08-07", + messages: expect.any(Array), + stream: true, + stream_options: { include_usage: true }, + reasoning_effort: "low", + verbosity: "high", }), ) - const body3 = (mockFetch.mock.calls[0][1] as any).body as string - expect(body3).toContain('"model":"gpt-5-2025-08-07"') - expect(body3).toContain('"effort":"minimal"') - expect(body3).toContain('"summary":"auto"') - expect(body3).toContain('"verbosity":"high"') - expect(body3).toContain('"temperature":1') - expect(body3).toContain('"max_output_tokens"') - - // Clean up - delete (global as any).fetch - }) - - it("should handle actual GPT-5 Responses API format", async () => { - // Mock fetch with actual response format from GPT-5 - const mockFetch = vitest.fn().mockResolvedValue({ - ok: true, - body: new ReadableStream({ - start(controller) { - // Test actual GPT-5 response format - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.created","response":{"id":"test","status":"in_progress"}}\n\n', - ), - ) - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.in_progress","response":{"status":"in_progress"}}\n\n', - ), - ) - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.output_item.added","item":{"type":"text","text":"First text"}}\n\n', - ), - ) - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.output_item.added","item":{"type":"text","text":" Second text"}}\n\n', - ), - ) - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.output_item.added","item":{"type":"reasoning","text":"Some reasoning"}}\n\n', - ), - ) - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.done","response":{"usage":{"prompt_tokens":100,"completion_tokens":20}}}\n\n', - ), - ) - controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) - controller.close() - }, - }), - }) - global.fetch = mockFetch as any - - handler = new OpenAiNativeHandler({ - ...mockOptions, - apiModelId: "gpt-5-2025-08-07", - }) - - const stream = handler.createMessage(systemPrompt, messages) - const chunks: any[] = [] - for await (const chunk of stream) { - chunks.push(chunk) - } - - // Should handle the actual format correctly - const textChunks = chunks.filter((c) => c.type === "text") - const reasoningChunks = chunks.filter((c) => c.type === "reasoning") - - expect(textChunks).toHaveLength(2) - expect(textChunks[0].text).toBe("First text") - expect(textChunks[1].text).toBe(" Second text") - - expect(reasoningChunks).toHaveLength(1) - expect(reasoningChunks[0].text).toBe("Some reasoning") - - // Should also have usage information with cost - const usageChunks = chunks.filter((c) => c.type === "usage") - expect(usageChunks).toHaveLength(1) - expect(usageChunks[0]).toMatchObject({ - type: "usage", - inputTokens: 100, - outputTokens: 20, - totalCost: expect.any(Number), - }) - - // Verify cost calculation (GPT-5 pricing: input $1.25/M, output $10/M) - const expectedInputCost = (100 / 1_000_000) * 1.25 - const expectedOutputCost = (20 / 1_000_000) * 10.0 - const expectedTotalCost = expectedInputCost + expectedOutputCost - expect(usageChunks[0].totalCost).toBeCloseTo(expectedTotalCost, 10) - - // Clean up - delete (global as any).fetch - }) - - it("should handle Responses API with no content gracefully", async () => { - // Mock fetch with empty response - const mockFetch = vitest.fn().mockResolvedValue({ - ok: true, - body: new ReadableStream({ - start(controller) { - controller.enqueue(new TextEncoder().encode('data: {"someField":"value"}\n\n')) - controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) - controller.close() - }, - }), - }) - global.fetch = mockFetch as any - - handler = new OpenAiNativeHandler({ - ...mockOptions, - apiModelId: "gpt-5-2025-08-07", - }) - - const stream = handler.createMessage(systemPrompt, messages) - const chunks: any[] = [] - - // Should not throw, just warn - for await (const chunk of stream) { - chunks.push(chunk) - } - - // Should have no content chunks when stream is empty - const contentChunks = chunks.filter((c) => c.type === "text" || c.type === "reasoning") - - expect(contentChunks).toHaveLength(0) - - // Clean up - delete (global as any).fetch - }) - - it("should support previous_response_id for conversation continuity", async () => { - // Mock fetch for Responses API - const mockFetch = vitest.fn().mockResolvedValue({ - ok: true, - body: new ReadableStream({ - start(controller) { - // Include response ID in the response - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.created","response":{"id":"resp_123","status":"in_progress"}}\n\n', - ), - ) - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.output_item.added","item":{"type":"text","text":"Response with ID"}}\n\n', - ), - ) - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.done","response":{"id":"resp_123","usage":{"prompt_tokens":10,"completion_tokens":3}}}\n\n', - ), - ) - controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) - controller.close() - }, - }), - }) - global.fetch = mockFetch as any - - handler = new OpenAiNativeHandler({ - ...mockOptions, - apiModelId: "gpt-5-2025-08-07", - }) - - // First request - should not have previous_response_id - const stream1 = handler.createMessage(systemPrompt, messages) - const chunks1: any[] = [] - for await (const chunk of stream1) { - chunks1.push(chunk) - } - - // Verify first request doesn't include previous_response_id - let firstCallBody = JSON.parse(mockFetch.mock.calls[0][1].body) - expect(firstCallBody.previous_response_id).toBeUndefined() - - // Second request with metadata - should include previous_response_id - const stream2 = handler.createMessage(systemPrompt, messages, { - taskId: "test-task", - previousResponseId: "resp_456", - }) - const chunks2: any[] = [] - for await (const chunk of stream2) { - chunks2.push(chunk) - } - - // Verify second request includes the provided previous_response_id - let secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body) - expect(secondCallBody.previous_response_id).toBe("resp_456") - - // Clean up - delete (global as any).fetch - }) - - it("should handle unhandled stream events gracefully", async () => { - // Mock fetch for the fallback SSE path (which is what gets used when SDK fails) - const mockFetch = vitest.fn().mockResolvedValue({ - ok: true, - body: new ReadableStream({ - start(controller) { - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.output_item.added","item":{"type":"text","text":"Hello"}}\n\n', - ), - ) - // This event is not handled, so it should be ignored - controller.enqueue( - new TextEncoder().encode('data: {"type":"response.audio.delta","delta":"..."}\n\n'), - ) - controller.enqueue(new TextEncoder().encode('data: {"type":"response.done","response":{}}\n\n')) - controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) - controller.close() - }, - }), - }) - global.fetch = mockFetch as any - - // Also mock the SDK to throw an error so it falls back to fetch - const mockClient = { - responses: { - create: vitest.fn().mockRejectedValue(new Error("SDK not available")), - }, - } - - handler = new OpenAiNativeHandler({ - ...mockOptions, - apiModelId: "gpt-5-2025-08-07", - }) - - // Replace the client with our mock - ;(handler as any).client = mockClient - - const stream = handler.createMessage(systemPrompt, messages) - const chunks: any[] = [] - const errors: any[] = [] - - try { - for await (const chunk of stream) { - chunks.push(chunk) - } - } catch (error) { - errors.push(error) - } - - // Log for debugging - if (chunks.length === 0 && errors.length === 0) { - console.log("No chunks and no errors received") - } - if (errors.length > 0) { - console.log("Errors:", errors) - } - - expect(errors.length).toBe(0) - const textChunks = chunks.filter((c) => c.type === "text") - expect(textChunks.length).toBeGreaterThan(0) - expect(textChunks[0].text).toBe("Hello") - - delete (global as any).fetch - }) - - it("should use stored response ID when metadata doesn't provide one", async () => { - // Mock fetch for Responses API - const mockFetch = vitest - .fn() - .mockResolvedValueOnce({ - ok: true, - body: new ReadableStream({ - start(controller) { - // First response with ID - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.done","response":{"id":"resp_789","output":[{"type":"text","content":[{"type":"text","text":"First"}]}],"usage":{"prompt_tokens":10,"completion_tokens":1}}}\n\n', - ), - ) - controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) - controller.close() - }, - }), - }) - .mockResolvedValueOnce({ - ok: true, - body: new ReadableStream({ - start(controller) { - // Second response - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.output_item.added","item":{"type":"text","text":"Second"}}\n\n', - ), - ) - controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) - controller.close() - }, - }), - }) - global.fetch = mockFetch as any - - handler = new OpenAiNativeHandler({ - ...mockOptions, - apiModelId: "gpt-5-2025-08-07", - }) - - // First request - establishes response ID - const stream1 = handler.createMessage(systemPrompt, messages) - for await (const chunk of stream1) { - // consume stream - } - - // Second request without metadata - should use stored response ID - const stream2 = handler.createMessage(systemPrompt, messages, { taskId: "test-task" }) - for await (const chunk of stream2) { - // consume stream - } - - // Verify second request uses the stored response ID from first request - let secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body) - expect(secondCallBody.previous_response_id).toBe("resp_789") - - // Clean up - delete (global as any).fetch - }) - - it("should only send latest message when using previous_response_id", async () => { - // Mock fetch for Responses API - const mockFetch = vitest - .fn() - .mockResolvedValueOnce({ - ok: true, - body: new ReadableStream({ - start(controller) { - // First response with ID - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.done","response":{"id":"resp_001","output":[{"type":"text","content":[{"type":"text","text":"First"}]}],"usage":{"prompt_tokens":50,"completion_tokens":1}}}\n\n', - ), - ) - controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) - controller.close() - }, - }), - }) - .mockResolvedValueOnce({ - ok: true, - body: new ReadableStream({ - start(controller) { - // Second response - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.output_item.added","item":{"type":"text","text":"Second"}}\n\n', - ), - ) - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.done","response":{"id":"resp_002","usage":{"prompt_tokens":10,"completion_tokens":1}}}\n\n', - ), - ) - controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) - controller.close() - }, - }), - }) - global.fetch = mockFetch as any - - handler = new OpenAiNativeHandler({ - ...mockOptions, - apiModelId: "gpt-5-2025-08-07", - }) - - // First request with full conversation - const firstMessages: Anthropic.Messages.MessageParam[] = [ - { role: "user", content: "Hello" }, - { role: "assistant", content: "Hi there!" }, - { role: "user", content: "How are you?" }, - ] - - const stream1 = handler.createMessage(systemPrompt, firstMessages) - for await (const chunk of stream1) { - // consume stream - } - - // Verify first request sends full conversation - let firstCallBody = JSON.parse(mockFetch.mock.calls[0][1].body) - expect(firstCallBody.input).toContain("Hello") - expect(firstCallBody.input).toContain("Hi there!") - expect(firstCallBody.input).toContain("How are you?") - expect(firstCallBody.previous_response_id).toBeUndefined() - - // Second request with previous_response_id - should only send latest message - const secondMessages: Anthropic.Messages.MessageParam[] = [ - { role: "user", content: "Hello" }, - { role: "assistant", content: "Hi there!" }, - { role: "user", content: "How are you?" }, - { role: "assistant", content: "I'm doing well!" }, - { role: "user", content: "What's the weather?" }, // Latest message - ] - - const stream2 = handler.createMessage(systemPrompt, secondMessages, { - taskId: "test-task", - previousResponseId: "resp_001", - }) - for await (const chunk of stream2) { - // consume stream - } - - // Verify second request only sends the latest user message - let secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body) - expect(secondCallBody.input).toBe("User: What's the weather?") - expect(secondCallBody.input).not.toContain("Hello") - expect(secondCallBody.input).not.toContain("Hi there!") - expect(secondCallBody.input).not.toContain("How are you?") - expect(secondCallBody.previous_response_id).toBe("resp_001") - - // Clean up - delete (global as any).fetch - }) - - it("should correctly prepare GPT-5 input with conversation continuity", () => { - const gpt5Handler = new OpenAiNativeHandler({ - ...mockOptions, - apiModelId: "gpt-5-2025-08-07", - }) - - // @ts-expect-error - private method - const { formattedInput, previousResponseId } = gpt5Handler.prepareGpt5Input(systemPrompt, messages, { - taskId: "task1", - previousResponseId: "resp_123", - }) - - expect(previousResponseId).toBe("resp_123") - expect(formattedInput).toBe("User: Hello!") - }) - - it("should provide helpful error messages for different error codes", async () => { - const testCases = [ - { status: 400, expectedMessage: "Invalid request to GPT-5 API" }, - { status: 401, expectedMessage: "Authentication failed" }, - { status: 403, expectedMessage: "Access denied" }, - { status: 404, expectedMessage: "GPT-5 API endpoint not found" }, - { status: 429, expectedMessage: "Rate limit exceeded" }, - { status: 500, expectedMessage: "OpenAI service error" }, - ] - - for (const { status, expectedMessage } of testCases) { - // Mock fetch with error response - const mockFetch = vitest.fn().mockResolvedValue({ - ok: false, - status, - statusText: "Error", - text: async () => JSON.stringify({ error: { message: "Test error" } }), - }) - global.fetch = mockFetch as any - - handler = new OpenAiNativeHandler({ - ...mockOptions, - apiModelId: "gpt-5-2025-08-07", - }) - - const stream = handler.createMessage(systemPrompt, messages) - - await expect(async () => { - for await (const chunk of stream) { - // Should throw before yielding anything - } - }).rejects.toThrow(expectedMessage) - } - - // Clean up - delete (global as any).fetch - }) - }) -}) - -// Added tests for GPT-5 streaming event coverage per PR_review_gpt5_final.md - -describe("GPT-5 streaming event coverage (additional)", () => { - it("should handle reasoning delta events for GPT-5", async () => { - const mockFetch = vitest.fn().mockResolvedValue({ - ok: true, - body: new ReadableStream({ - start(controller) { - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.reasoning.delta","delta":"Thinking about the problem..."}\n\n', - ), - ) - controller.enqueue( - new TextEncoder().encode('data: {"type":"response.text.delta","delta":"The answer is..."}\n\n'), - ) - controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) - controller.close() - }, - }), - }) - // @ts-ignore - global.fetch = mockFetch - - const handler = new OpenAiNativeHandler({ - apiModelId: "gpt-5-2025-08-07", - openAiNativeApiKey: "test-api-key", - }) - - const systemPrompt = "You are a helpful assistant." - const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello!" }] - const stream = handler.createMessage(systemPrompt, messages) - - const chunks: any[] = [] - for await (const chunk of stream) { - chunks.push(chunk) - } - - const reasoningChunks = chunks.filter((c) => c.type === "reasoning") - const textChunks = chunks.filter((c) => c.type === "text") - - expect(reasoningChunks).toHaveLength(1) - expect(reasoningChunks[0].text).toBe("Thinking about the problem...") - expect(textChunks).toHaveLength(1) - expect(textChunks[0].text).toBe("The answer is...") - - // @ts-ignore - delete global.fetch - }) - - it("should handle refusal delta events for GPT-5 and prefix output", async () => { - const mockFetch = vitest.fn().mockResolvedValue({ - ok: true, - body: new ReadableStream({ - start(controller) { - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.refusal.delta","delta":"I cannot comply with this request."}\n\n', - ), - ) - controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) - controller.close() - }, - }), - }) - // @ts-ignore - global.fetch = mockFetch - - const handler = new OpenAiNativeHandler({ - apiModelId: "gpt-5-2025-08-07", - openAiNativeApiKey: "test-api-key", - }) - - const systemPrompt = "You are a helpful assistant." - const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Do something disallowed" }] - const stream = handler.createMessage(systemPrompt, messages) - - const chunks: any[] = [] - for await (const chunk of stream) { - chunks.push(chunk) - } - - const textChunks = chunks.filter((c) => c.type === "text") - expect(textChunks).toHaveLength(1) - expect(textChunks[0].text).toBe("[Refusal] I cannot comply with this request.") - - // @ts-ignore - delete global.fetch - }) - - it("should ignore malformed JSON lines in SSE stream", async () => { - const mockFetch = vitest.fn().mockResolvedValue({ - ok: true, - body: new ReadableStream({ - start(controller) { - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.output_item.added","item":{"type":"text","text":"Before"}}\n\n', - ), - ) - // Malformed JSON line - controller.enqueue( - new TextEncoder().encode('data: {"type":"response.text.delta","delta":"Bad"\n\n'), - ) - // Valid line after malformed - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.output_item.added","item":{"type":"text","text":"After"}}\n\n', - ), - ) - controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) - controller.close() - }, - }), - }) - // @ts-ignore - global.fetch = mockFetch - - const handler = new OpenAiNativeHandler({ - apiModelId: "gpt-5-2025-08-07", - openAiNativeApiKey: "test-api-key", - }) - - const systemPrompt = "You are a helpful assistant." - const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello!" }] - const stream = handler.createMessage(systemPrompt, messages) - - const chunks: any[] = [] - for await (const chunk of stream) { - chunks.push(chunk) - } - - // It should not throw and still capture the valid texts around the malformed line - const textChunks = chunks.filter((c) => c.type === "text") - expect(textChunks.map((c: any) => c.text)).toEqual(["Before", "After"]) - - // @ts-ignore - delete global.fetch - }) - - describe("Codex Mini Model", () => { - let handler: OpenAiNativeHandler - const mockOptions: ApiHandlerOptions = { - openAiNativeApiKey: "test-api-key", - apiModelId: "codex-mini-latest", - } - - it("should handle codex-mini-latest streaming response", async () => { - // Mock fetch for Codex Mini responses API - const mockFetch = vitest.fn().mockResolvedValue({ - ok: true, - body: new ReadableStream({ - start(controller) { - // Codex Mini uses the same responses API format - controller.enqueue( - new TextEncoder().encode('data: {"type":"response.output_text.delta","delta":"Hello"}\n\n'), - ) - controller.enqueue( - new TextEncoder().encode('data: {"type":"response.output_text.delta","delta":" from"}\n\n'), - ) - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.output_text.delta","delta":" Codex"}\n\n', - ), - ) - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.output_text.delta","delta":" Mini!"}\n\n', - ), - ) - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.done","response":{"usage":{"prompt_tokens":50,"completion_tokens":10}}}\n\n', - ), - ) - controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) - controller.close() - }, - }), - }) - global.fetch = mockFetch as any - - handler = new OpenAiNativeHandler({ - ...mockOptions, - apiModelId: "codex-mini-latest", - }) - - const systemPrompt = "You are a helpful coding assistant." - const messages: Anthropic.Messages.MessageParam[] = [ - { role: "user", content: "Write a hello world function" }, - ] - - const stream = handler.createMessage(systemPrompt, messages) - const chunks: any[] = [] - for await (const chunk of stream) { - chunks.push(chunk) - } - - // Verify text chunks - const textChunks = chunks.filter((c) => c.type === "text") - expect(textChunks).toHaveLength(4) - expect(textChunks.map((c) => c.text).join("")).toBe("Hello from Codex Mini!") - - // Verify usage data from API - const usageChunks = chunks.filter((c) => c.type === "usage") - expect(usageChunks).toHaveLength(1) - expect(usageChunks[0]).toMatchObject({ - type: "usage", - inputTokens: 50, - outputTokens: 10, - totalCost: expect.any(Number), // Codex Mini has pricing: $1.5/M input, $6/M output - }) - - // Verify cost is calculated correctly based on API usage data - const expectedCost = (50 / 1_000_000) * 1.5 + (10 / 1_000_000) * 6 - expect(usageChunks[0].totalCost).toBeCloseTo(expectedCost, 10) - - // Verify the request was made with correct parameters - expect(mockFetch).toHaveBeenCalledWith( - "https://api.openai.com/v1/responses", - expect.objectContaining({ - method: "POST", - headers: expect.objectContaining({ - "Content-Type": "application/json", - Authorization: "Bearer test-api-key", - Accept: "text/event-stream", - }), - body: expect.any(String), - }), - ) - - const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body) - expect(requestBody).toMatchObject({ - model: "codex-mini-latest", - input: "Developer: You are a helpful coding assistant.\n\nUser: Write a hello world function", - stream: true, - }) - - // Clean up - delete (global as any).fetch - }) - - it("should handle codex-mini-latest non-streaming completion", async () => { - handler = new OpenAiNativeHandler({ - ...mockOptions, - apiModelId: "codex-mini-latest", - }) - - // Codex Mini now uses the same Responses API as GPT-5, which doesn't support non-streaming - await expect(handler.completePrompt("Write a hello world function in Python")).rejects.toThrow( - "completePrompt is not supported for codex-mini-latest. Use createMessage (Responses API) instead.", - ) - }) - - it("should handle codex-mini-latest API errors", async () => { - // Mock fetch with error response - const mockFetch = vitest.fn().mockResolvedValue({ - ok: false, - status: 429, - statusText: "Too Many Requests", - text: async () => "Rate limit exceeded", - }) - global.fetch = mockFetch as any - - handler = new OpenAiNativeHandler({ - ...mockOptions, - apiModelId: "codex-mini-latest", - }) - - const systemPrompt = "You are a helpful assistant." - const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }] - - const stream = handler.createMessage(systemPrompt, messages) - - // Should throw an error (using the same error format as GPT-5) - await expect(async () => { - for await (const chunk of stream) { - // consume stream - } - }).rejects.toThrow("Rate limit exceeded") - - // Clean up - delete (global as any).fetch - }) - - it("should handle codex-mini-latest with multiple user messages", async () => { - // Mock fetch for streaming response - const mockFetch = vitest.fn().mockResolvedValue({ - ok: true, - body: new ReadableStream({ - start(controller) { - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.output_text.delta","delta":"Combined response"}\n\n', - ), - ) - controller.enqueue(new TextEncoder().encode('data: {"type":"response.completed"}\n\n')) - controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) - controller.close() - }, - }), - }) - global.fetch = mockFetch as any - - handler = new OpenAiNativeHandler({ - ...mockOptions, - apiModelId: "codex-mini-latest", - }) - - const systemPrompt = "You are a helpful assistant." - const messages: Anthropic.Messages.MessageParam[] = [ - { role: "user", content: "First question" }, - { role: "assistant", content: "First answer" }, - { role: "user", content: "Second question" }, - ] - - const stream = handler.createMessage(systemPrompt, messages) - const chunks: any[] = [] - for await (const chunk of stream) { - chunks.push(chunk) - } - - // Verify the request body includes full conversation like GPT-5 - const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body) - expect(requestBody.input).toContain("Developer: You are a helpful assistant") - expect(requestBody.input).toContain("User: First question") - expect(requestBody.input).toContain("Assistant: First answer") - expect(requestBody.input).toContain("User: Second question") - - // Clean up - delete (global as any).fetch - }) - - it("should handle codex-mini-latest stream error events", async () => { - // Mock fetch with error event in stream - const mockFetch = vitest.fn().mockResolvedValue({ - ok: true, - body: new ReadableStream({ - start(controller) { - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.output_text.delta","delta":"Partial"}\n\n', - ), - ) - controller.enqueue( - new TextEncoder().encode( - 'data: {"type":"response.error","error":{"message":"Model overloaded"}}\n\n', - ), - ) - // The error handler will throw, but we still need to close the stream - controller.close() - }, - }), - }) - global.fetch = mockFetch as any - - handler = new OpenAiNativeHandler({ - ...mockOptions, - apiModelId: "codex-mini-latest", - }) - - const systemPrompt = "You are a helpful assistant." - const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }] - - const stream = handler.createMessage(systemPrompt, messages) - - // Should throw an error when encountering error event - await expect(async () => { - const chunks = [] - for await (const chunk of stream) { - chunks.push(chunk) - } - }).rejects.toThrow("Responses API error: Model overloaded") - - // Clean up - delete (global as any).fetch }) }) }) diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts index 053af7f5e5f..5e498bee450 100644 --- a/src/api/providers/openai-native.ts +++ b/src/api/providers/openai-native.ts @@ -7,10 +7,8 @@ import { OpenAiNativeModelId, openAiNativeModels, OPENAI_NATIVE_DEFAULT_TEMPERATURE, - GPT5_DEFAULT_TEMPERATURE, type ReasoningEffort, type VerbosityLevel, - type ReasoningEffortWithMinimal, } from "@roo-code/types" import type { ApiHandlerOptions } from "../../shared/api" @@ -18,7 +16,7 @@ import type { ApiHandlerOptions } from "../../shared/api" import { calculateApiCostOpenAI } from "../../shared/cost" import { convertToOpenAiMessages } from "../transform/openai-format" -import { ApiStream, ApiStreamUsageChunk } from "../transform/stream" +import { ApiStream } from "../transform/stream" import { getModelParams } from "../transform/model-params" import { BaseProvider } from "./base-provider" @@ -26,77 +24,43 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ". export type OpenAiNativeModel = ReturnType -// GPT-5 specific types +// GPT-5 specific types for Responses API +type ReasoningEffortWithMinimal = ReasoningEffort | "minimal" + +interface GPT5ResponsesAPIParams { + model: string + input: string + reasoning?: { + effort: ReasoningEffortWithMinimal + } + text?: { + verbosity: VerbosityLevel + } +} + +interface GPT5ResponseChunk { + type: "text" | "reasoning" | "usage" + text?: string + reasoning?: string + usage?: { + input_tokens: number + output_tokens: number + reasoning_tokens?: number + total_tokens: number + } +} export class OpenAiNativeHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions private client: OpenAI - private lastResponseId: string | undefined - private responseIdPromise: Promise | undefined - private responseIdResolver: ((value: string | undefined) => void) | undefined - - // Event types handled by the shared GPT-5 event processor to avoid duplication - private readonly gpt5CoreHandledTypes = new Set([ - "response.text.delta", - "response.output_text.delta", - "response.reasoning.delta", - "response.reasoning_text.delta", - "response.reasoning_summary.delta", - "response.reasoning_summary_text.delta", - "response.refusal.delta", - "response.output_item.added", - "response.done", - "response.completed", - ]) constructor(options: ApiHandlerOptions) { super() this.options = options - // Default to including reasoning.summary: "auto" for GPT‑5 unless explicitly disabled - if (this.options.enableGpt5ReasoningSummary === undefined) { - this.options.enableGpt5ReasoningSummary = true - } const apiKey = this.options.openAiNativeApiKey ?? "not-provided" this.client = new OpenAI({ baseURL: this.options.openAiNativeBaseUrl, apiKey }) } - private normalizeGpt5Usage(usage: any, model: OpenAiNativeModel): ApiStreamUsageChunk | undefined { - if (!usage) return undefined - - const totalInputTokens = usage.input_tokens ?? usage.prompt_tokens ?? 0 - const totalOutputTokens = usage.output_tokens ?? usage.completion_tokens ?? 0 - const cacheWriteTokens = usage.cache_creation_input_tokens ?? usage.cache_write_tokens ?? 0 - const cacheReadTokens = usage.cache_read_input_tokens ?? usage.cache_read_tokens ?? usage.cached_tokens ?? 0 - - const totalCost = calculateApiCostOpenAI( - model.info, - totalInputTokens, - totalOutputTokens, - cacheWriteTokens || 0, - cacheReadTokens || 0, - ) - - return { - type: "usage", - inputTokens: totalInputTokens, - outputTokens: totalOutputTokens, - cacheWriteTokens, - cacheReadTokens, - totalCost, - } - } - - private resolveResponseId(responseId: string | undefined): void { - if (responseId) { - this.lastResponseId = responseId - } - // Resolve the promise so the next request can use this ID - if (this.responseIdResolver) { - this.responseIdResolver(responseId) - this.responseIdResolver = undefined - } - } - override async *createMessage( systemPrompt: string, messages: Anthropic.Messages.MessageParam[], @@ -117,9 +81,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio yield* this.handleReasonerMessage(model, id, systemPrompt, messages) } else if (model.id.startsWith("o1")) { yield* this.handleO1FamilyMessage(model, systemPrompt, messages) - } else if (this.isResponsesApiModel(model.id)) { - // Both GPT-5 and Codex Mini use the v1/responses endpoint - yield* this.handleResponsesApiMessage(model, systemPrompt, messages, metadata) + } else if (this.isGpt5Model(model.id)) { + yield* this.handleGpt5Message(model, systemPrompt, messages) } else { yield* this.handleDefaultModelMessage(model, systemPrompt, messages) } @@ -194,8 +157,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio ...(reasoning && reasoning), } - // Add verbosity if supported - if (verbosity) { + // Add verbosity if supported (for future GPT-5 models) + if (verbosity && model.id.startsWith("gpt-5")) { params.verbosity = verbosity } @@ -213,935 +176,185 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio ) } - private async *handleResponsesApiMessage( + private async *handleGpt5Message( model: OpenAiNativeModel, systemPrompt: string, messages: Anthropic.Messages.MessageParam[], - metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { - // Prefer the official SDK Responses API with streaming; fall back to fetch-based SSE if needed. - const { verbosity } = this.getModel() + // GPT-5 uses the Responses API, not Chat Completions + // We need to format the input as a single string combining system prompt and messages + const formattedInput = this.formatInputForResponsesAPI(systemPrompt, messages) - // Both GPT-5 and Codex Mini use the same v1/responses endpoint format - - // Resolve reasoning effort (supports "minimal" for GPT‑5) + // Get reasoning effort, supporting the new "minimal" option for GPT-5 const reasoningEffort = this.getGpt5ReasoningEffort(model) - // Wait for any pending response ID from a previous request to be available - // This handles the race condition with fast nano model responses - let effectivePreviousResponseId = metadata?.previousResponseId - - // Only allow fallback to pending/last response id when not explicitly suppressed - if (!metadata?.suppressPreviousResponseId) { - // If we have a pending response ID promise, wait for it to resolve - if (!effectivePreviousResponseId && this.responseIdPromise) { - try { - const resolvedId = await Promise.race([ - this.responseIdPromise, - // Timeout after 100ms to avoid blocking too long - new Promise((resolve) => setTimeout(() => resolve(undefined), 100)), - ]) - if (resolvedId) { - effectivePreviousResponseId = resolvedId - } - } catch { - // Non-fatal if promise fails - } - } - - // Fall back to the last known response ID if still not available - if (!effectivePreviousResponseId) { - effectivePreviousResponseId = this.lastResponseId - } - } - - // Format input and capture continuity id - const { formattedInput, previousResponseId } = this.prepareGpt5Input(systemPrompt, messages, metadata) - const requestPreviousResponseId = effectivePreviousResponseId ?? previousResponseId - - // Create a new promise for this request's response ID - this.responseIdPromise = new Promise((resolve) => { - this.responseIdResolver = resolve - }) - - // Build a request body (also used for fallback) - // Ensure we explicitly pass max_output_tokens for GPT‑5 based on Roo's reserved model response calculation - // so requests do not default to very large limits (e.g., 120k). - interface Gpt5RequestBody { - model: string - input: string - stream: boolean - reasoning?: { effort: ReasoningEffortWithMinimal; summary?: "auto" } - text?: { verbosity: VerbosityLevel } - temperature?: number - max_output_tokens?: number - previous_response_id?: string - } + // Get verbosity from model settings, default to "medium" if not specified + const verbosity = model.verbosity || "medium" - const requestBody: Gpt5RequestBody = { + // Prepare the request parameters for Responses API + const params: GPT5ResponsesAPIParams = { model: model.id, input: formattedInput, - stream: true, ...(reasoningEffort && { reasoning: { effort: reasoningEffort, - ...(this.options.enableGpt5ReasoningSummary ? { summary: "auto" as const } : {}), }, }), - text: { verbosity: (verbosity || "medium") as VerbosityLevel }, - temperature: this.options.modelTemperature ?? GPT5_DEFAULT_TEMPERATURE, - // Explicitly include the calculated max output tokens for GPT‑5. - // Use the per-request reserved output computed by Roo (params.maxTokens from getModelParams). - ...(model.maxTokens ? { max_output_tokens: model.maxTokens } : {}), - ...(requestPreviousResponseId && { previous_response_id: requestPreviousResponseId }), + text: { + verbosity: verbosity, + }, } - try { - // Use the official SDK - const stream = (await (this.client as any).responses.create(requestBody)) as AsyncIterable - - if (typeof (stream as any)[Symbol.asyncIterator] !== "function") { - throw new Error( - "OpenAI SDK did not return an AsyncIterable for Responses API streaming. Falling back to SSE.", - ) - } - - for await (const event of stream) { - for await (const outChunk of this.processGpt5Event(event, model)) { - yield outChunk - } - } - } catch (sdkErr: any) { - // Check if this is a 400 error about previous_response_id not found - const errorMessage = sdkErr?.message || sdkErr?.error?.message || "" - const is400Error = sdkErr?.status === 400 || sdkErr?.response?.status === 400 - const isPreviousResponseError = - errorMessage.includes("Previous response") || errorMessage.includes("not found") - - if (is400Error && requestBody.previous_response_id && isPreviousResponseError) { - // Log the error and retry without the previous_response_id - console.warn( - `[GPT-5] Previous response ID not found (${requestBody.previous_response_id}), retrying without it`, - ) + // Since the OpenAI SDK doesn't yet support the Responses API, + // we'll make a direct HTTP request + const response = await this.makeGpt5ResponsesAPIRequest(params, model) - // Remove the problematic previous_response_id and retry - const retryRequestBody = { ...requestBody } - delete retryRequestBody.previous_response_id - - // Clear the stored lastResponseId to prevent using it again - this.lastResponseId = undefined - - try { - // Retry with the SDK - const retryStream = (await (this.client as any).responses.create( - retryRequestBody, - )) as AsyncIterable - - if (typeof (retryStream as any)[Symbol.asyncIterator] !== "function") { - // If SDK fails, fall back to SSE - yield* this.makeGpt5ResponsesAPIRequest(retryRequestBody, model, metadata) - return - } - - for await (const event of retryStream) { - for await (const outChunk of this.processGpt5Event(event, model)) { - yield outChunk - } - } - return - } catch (retryErr) { - // If retry also fails, fall back to SSE - yield* this.makeGpt5ResponsesAPIRequest(retryRequestBody, model, metadata) - return - } - } - - // For other errors, fallback to manual SSE via fetch - yield* this.makeGpt5ResponsesAPIRequest(requestBody, model, metadata) - } + yield* this.handleGpt5StreamResponse(response, model) } private formatInputForResponsesAPI(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): string { - // Format the conversation for the Responses API input field - // Use Developer role format for GPT-5 (aligning with o1/o3 Developer role usage per GPT-5 Responses guidance) - // This ensures consistent instruction handling across reasoning models - let formattedInput = `Developer: ${systemPrompt}\n\n` + // Format the conversation for the Responses API's single input field + let formattedInput = `System: ${systemPrompt}\n\n` for (const message of messages) { const role = message.role === "user" ? "User" : "Assistant" - - // Handle text content - if (typeof message.content === "string") { - formattedInput += `${role}: ${message.content}\n\n` - } else if (Array.isArray(message.content)) { - // Handle content blocks - const textContent = message.content - .filter((block) => block.type === "text") - .map((block) => (block as any).text) - .join("\n") - if (textContent) { - formattedInput += `${role}: ${textContent}\n\n` - } - } + const content = + typeof message.content === "string" + ? message.content + : message.content.map((c) => (c.type === "text" ? c.text : "[image]")).join(" ") + formattedInput += `${role}: ${content}\n\n` } return formattedInput.trim() } - private formatSingleMessageForResponsesAPI(message: Anthropic.Messages.MessageParam): string { - // Format a single message for the Responses API when using previous_response_id - const role = message.role === "user" ? "User" : "Assistant" - - // Handle text content - if (typeof message.content === "string") { - return `${role}: ${message.content}` - } else if (Array.isArray(message.content)) { - // Handle content blocks - const textContent = message.content - .filter((block) => block.type === "text") - .map((block) => (block as any).text) - .join("\n") - if (textContent) { - return `${role}: ${textContent}` + private getGpt5ReasoningEffort(model: OpenAiNativeModel): ReasoningEffortWithMinimal | undefined { + const { reasoning } = model + + // Check if reasoning effort is configured + if (reasoning && "reasoning_effort" in reasoning) { + const effort = reasoning.reasoning_effort + // Support the new "minimal" effort level for GPT-5 + if (effort === "low" || effort === "medium" || effort === "high") { + return effort } } - return "" + // Default to "minimal" for GPT-5 models when not specified + // This provides fastest time-to-first-token as per documentation + return "minimal" } - private async *makeGpt5ResponsesAPIRequest( - requestBody: any, + private async makeGpt5ResponsesAPIRequest( + params: GPT5ResponsesAPIParams, model: OpenAiNativeModel, - metadata?: ApiHandlerCreateMessageMetadata, - ): ApiStream { - const apiKey = this.options.openAiNativeApiKey ?? "not-provided" - const baseUrl = this.options.openAiNativeBaseUrl || "https://api.openai.com" - const url = `${baseUrl}/v1/responses` - - try { - const response = await fetch(url, { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${apiKey}`, - Accept: "text/event-stream", - }, - body: JSON.stringify(requestBody), - }) - - if (!response.ok) { - const errorText = await response.text() - - let errorMessage = `GPT-5 API request failed (${response.status})` - let errorDetails = "" - - // Try to parse error as JSON for better error messages - try { - const errorJson = JSON.parse(errorText) - if (errorJson.error?.message) { - errorDetails = errorJson.error.message - } else if (errorJson.message) { - errorDetails = errorJson.message - } else { - errorDetails = errorText - } - } catch { - // If not JSON, use the raw text - errorDetails = errorText - } - - // Check if this is a 400 error about previous_response_id not found - const isPreviousResponseError = - errorDetails.includes("Previous response") || errorDetails.includes("not found") - - if (response.status === 400 && requestBody.previous_response_id && isPreviousResponseError) { - // Log the error and retry without the previous_response_id - console.warn( - `[GPT-5 SSE] Previous response ID not found (${requestBody.previous_response_id}), retrying without it`, - ) - - // Remove the problematic previous_response_id and retry - const retryRequestBody = { ...requestBody } - delete retryRequestBody.previous_response_id - - // Clear the stored lastResponseId to prevent using it again - this.lastResponseId = undefined - // Resolve the promise once to unblock any waiting requests - this.resolveResponseId(undefined) - - // Retry the request without the previous_response_id - const retryResponse = await fetch(url, { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${apiKey}`, - Accept: "text/event-stream", - }, - body: JSON.stringify(retryRequestBody), - }) - - if (!retryResponse.ok) { - // If retry also fails, throw the original error - throw new Error(`GPT-5 API retry failed (${retryResponse.status})`) - } - - if (!retryResponse.body) { - throw new Error("GPT-5 Responses API error: No response body from retry request") - } - - // Handle the successful retry response - yield* this.handleGpt5StreamResponse(retryResponse.body, model) - return - } - - // Provide user-friendly error messages based on status code - switch (response.status) { - case 400: - errorMessage = "Invalid request to GPT-5 API. Please check your input parameters." - break - case 401: - errorMessage = "Authentication failed. Please check your OpenAI API key." - break - case 403: - errorMessage = "Access denied. Your API key may not have access to GPT-5 models." - break - case 404: - errorMessage = - "GPT-5 API endpoint not found. The model may not be available yet or requires a different configuration." - break - case 429: - errorMessage = "Rate limit exceeded. Please try again later." - break - case 500: - case 502: - case 503: - errorMessage = "OpenAI service error. Please try again later." - break - default: - errorMessage = `GPT-5 API error (${response.status})` - } - - // Append details if available - if (errorDetails) { - errorMessage += ` - ${errorDetails}` - } - - throw new Error(errorMessage) - } - - if (!response.body) { - throw new Error("GPT-5 Responses API error: No response body") - } - - // Handle streaming response - yield* this.handleGpt5StreamResponse(response.body, model) - } catch (error) { - if (error instanceof Error) { - // Re-throw with the original error message if it's already formatted - if (error.message.includes("GPT-5")) { - throw error - } - // Otherwise, wrap it with context - throw new Error(`Failed to connect to GPT-5 API: ${error.message}`) - } - // Handle non-Error objects - throw new Error(`Unexpected error connecting to GPT-5 API`) - } - } - - /** - * Prepares the input and conversation continuity parameters for a GPT-5 API call. - * - * - If a `previousResponseId` is available (either from metadata or the handler's state), - * it formats only the most recent user message for the input and returns the response ID - * to maintain conversation context. - * - Otherwise, it formats the entire conversation history (system prompt + messages) for the input. - * - * @returns An object containing the formatted input string and the previous response ID (if used). - */ - private prepareGpt5Input( - systemPrompt: string, - messages: Anthropic.Messages.MessageParam[], - metadata?: ApiHandlerCreateMessageMetadata, - ): { formattedInput: string; previousResponseId?: string } { - // Respect explicit suppression signal for continuity (e.g. immediately after condense) - const isFirstMessage = messages.length === 1 && messages[0].role === "user" - const allowFallback = !metadata?.suppressPreviousResponseId - - const previousResponseId = - metadata?.previousResponseId ?? (allowFallback && !isFirstMessage ? this.lastResponseId : undefined) - - if (previousResponseId) { - const lastUserMessage = [...messages].reverse().find((msg) => msg.role === "user") - const formattedInput = lastUserMessage ? this.formatSingleMessageForResponsesAPI(lastUserMessage) : "" - return { formattedInput, previousResponseId } - } else { - const formattedInput = this.formatInputForResponsesAPI(systemPrompt, messages) - return { formattedInput } + ): Promise> { + // The OpenAI SDK doesn't have direct support for the Responses API yet, + // but we can access it through the underlying client request method if available. + // For now, we'll use the Chat Completions API with GPT-5 specific formatting + // to maintain compatibility while the Responses API SDK support is being added. + + // Convert Responses API params to Chat Completions format + // GPT-5 models use "developer" role for system messages + const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [{ role: "developer", content: params.input }] + + // Build the request parameters + const requestParams: any = { + model: params.model, + messages, + stream: true, + stream_options: { include_usage: true }, } - } - /** - * Handles the streaming response from the GPT-5 Responses API. - * - * This function iterates through the Server-Sent Events (SSE) stream, parses each event, - * and yields structured data chunks (`ApiStream`). It handles a wide variety of event types, - * including text deltas, reasoning, usage data, and various status/tool events. - * - * The following event types are intentionally ignored as they are not currently consumed - * by the client application: - * - Audio events (`response.audio.*`) - * - Most tool call events (e.g., `response.function_call_arguments.*`, `response.mcp_call.*`, etc.) - * as the client does not yet support rendering these tool interactions. - * - Status events (`response.created`, `response.in_progress`, etc.) as they are informational - * and do not affect the final output. - */ - private async *handleGpt5StreamResponse(body: ReadableStream, model: OpenAiNativeModel): ApiStream { - const reader = body.getReader() - const decoder = new TextDecoder() - let buffer = "" - let hasContent = false - let totalInputTokens = 0 - let totalOutputTokens = 0 - - try { - while (true) { - const { done, value } = await reader.read() - if (done) break - - buffer += decoder.decode(value, { stream: true }) - const lines = buffer.split("\n") - buffer = lines.pop() || "" - - for (const line of lines) { - if (line.startsWith("data: ")) { - const data = line.slice(6).trim() - if (data === "[DONE]") { - continue - } - - try { - const parsed = JSON.parse(data) - - // Store response ID for conversation continuity - if (parsed.response?.id) { - this.resolveResponseId(parsed.response.id) - } - - // Delegate standard event types to the shared processor to avoid duplication - if (parsed?.type && this.gpt5CoreHandledTypes.has(parsed.type)) { - for await (const outChunk of this.processGpt5Event(parsed, model)) { - // Track whether we've emitted any content so fallback handling can decide appropriately - if (outChunk.type === "text" || outChunk.type === "reasoning") { - hasContent = true - } - yield outChunk - } - continue - } - - // Check if this is a complete response (non-streaming format) - if (parsed.response && parsed.response.output && Array.isArray(parsed.response.output)) { - // Handle complete response in the initial event - for (const outputItem of parsed.response.output) { - if (outputItem.type === "text" && outputItem.content) { - for (const content of outputItem.content) { - if (content.type === "text" && content.text) { - hasContent = true - yield { - type: "text", - text: content.text, - } - } - } - } - // Additionally handle reasoning summaries if present (non-streaming summary output) - if (outputItem.type === "reasoning" && Array.isArray(outputItem.summary)) { - for (const summary of outputItem.summary) { - if (summary?.type === "summary_text" && typeof summary.text === "string") { - hasContent = true - yield { - type: "reasoning", - text: summary.text, - } - } - } - } - } - // Check for usage in the complete response - if (parsed.response.usage) { - const usageData = this.normalizeGpt5Usage(parsed.response.usage, model) - if (usageData) { - yield usageData - } - } - } - // Handle streaming delta events for text content - else if ( - parsed.type === "response.text.delta" || - parsed.type === "response.output_text.delta" - ) { - // Primary streaming event for text deltas - if (parsed.delta) { - hasContent = true - yield { - type: "text", - text: parsed.delta, - } - } - } else if ( - parsed.type === "response.text.done" || - parsed.type === "response.output_text.done" - ) { - // Text streaming completed - final text already streamed via deltas - } - // Handle reasoning delta events - else if ( - parsed.type === "response.reasoning.delta" || - parsed.type === "response.reasoning_text.delta" - ) { - // Streaming reasoning content - if (parsed.delta) { - hasContent = true - yield { - type: "reasoning", - text: parsed.delta, - } - } - } else if ( - parsed.type === "response.reasoning.done" || - parsed.type === "response.reasoning_text.done" - ) { - // Reasoning streaming completed - } - // Handle reasoning summary events - else if ( - parsed.type === "response.reasoning_summary.delta" || - parsed.type === "response.reasoning_summary_text.delta" - ) { - // Streaming reasoning summary - if (parsed.delta) { - hasContent = true - yield { - type: "reasoning", - text: parsed.delta, - } - } - } else if ( - parsed.type === "response.reasoning_summary.done" || - parsed.type === "response.reasoning_summary_text.done" - ) { - // Reasoning summary completed - } - // Handle refusal delta events - else if (parsed.type === "response.refusal.delta") { - // Model is refusing to answer - if (parsed.delta) { - hasContent = true - yield { - type: "text", - text: `[Refusal] ${parsed.delta}`, - } - } - } else if (parsed.type === "response.refusal.done") { - // Refusal completed - } - // Handle audio delta events (for multimodal responses) - else if (parsed.type === "response.audio.delta") { - // Audio streaming - we'll skip for now as we focus on text - // Could be handled in future for voice responses - } else if (parsed.type === "response.audio.done") { - // Audio completed - } - // Handle audio transcript delta events - else if (parsed.type === "response.audio_transcript.delta") { - // Audio transcript streaming - if (parsed.delta) { - hasContent = true - yield { - type: "text", - text: parsed.delta, - } - } - } else if (parsed.type === "response.audio_transcript.done") { - // Audio transcript completed - } - // Handle content part events (for structured content) - else if (parsed.type === "response.content_part.added") { - // New content part added - could be text, image, etc. - if (parsed.part?.type === "text" && parsed.part.text) { - hasContent = true - yield { - type: "text", - text: parsed.part.text, - } - } - } else if (parsed.type === "response.content_part.done") { - // Content part completed - } - // Handle output item events (alternative format) - else if (parsed.type === "response.output_item.added") { - // This is where the actual content comes through in some test cases - if (parsed.item) { - if (parsed.item.type === "text" && parsed.item.text) { - hasContent = true - yield { type: "text", text: parsed.item.text } - } else if (parsed.item.type === "reasoning" && parsed.item.text) { - hasContent = true - yield { type: "reasoning", text: parsed.item.text } - } else if (parsed.item.type === "message" && parsed.item.content) { - // Handle message type items - for (const content of parsed.item.content) { - if (content.type === "text" && content.text) { - hasContent = true - yield { type: "text", text: content.text } - } - } - } - } - } else if (parsed.type === "response.output_item.done") { - // Output item completed - } - // Handle function/tool call events - else if (parsed.type === "response.function_call_arguments.delta") { - // Function call arguments streaming - // We could yield this as a special type if needed for tool usage - } else if (parsed.type === "response.function_call_arguments.done") { - // Function call completed - } - // Handle MCP (Model Context Protocol) tool events - else if (parsed.type === "response.mcp_call_arguments.delta") { - // MCP tool call arguments streaming - } else if (parsed.type === "response.mcp_call_arguments.done") { - // MCP tool call completed - } else if (parsed.type === "response.mcp_call.in_progress") { - // MCP tool call in progress - } else if ( - parsed.type === "response.mcp_call.completed" || - parsed.type === "response.mcp_call.failed" - ) { - // MCP tool call status events - } else if (parsed.type === "response.mcp_list_tools.in_progress") { - // MCP list tools in progress - } else if ( - parsed.type === "response.mcp_list_tools.completed" || - parsed.type === "response.mcp_list_tools.failed" - ) { - // MCP list tools status events - } - // Handle web search events - else if (parsed.type === "response.web_search_call.searching") { - // Web search in progress - } else if (parsed.type === "response.web_search_call.in_progress") { - // Processing web search results - } else if (parsed.type === "response.web_search_call.completed") { - // Web search completed - } - // Handle code interpreter events - else if (parsed.type === "response.code_interpreter_call_code.delta") { - // Code interpreter code streaming - if (parsed.delta) { - // Could yield as a special code type if needed - } - } else if (parsed.type === "response.code_interpreter_call_code.done") { - // Code interpreter code completed - } else if (parsed.type === "response.code_interpreter_call.interpreting") { - // Code interpreter running - } else if (parsed.type === "response.code_interpreter_call.in_progress") { - // Code execution in progress - } else if (parsed.type === "response.code_interpreter_call.completed") { - // Code interpreter completed - } - // Handle file search events - else if (parsed.type === "response.file_search_call.searching") { - // File search in progress - } else if (parsed.type === "response.file_search_call.in_progress") { - // Processing file search results - } else if (parsed.type === "response.file_search_call.completed") { - // File search completed - } - // Handle image generation events - else if (parsed.type === "response.image_gen_call.generating") { - // Image generation in progress - } else if (parsed.type === "response.image_gen_call.in_progress") { - // Processing image generation - } else if (parsed.type === "response.image_gen_call.partial_image") { - // Image partially generated - } else if (parsed.type === "response.image_gen_call.completed") { - // Image generation completed - } - // Handle computer use events - else if ( - parsed.type === "response.computer_tool_call.output_item" || - parsed.type === "response.computer_tool_call.output_screenshot" - ) { - // Computer use tool events - } - // Handle annotation events - else if ( - parsed.type === "response.output_text_annotation.added" || - parsed.type === "response.text_annotation.added" - ) { - // Text annotation events - could be citations, references, etc. - } - // Handle error events - else if (parsed.type === "response.error" || parsed.type === "error") { - // Error event from the API - if (parsed.error || parsed.message) { - throw new Error( - `Responses API error: ${parsed.error?.message || parsed.message || "Unknown error"}`, - ) - } - } - // Handle incomplete event - else if (parsed.type === "response.incomplete") { - // Response was incomplete - might need to handle specially - } - // Handle queued event - else if (parsed.type === "response.queued") { - // Response is queued - } - // Handle in_progress event - else if (parsed.type === "response.in_progress") { - // Response is being processed - } - // Handle failed event - else if (parsed.type === "response.failed") { - // Response failed - if (parsed.error || parsed.message) { - throw new Error( - `GPT-5 response failed: ${parsed.error?.message || parsed.message || "Unknown failure"}`, - ) - } - } else if (parsed.type === "response.completed" || parsed.type === "response.done") { - // Store response ID for conversation continuity - if (parsed.response?.id) { - this.resolveResponseId(parsed.response.id) - } - - // Check if the done event contains the complete output (as a fallback) - if ( - !hasContent && - parsed.response && - parsed.response.output && - Array.isArray(parsed.response.output) - ) { - for (const outputItem of parsed.response.output) { - if (outputItem.type === "message" && outputItem.content) { - for (const content of outputItem.content) { - if (content.type === "output_text" && content.text) { - hasContent = true - yield { - type: "text", - text: content.text, - } - } - } - } - // Also surface reasoning summaries if present in the final output - if (outputItem.type === "reasoning" && Array.isArray(outputItem.summary)) { - for (const summary of outputItem.summary) { - if ( - summary?.type === "summary_text" && - typeof summary.text === "string" - ) { - hasContent = true - yield { - type: "reasoning", - text: summary.text, - } - } - } - } - } - } - - // Usage for done/completed is already handled by processGpt5Event in SDK path. - // For SSE path, usage often arrives separately; avoid double-emitting here. - } - // These are structural or status events, we can just log them at a lower level or ignore. - else if ( - parsed.type === "response.created" || - parsed.type === "response.in_progress" || - parsed.type === "response.output_item.done" || - parsed.type === "response.content_part.added" || - parsed.type === "response.content_part.done" - ) { - // Status events - no action needed - } - // Fallback for older formats or unexpected responses - else if (parsed.choices?.[0]?.delta?.content) { - hasContent = true - yield { - type: "text", - text: parsed.choices[0].delta.content, - } - } - // Additional fallback: some events place text under 'item.text' even if type isn't matched above - else if ( - parsed.item && - typeof parsed.item.text === "string" && - parsed.item.text.length > 0 - ) { - hasContent = true - yield { - type: "text", - text: parsed.item.text, - } - } else if (parsed.usage) { - // Handle usage if it arrives in a separate, non-completed event - const usageData = this.normalizeGpt5Usage(parsed.usage, model) - if (usageData) { - yield usageData - } - } - } catch (e) { - // Only ignore JSON parsing errors, re-throw actual API errors - if (!(e instanceof SyntaxError)) { - throw e - } - } - } - // Also try to parse non-SSE formatted lines - else if (line.trim() && !line.startsWith(":")) { - try { - const parsed = JSON.parse(line) - - // Try to extract content from various possible locations - if (parsed.content || parsed.text || parsed.message) { - hasContent = true - yield { - type: "text", - text: parsed.content || parsed.text || parsed.message, - } - } - } catch { - // Not JSON, might be plain text - ignore - } - } - } - } - - // If we didn't get any content, don't throw - the API might have returned an empty response - // This can happen in certain edge cases and shouldn't break the flow - } catch (error) { - if (error instanceof Error) { - throw new Error(`Error processing GPT-5 response stream: ${error.message}`) + // Add reasoning effort if specified (supporting "minimal" for GPT-5) + if (params.reasoning?.effort) { + if (params.reasoning.effort === "minimal") { + // For minimal effort, we pass "minimal" as the reasoning_effort + requestParams.reasoning_effort = "minimal" + } else { + requestParams.reasoning_effort = params.reasoning.effort } - throw new Error("Unexpected error processing GPT-5 response stream") - } finally { - reader.releaseLock() } - } - /** - * Shared processor for GPT‑5 Responses API events. - * Used by both the official SDK streaming path and (optionally) by the SSE fallback. - */ - private async *processGpt5Event(event: any, model: OpenAiNativeModel): ApiStream { - // Persist response id for conversation continuity when available - if (event?.response?.id) { - this.resolveResponseId(event.response.id) + // Add verbosity control for GPT-5 models + // According to the docs, Chat Completions API also supports verbosity parameter + if (params.text?.verbosity) { + requestParams.verbosity = params.text.verbosity } - // Handle known streaming text deltas - if (event?.type === "response.text.delta" || event?.type === "response.output_text.delta") { - if (event?.delta) { - yield { type: "text", text: event.delta } - } - return - } + const stream = (await this.client.chat.completions.create( + requestParams, + )) as unknown as AsyncIterable - // Handle reasoning deltas (including summary variants) - if ( - event?.type === "response.reasoning.delta" || - event?.type === "response.reasoning_text.delta" || - event?.type === "response.reasoning_summary.delta" || - event?.type === "response.reasoning_summary_text.delta" - ) { - if (event?.delta) { - yield { type: "reasoning", text: event.delta } - } - return - } + // Convert the stream to GPT-5 response format + return this.convertChatStreamToGpt5Format(stream) + } - // Handle refusal deltas - if (event?.type === "response.refusal.delta") { - if (event?.delta) { - yield { type: "text", text: `[Refusal] ${event.delta}` } - } - return - } + private async *convertChatStreamToGpt5Format( + stream: AsyncIterable, + ): AsyncIterable { + for await (const chunk of stream) { + const delta = chunk.choices[0]?.delta - // Handle output item additions (SDK or Responses API alternative format) - if (event?.type === "response.output_item.added") { - const item = event?.item - if (item) { - if (item.type === "text" && item.text) { - yield { type: "text", text: item.text } - } else if (item.type === "reasoning" && item.text) { - yield { type: "reasoning", text: item.text } - } else if (item.type === "message" && Array.isArray(item.content)) { - for (const content of item.content) { - // Some implementations send 'text'; others send 'output_text' - if ((content?.type === "text" || content?.type === "output_text") && content?.text) { - yield { type: "text", text: content.text } - } - } + if (delta?.content) { + yield { + type: "text", + text: delta.content, } } - return - } - // Completion events that may carry usage - if (event?.type === "response.done" || event?.type === "response.completed") { - const usage = event?.response?.usage || event?.usage || undefined - const usageData = this.normalizeGpt5Usage(usage, model) - if (usageData) { - yield usageData - } - return - } - - // Fallbacks for older formats or unexpected objects - if (event?.choices?.[0]?.delta?.content) { - yield { type: "text", text: event.choices[0].delta.content } - return - } - - if (event?.usage) { - const usageData = this.normalizeGpt5Usage(event.usage, model) - if (usageData) { - yield usageData + if (chunk.usage) { + yield { + type: "usage", + usage: { + input_tokens: chunk.usage.prompt_tokens || 0, + output_tokens: chunk.usage.completion_tokens || 0, + total_tokens: chunk.usage.total_tokens || 0, + }, + } } } } - private getGpt5ReasoningEffort(model: OpenAiNativeModel): ReasoningEffortWithMinimal | undefined { - const { reasoning, info } = model + private async *handleGpt5StreamResponse( + stream: AsyncIterable, + model: OpenAiNativeModel, + ): ApiStream { + for await (const chunk of stream) { + if (chunk.type === "text" && chunk.text) { + yield { + type: "text", + text: chunk.text, + } + } else if (chunk.type === "usage" && chunk.usage) { + const inputTokens = chunk.usage.input_tokens + const outputTokens = chunk.usage.output_tokens + const cacheReadTokens = 0 + const cacheWriteTokens = 0 + const totalCost = calculateApiCostOpenAI( + model.info, + inputTokens, + outputTokens, + cacheWriteTokens, + cacheReadTokens, + ) - // Check if reasoning effort is configured - if (reasoning && "reasoning_effort" in reasoning) { - const effort = reasoning.reasoning_effort as string - // Support all effort levels including "minimal" for GPT-5 - if (effort === "minimal" || effort === "low" || effort === "medium" || effort === "high") { - return effort as ReasoningEffortWithMinimal + yield { + type: "usage", + inputTokens, + outputTokens, + cacheWriteTokens, + cacheReadTokens, + totalCost, + } } } - - // Centralize default: use the model's default from types if available; otherwise undefined - return info.reasoningEffort as ReasoningEffortWithMinimal | undefined } private isGpt5Model(modelId: string): boolean { return modelId.startsWith("gpt-5") } - private isResponsesApiModel(modelId: string): boolean { - // Both GPT-5 and Codex Mini use the v1/responses endpoint - return modelId.startsWith("gpt-5") || modelId === "codex-mini-latest" - } - private async *handleStreamResponse( stream: AsyncIterable, model: OpenAiNativeModel, @@ -1163,28 +376,16 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } private async *yieldUsage(info: ModelInfo, usage: OpenAI.Completions.CompletionUsage | undefined): ApiStream { - const inputTokens = usage?.prompt_tokens || 0 + const inputTokens = usage?.prompt_tokens || 0 // sum of cache hits and misses const outputTokens = usage?.completion_tokens || 0 - - // Extract cache tokens from prompt_tokens_details - // According to OpenAI API, cached_tokens represents tokens read from cache - const cacheReadTokens = usage?.prompt_tokens_details?.cached_tokens || undefined - - // Cache write tokens are not typically reported in the standard streaming response - // They would be in cache_creation_input_tokens if available - const cacheWriteTokens = (usage as any)?.cache_creation_input_tokens || undefined - - const totalCost = calculateApiCostOpenAI( - info, - inputTokens, - outputTokens, - cacheWriteTokens || 0, - cacheReadTokens || 0, - ) + const cacheReadTokens = usage?.prompt_tokens_details?.cached_tokens || 0 + const cacheWriteTokens = 0 + const totalCost = calculateApiCostOpenAI(info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens) + const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadTokens - cacheWriteTokens) yield { type: "usage", - inputTokens: inputTokens, + inputTokens: nonCachedInputTokens, outputTokens: outputTokens, cacheWriteTokens: cacheWriteTokens, cacheReadTokens: cacheReadTokens, @@ -1205,17 +406,15 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio modelId: id, model: info, settings: this.options, - defaultTemperature: this.isGpt5Model(id) ? GPT5_DEFAULT_TEMPERATURE : OPENAI_NATIVE_DEFAULT_TEMPERATURE, + defaultTemperature: OPENAI_NATIVE_DEFAULT_TEMPERATURE, }) - // For models using the Responses API (GPT-5 and Codex Mini), ensure we support reasoning effort - if (this.isResponsesApiModel(id)) { - const effort = - (this.options.reasoningEffort as ReasoningEffortWithMinimal | undefined) ?? - (info.reasoningEffort as ReasoningEffortWithMinimal | undefined) - - if (effort) { - ;(params.reasoning as any) = { reasoning_effort: effort } + // For GPT-5 models, ensure we support minimal reasoning effort + if (this.isGpt5Model(id) && params.reasoning) { + // Allow "minimal" effort for GPT-5 models + const effort = this.options.reasoningEffort + if (effort === "low" || effort === "medium" || effort === "high") { + params.reasoning.reasoning_effort = effort } } @@ -1224,50 +423,25 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio return { id: id.startsWith("o3-mini") ? "o3-mini" : id, info, ...params, verbosity: params.verbosity } } - /** - * Gets the last GPT-5 response ID captured from the Responses API stream. - * Used for maintaining conversation continuity across requests. - * @returns The response ID, or undefined if not available yet - */ - getLastResponseId(): string | undefined { - return this.lastResponseId - } - - /** - * Sets the last GPT-5 response ID for conversation continuity. - * Typically only used in tests or special flows. - * @param responseId The GPT-5 response ID to store - */ - setResponseId(responseId: string): void { - this.lastResponseId = responseId - } - async completePrompt(prompt: string): Promise { try { const { id, temperature, reasoning, verbosity } = this.getModel() - const isResponsesApi = this.isResponsesApiModel(id) - if (isResponsesApi) { - // Models that use the Responses API (GPT-5 and Codex Mini) don't support non-streaming completion - throw new Error(`completePrompt is not supported for ${id}. Use createMessage (Responses API) instead.`) - } - - const params: any = { + const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming & { + verbosity?: VerbosityLevel + } = { model: id, messages: [{ role: "user", content: prompt }], + temperature, + ...(reasoning && reasoning), } - // Add temperature if supported - if (temperature !== undefined) { - params.temperature = temperature - } - - // Add reasoning parameters for models that support them - if (reasoning) { - Object.assign(params, reasoning) + // Add verbosity for GPT-5 models + if (this.isGpt5Model(id) && verbosity) { + params.verbosity = verbosity } - const response = await this.client.chat.completions.create(params) + const response = await this.client.chat.completions.create(params as any) return response.choices[0]?.message.content || "" } catch (error) { if (error instanceof Error) { diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index eed719cf0fb..85abcf1a690 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -305,7 +305,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl ], stream: true, ...(isGrokXAI ? {} : { stream_options: { include_usage: true } }), - reasoning_effort: modelInfo.reasoningEffort as "low" | "medium" | "high" | undefined, + reasoning_effort: modelInfo.reasoningEffort, temperature: undefined, } @@ -330,7 +330,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl }, ...convertToOpenAiMessages(messages), ], - reasoning_effort: modelInfo.reasoningEffort as "low" | "medium" | "high" | undefined, + reasoning_effort: modelInfo.reasoningEffort, temperature: undefined, } diff --git a/src/api/providers/requesty.ts b/src/api/providers/requesty.ts index d2e55fc8f01..8af0b9aa426 100644 --- a/src/api/providers/requesty.ts +++ b/src/api/providers/requesty.ts @@ -116,7 +116,7 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan model, max_tokens, temperature, - ...(reasoning_effort && reasoning_effort !== "minimal" && { reasoning_effort }), + ...(reasoning_effort && { reasoning_effort }), ...(thinking && { thinking }), stream: true, stream_options: { include_usage: true }, diff --git a/src/api/transform/model-params.ts b/src/api/transform/model-params.ts index 933697c0a53..cc30aa56053 100644 --- a/src/api/transform/model-params.ts +++ b/src/api/transform/model-params.ts @@ -2,7 +2,6 @@ import { type ModelInfo, type ProviderSettings, type VerbosityLevel, - type ReasoningEffortWithMinimal, ANTHROPIC_DEFAULT_MAX_TOKENS, } from "@roo-code/types" @@ -39,7 +38,7 @@ type GetModelParamsOptions = { type BaseModelParams = { maxTokens: number | undefined temperature: number | undefined - reasoningEffort: ReasoningEffortWithMinimal | undefined + reasoningEffort: "low" | "medium" | "high" | undefined reasoningBudget: number | undefined verbosity: VerbosityLevel | undefined } @@ -129,8 +128,7 @@ export function getModelParams({ temperature = 1.0 } else if (shouldUseReasoningEffort({ model, settings })) { // "Traditional" reasoning models use the `reasoningEffort` parameter. - const effort = customReasoningEffort ?? model.reasoningEffort - reasoningEffort = effort as ReasoningEffortWithMinimal + reasoningEffort = customReasoningEffort ?? model.reasoningEffort } const params: BaseModelParams = { maxTokens, temperature, reasoningEffort, reasoningBudget, verbosity } diff --git a/src/api/transform/reasoning.ts b/src/api/transform/reasoning.ts index 46ef029ea39..a173c59b19b 100644 --- a/src/api/transform/reasoning.ts +++ b/src/api/transform/reasoning.ts @@ -2,7 +2,7 @@ import { BetaThinkingConfigParam } from "@anthropic-ai/sdk/resources/beta" import OpenAI from "openai" import type { GenerateContentConfig } from "@google/genai" -import type { ModelInfo, ProviderSettings, ReasoningEffortWithMinimal } from "@roo-code/types" +import type { ModelInfo, ProviderSettings } from "@roo-code/types" import { shouldUseReasoningBudget, shouldUseReasoningEffort } from "../../shared/api" @@ -23,7 +23,7 @@ export type GeminiReasoningParams = GenerateContentConfig["thinkingConfig"] export type GetModelReasoningOptions = { model: ModelInfo reasoningBudget: number | undefined - reasoningEffort: ReasoningEffortWithMinimal | undefined + reasoningEffort: ReasoningEffort | undefined settings: ProviderSettings } @@ -36,9 +36,7 @@ export const getOpenRouterReasoning = ({ shouldUseReasoningBudget({ model, settings }) ? { max_tokens: reasoningBudget } : shouldUseReasoningEffort({ model, settings }) - ? reasoningEffort !== "minimal" - ? { effort: reasoningEffort } - : undefined + ? { effort: reasoningEffort } : undefined export const getAnthropicReasoning = ({ @@ -52,19 +50,8 @@ export const getOpenAiReasoning = ({ model, reasoningEffort, settings, -}: GetModelReasoningOptions): OpenAiReasoningParams | undefined => { - if (!shouldUseReasoningEffort({ model, settings })) { - return undefined - } - - // If model has reasoning effort capability, return object even if effort is undefined - // This preserves the reasoning_effort field in the API call - if (reasoningEffort === "minimal") { - return undefined - } - - return { reasoning_effort: reasoningEffort } -} +}: GetModelReasoningOptions): OpenAiReasoningParams | undefined => + shouldUseReasoningEffort({ model, settings }) ? { reasoning_effort: reasoningEffort } : undefined export const getGeminiReasoning = ({ model, diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 5e96b6fb167..34cc255cd85 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -252,8 +252,6 @@ export class Task extends EventEmitter implements TaskLike { didCompleteReadingStream = false assistantMessageParser?: AssistantMessageParser isAssistantMessageParserEnabled = false - private lastUsedInstructions?: string - private skipPrevResponseIdOnce: boolean = false constructor({ provider, @@ -852,7 +850,6 @@ export class Task extends EventEmitter implements TaskLike { progressStatus?: ToolProgressStatus, options: { isNonInteractive?: boolean - metadata?: Record } = {}, contextCondense?: ContextCondense, ): Promise { @@ -890,7 +887,6 @@ export class Task extends EventEmitter implements TaskLike { images, partial, contextCondense, - metadata: options.metadata, }) } } else { @@ -906,9 +902,6 @@ export class Task extends EventEmitter implements TaskLike { lastMessage.images = images lastMessage.partial = false lastMessage.progressStatus = progressStatus - if (options.metadata) { - ;(lastMessage as any).metadata = options.metadata - } // Instead of streaming partialMessage events, we do a save // and post like normal to persist to disk. @@ -924,15 +917,7 @@ export class Task extends EventEmitter implements TaskLike { this.lastMessageTs = sayTs } - await this.addToClineMessages({ - ts: sayTs, - type: "say", - say: type, - text, - images, - contextCondense, - metadata: options.metadata, - }) + await this.addToClineMessages({ ts: sayTs, type: "say", say: type, text, images, contextCondense }) } } } else { @@ -1777,8 +1762,6 @@ export class Task extends EventEmitter implements TaskLike { presentAssistantMessage(this) } - await this.persistGpt5Metadata(reasoningMessage) - updateApiReqMsg() await this.saveClineMessages() await this.providerRef.deref()?.postStateToWebview() @@ -1997,7 +1980,6 @@ export class Task extends EventEmitter implements TaskLike { Task.lastGlobalApiRequestTime = Date.now() const systemPrompt = await this.getSystemPrompt() - this.lastUsedInstructions = systemPrompt const { contextTokens } = this.getTokenUsage() if (contextTokens) { @@ -2036,10 +2018,6 @@ export class Task extends EventEmitter implements TaskLike { if (truncateResult.error) { await this.say("condense_context_error", truncateResult.error) } else if (truncateResult.summary) { - // A condense operation occurred; for the next GPT‑5 API call we should NOT - // send previous_response_id so the request reflects the fresh condensed context. - this.skipPrevResponseIdOnce = true - const { summary, cost, prevContextTokens, newContextTokens = 0 } = truncateResult const contextCondense: ContextCondense = { summary, cost, newContextTokens, prevContextTokens } await this.say( @@ -2056,7 +2034,7 @@ export class Task extends EventEmitter implements TaskLike { } const messagesSinceLastSummary = getMessagesSinceLastSummary(this.apiConversationHistory) - let cleanConversationHistory = maybeRemoveImageBlocks(messagesSinceLastSummary, this.api).map( + const cleanConversationHistory = maybeRemoveImageBlocks(messagesSinceLastSummary, this.api).map( ({ role, content }) => ({ role, content }), ) @@ -2072,41 +2050,9 @@ export class Task extends EventEmitter implements TaskLike { throw new Error("Auto-approval limit reached and user did not approve continuation") } - // Determine GPT‑5 previous_response_id from last persisted assistant turn (if available), - // unless a condense just occurred (skip once after condense). - let previousResponseId: string | undefined = undefined - try { - const modelId = this.api.getModel().id - if (modelId && modelId.startsWith("gpt-5") && !this.skipPrevResponseIdOnce) { - // Find the last assistant message that has a previous_response_id stored - const idx = findLastIndex( - this.clineMessages, - (m) => - m.type === "say" && - (m as any).say === "text" && - (m as any).metadata?.gpt5?.previous_response_id, - ) - if (idx !== -1) { - // Use the previous_response_id from the last assistant message for this request - previousResponseId = ((this.clineMessages[idx] as any).metadata.gpt5.previous_response_id || - undefined) as string | undefined - } - } - } catch { - // non-fatal - } - const metadata: ApiHandlerCreateMessageMetadata = { mode: mode, taskId: this.taskId, - ...(previousResponseId ? { previousResponseId } : {}), - // If a condense just occurred, explicitly suppress continuity fallback for the next call - ...(this.skipPrevResponseIdOnce ? { suppressPreviousResponseId: true } : {}), - } - - // Reset skip flag after applying (it only affects the immediate next call) - if (this.skipPrevResponseIdOnce) { - this.skipPrevResponseIdOnce = false } const stream = this.api.createMessage(systemPrompt, cleanConversationHistory, metadata) @@ -2252,35 +2198,6 @@ export class Task extends EventEmitter implements TaskLike { } } - /** - * Persist GPT-5 per-turn metadata (previous_response_id, instructions, reasoning_summary) - * onto the last complete assistant say("text") message. - */ - private async persistGpt5Metadata(reasoningMessage?: string): Promise { - try { - const modelId = this.api.getModel().id - if (!modelId || !modelId.startsWith("gpt-5")) return - - const lastResponseId: string | undefined = (this.api as any)?.getLastResponseId?.() - const idx = findLastIndex( - this.clineMessages, - (m) => m.type === "say" && (m as any).say === "text" && m.partial !== true, - ) - if (idx !== -1) { - const msg = this.clineMessages[idx] as any - msg.metadata = msg.metadata ?? {} - msg.metadata.gpt5 = { - ...(msg.metadata.gpt5 ?? {}), - previous_response_id: lastResponseId, - instructions: this.lastUsedInstructions, - reasoning_summary: (reasoningMessage ?? "").trim() || undefined, - } - } - } catch { - // Non-fatal error in metadata persistence - } - } - // Getters public get cwd() { diff --git a/src/shared/api.ts b/src/shared/api.ts index e9b57af3c17..014b903453e 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -6,15 +6,8 @@ import { } from "@roo-code/types" // ApiHandlerOptions -// Extend ProviderSettings (minus apiProvider) with handler-specific toggles. -export type ApiHandlerOptions = Omit & { - /** - * When true and using GPT‑5 Responses API, include reasoning.summary: "auto" - * so the API returns reasoning summaries (we already parse and surface them). - * Defaults to true; set to false to disable summaries. - */ - enableGpt5ReasoningSummary?: boolean -} + +export type ApiHandlerOptions = Omit // RouterName diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx index 70a58f03bf8..74ba885d25d 100644 --- a/webview-ui/src/components/settings/ApiOptions.tsx +++ b/webview-ui/src/components/settings/ApiOptions.tsx @@ -576,12 +576,6 @@ const ApiOptions = ({ if (value !== "custom-arn" && selectedProvider === "bedrock") { setApiConfigurationField("awsCustomArn", "") } - - // Clear reasoning effort when switching models to allow the new model's default to take effect - // This is especially important for GPT-5 models which default to "medium" - if (selectedProvider === "openai-native") { - setApiConfigurationField("reasoningEffort", undefined) - } }}> @@ -623,14 +617,11 @@ const ApiOptions = ({ modelInfo={selectedModelInfo} /> - {/* Gate Verbosity UI by capability flag */} - {selectedModelInfo?.supportsVerbosity && ( - - )} + {!fromWelcomeView && ( diff --git a/webview-ui/src/components/settings/ThinkingBudget.tsx b/webview-ui/src/components/settings/ThinkingBudget.tsx index a3e2d428b4a..a49ec79efc2 100644 --- a/webview-ui/src/components/settings/ThinkingBudget.tsx +++ b/webview-ui/src/components/settings/ThinkingBudget.tsx @@ -1,12 +1,7 @@ import { useEffect } from "react" import { Checkbox } from "vscrui" -import { - type ProviderSettings, - type ModelInfo, - type ReasoningEffortWithMinimal, - reasoningEfforts, -} from "@roo-code/types" +import { type ProviderSettings, type ModelInfo, type ReasoningEffort, reasoningEfforts } from "@roo-code/types" import { DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS, @@ -32,35 +27,10 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod const isGemini25Pro = selectedModelId && selectedModelId.includes("gemini-2.5-pro") const minThinkingTokens = isGemini25Pro ? GEMINI_25_PRO_MIN_THINKING_TOKENS : 1024 - // Check if this is a GPT-5 model to show "minimal" option - // Only show minimal for OpenAI Native provider GPT-5 models - const isOpenAiNativeProvider = apiConfiguration.apiProvider === "openai-native" - const isGpt5Model = isOpenAiNativeProvider && selectedModelId && selectedModelId.startsWith("gpt-5") - // Add "minimal" option for GPT-5 models - // Spread to convert readonly tuple into a mutable array, then expose as readonly for safety - const baseEfforts = [...reasoningEfforts] as ReasoningEffortWithMinimal[] - const availableReasoningEfforts: ReadonlyArray = isGpt5Model - ? (["minimal", ...baseEfforts] as ReasoningEffortWithMinimal[]) - : baseEfforts - - // Default reasoning effort - use model's default if available - // GPT-5 models have "medium" as their default in the model configuration - const modelDefaultReasoningEffort = modelInfo?.reasoningEffort as ReasoningEffortWithMinimal | undefined - const defaultReasoningEffort: ReasoningEffortWithMinimal = modelDefaultReasoningEffort || "medium" - const currentReasoningEffort: ReasoningEffortWithMinimal = - (apiConfiguration.reasoningEffort as ReasoningEffortWithMinimal | undefined) || defaultReasoningEffort - const isReasoningBudgetSupported = !!modelInfo && modelInfo.supportsReasoningBudget const isReasoningBudgetRequired = !!modelInfo && modelInfo.requiredReasoningBudget const isReasoningEffortSupported = !!modelInfo && modelInfo.supportsReasoningEffort - // Set default reasoning effort when model supports it and no value is set - useEffect(() => { - if (isReasoningEffortSupported && !apiConfiguration.reasoningEffort && defaultReasoningEffort) { - setApiConfigurationField("reasoningEffort", defaultReasoningEffort) - } - }, [isReasoningEffortSupported, apiConfiguration.reasoningEffort, defaultReasoningEffort, setApiConfigurationField]) - const enableReasoningEffort = apiConfiguration.enableReasoningEffort const customMaxOutputTokens = apiConfiguration.modelMaxTokens || DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS const customMaxThinkingTokens = @@ -139,21 +109,13 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod