diff --git a/packages/types/src/message.ts b/packages/types/src/message.ts
index 7197ab29a12..21baf3f2033 100644
--- a/packages/types/src/message.ts
+++ b/packages/types/src/message.ts
@@ -176,17 +176,6 @@ export const clineMessageSchema = z.object({
 	contextCondense: contextCondenseSchema.optional(),
 	isProtected: z.boolean().optional(),
 	apiProtocol: z.union([z.literal("openai"), z.literal("anthropic")]).optional(),
-	metadata: z
-		.object({
-			gpt5: z
-				.object({
-					previous_response_id: z.string().optional(),
-					instructions: z.string().optional(),
-					reasoning_summary: z.string().optional(),
-				})
-				.optional(),
-		})
-		.optional(),
 })
 
 export type ClineMessage = z.infer<typeof clineMessageSchema>
diff --git a/packages/types/src/model.ts b/packages/types/src/model.ts
index 90b61ad879e..a09790578b5 100644
--- a/packages/types/src/model.ts
+++ b/packages/types/src/model.ts
@@ -44,8 +44,6 @@ export const modelInfoSchema = z.object({
 	supportsImages: z.boolean().optional(),
 	supportsComputerUse: z.boolean().optional(),
 	supportsPromptCache: z.boolean(),
-	// Capability flag to indicate whether the model supports an output verbosity parameter
-	supportsVerbosity: z.boolean().optional(),
 	supportsReasoningBudget: z.boolean().optional(),
 	requiredReasoningBudget: z.boolean().optional(),
 	supportsReasoningEffort: z.boolean().optional(),
diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts
index aebfd4dbe57..f0c90101fcb 100644
--- a/packages/types/src/provider-settings.ts
+++ b/packages/types/src/provider-settings.ts
@@ -3,11 +3,6 @@ import { z } from "zod"
 import { reasoningEffortsSchema, verbosityLevelsSchema, modelInfoSchema } from "./model.js"
 import { codebaseIndexProviderSchema } from "./codebase-index.js"
 
-// Extended schema that includes "minimal" for GPT-5 models
-export const extendedReasoningEffortsSchema = z.union([reasoningEffortsSchema, z.literal("minimal")])
-
-export type ReasoningEffortWithMinimal = z.infer<typeof extendedReasoningEffortsSchema>
-
 /**
  * ProviderName
  */
@@ -81,7 +76,7 @@ const baseProviderSettingsSchema = z.object({
 
 	// Model reasoning.
 	enableReasoningEffort: z.boolean().optional(),
-	reasoningEffort: extendedReasoningEffortsSchema.optional(),
+	reasoningEffort: reasoningEffortsSchema.optional(),
 	modelMaxTokens: z.number().optional(),
 	modelMaxThinkingTokens: z.number().optional(),
 
diff --git a/packages/types/src/providers/openai.ts b/packages/types/src/providers/openai.ts
index 78d3cb63344..22e48e2b07d 100644
--- a/packages/types/src/providers/openai.ts
+++ b/packages/types/src/providers/openai.ts
@@ -12,13 +12,10 @@ export const openAiNativeModels = {
 		supportsImages: true,
 		supportsPromptCache: true,
 		supportsReasoningEffort: true,
-		reasoningEffort: "medium",
 		inputPrice: 1.25,
 		outputPrice: 10.0,
 		cacheReadsPrice: 0.13,
 		description: "GPT-5: The best model for coding and agentic tasks across domains",
-		// supportsVerbosity is a new capability; ensure ModelInfo includes it
-		supportsVerbosity: true,
 	},
 	"gpt-5-mini-2025-08-07": {
 		maxTokens: 128000,
@@ -26,12 +23,10 @@ export const openAiNativeModels = {
 		supportsImages: true,
 		supportsPromptCache: true,
 		supportsReasoningEffort: true,
-		reasoningEffort: "medium",
 		inputPrice: 0.25,
 		outputPrice: 2.0,
 		cacheReadsPrice: 0.03,
 		description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks",
-		supportsVerbosity: true,
 	},
 	"gpt-5-nano-2025-08-07": {
 		maxTokens: 128000,
@@ -39,12 +34,10 @@ export const openAiNativeModels = {
 		supportsImages: true,
 		supportsPromptCache: true,
 		supportsReasoningEffort: true,
-		reasoningEffort: "medium",
 		inputPrice: 0.05,
 		outputPrice: 0.4,
 		cacheReadsPrice: 0.01,
 		description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5",
-		supportsVerbosity: true,
 	},
 	"gpt-4.1": {
 		maxTokens: 32_768,
@@ -247,6 +240,5 @@ export const openAiModelInfoSaneDefaults: ModelInfo = {
 export const azureOpenAiDefaultApiVersion = "2024-08-01-preview"
 
 export const OPENAI_NATIVE_DEFAULT_TEMPERATURE = 0
-export const GPT5_DEFAULT_TEMPERATURE = 1.0
 
 export const OPENAI_AZURE_AI_INFERENCE_PATH = "/models/chat/completions"
diff --git a/src/api/index.ts b/src/api/index.ts
index 5e705a80d24..57b06f7bbdb 100644
--- a/src/api/index.ts
+++ b/src/api/index.ts
@@ -44,13 +44,6 @@ export interface SingleCompletionHandler {
 export interface ApiHandlerCreateMessageMetadata {
 	mode?: string
 	taskId: string
-	previousResponseId?: string
-	/**
-	 * When true, the provider must NOT fall back to internal continuity state
-	 * (e.g., lastResponseId) if previousResponseId is absent.
-	 * Used to enforce "skip once" after a condense operation.
-	 */
-	suppressPreviousResponseId?: boolean
 }
 
 export interface ApiHandler {
diff --git a/src/api/providers/__tests__/openai-native.spec.ts b/src/api/providers/__tests__/openai-native.spec.ts
index 1d76d387a9f..fdd71ba3f6d 100644
--- a/src/api/providers/__tests__/openai-native.spec.ts
+++ b/src/api/providers/__tests__/openai-native.spec.ts
@@ -160,12 +160,8 @@ describe("OpenAiNativeHandler", () => {
 			expect(results.length).toBe(1)
 			expect(results[0].type).toBe("usage")
 			// Use type assertion to avoid TypeScript errors
-			const usageResult = results[0] as any
-			expect(usageResult.inputTokens).toBe(0)
-			expect(usageResult.outputTokens).toBe(0)
-			// When no cache tokens are present, they should be undefined
-			expect(usageResult.cacheWriteTokens).toBeUndefined()
-			expect(usageResult.cacheReadTokens).toBeUndefined()
+			expect((results[0] as any).inputTokens).toBe(0)
+			expect((results[0] as any).outputTokens).toBe(0)
 
 			// Verify developer role is used for system prompt with o1 model
 			expect(mockCreate).toHaveBeenCalledWith({
@@ -290,111 +286,6 @@ describe("OpenAiNativeHandler", () => {
 			expect((results[1] as any).outputTokens).toBe(5)
 			expect((results[1] as any).totalCost).toBeCloseTo(0.00006, 6)
 		})
-
-		it("should handle cache tokens in streaming response", async () => {
-			const mockStream = [
-				{ choices: [{ delta: { content: "Hello" } }], usage: null },
-				{ choices: [{ delta: { content: " cached" } }], usage: null },
-				{
-					choices: [{ delta: { content: " response" } }],
-					usage: {
-						prompt_tokens: 100,
-						completion_tokens: 10,
-						prompt_tokens_details: {
-							cached_tokens: 80,
-							audio_tokens: 0,
-						},
-						completion_tokens_details: {
-							reasoning_tokens: 0,
-							audio_tokens: 0,
-							accepted_prediction_tokens: 0,
-							rejected_prediction_tokens: 0,
-						},
-					},
-				},
-			]
-
-			mockCreate.mockResolvedValueOnce(
-				(async function* () {
-					for (const chunk of mockStream) {
-						yield chunk
-					}
-				})(),
-			)
-
-			const generator = handler.createMessage(systemPrompt, messages)
-			const results = []
-			for await (const result of generator) {
-				results.push(result)
-			}
-
-			// Verify text responses
-			expect(results.length).toBe(4)
-			expect(results[0]).toMatchObject({ type: "text", text: "Hello" })
-			expect(results[1]).toMatchObject({ type: "text", text: " cached" })
-			expect(results[2]).toMatchObject({ type: "text", text: " response" })
-
-			// Check usage data includes cache tokens
-			expect(results[3].type).toBe("usage")
-			const usageChunk = results[3] as any
-			expect(usageChunk.inputTokens).toBe(100) // Total input tokens (includes cached)
-			expect(usageChunk.outputTokens).toBe(10)
-			expect(usageChunk.cacheReadTokens).toBe(80) // Cached tokens from prompt_tokens_details
-			expect(usageChunk.cacheWriteTokens).toBeUndefined() // No cache write tokens in standard response
-
-			// Verify cost calculation takes cache into account
-			// GPT-4.1 pricing: input $2/1M, output $8/1M, cache read $0.5/1M
-			// OpenAI's prompt_tokens includes cached tokens, so we need to calculate:
-			// - Non-cached input tokens: 100 - 80 = 20
-			// - Cost for non-cached input: (20 / 1_000_000) * 2.0
-			// - Cost for cached input: (80 / 1_000_000) * 0.5
-			// - Cost for output: (10 / 1_000_000) * 8.0
-			const nonCachedInputTokens = 100 - 80
-			const expectedNonCachedInputCost = (nonCachedInputTokens / 1_000_000) * 2.0
-			const expectedCacheReadCost = (80 / 1_000_000) * 0.5
-			const expectedOutputCost = (10 / 1_000_000) * 8.0
-			const expectedTotalCost = expectedNonCachedInputCost + expectedCacheReadCost + expectedOutputCost
-			expect(usageChunk.totalCost).toBeCloseTo(expectedTotalCost, 10)
-		})
-
-		it("should handle cache write tokens if present", async () => {
-			const mockStream = [
-				{ choices: [{ delta: { content: "Test" } }], usage: null },
-				{
-					choices: [{ delta: {} }],
-					usage: {
-						prompt_tokens: 150,
-						completion_tokens: 5,
-						prompt_tokens_details: {
-							cached_tokens: 50,
-						},
-						cache_creation_input_tokens: 30, // Cache write tokens
-					},
-				},
-			]
-
-			mockCreate.mockResolvedValueOnce(
-				(async function* () {
-					for (const chunk of mockStream) {
-						yield chunk
-					}
-				})(),
-			)
-
-			const generator = handler.createMessage(systemPrompt, messages)
-			const results = []
-			for await (const result of generator) {
-				results.push(result)
-			}
-
-			// Check usage data includes both cache read and write tokens
-			const usageChunk = results.find((r) => r.type === "usage") as any
-			expect(usageChunk).toBeDefined()
-			expect(usageChunk.inputTokens).toBe(150)
-			expect(usageChunk.outputTokens).toBe(5)
-			expect(usageChunk.cacheReadTokens).toBe(50)
-			expect(usageChunk.cacheWriteTokens).toBe(30)
-		})
 	})
 
 	describe("completePrompt", () => {
@@ -570,40 +461,7 @@ describe("OpenAiNativeHandler", () => {
 	})
 
 	describe("GPT-5 models", () => {
-		it("should handle GPT-5 model with Responses API", async () => {
-			// Mock fetch for Responses API
-			const mockFetch = vitest.fn().mockResolvedValue({
-				ok: true,
-				body: new ReadableStream({
-					start(controller) {
-						// Simulate actual GPT-5 Responses API SSE stream format
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.created","response":{"id":"test","status":"in_progress"}}\n\n',
-							),
-						)
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.output_item.added","item":{"type":"text","text":"Hello"}}\n\n',
-							),
-						)
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.output_item.added","item":{"type":"text","text":" world"}}\n\n',
-							),
-						)
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.done","response":{"usage":{"prompt_tokens":10,"completion_tokens":2}}}\n\n',
-							),
-						)
-						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-						controller.close()
-					},
-				}),
-			})
-			global.fetch = mockFetch as any
-
+		it("should handle GPT-5 model with developer role", async () => {
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-2025-08-07",
@@ -615,56 +473,20 @@ describe("OpenAiNativeHandler", () => {
 				chunks.push(chunk)
 			}
 
-			// Verify Responses API is called with correct parameters
-			expect(mockFetch).toHaveBeenCalledWith(
-				"https://api.openai.com/v1/responses",
+			// Verify developer role is used for GPT-5 with default parameters
+			expect(mockCreate).toHaveBeenCalledWith(
 				expect.objectContaining({
-					method: "POST",
-					headers: expect.objectContaining({
-						"Content-Type": "application/json",
-						Authorization: "Bearer test-api-key",
-						Accept: "text/event-stream",
-					}),
-					body: expect.any(String),
+					model: "gpt-5-2025-08-07",
+					messages: [{ role: "developer", content: expect.stringContaining(systemPrompt) }],
+					stream: true,
+					stream_options: { include_usage: true },
+					reasoning_effort: "minimal", // Default for GPT-5
+					verbosity: "medium", // Default verbosity
 				}),
 			)
-			const body1 = (mockFetch.mock.calls[0][1] as any).body as string
-			expect(body1).toContain('"model":"gpt-5-2025-08-07"')
-			expect(body1).toContain('"input":"Developer: You are a helpful assistant.\\n\\nUser: Hello!"')
-			expect(body1).toContain('"effort":"medium"')
-			expect(body1).toContain('"summary":"auto"')
-			expect(body1).toContain('"verbosity":"medium"')
-			expect(body1).toContain('"temperature":1')
-			expect(body1).toContain('"max_output_tokens"')
-
-			// Verify the streamed content
-			const textChunks = chunks.filter((c) => c.type === "text")
-			expect(textChunks).toHaveLength(2)
-			expect(textChunks[0].text).toBe("Hello")
-			expect(textChunks[1].text).toBe(" world")
-
-			// Clean up
-			delete (global as any).fetch
 		})
 
-		it("should handle GPT-5-mini model with Responses API", async () => {
-			// Mock fetch for Responses API
-			const mockFetch = vitest.fn().mockResolvedValue({
-				ok: true,
-				body: new ReadableStream({
-					start(controller) {
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.output_item.added","item":{"type":"text","text":"Response"}}\n\n',
-							),
-						)
-						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-						controller.close()
-					},
-				}),
-			})
-			global.fetch = mockFetch as any
-
+		it("should handle GPT-5-mini model", async () => {
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-mini-2025-08-07",
@@ -676,36 +498,19 @@ describe("OpenAiNativeHandler", () => {
 				chunks.push(chunk)
 			}
 
-			// Verify correct model and default parameters
-			expect(mockFetch).toHaveBeenCalledWith(
-				"https://api.openai.com/v1/responses",
+			expect(mockCreate).toHaveBeenCalledWith(
 				expect.objectContaining({
-					body: expect.stringContaining('"model":"gpt-5-mini-2025-08-07"'),
+					model: "gpt-5-mini-2025-08-07",
+					messages: [{ role: "developer", content: expect.stringContaining(systemPrompt) }],
+					stream: true,
+					stream_options: { include_usage: true },
+					reasoning_effort: "minimal", // Default for GPT-5
+					verbosity: "medium", // Default verbosity
 				}),
 			)
-
-			// Clean up
-			delete (global as any).fetch
 		})
 
-		it("should handle GPT-5-nano model with Responses API", async () => {
-			// Mock fetch for Responses API
-			const mockFetch = vitest.fn().mockResolvedValue({
-				ok: true,
-				body: new ReadableStream({
-					start(controller) {
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.output_item.added","item":{"type":"text","text":"Nano response"}}\n\n',
-							),
-						)
-						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-						controller.close()
-					},
-				}),
-			})
-			global.fetch = mockFetch as any
-
+		it("should handle GPT-5-nano model", async () => {
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-nano-2025-08-07",
@@ -717,36 +522,19 @@ describe("OpenAiNativeHandler", () => {
 				chunks.push(chunk)
 			}
 
-			// Verify correct model
-			expect(mockFetch).toHaveBeenCalledWith(
-				"https://api.openai.com/v1/responses",
+			expect(mockCreate).toHaveBeenCalledWith(
 				expect.objectContaining({
-					body: expect.stringContaining('"model":"gpt-5-nano-2025-08-07"'),
+					model: "gpt-5-nano-2025-08-07",
+					messages: [{ role: "developer", content: expect.stringContaining(systemPrompt) }],
+					stream: true,
+					stream_options: { include_usage: true },
+					reasoning_effort: "minimal", // Default for GPT-5
+					verbosity: "medium", // Default verbosity
 				}),
 			)
-
-			// Clean up
-			delete (global as any).fetch
 		})
 
 		it("should support verbosity control for GPT-5", async () => {
-			// Mock fetch for Responses API
-			const mockFetch = vitest.fn().mockResolvedValue({
-				ok: true,
-				body: new ReadableStream({
-					start(controller) {
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.output_item.added","item":{"type":"text","text":"Low verbosity"}}\n\n',
-							),
-						)
-						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-						controller.close()
-					},
-				}),
-			})
-			global.fetch = mockFetch as any
-
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-2025-08-07",
@@ -761,77 +549,18 @@ describe("OpenAiNativeHandler", () => {
 			}
 
 			// Verify that verbosity is passed in the request
-			expect(mockFetch).toHaveBeenCalledWith(
-				"https://api.openai.com/v1/responses",
+			expect(mockCreate).toHaveBeenCalledWith(
 				expect.objectContaining({
-					body: expect.stringContaining('"verbosity":"low"'),
+					model: "gpt-5-2025-08-07",
+					messages: expect.any(Array),
+					stream: true,
+					stream_options: { include_usage: true },
+					verbosity: "low",
 				}),
 			)
-
-			// Clean up
-			delete (global as any).fetch
 		})
 
 		it("should support minimal reasoning effort for GPT-5", async () => {
-			// Mock fetch for Responses API
-			const mockFetch = vitest.fn().mockResolvedValue({
-				ok: true,
-				body: new ReadableStream({
-					start(controller) {
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.output_item.added","item":{"type":"text","text":"Minimal effort"}}\n\n',
-							),
-						)
-						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-						controller.close()
-					},
-				}),
-			})
-			global.fetch = mockFetch as any
-
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "gpt-5-2025-08-07",
-				reasoningEffort: "minimal" as any, // GPT-5 supports minimal
-			})
-
-			const stream = handler.createMessage(systemPrompt, messages)
-			const chunks: any[] = []
-			for await (const chunk of stream) {
-				chunks.push(chunk)
-			}
-
-			// With minimal reasoning effort, the model should pass it through
-			expect(mockFetch).toHaveBeenCalledWith(
-				"https://api.openai.com/v1/responses",
-				expect.objectContaining({
-					body: expect.stringContaining('"effort":"minimal"'),
-				}),
-			)
-
-			// Clean up
-			delete (global as any).fetch
-		})
-
-		it("should support low reasoning effort for GPT-5", async () => {
-			// Mock fetch for Responses API
-			const mockFetch = vitest.fn().mockResolvedValue({
-				ok: true,
-				body: new ReadableStream({
-					start(controller) {
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.output_item.added","item":{"type":"text","text":"Low effort response"}}\n\n',
-							),
-						)
-						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-						controller.close()
-					},
-				}),
-			})
-			global.fetch = mockFetch as any
-
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-2025-08-07",
@@ -844,48 +573,25 @@ describe("OpenAiNativeHandler", () => {
 				chunks.push(chunk)
 			}
 
-			// Should use Responses API with low reasoning effort
-			expect(mockFetch).toHaveBeenCalledWith(
-				"https://api.openai.com/v1/responses",
+			// With low reasoning effort, the model should pass it through
+			expect(mockCreate).toHaveBeenCalledWith(
 				expect.objectContaining({
-					body: expect.any(String),
+					model: "gpt-5-2025-08-07",
+					messages: expect.any(Array),
+					stream: true,
+					stream_options: { include_usage: true },
+					reasoning_effort: "low",
+					verbosity: "medium", // Default verbosity
 				}),
 			)
-			const body2 = (mockFetch.mock.calls[0][1] as any).body as string
-			expect(body2).toContain('"model":"gpt-5-2025-08-07"')
-			expect(body2).toContain('"effort":"low"')
-			expect(body2).toContain('"summary":"auto"')
-			expect(body2).toContain('"verbosity":"medium"')
-			expect(body2).toContain('"temperature":1')
-			expect(body2).toContain('"max_output_tokens"')
-
-			// Clean up
-			delete (global as any).fetch
 		})
 
 		it("should support both verbosity and reasoning effort together for GPT-5", async () => {
-			// Mock fetch for Responses API
-			const mockFetch = vitest.fn().mockResolvedValue({
-				ok: true,
-				body: new ReadableStream({
-					start(controller) {
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.output_item.added","item":{"type":"text","text":"High verbosity minimal effort"}}\n\n',
-							),
-						)
-						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-						controller.close()
-					},
-				}),
-			})
-			global.fetch = mockFetch as any
-
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-2025-08-07",
-				verbosity: "high",
-				reasoningEffort: "minimal" as any,
+				verbosity: "high", // Set verbosity through options
+				reasoningEffort: "low", // Set reasoning effort
 			})
 
 			const stream = handler.createMessage(systemPrompt, messages)
@@ -894,863 +600,17 @@ describe("OpenAiNativeHandler", () => {
 				chunks.push(chunk)
 			}
 
-			// Should use Responses API with both parameters
-			expect(mockFetch).toHaveBeenCalledWith(
-				"https://api.openai.com/v1/responses",
+			// Verify both parameters are passed
+			expect(mockCreate).toHaveBeenCalledWith(
 				expect.objectContaining({
-					body: expect.any(String),
+					model: "gpt-5-2025-08-07",
+					messages: expect.any(Array),
+					stream: true,
+					stream_options: { include_usage: true },
+					reasoning_effort: "low",
+					verbosity: "high",
 				}),
 			)
-			const body3 = (mockFetch.mock.calls[0][1] as any).body as string
-			expect(body3).toContain('"model":"gpt-5-2025-08-07"')
-			expect(body3).toContain('"effort":"minimal"')
-			expect(body3).toContain('"summary":"auto"')
-			expect(body3).toContain('"verbosity":"high"')
-			expect(body3).toContain('"temperature":1')
-			expect(body3).toContain('"max_output_tokens"')
-
-			// Clean up
-			delete (global as any).fetch
-		})
-
-		it("should handle actual GPT-5 Responses API format", async () => {
-			// Mock fetch with actual response format from GPT-5
-			const mockFetch = vitest.fn().mockResolvedValue({
-				ok: true,
-				body: new ReadableStream({
-					start(controller) {
-						// Test actual GPT-5 response format
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.created","response":{"id":"test","status":"in_progress"}}\n\n',
-							),
-						)
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.in_progress","response":{"status":"in_progress"}}\n\n',
-							),
-						)
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.output_item.added","item":{"type":"text","text":"First text"}}\n\n',
-							),
-						)
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.output_item.added","item":{"type":"text","text":" Second text"}}\n\n',
-							),
-						)
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.output_item.added","item":{"type":"reasoning","text":"Some reasoning"}}\n\n',
-							),
-						)
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.done","response":{"usage":{"prompt_tokens":100,"completion_tokens":20}}}\n\n',
-							),
-						)
-						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-						controller.close()
-					},
-				}),
-			})
-			global.fetch = mockFetch as any
-
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "gpt-5-2025-08-07",
-			})
-
-			const stream = handler.createMessage(systemPrompt, messages)
-			const chunks: any[] = []
-			for await (const chunk of stream) {
-				chunks.push(chunk)
-			}
-
-			// Should handle the actual format correctly
-			const textChunks = chunks.filter((c) => c.type === "text")
-			const reasoningChunks = chunks.filter((c) => c.type === "reasoning")
-
-			expect(textChunks).toHaveLength(2)
-			expect(textChunks[0].text).toBe("First text")
-			expect(textChunks[1].text).toBe(" Second text")
-
-			expect(reasoningChunks).toHaveLength(1)
-			expect(reasoningChunks[0].text).toBe("Some reasoning")
-
-			// Should also have usage information with cost
-			const usageChunks = chunks.filter((c) => c.type === "usage")
-			expect(usageChunks).toHaveLength(1)
-			expect(usageChunks[0]).toMatchObject({
-				type: "usage",
-				inputTokens: 100,
-				outputTokens: 20,
-				totalCost: expect.any(Number),
-			})
-
-			// Verify cost calculation (GPT-5 pricing: input $1.25/M, output $10/M)
-			const expectedInputCost = (100 / 1_000_000) * 1.25
-			const expectedOutputCost = (20 / 1_000_000) * 10.0
-			const expectedTotalCost = expectedInputCost + expectedOutputCost
-			expect(usageChunks[0].totalCost).toBeCloseTo(expectedTotalCost, 10)
-
-			// Clean up
-			delete (global as any).fetch
-		})
-
-		it("should handle Responses API with no content gracefully", async () => {
-			// Mock fetch with empty response
-			const mockFetch = vitest.fn().mockResolvedValue({
-				ok: true,
-				body: new ReadableStream({
-					start(controller) {
-						controller.enqueue(new TextEncoder().encode('data: {"someField":"value"}\n\n'))
-						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-						controller.close()
-					},
-				}),
-			})
-			global.fetch = mockFetch as any
-
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "gpt-5-2025-08-07",
-			})
-
-			const stream = handler.createMessage(systemPrompt, messages)
-			const chunks: any[] = []
-
-			// Should not throw, just warn
-			for await (const chunk of stream) {
-				chunks.push(chunk)
-			}
-
-			// Should have no content chunks when stream is empty
-			const contentChunks = chunks.filter((c) => c.type === "text" || c.type === "reasoning")
-
-			expect(contentChunks).toHaveLength(0)
-
-			// Clean up
-			delete (global as any).fetch
-		})
-
-		it("should support previous_response_id for conversation continuity", async () => {
-			// Mock fetch for Responses API
-			const mockFetch = vitest.fn().mockResolvedValue({
-				ok: true,
-				body: new ReadableStream({
-					start(controller) {
-						// Include response ID in the response
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.created","response":{"id":"resp_123","status":"in_progress"}}\n\n',
-							),
-						)
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.output_item.added","item":{"type":"text","text":"Response with ID"}}\n\n',
-							),
-						)
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.done","response":{"id":"resp_123","usage":{"prompt_tokens":10,"completion_tokens":3}}}\n\n',
-							),
-						)
-						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-						controller.close()
-					},
-				}),
-			})
-			global.fetch = mockFetch as any
-
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "gpt-5-2025-08-07",
-			})
-
-			// First request - should not have previous_response_id
-			const stream1 = handler.createMessage(systemPrompt, messages)
-			const chunks1: any[] = []
-			for await (const chunk of stream1) {
-				chunks1.push(chunk)
-			}
-
-			// Verify first request doesn't include previous_response_id
-			let firstCallBody = JSON.parse(mockFetch.mock.calls[0][1].body)
-			expect(firstCallBody.previous_response_id).toBeUndefined()
-
-			// Second request with metadata - should include previous_response_id
-			const stream2 = handler.createMessage(systemPrompt, messages, {
-				taskId: "test-task",
-				previousResponseId: "resp_456",
-			})
-			const chunks2: any[] = []
-			for await (const chunk of stream2) {
-				chunks2.push(chunk)
-			}
-
-			// Verify second request includes the provided previous_response_id
-			let secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body)
-			expect(secondCallBody.previous_response_id).toBe("resp_456")
-
-			// Clean up
-			delete (global as any).fetch
-		})
-
-		it("should handle unhandled stream events gracefully", async () => {
-			// Mock fetch for the fallback SSE path (which is what gets used when SDK fails)
-			const mockFetch = vitest.fn().mockResolvedValue({
-				ok: true,
-				body: new ReadableStream({
-					start(controller) {
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.output_item.added","item":{"type":"text","text":"Hello"}}\n\n',
-							),
-						)
-						// This event is not handled, so it should be ignored
-						controller.enqueue(
-							new TextEncoder().encode('data: {"type":"response.audio.delta","delta":"..."}\n\n'),
-						)
-						controller.enqueue(new TextEncoder().encode('data: {"type":"response.done","response":{}}\n\n'))
-						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-						controller.close()
-					},
-				}),
-			})
-			global.fetch = mockFetch as any
-
-			// Also mock the SDK to throw an error so it falls back to fetch
-			const mockClient = {
-				responses: {
-					create: vitest.fn().mockRejectedValue(new Error("SDK not available")),
-				},
-			}
-
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "gpt-5-2025-08-07",
-			})
-
-			// Replace the client with our mock
-			;(handler as any).client = mockClient
-
-			const stream = handler.createMessage(systemPrompt, messages)
-			const chunks: any[] = []
-			const errors: any[] = []
-
-			try {
-				for await (const chunk of stream) {
-					chunks.push(chunk)
-				}
-			} catch (error) {
-				errors.push(error)
-			}
-
-			// Log for debugging
-			if (chunks.length === 0 && errors.length === 0) {
-				console.log("No chunks and no errors received")
-			}
-			if (errors.length > 0) {
-				console.log("Errors:", errors)
-			}
-
-			expect(errors.length).toBe(0)
-			const textChunks = chunks.filter((c) => c.type === "text")
-			expect(textChunks.length).toBeGreaterThan(0)
-			expect(textChunks[0].text).toBe("Hello")
-
-			delete (global as any).fetch
-		})
-
-		it("should use stored response ID when metadata doesn't provide one", async () => {
-			// Mock fetch for Responses API
-			const mockFetch = vitest
-				.fn()
-				.mockResolvedValueOnce({
-					ok: true,
-					body: new ReadableStream({
-						start(controller) {
-							// First response with ID
-							controller.enqueue(
-								new TextEncoder().encode(
-									'data: {"type":"response.done","response":{"id":"resp_789","output":[{"type":"text","content":[{"type":"text","text":"First"}]}],"usage":{"prompt_tokens":10,"completion_tokens":1}}}\n\n',
-								),
-							)
-							controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-							controller.close()
-						},
-					}),
-				})
-				.mockResolvedValueOnce({
-					ok: true,
-					body: new ReadableStream({
-						start(controller) {
-							// Second response
-							controller.enqueue(
-								new TextEncoder().encode(
-									'data: {"type":"response.output_item.added","item":{"type":"text","text":"Second"}}\n\n',
-								),
-							)
-							controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-							controller.close()
-						},
-					}),
-				})
-			global.fetch = mockFetch as any
-
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "gpt-5-2025-08-07",
-			})
-
-			// First request - establishes response ID
-			const stream1 = handler.createMessage(systemPrompt, messages)
-			for await (const chunk of stream1) {
-				// consume stream
-			}
-
-			// Second request without metadata - should use stored response ID
-			const stream2 = handler.createMessage(systemPrompt, messages, { taskId: "test-task" })
-			for await (const chunk of stream2) {
-				// consume stream
-			}
-
-			// Verify second request uses the stored response ID from first request
-			let secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body)
-			expect(secondCallBody.previous_response_id).toBe("resp_789")
-
-			// Clean up
-			delete (global as any).fetch
-		})
-
-		it("should only send latest message when using previous_response_id", async () => {
-			// Mock fetch for Responses API
-			const mockFetch = vitest
-				.fn()
-				.mockResolvedValueOnce({
-					ok: true,
-					body: new ReadableStream({
-						start(controller) {
-							// First response with ID
-							controller.enqueue(
-								new TextEncoder().encode(
-									'data: {"type":"response.done","response":{"id":"resp_001","output":[{"type":"text","content":[{"type":"text","text":"First"}]}],"usage":{"prompt_tokens":50,"completion_tokens":1}}}\n\n',
-								),
-							)
-							controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-							controller.close()
-						},
-					}),
-				})
-				.mockResolvedValueOnce({
-					ok: true,
-					body: new ReadableStream({
-						start(controller) {
-							// Second response
-							controller.enqueue(
-								new TextEncoder().encode(
-									'data: {"type":"response.output_item.added","item":{"type":"text","text":"Second"}}\n\n',
-								),
-							)
-							controller.enqueue(
-								new TextEncoder().encode(
-									'data: {"type":"response.done","response":{"id":"resp_002","usage":{"prompt_tokens":10,"completion_tokens":1}}}\n\n',
-								),
-							)
-							controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-							controller.close()
-						},
-					}),
-				})
-			global.fetch = mockFetch as any
-
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "gpt-5-2025-08-07",
-			})
-
-			// First request with full conversation
-			const firstMessages: Anthropic.Messages.MessageParam[] = [
-				{ role: "user", content: "Hello" },
-				{ role: "assistant", content: "Hi there!" },
-				{ role: "user", content: "How are you?" },
-			]
-
-			const stream1 = handler.createMessage(systemPrompt, firstMessages)
-			for await (const chunk of stream1) {
-				// consume stream
-			}
-
-			// Verify first request sends full conversation
-			let firstCallBody = JSON.parse(mockFetch.mock.calls[0][1].body)
-			expect(firstCallBody.input).toContain("Hello")
-			expect(firstCallBody.input).toContain("Hi there!")
-			expect(firstCallBody.input).toContain("How are you?")
-			expect(firstCallBody.previous_response_id).toBeUndefined()
-
-			// Second request with previous_response_id - should only send latest message
-			const secondMessages: Anthropic.Messages.MessageParam[] = [
-				{ role: "user", content: "Hello" },
-				{ role: "assistant", content: "Hi there!" },
-				{ role: "user", content: "How are you?" },
-				{ role: "assistant", content: "I'm doing well!" },
-				{ role: "user", content: "What's the weather?" }, // Latest message
-			]
-
-			const stream2 = handler.createMessage(systemPrompt, secondMessages, {
-				taskId: "test-task",
-				previousResponseId: "resp_001",
-			})
-			for await (const chunk of stream2) {
-				// consume stream
-			}
-
-			// Verify second request only sends the latest user message
-			let secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body)
-			expect(secondCallBody.input).toBe("User: What's the weather?")
-			expect(secondCallBody.input).not.toContain("Hello")
-			expect(secondCallBody.input).not.toContain("Hi there!")
-			expect(secondCallBody.input).not.toContain("How are you?")
-			expect(secondCallBody.previous_response_id).toBe("resp_001")
-
-			// Clean up
-			delete (global as any).fetch
-		})
-
-		it("should correctly prepare GPT-5 input with conversation continuity", () => {
-			const gpt5Handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "gpt-5-2025-08-07",
-			})
-
-			// @ts-expect-error - private method
-			const { formattedInput, previousResponseId } = gpt5Handler.prepareGpt5Input(systemPrompt, messages, {
-				taskId: "task1",
-				previousResponseId: "resp_123",
-			})
-
-			expect(previousResponseId).toBe("resp_123")
-			expect(formattedInput).toBe("User: Hello!")
-		})
-
-		it("should provide helpful error messages for different error codes", async () => {
-			const testCases = [
-				{ status: 400, expectedMessage: "Invalid request to GPT-5 API" },
-				{ status: 401, expectedMessage: "Authentication failed" },
-				{ status: 403, expectedMessage: "Access denied" },
-				{ status: 404, expectedMessage: "GPT-5 API endpoint not found" },
-				{ status: 429, expectedMessage: "Rate limit exceeded" },
-				{ status: 500, expectedMessage: "OpenAI service error" },
-			]
-
-			for (const { status, expectedMessage } of testCases) {
-				// Mock fetch with error response
-				const mockFetch = vitest.fn().mockResolvedValue({
-					ok: false,
-					status,
-					statusText: "Error",
-					text: async () => JSON.stringify({ error: { message: "Test error" } }),
-				})
-				global.fetch = mockFetch as any
-
-				handler = new OpenAiNativeHandler({
-					...mockOptions,
-					apiModelId: "gpt-5-2025-08-07",
-				})
-
-				const stream = handler.createMessage(systemPrompt, messages)
-
-				await expect(async () => {
-					for await (const chunk of stream) {
-						// Should throw before yielding anything
-					}
-				}).rejects.toThrow(expectedMessage)
-			}
-
-			// Clean up
-			delete (global as any).fetch
-		})
-	})
-})
-
-// Added tests for GPT-5 streaming event coverage per PR_review_gpt5_final.md
-
-describe("GPT-5 streaming event coverage (additional)", () => {
-	it("should handle reasoning delta events for GPT-5", async () => {
-		const mockFetch = vitest.fn().mockResolvedValue({
-			ok: true,
-			body: new ReadableStream({
-				start(controller) {
-					controller.enqueue(
-						new TextEncoder().encode(
-							'data: {"type":"response.reasoning.delta","delta":"Thinking about the problem..."}\n\n',
-						),
-					)
-					controller.enqueue(
-						new TextEncoder().encode('data: {"type":"response.text.delta","delta":"The answer is..."}\n\n'),
-					)
-					controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-					controller.close()
-				},
-			}),
-		})
-		// @ts-ignore
-		global.fetch = mockFetch
-
-		const handler = new OpenAiNativeHandler({
-			apiModelId: "gpt-5-2025-08-07",
-			openAiNativeApiKey: "test-api-key",
-		})
-
-		const systemPrompt = "You are a helpful assistant."
-		const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello!" }]
-		const stream = handler.createMessage(systemPrompt, messages)
-
-		const chunks: any[] = []
-		for await (const chunk of stream) {
-			chunks.push(chunk)
-		}
-
-		const reasoningChunks = chunks.filter((c) => c.type === "reasoning")
-		const textChunks = chunks.filter((c) => c.type === "text")
-
-		expect(reasoningChunks).toHaveLength(1)
-		expect(reasoningChunks[0].text).toBe("Thinking about the problem...")
-		expect(textChunks).toHaveLength(1)
-		expect(textChunks[0].text).toBe("The answer is...")
-
-		// @ts-ignore
-		delete global.fetch
-	})
-
-	it("should handle refusal delta events for GPT-5 and prefix output", async () => {
-		const mockFetch = vitest.fn().mockResolvedValue({
-			ok: true,
-			body: new ReadableStream({
-				start(controller) {
-					controller.enqueue(
-						new TextEncoder().encode(
-							'data: {"type":"response.refusal.delta","delta":"I cannot comply with this request."}\n\n',
-						),
-					)
-					controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-					controller.close()
-				},
-			}),
-		})
-		// @ts-ignore
-		global.fetch = mockFetch
-
-		const handler = new OpenAiNativeHandler({
-			apiModelId: "gpt-5-2025-08-07",
-			openAiNativeApiKey: "test-api-key",
-		})
-
-		const systemPrompt = "You are a helpful assistant."
-		const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Do something disallowed" }]
-		const stream = handler.createMessage(systemPrompt, messages)
-
-		const chunks: any[] = []
-		for await (const chunk of stream) {
-			chunks.push(chunk)
-		}
-
-		const textChunks = chunks.filter((c) => c.type === "text")
-		expect(textChunks).toHaveLength(1)
-		expect(textChunks[0].text).toBe("[Refusal] I cannot comply with this request.")
-
-		// @ts-ignore
-		delete global.fetch
-	})
-
-	it("should ignore malformed JSON lines in SSE stream", async () => {
-		const mockFetch = vitest.fn().mockResolvedValue({
-			ok: true,
-			body: new ReadableStream({
-				start(controller) {
-					controller.enqueue(
-						new TextEncoder().encode(
-							'data: {"type":"response.output_item.added","item":{"type":"text","text":"Before"}}\n\n',
-						),
-					)
-					// Malformed JSON line
-					controller.enqueue(
-						new TextEncoder().encode('data: {"type":"response.text.delta","delta":"Bad"\n\n'),
-					)
-					// Valid line after malformed
-					controller.enqueue(
-						new TextEncoder().encode(
-							'data: {"type":"response.output_item.added","item":{"type":"text","text":"After"}}\n\n',
-						),
-					)
-					controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-					controller.close()
-				},
-			}),
-		})
-		// @ts-ignore
-		global.fetch = mockFetch
-
-		const handler = new OpenAiNativeHandler({
-			apiModelId: "gpt-5-2025-08-07",
-			openAiNativeApiKey: "test-api-key",
-		})
-
-		const systemPrompt = "You are a helpful assistant."
-		const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello!" }]
-		const stream = handler.createMessage(systemPrompt, messages)
-
-		const chunks: any[] = []
-		for await (const chunk of stream) {
-			chunks.push(chunk)
-		}
-
-		// It should not throw and still capture the valid texts around the malformed line
-		const textChunks = chunks.filter((c) => c.type === "text")
-		expect(textChunks.map((c: any) => c.text)).toEqual(["Before", "After"])
-
-		// @ts-ignore
-		delete global.fetch
-	})
-
-	describe("Codex Mini Model", () => {
-		let handler: OpenAiNativeHandler
-		const mockOptions: ApiHandlerOptions = {
-			openAiNativeApiKey: "test-api-key",
-			apiModelId: "codex-mini-latest",
-		}
-
-		it("should handle codex-mini-latest streaming response", async () => {
-			// Mock fetch for Codex Mini responses API
-			const mockFetch = vitest.fn().mockResolvedValue({
-				ok: true,
-				body: new ReadableStream({
-					start(controller) {
-						// Codex Mini uses the same responses API format
-						controller.enqueue(
-							new TextEncoder().encode('data: {"type":"response.output_text.delta","delta":"Hello"}\n\n'),
-						)
-						controller.enqueue(
-							new TextEncoder().encode('data: {"type":"response.output_text.delta","delta":" from"}\n\n'),
-						)
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.output_text.delta","delta":" Codex"}\n\n',
-							),
-						)
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.output_text.delta","delta":" Mini!"}\n\n',
-							),
-						)
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.done","response":{"usage":{"prompt_tokens":50,"completion_tokens":10}}}\n\n',
-							),
-						)
-						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-						controller.close()
-					},
-				}),
-			})
-			global.fetch = mockFetch as any
-
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "codex-mini-latest",
-			})
-
-			const systemPrompt = "You are a helpful coding assistant."
-			const messages: Anthropic.Messages.MessageParam[] = [
-				{ role: "user", content: "Write a hello world function" },
-			]
-
-			const stream = handler.createMessage(systemPrompt, messages)
-			const chunks: any[] = []
-			for await (const chunk of stream) {
-				chunks.push(chunk)
-			}
-
-			// Verify text chunks
-			const textChunks = chunks.filter((c) => c.type === "text")
-			expect(textChunks).toHaveLength(4)
-			expect(textChunks.map((c) => c.text).join("")).toBe("Hello from Codex Mini!")
-
-			// Verify usage data from API
-			const usageChunks = chunks.filter((c) => c.type === "usage")
-			expect(usageChunks).toHaveLength(1)
-			expect(usageChunks[0]).toMatchObject({
-				type: "usage",
-				inputTokens: 50,
-				outputTokens: 10,
-				totalCost: expect.any(Number), // Codex Mini has pricing: $1.5/M input, $6/M output
-			})
-
-			// Verify cost is calculated correctly based on API usage data
-			const expectedCost = (50 / 1_000_000) * 1.5 + (10 / 1_000_000) * 6
-			expect(usageChunks[0].totalCost).toBeCloseTo(expectedCost, 10)
-
-			// Verify the request was made with correct parameters
-			expect(mockFetch).toHaveBeenCalledWith(
-				"https://api.openai.com/v1/responses",
-				expect.objectContaining({
-					method: "POST",
-					headers: expect.objectContaining({
-						"Content-Type": "application/json",
-						Authorization: "Bearer test-api-key",
-						Accept: "text/event-stream",
-					}),
-					body: expect.any(String),
-				}),
-			)
-
-			const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body)
-			expect(requestBody).toMatchObject({
-				model: "codex-mini-latest",
-				input: "Developer: You are a helpful coding assistant.\n\nUser: Write a hello world function",
-				stream: true,
-			})
-
-			// Clean up
-			delete (global as any).fetch
-		})
-
-		it("should handle codex-mini-latest non-streaming completion", async () => {
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "codex-mini-latest",
-			})
-
-			// Codex Mini now uses the same Responses API as GPT-5, which doesn't support non-streaming
-			await expect(handler.completePrompt("Write a hello world function in Python")).rejects.toThrow(
-				"completePrompt is not supported for codex-mini-latest. Use createMessage (Responses API) instead.",
-			)
-		})
-
-		it("should handle codex-mini-latest API errors", async () => {
-			// Mock fetch with error response
-			const mockFetch = vitest.fn().mockResolvedValue({
-				ok: false,
-				status: 429,
-				statusText: "Too Many Requests",
-				text: async () => "Rate limit exceeded",
-			})
-			global.fetch = mockFetch as any
-
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "codex-mini-latest",
-			})
-
-			const systemPrompt = "You are a helpful assistant."
-			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
-
-			const stream = handler.createMessage(systemPrompt, messages)
-
-			// Should throw an error (using the same error format as GPT-5)
-			await expect(async () => {
-				for await (const chunk of stream) {
-					// consume stream
-				}
-			}).rejects.toThrow("Rate limit exceeded")
-
-			// Clean up
-			delete (global as any).fetch
-		})
-
-		it("should handle codex-mini-latest with multiple user messages", async () => {
-			// Mock fetch for streaming response
-			const mockFetch = vitest.fn().mockResolvedValue({
-				ok: true,
-				body: new ReadableStream({
-					start(controller) {
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.output_text.delta","delta":"Combined response"}\n\n',
-							),
-						)
-						controller.enqueue(new TextEncoder().encode('data: {"type":"response.completed"}\n\n'))
-						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-						controller.close()
-					},
-				}),
-			})
-			global.fetch = mockFetch as any
-
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "codex-mini-latest",
-			})
-
-			const systemPrompt = "You are a helpful assistant."
-			const messages: Anthropic.Messages.MessageParam[] = [
-				{ role: "user", content: "First question" },
-				{ role: "assistant", content: "First answer" },
-				{ role: "user", content: "Second question" },
-			]
-
-			const stream = handler.createMessage(systemPrompt, messages)
-			const chunks: any[] = []
-			for await (const chunk of stream) {
-				chunks.push(chunk)
-			}
-
-			// Verify the request body includes full conversation like GPT-5
-			const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body)
-			expect(requestBody.input).toContain("Developer: You are a helpful assistant")
-			expect(requestBody.input).toContain("User: First question")
-			expect(requestBody.input).toContain("Assistant: First answer")
-			expect(requestBody.input).toContain("User: Second question")
-
-			// Clean up
-			delete (global as any).fetch
-		})
-
-		it("should handle codex-mini-latest stream error events", async () => {
-			// Mock fetch with error event in stream
-			const mockFetch = vitest.fn().mockResolvedValue({
-				ok: true,
-				body: new ReadableStream({
-					start(controller) {
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.output_text.delta","delta":"Partial"}\n\n',
-							),
-						)
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.error","error":{"message":"Model overloaded"}}\n\n',
-							),
-						)
-						// The error handler will throw, but we still need to close the stream
-						controller.close()
-					},
-				}),
-			})
-			global.fetch = mockFetch as any
-
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "codex-mini-latest",
-			})
-
-			const systemPrompt = "You are a helpful assistant."
-			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
-
-			const stream = handler.createMessage(systemPrompt, messages)
-
-			// Should throw an error when encountering error event
-			await expect(async () => {
-				const chunks = []
-				for await (const chunk of stream) {
-					chunks.push(chunk)
-				}
-			}).rejects.toThrow("Responses API error: Model overloaded")
-
-			// Clean up
-			delete (global as any).fetch
 		})
 	})
 })
diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts
index 053af7f5e5f..5e498bee450 100644
--- a/src/api/providers/openai-native.ts
+++ b/src/api/providers/openai-native.ts
@@ -7,10 +7,8 @@ import {
 	OpenAiNativeModelId,
 	openAiNativeModels,
 	OPENAI_NATIVE_DEFAULT_TEMPERATURE,
-	GPT5_DEFAULT_TEMPERATURE,
 	type ReasoningEffort,
 	type VerbosityLevel,
-	type ReasoningEffortWithMinimal,
 } from "@roo-code/types"
 
 import type { ApiHandlerOptions } from "../../shared/api"
@@ -18,7 +16,7 @@ import type { ApiHandlerOptions } from "../../shared/api"
 import { calculateApiCostOpenAI } from "../../shared/cost"
 
 import { convertToOpenAiMessages } from "../transform/openai-format"
-import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
+import { ApiStream } from "../transform/stream"
 import { getModelParams } from "../transform/model-params"
 
 import { BaseProvider } from "./base-provider"
@@ -26,77 +24,43 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ".
 
 export type OpenAiNativeModel = ReturnType<OpenAiNativeHandler["getModel"]>
 
-// GPT-5 specific types
+// GPT-5 specific types for Responses API
+type ReasoningEffortWithMinimal = ReasoningEffort | "minimal"
+
+interface GPT5ResponsesAPIParams {
+	model: string
+	input: string
+	reasoning?: {
+		effort: ReasoningEffortWithMinimal
+	}
+	text?: {
+		verbosity: VerbosityLevel
+	}
+}
+
+interface GPT5ResponseChunk {
+	type: "text" | "reasoning" | "usage"
+	text?: string
+	reasoning?: string
+	usage?: {
+		input_tokens: number
+		output_tokens: number
+		reasoning_tokens?: number
+		total_tokens: number
+	}
+}
 
 export class OpenAiNativeHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	private client: OpenAI
-	private lastResponseId: string | undefined
-	private responseIdPromise: Promise<string | undefined> | undefined
-	private responseIdResolver: ((value: string | undefined) => void) | undefined
-
-	// Event types handled by the shared GPT-5 event processor to avoid duplication
-	private readonly gpt5CoreHandledTypes = new Set<string>([
-		"response.text.delta",
-		"response.output_text.delta",
-		"response.reasoning.delta",
-		"response.reasoning_text.delta",
-		"response.reasoning_summary.delta",
-		"response.reasoning_summary_text.delta",
-		"response.refusal.delta",
-		"response.output_item.added",
-		"response.done",
-		"response.completed",
-	])
 
 	constructor(options: ApiHandlerOptions) {
 		super()
 		this.options = options
-		// Default to including reasoning.summary: "auto" for GPT‑5 unless explicitly disabled
-		if (this.options.enableGpt5ReasoningSummary === undefined) {
-			this.options.enableGpt5ReasoningSummary = true
-		}
 		const apiKey = this.options.openAiNativeApiKey ?? "not-provided"
 		this.client = new OpenAI({ baseURL: this.options.openAiNativeBaseUrl, apiKey })
 	}
 
-	private normalizeGpt5Usage(usage: any, model: OpenAiNativeModel): ApiStreamUsageChunk | undefined {
-		if (!usage) return undefined
-
-		const totalInputTokens = usage.input_tokens ?? usage.prompt_tokens ?? 0
-		const totalOutputTokens = usage.output_tokens ?? usage.completion_tokens ?? 0
-		const cacheWriteTokens = usage.cache_creation_input_tokens ?? usage.cache_write_tokens ?? 0
-		const cacheReadTokens = usage.cache_read_input_tokens ?? usage.cache_read_tokens ?? usage.cached_tokens ?? 0
-
-		const totalCost = calculateApiCostOpenAI(
-			model.info,
-			totalInputTokens,
-			totalOutputTokens,
-			cacheWriteTokens || 0,
-			cacheReadTokens || 0,
-		)
-
-		return {
-			type: "usage",
-			inputTokens: totalInputTokens,
-			outputTokens: totalOutputTokens,
-			cacheWriteTokens,
-			cacheReadTokens,
-			totalCost,
-		}
-	}
-
-	private resolveResponseId(responseId: string | undefined): void {
-		if (responseId) {
-			this.lastResponseId = responseId
-		}
-		// Resolve the promise so the next request can use this ID
-		if (this.responseIdResolver) {
-			this.responseIdResolver(responseId)
-			this.responseIdResolver = undefined
-		}
-	}
-
 	override async *createMessage(
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
@@ -117,9 +81,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			yield* this.handleReasonerMessage(model, id, systemPrompt, messages)
 		} else if (model.id.startsWith("o1")) {
 			yield* this.handleO1FamilyMessage(model, systemPrompt, messages)
-		} else if (this.isResponsesApiModel(model.id)) {
-			// Both GPT-5 and Codex Mini use the v1/responses endpoint
-			yield* this.handleResponsesApiMessage(model, systemPrompt, messages, metadata)
+		} else if (this.isGpt5Model(model.id)) {
+			yield* this.handleGpt5Message(model, systemPrompt, messages)
 		} else {
 			yield* this.handleDefaultModelMessage(model, systemPrompt, messages)
 		}
@@ -194,8 +157,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			...(reasoning && reasoning),
 		}
 
-		// Add verbosity if supported
-		if (verbosity) {
+		// Add verbosity if supported (for future GPT-5 models)
+		if (verbosity && model.id.startsWith("gpt-5")) {
 			params.verbosity = verbosity
 		}
 
@@ -213,935 +176,185 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		)
 	}
 
-	private async *handleResponsesApiMessage(
+	private async *handleGpt5Message(
 		model: OpenAiNativeModel,
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
-		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
-		// Prefer the official SDK Responses API with streaming; fall back to fetch-based SSE if needed.
-		const { verbosity } = this.getModel()
+		// GPT-5 uses the Responses API, not Chat Completions
+		// We need to format the input as a single string combining system prompt and messages
+		const formattedInput = this.formatInputForResponsesAPI(systemPrompt, messages)
 
-		// Both GPT-5 and Codex Mini use the same v1/responses endpoint format
-
-		// Resolve reasoning effort (supports "minimal" for GPT‑5)
+		// Get reasoning effort, supporting the new "minimal" option for GPT-5
 		const reasoningEffort = this.getGpt5ReasoningEffort(model)
 
-		// Wait for any pending response ID from a previous request to be available
-		// This handles the race condition with fast nano model responses
-		let effectivePreviousResponseId = metadata?.previousResponseId
-
-		// Only allow fallback to pending/last response id when not explicitly suppressed
-		if (!metadata?.suppressPreviousResponseId) {
-			// If we have a pending response ID promise, wait for it to resolve
-			if (!effectivePreviousResponseId && this.responseIdPromise) {
-				try {
-					const resolvedId = await Promise.race([
-						this.responseIdPromise,
-						// Timeout after 100ms to avoid blocking too long
-						new Promise<undefined>((resolve) => setTimeout(() => resolve(undefined), 100)),
-					])
-					if (resolvedId) {
-						effectivePreviousResponseId = resolvedId
-					}
-				} catch {
-					// Non-fatal if promise fails
-				}
-			}
-
-			// Fall back to the last known response ID if still not available
-			if (!effectivePreviousResponseId) {
-				effectivePreviousResponseId = this.lastResponseId
-			}
-		}
-
-		// Format input and capture continuity id
-		const { formattedInput, previousResponseId } = this.prepareGpt5Input(systemPrompt, messages, metadata)
-		const requestPreviousResponseId = effectivePreviousResponseId ?? previousResponseId
-
-		// Create a new promise for this request's response ID
-		this.responseIdPromise = new Promise<string | undefined>((resolve) => {
-			this.responseIdResolver = resolve
-		})
-
-		// Build a request body (also used for fallback)
-		// Ensure we explicitly pass max_output_tokens for GPT‑5 based on Roo's reserved model response calculation
-		// so requests do not default to very large limits (e.g., 120k).
-		interface Gpt5RequestBody {
-			model: string
-			input: string
-			stream: boolean
-			reasoning?: { effort: ReasoningEffortWithMinimal; summary?: "auto" }
-			text?: { verbosity: VerbosityLevel }
-			temperature?: number
-			max_output_tokens?: number
-			previous_response_id?: string
-		}
+		// Get verbosity from model settings, default to "medium" if not specified
+		const verbosity = model.verbosity || "medium"
 
-		const requestBody: Gpt5RequestBody = {
+		// Prepare the request parameters for Responses API
+		const params: GPT5ResponsesAPIParams = {
 			model: model.id,
 			input: formattedInput,
-			stream: true,
 			...(reasoningEffort && {
 				reasoning: {
 					effort: reasoningEffort,
-					...(this.options.enableGpt5ReasoningSummary ? { summary: "auto" as const } : {}),
 				},
 			}),
-			text: { verbosity: (verbosity || "medium") as VerbosityLevel },
-			temperature: this.options.modelTemperature ?? GPT5_DEFAULT_TEMPERATURE,
-			// Explicitly include the calculated max output tokens for GPT‑5.
-			// Use the per-request reserved output computed by Roo (params.maxTokens from getModelParams).
-			...(model.maxTokens ? { max_output_tokens: model.maxTokens } : {}),
-			...(requestPreviousResponseId && { previous_response_id: requestPreviousResponseId }),
+			text: {
+				verbosity: verbosity,
+			},
 		}
 
-		try {
-			// Use the official SDK
-			const stream = (await (this.client as any).responses.create(requestBody)) as AsyncIterable<any>
-
-			if (typeof (stream as any)[Symbol.asyncIterator] !== "function") {
-				throw new Error(
-					"OpenAI SDK did not return an AsyncIterable for Responses API streaming. Falling back to SSE.",
-				)
-			}
-
-			for await (const event of stream) {
-				for await (const outChunk of this.processGpt5Event(event, model)) {
-					yield outChunk
-				}
-			}
-		} catch (sdkErr: any) {
-			// Check if this is a 400 error about previous_response_id not found
-			const errorMessage = sdkErr?.message || sdkErr?.error?.message || ""
-			const is400Error = sdkErr?.status === 400 || sdkErr?.response?.status === 400
-			const isPreviousResponseError =
-				errorMessage.includes("Previous response") || errorMessage.includes("not found")
-
-			if (is400Error && requestBody.previous_response_id && isPreviousResponseError) {
-				// Log the error and retry without the previous_response_id
-				console.warn(
-					`[GPT-5] Previous response ID not found (${requestBody.previous_response_id}), retrying without it`,
-				)
+		// Since the OpenAI SDK doesn't yet support the Responses API,
+		// we'll make a direct HTTP request
+		const response = await this.makeGpt5ResponsesAPIRequest(params, model)
 
-				// Remove the problematic previous_response_id and retry
-				const retryRequestBody = { ...requestBody }
-				delete retryRequestBody.previous_response_id
-
-				// Clear the stored lastResponseId to prevent using it again
-				this.lastResponseId = undefined
-
-				try {
-					// Retry with the SDK
-					const retryStream = (await (this.client as any).responses.create(
-						retryRequestBody,
-					)) as AsyncIterable<any>
-
-					if (typeof (retryStream as any)[Symbol.asyncIterator] !== "function") {
-						// If SDK fails, fall back to SSE
-						yield* this.makeGpt5ResponsesAPIRequest(retryRequestBody, model, metadata)
-						return
-					}
-
-					for await (const event of retryStream) {
-						for await (const outChunk of this.processGpt5Event(event, model)) {
-							yield outChunk
-						}
-					}
-					return
-				} catch (retryErr) {
-					// If retry also fails, fall back to SSE
-					yield* this.makeGpt5ResponsesAPIRequest(retryRequestBody, model, metadata)
-					return
-				}
-			}
-
-			// For other errors, fallback to manual SSE via fetch
-			yield* this.makeGpt5ResponsesAPIRequest(requestBody, model, metadata)
-		}
+		yield* this.handleGpt5StreamResponse(response, model)
 	}
 
 	private formatInputForResponsesAPI(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): string {
-		// Format the conversation for the Responses API input field
-		// Use Developer role format for GPT-5 (aligning with o1/o3 Developer role usage per GPT-5 Responses guidance)
-		// This ensures consistent instruction handling across reasoning models
-		let formattedInput = `Developer: ${systemPrompt}\n\n`
+		// Format the conversation for the Responses API's single input field
+		let formattedInput = `System: ${systemPrompt}\n\n`
 
 		for (const message of messages) {
 			const role = message.role === "user" ? "User" : "Assistant"
-
-			// Handle text content
-			if (typeof message.content === "string") {
-				formattedInput += `${role}: ${message.content}\n\n`
-			} else if (Array.isArray(message.content)) {
-				// Handle content blocks
-				const textContent = message.content
-					.filter((block) => block.type === "text")
-					.map((block) => (block as any).text)
-					.join("\n")
-				if (textContent) {
-					formattedInput += `${role}: ${textContent}\n\n`
-				}
-			}
+			const content =
+				typeof message.content === "string"
+					? message.content
+					: message.content.map((c) => (c.type === "text" ? c.text : "[image]")).join(" ")
+			formattedInput += `${role}: ${content}\n\n`
 		}
 
 		return formattedInput.trim()
 	}
 
-	private formatSingleMessageForResponsesAPI(message: Anthropic.Messages.MessageParam): string {
-		// Format a single message for the Responses API when using previous_response_id
-		const role = message.role === "user" ? "User" : "Assistant"
-
-		// Handle text content
-		if (typeof message.content === "string") {
-			return `${role}: ${message.content}`
-		} else if (Array.isArray(message.content)) {
-			// Handle content blocks
-			const textContent = message.content
-				.filter((block) => block.type === "text")
-				.map((block) => (block as any).text)
-				.join("\n")
-			if (textContent) {
-				return `${role}: ${textContent}`
+	private getGpt5ReasoningEffort(model: OpenAiNativeModel): ReasoningEffortWithMinimal | undefined {
+		const { reasoning } = model
+
+		// Check if reasoning effort is configured
+		if (reasoning && "reasoning_effort" in reasoning) {
+			const effort = reasoning.reasoning_effort
+			// Support the new "minimal" effort level for GPT-5
+			if (effort === "low" || effort === "medium" || effort === "high") {
+				return effort
 			}
 		}
 
-		return ""
+		// Default to "minimal" for GPT-5 models when not specified
+		// This provides fastest time-to-first-token as per documentation
+		return "minimal"
 	}
 
-	private async *makeGpt5ResponsesAPIRequest(
-		requestBody: any,
+	private async makeGpt5ResponsesAPIRequest(
+		params: GPT5ResponsesAPIParams,
 		model: OpenAiNativeModel,
-		metadata?: ApiHandlerCreateMessageMetadata,
-	): ApiStream {
-		const apiKey = this.options.openAiNativeApiKey ?? "not-provided"
-		const baseUrl = this.options.openAiNativeBaseUrl || "https://api.openai.com"
-		const url = `${baseUrl}/v1/responses`
-
-		try {
-			const response = await fetch(url, {
-				method: "POST",
-				headers: {
-					"Content-Type": "application/json",
-					Authorization: `Bearer ${apiKey}`,
-					Accept: "text/event-stream",
-				},
-				body: JSON.stringify(requestBody),
-			})
-
-			if (!response.ok) {
-				const errorText = await response.text()
-
-				let errorMessage = `GPT-5 API request failed (${response.status})`
-				let errorDetails = ""
-
-				// Try to parse error as JSON for better error messages
-				try {
-					const errorJson = JSON.parse(errorText)
-					if (errorJson.error?.message) {
-						errorDetails = errorJson.error.message
-					} else if (errorJson.message) {
-						errorDetails = errorJson.message
-					} else {
-						errorDetails = errorText
-					}
-				} catch {
-					// If not JSON, use the raw text
-					errorDetails = errorText
-				}
-
-				// Check if this is a 400 error about previous_response_id not found
-				const isPreviousResponseError =
-					errorDetails.includes("Previous response") || errorDetails.includes("not found")
-
-				if (response.status === 400 && requestBody.previous_response_id && isPreviousResponseError) {
-					// Log the error and retry without the previous_response_id
-					console.warn(
-						`[GPT-5 SSE] Previous response ID not found (${requestBody.previous_response_id}), retrying without it`,
-					)
-
-					// Remove the problematic previous_response_id and retry
-					const retryRequestBody = { ...requestBody }
-					delete retryRequestBody.previous_response_id
-
-					// Clear the stored lastResponseId to prevent using it again
-					this.lastResponseId = undefined
-					// Resolve the promise once to unblock any waiting requests
-					this.resolveResponseId(undefined)
-
-					// Retry the request without the previous_response_id
-					const retryResponse = await fetch(url, {
-						method: "POST",
-						headers: {
-							"Content-Type": "application/json",
-							Authorization: `Bearer ${apiKey}`,
-							Accept: "text/event-stream",
-						},
-						body: JSON.stringify(retryRequestBody),
-					})
-
-					if (!retryResponse.ok) {
-						// If retry also fails, throw the original error
-						throw new Error(`GPT-5 API retry failed (${retryResponse.status})`)
-					}
-
-					if (!retryResponse.body) {
-						throw new Error("GPT-5 Responses API error: No response body from retry request")
-					}
-
-					// Handle the successful retry response
-					yield* this.handleGpt5StreamResponse(retryResponse.body, model)
-					return
-				}
-
-				// Provide user-friendly error messages based on status code
-				switch (response.status) {
-					case 400:
-						errorMessage = "Invalid request to GPT-5 API. Please check your input parameters."
-						break
-					case 401:
-						errorMessage = "Authentication failed. Please check your OpenAI API key."
-						break
-					case 403:
-						errorMessage = "Access denied. Your API key may not have access to GPT-5 models."
-						break
-					case 404:
-						errorMessage =
-							"GPT-5 API endpoint not found. The model may not be available yet or requires a different configuration."
-						break
-					case 429:
-						errorMessage = "Rate limit exceeded. Please try again later."
-						break
-					case 500:
-					case 502:
-					case 503:
-						errorMessage = "OpenAI service error. Please try again later."
-						break
-					default:
-						errorMessage = `GPT-5 API error (${response.status})`
-				}
-
-				// Append details if available
-				if (errorDetails) {
-					errorMessage += ` - ${errorDetails}`
-				}
-
-				throw new Error(errorMessage)
-			}
-
-			if (!response.body) {
-				throw new Error("GPT-5 Responses API error: No response body")
-			}
-
-			// Handle streaming response
-			yield* this.handleGpt5StreamResponse(response.body, model)
-		} catch (error) {
-			if (error instanceof Error) {
-				// Re-throw with the original error message if it's already formatted
-				if (error.message.includes("GPT-5")) {
-					throw error
-				}
-				// Otherwise, wrap it with context
-				throw new Error(`Failed to connect to GPT-5 API: ${error.message}`)
-			}
-			// Handle non-Error objects
-			throw new Error(`Unexpected error connecting to GPT-5 API`)
-		}
-	}
-
-	/**
-	 * Prepares the input and conversation continuity parameters for a GPT-5 API call.
-	 *
-	 * - If a `previousResponseId` is available (either from metadata or the handler's state),
-	 *   it formats only the most recent user message for the input and returns the response ID
-	 *   to maintain conversation context.
-	 * - Otherwise, it formats the entire conversation history (system prompt + messages) for the input.
-	 *
-	 * @returns An object containing the formatted input string and the previous response ID (if used).
-	 */
-	private prepareGpt5Input(
-		systemPrompt: string,
-		messages: Anthropic.Messages.MessageParam[],
-		metadata?: ApiHandlerCreateMessageMetadata,
-	): { formattedInput: string; previousResponseId?: string } {
-		// Respect explicit suppression signal for continuity (e.g. immediately after condense)
-		const isFirstMessage = messages.length === 1 && messages[0].role === "user"
-		const allowFallback = !metadata?.suppressPreviousResponseId
-
-		const previousResponseId =
-			metadata?.previousResponseId ?? (allowFallback && !isFirstMessage ? this.lastResponseId : undefined)
-
-		if (previousResponseId) {
-			const lastUserMessage = [...messages].reverse().find((msg) => msg.role === "user")
-			const formattedInput = lastUserMessage ? this.formatSingleMessageForResponsesAPI(lastUserMessage) : ""
-			return { formattedInput, previousResponseId }
-		} else {
-			const formattedInput = this.formatInputForResponsesAPI(systemPrompt, messages)
-			return { formattedInput }
+	): Promise<AsyncIterable<GPT5ResponseChunk>> {
+		// The OpenAI SDK doesn't have direct support for the Responses API yet,
+		// but we can access it through the underlying client request method if available.
+		// For now, we'll use the Chat Completions API with GPT-5 specific formatting
+		// to maintain compatibility while the Responses API SDK support is being added.
+
+		// Convert Responses API params to Chat Completions format
+		// GPT-5 models use "developer" role for system messages
+		const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [{ role: "developer", content: params.input }]
+
+		// Build the request parameters
+		const requestParams: any = {
+			model: params.model,
+			messages,
+			stream: true,
+			stream_options: { include_usage: true },
 		}
-	}
 
-	/**
-	 * Handles the streaming response from the GPT-5 Responses API.
-	 *
-	 * This function iterates through the Server-Sent Events (SSE) stream, parses each event,
-	 * and yields structured data chunks (`ApiStream`). It handles a wide variety of event types,
-	 * including text deltas, reasoning, usage data, and various status/tool events.
-	 *
-	 * The following event types are intentionally ignored as they are not currently consumed
-	 * by the client application:
-	 * - Audio events (`response.audio.*`)
-	 * - Most tool call events (e.g., `response.function_call_arguments.*`, `response.mcp_call.*`, etc.)
-	 *   as the client does not yet support rendering these tool interactions.
-	 * - Status events (`response.created`, `response.in_progress`, etc.) as they are informational
-	 *   and do not affect the final output.
-	 */
-	private async *handleGpt5StreamResponse(body: ReadableStream<Uint8Array>, model: OpenAiNativeModel): ApiStream {
-		const reader = body.getReader()
-		const decoder = new TextDecoder()
-		let buffer = ""
-		let hasContent = false
-		let totalInputTokens = 0
-		let totalOutputTokens = 0
-
-		try {
-			while (true) {
-				const { done, value } = await reader.read()
-				if (done) break
-
-				buffer += decoder.decode(value, { stream: true })
-				const lines = buffer.split("\n")
-				buffer = lines.pop() || ""
-
-				for (const line of lines) {
-					if (line.startsWith("data: ")) {
-						const data = line.slice(6).trim()
-						if (data === "[DONE]") {
-							continue
-						}
-
-						try {
-							const parsed = JSON.parse(data)
-
-							// Store response ID for conversation continuity
-							if (parsed.response?.id) {
-								this.resolveResponseId(parsed.response.id)
-							}
-
-							// Delegate standard event types to the shared processor to avoid duplication
-							if (parsed?.type && this.gpt5CoreHandledTypes.has(parsed.type)) {
-								for await (const outChunk of this.processGpt5Event(parsed, model)) {
-									// Track whether we've emitted any content so fallback handling can decide appropriately
-									if (outChunk.type === "text" || outChunk.type === "reasoning") {
-										hasContent = true
-									}
-									yield outChunk
-								}
-								continue
-							}
-
-							// Check if this is a complete response (non-streaming format)
-							if (parsed.response && parsed.response.output && Array.isArray(parsed.response.output)) {
-								// Handle complete response in the initial event
-								for (const outputItem of parsed.response.output) {
-									if (outputItem.type === "text" && outputItem.content) {
-										for (const content of outputItem.content) {
-											if (content.type === "text" && content.text) {
-												hasContent = true
-												yield {
-													type: "text",
-													text: content.text,
-												}
-											}
-										}
-									}
-									// Additionally handle reasoning summaries if present (non-streaming summary output)
-									if (outputItem.type === "reasoning" && Array.isArray(outputItem.summary)) {
-										for (const summary of outputItem.summary) {
-											if (summary?.type === "summary_text" && typeof summary.text === "string") {
-												hasContent = true
-												yield {
-													type: "reasoning",
-													text: summary.text,
-												}
-											}
-										}
-									}
-								}
-								// Check for usage in the complete response
-								if (parsed.response.usage) {
-									const usageData = this.normalizeGpt5Usage(parsed.response.usage, model)
-									if (usageData) {
-										yield usageData
-									}
-								}
-							}
-							// Handle streaming delta events for text content
-							else if (
-								parsed.type === "response.text.delta" ||
-								parsed.type === "response.output_text.delta"
-							) {
-								// Primary streaming event for text deltas
-								if (parsed.delta) {
-									hasContent = true
-									yield {
-										type: "text",
-										text: parsed.delta,
-									}
-								}
-							} else if (
-								parsed.type === "response.text.done" ||
-								parsed.type === "response.output_text.done"
-							) {
-								// Text streaming completed - final text already streamed via deltas
-							}
-							// Handle reasoning delta events
-							else if (
-								parsed.type === "response.reasoning.delta" ||
-								parsed.type === "response.reasoning_text.delta"
-							) {
-								// Streaming reasoning content
-								if (parsed.delta) {
-									hasContent = true
-									yield {
-										type: "reasoning",
-										text: parsed.delta,
-									}
-								}
-							} else if (
-								parsed.type === "response.reasoning.done" ||
-								parsed.type === "response.reasoning_text.done"
-							) {
-								// Reasoning streaming completed
-							}
-							// Handle reasoning summary events
-							else if (
-								parsed.type === "response.reasoning_summary.delta" ||
-								parsed.type === "response.reasoning_summary_text.delta"
-							) {
-								// Streaming reasoning summary
-								if (parsed.delta) {
-									hasContent = true
-									yield {
-										type: "reasoning",
-										text: parsed.delta,
-									}
-								}
-							} else if (
-								parsed.type === "response.reasoning_summary.done" ||
-								parsed.type === "response.reasoning_summary_text.done"
-							) {
-								// Reasoning summary completed
-							}
-							// Handle refusal delta events
-							else if (parsed.type === "response.refusal.delta") {
-								// Model is refusing to answer
-								if (parsed.delta) {
-									hasContent = true
-									yield {
-										type: "text",
-										text: `[Refusal] ${parsed.delta}`,
-									}
-								}
-							} else if (parsed.type === "response.refusal.done") {
-								// Refusal completed
-							}
-							// Handle audio delta events (for multimodal responses)
-							else if (parsed.type === "response.audio.delta") {
-								// Audio streaming - we'll skip for now as we focus on text
-								// Could be handled in future for voice responses
-							} else if (parsed.type === "response.audio.done") {
-								// Audio completed
-							}
-							// Handle audio transcript delta events
-							else if (parsed.type === "response.audio_transcript.delta") {
-								// Audio transcript streaming
-								if (parsed.delta) {
-									hasContent = true
-									yield {
-										type: "text",
-										text: parsed.delta,
-									}
-								}
-							} else if (parsed.type === "response.audio_transcript.done") {
-								// Audio transcript completed
-							}
-							// Handle content part events (for structured content)
-							else if (parsed.type === "response.content_part.added") {
-								// New content part added - could be text, image, etc.
-								if (parsed.part?.type === "text" && parsed.part.text) {
-									hasContent = true
-									yield {
-										type: "text",
-										text: parsed.part.text,
-									}
-								}
-							} else if (parsed.type === "response.content_part.done") {
-								// Content part completed
-							}
-							// Handle output item events (alternative format)
-							else if (parsed.type === "response.output_item.added") {
-								// This is where the actual content comes through in some test cases
-								if (parsed.item) {
-									if (parsed.item.type === "text" && parsed.item.text) {
-										hasContent = true
-										yield { type: "text", text: parsed.item.text }
-									} else if (parsed.item.type === "reasoning" && parsed.item.text) {
-										hasContent = true
-										yield { type: "reasoning", text: parsed.item.text }
-									} else if (parsed.item.type === "message" && parsed.item.content) {
-										// Handle message type items
-										for (const content of parsed.item.content) {
-											if (content.type === "text" && content.text) {
-												hasContent = true
-												yield { type: "text", text: content.text }
-											}
-										}
-									}
-								}
-							} else if (parsed.type === "response.output_item.done") {
-								// Output item completed
-							}
-							// Handle function/tool call events
-							else if (parsed.type === "response.function_call_arguments.delta") {
-								// Function call arguments streaming
-								// We could yield this as a special type if needed for tool usage
-							} else if (parsed.type === "response.function_call_arguments.done") {
-								// Function call completed
-							}
-							// Handle MCP (Model Context Protocol) tool events
-							else if (parsed.type === "response.mcp_call_arguments.delta") {
-								// MCP tool call arguments streaming
-							} else if (parsed.type === "response.mcp_call_arguments.done") {
-								// MCP tool call completed
-							} else if (parsed.type === "response.mcp_call.in_progress") {
-								// MCP tool call in progress
-							} else if (
-								parsed.type === "response.mcp_call.completed" ||
-								parsed.type === "response.mcp_call.failed"
-							) {
-								// MCP tool call status events
-							} else if (parsed.type === "response.mcp_list_tools.in_progress") {
-								// MCP list tools in progress
-							} else if (
-								parsed.type === "response.mcp_list_tools.completed" ||
-								parsed.type === "response.mcp_list_tools.failed"
-							) {
-								// MCP list tools status events
-							}
-							// Handle web search events
-							else if (parsed.type === "response.web_search_call.searching") {
-								// Web search in progress
-							} else if (parsed.type === "response.web_search_call.in_progress") {
-								// Processing web search results
-							} else if (parsed.type === "response.web_search_call.completed") {
-								// Web search completed
-							}
-							// Handle code interpreter events
-							else if (parsed.type === "response.code_interpreter_call_code.delta") {
-								// Code interpreter code streaming
-								if (parsed.delta) {
-									// Could yield as a special code type if needed
-								}
-							} else if (parsed.type === "response.code_interpreter_call_code.done") {
-								// Code interpreter code completed
-							} else if (parsed.type === "response.code_interpreter_call.interpreting") {
-								// Code interpreter running
-							} else if (parsed.type === "response.code_interpreter_call.in_progress") {
-								// Code execution in progress
-							} else if (parsed.type === "response.code_interpreter_call.completed") {
-								// Code interpreter completed
-							}
-							// Handle file search events
-							else if (parsed.type === "response.file_search_call.searching") {
-								// File search in progress
-							} else if (parsed.type === "response.file_search_call.in_progress") {
-								// Processing file search results
-							} else if (parsed.type === "response.file_search_call.completed") {
-								// File search completed
-							}
-							// Handle image generation events
-							else if (parsed.type === "response.image_gen_call.generating") {
-								// Image generation in progress
-							} else if (parsed.type === "response.image_gen_call.in_progress") {
-								// Processing image generation
-							} else if (parsed.type === "response.image_gen_call.partial_image") {
-								// Image partially generated
-							} else if (parsed.type === "response.image_gen_call.completed") {
-								// Image generation completed
-							}
-							// Handle computer use events
-							else if (
-								parsed.type === "response.computer_tool_call.output_item" ||
-								parsed.type === "response.computer_tool_call.output_screenshot"
-							) {
-								// Computer use tool events
-							}
-							// Handle annotation events
-							else if (
-								parsed.type === "response.output_text_annotation.added" ||
-								parsed.type === "response.text_annotation.added"
-							) {
-								// Text annotation events - could be citations, references, etc.
-							}
-							// Handle error events
-							else if (parsed.type === "response.error" || parsed.type === "error") {
-								// Error event from the API
-								if (parsed.error || parsed.message) {
-									throw new Error(
-										`Responses API error: ${parsed.error?.message || parsed.message || "Unknown error"}`,
-									)
-								}
-							}
-							// Handle incomplete event
-							else if (parsed.type === "response.incomplete") {
-								// Response was incomplete - might need to handle specially
-							}
-							// Handle queued event
-							else if (parsed.type === "response.queued") {
-								// Response is queued
-							}
-							// Handle in_progress event
-							else if (parsed.type === "response.in_progress") {
-								// Response is being processed
-							}
-							// Handle failed event
-							else if (parsed.type === "response.failed") {
-								// Response failed
-								if (parsed.error || parsed.message) {
-									throw new Error(
-										`GPT-5 response failed: ${parsed.error?.message || parsed.message || "Unknown failure"}`,
-									)
-								}
-							} else if (parsed.type === "response.completed" || parsed.type === "response.done") {
-								// Store response ID for conversation continuity
-								if (parsed.response?.id) {
-									this.resolveResponseId(parsed.response.id)
-								}
-
-								// Check if the done event contains the complete output (as a fallback)
-								if (
-									!hasContent &&
-									parsed.response &&
-									parsed.response.output &&
-									Array.isArray(parsed.response.output)
-								) {
-									for (const outputItem of parsed.response.output) {
-										if (outputItem.type === "message" && outputItem.content) {
-											for (const content of outputItem.content) {
-												if (content.type === "output_text" && content.text) {
-													hasContent = true
-													yield {
-														type: "text",
-														text: content.text,
-													}
-												}
-											}
-										}
-										// Also surface reasoning summaries if present in the final output
-										if (outputItem.type === "reasoning" && Array.isArray(outputItem.summary)) {
-											for (const summary of outputItem.summary) {
-												if (
-													summary?.type === "summary_text" &&
-													typeof summary.text === "string"
-												) {
-													hasContent = true
-													yield {
-														type: "reasoning",
-														text: summary.text,
-													}
-												}
-											}
-										}
-									}
-								}
-
-								// Usage for done/completed is already handled by processGpt5Event in SDK path.
-								// For SSE path, usage often arrives separately; avoid double-emitting here.
-							}
-							// These are structural or status events, we can just log them at a lower level or ignore.
-							else if (
-								parsed.type === "response.created" ||
-								parsed.type === "response.in_progress" ||
-								parsed.type === "response.output_item.done" ||
-								parsed.type === "response.content_part.added" ||
-								parsed.type === "response.content_part.done"
-							) {
-								// Status events - no action needed
-							}
-							// Fallback for older formats or unexpected responses
-							else if (parsed.choices?.[0]?.delta?.content) {
-								hasContent = true
-								yield {
-									type: "text",
-									text: parsed.choices[0].delta.content,
-								}
-							}
-							// Additional fallback: some events place text under 'item.text' even if type isn't matched above
-							else if (
-								parsed.item &&
-								typeof parsed.item.text === "string" &&
-								parsed.item.text.length > 0
-							) {
-								hasContent = true
-								yield {
-									type: "text",
-									text: parsed.item.text,
-								}
-							} else if (parsed.usage) {
-								// Handle usage if it arrives in a separate, non-completed event
-								const usageData = this.normalizeGpt5Usage(parsed.usage, model)
-								if (usageData) {
-									yield usageData
-								}
-							}
-						} catch (e) {
-							// Only ignore JSON parsing errors, re-throw actual API errors
-							if (!(e instanceof SyntaxError)) {
-								throw e
-							}
-						}
-					}
-					// Also try to parse non-SSE formatted lines
-					else if (line.trim() && !line.startsWith(":")) {
-						try {
-							const parsed = JSON.parse(line)
-
-							// Try to extract content from various possible locations
-							if (parsed.content || parsed.text || parsed.message) {
-								hasContent = true
-								yield {
-									type: "text",
-									text: parsed.content || parsed.text || parsed.message,
-								}
-							}
-						} catch {
-							// Not JSON, might be plain text - ignore
-						}
-					}
-				}
-			}
-
-			// If we didn't get any content, don't throw - the API might have returned an empty response
-			// This can happen in certain edge cases and shouldn't break the flow
-		} catch (error) {
-			if (error instanceof Error) {
-				throw new Error(`Error processing GPT-5 response stream: ${error.message}`)
+		// Add reasoning effort if specified (supporting "minimal" for GPT-5)
+		if (params.reasoning?.effort) {
+			if (params.reasoning.effort === "minimal") {
+				// For minimal effort, we pass "minimal" as the reasoning_effort
+				requestParams.reasoning_effort = "minimal"
+			} else {
+				requestParams.reasoning_effort = params.reasoning.effort
 			}
-			throw new Error("Unexpected error processing GPT-5 response stream")
-		} finally {
-			reader.releaseLock()
 		}
-	}
 
-	/**
-	 * Shared processor for GPT‑5 Responses API events.
-	 * Used by both the official SDK streaming path and (optionally) by the SSE fallback.
-	 */
-	private async *processGpt5Event(event: any, model: OpenAiNativeModel): ApiStream {
-		// Persist response id for conversation continuity when available
-		if (event?.response?.id) {
-			this.resolveResponseId(event.response.id)
+		// Add verbosity control for GPT-5 models
+		// According to the docs, Chat Completions API also supports verbosity parameter
+		if (params.text?.verbosity) {
+			requestParams.verbosity = params.text.verbosity
 		}
 
-		// Handle known streaming text deltas
-		if (event?.type === "response.text.delta" || event?.type === "response.output_text.delta") {
-			if (event?.delta) {
-				yield { type: "text", text: event.delta }
-			}
-			return
-		}
+		const stream = (await this.client.chat.completions.create(
+			requestParams,
+		)) as unknown as AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>
 
-		// Handle reasoning deltas (including summary variants)
-		if (
-			event?.type === "response.reasoning.delta" ||
-			event?.type === "response.reasoning_text.delta" ||
-			event?.type === "response.reasoning_summary.delta" ||
-			event?.type === "response.reasoning_summary_text.delta"
-		) {
-			if (event?.delta) {
-				yield { type: "reasoning", text: event.delta }
-			}
-			return
-		}
+		// Convert the stream to GPT-5 response format
+		return this.convertChatStreamToGpt5Format(stream)
+	}
 
-		// Handle refusal deltas
-		if (event?.type === "response.refusal.delta") {
-			if (event?.delta) {
-				yield { type: "text", text: `[Refusal] ${event.delta}` }
-			}
-			return
-		}
+	private async *convertChatStreamToGpt5Format(
+		stream: AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>,
+	): AsyncIterable<GPT5ResponseChunk> {
+		for await (const chunk of stream) {
+			const delta = chunk.choices[0]?.delta
 
-		// Handle output item additions (SDK or Responses API alternative format)
-		if (event?.type === "response.output_item.added") {
-			const item = event?.item
-			if (item) {
-				if (item.type === "text" && item.text) {
-					yield { type: "text", text: item.text }
-				} else if (item.type === "reasoning" && item.text) {
-					yield { type: "reasoning", text: item.text }
-				} else if (item.type === "message" && Array.isArray(item.content)) {
-					for (const content of item.content) {
-						// Some implementations send 'text'; others send 'output_text'
-						if ((content?.type === "text" || content?.type === "output_text") && content?.text) {
-							yield { type: "text", text: content.text }
-						}
-					}
+			if (delta?.content) {
+				yield {
+					type: "text",
+					text: delta.content,
 				}
 			}
-			return
-		}
 
-		// Completion events that may carry usage
-		if (event?.type === "response.done" || event?.type === "response.completed") {
-			const usage = event?.response?.usage || event?.usage || undefined
-			const usageData = this.normalizeGpt5Usage(usage, model)
-			if (usageData) {
-				yield usageData
-			}
-			return
-		}
-
-		// Fallbacks for older formats or unexpected objects
-		if (event?.choices?.[0]?.delta?.content) {
-			yield { type: "text", text: event.choices[0].delta.content }
-			return
-		}
-
-		if (event?.usage) {
-			const usageData = this.normalizeGpt5Usage(event.usage, model)
-			if (usageData) {
-				yield usageData
+			if (chunk.usage) {
+				yield {
+					type: "usage",
+					usage: {
+						input_tokens: chunk.usage.prompt_tokens || 0,
+						output_tokens: chunk.usage.completion_tokens || 0,
+						total_tokens: chunk.usage.total_tokens || 0,
+					},
+				}
 			}
 		}
 	}
 
-	private getGpt5ReasoningEffort(model: OpenAiNativeModel): ReasoningEffortWithMinimal | undefined {
-		const { reasoning, info } = model
+	private async *handleGpt5StreamResponse(
+		stream: AsyncIterable<GPT5ResponseChunk>,
+		model: OpenAiNativeModel,
+	): ApiStream {
+		for await (const chunk of stream) {
+			if (chunk.type === "text" && chunk.text) {
+				yield {
+					type: "text",
+					text: chunk.text,
+				}
+			} else if (chunk.type === "usage" && chunk.usage) {
+				const inputTokens = chunk.usage.input_tokens
+				const outputTokens = chunk.usage.output_tokens
+				const cacheReadTokens = 0
+				const cacheWriteTokens = 0
+				const totalCost = calculateApiCostOpenAI(
+					model.info,
+					inputTokens,
+					outputTokens,
+					cacheWriteTokens,
+					cacheReadTokens,
+				)
 
-		// Check if reasoning effort is configured
-		if (reasoning && "reasoning_effort" in reasoning) {
-			const effort = reasoning.reasoning_effort as string
-			// Support all effort levels including "minimal" for GPT-5
-			if (effort === "minimal" || effort === "low" || effort === "medium" || effort === "high") {
-				return effort as ReasoningEffortWithMinimal
+				yield {
+					type: "usage",
+					inputTokens,
+					outputTokens,
+					cacheWriteTokens,
+					cacheReadTokens,
+					totalCost,
+				}
 			}
 		}
-
-		// Centralize default: use the model's default from types if available; otherwise undefined
-		return info.reasoningEffort as ReasoningEffortWithMinimal | undefined
 	}
 
 	private isGpt5Model(modelId: string): boolean {
 		return modelId.startsWith("gpt-5")
 	}
 
-	private isResponsesApiModel(modelId: string): boolean {
-		// Both GPT-5 and Codex Mini use the v1/responses endpoint
-		return modelId.startsWith("gpt-5") || modelId === "codex-mini-latest"
-	}
-
 	private async *handleStreamResponse(
 		stream: AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>,
 		model: OpenAiNativeModel,
@@ -1163,28 +376,16 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 	}
 
 	private async *yieldUsage(info: ModelInfo, usage: OpenAI.Completions.CompletionUsage | undefined): ApiStream {
-		const inputTokens = usage?.prompt_tokens || 0
+		const inputTokens = usage?.prompt_tokens || 0 // sum of cache hits and misses
 		const outputTokens = usage?.completion_tokens || 0
-
-		// Extract cache tokens from prompt_tokens_details
-		// According to OpenAI API, cached_tokens represents tokens read from cache
-		const cacheReadTokens = usage?.prompt_tokens_details?.cached_tokens || undefined
-
-		// Cache write tokens are not typically reported in the standard streaming response
-		// They would be in cache_creation_input_tokens if available
-		const cacheWriteTokens = (usage as any)?.cache_creation_input_tokens || undefined
-
-		const totalCost = calculateApiCostOpenAI(
-			info,
-			inputTokens,
-			outputTokens,
-			cacheWriteTokens || 0,
-			cacheReadTokens || 0,
-		)
+		const cacheReadTokens = usage?.prompt_tokens_details?.cached_tokens || 0
+		const cacheWriteTokens = 0
+		const totalCost = calculateApiCostOpenAI(info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
+		const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadTokens - cacheWriteTokens)
 
 		yield {
 			type: "usage",
-			inputTokens: inputTokens,
+			inputTokens: nonCachedInputTokens,
 			outputTokens: outputTokens,
 			cacheWriteTokens: cacheWriteTokens,
 			cacheReadTokens: cacheReadTokens,
@@ -1205,17 +406,15 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			modelId: id,
 			model: info,
 			settings: this.options,
-			defaultTemperature: this.isGpt5Model(id) ? GPT5_DEFAULT_TEMPERATURE : OPENAI_NATIVE_DEFAULT_TEMPERATURE,
+			defaultTemperature: OPENAI_NATIVE_DEFAULT_TEMPERATURE,
 		})
 
-		// For models using the Responses API (GPT-5 and Codex Mini), ensure we support reasoning effort
-		if (this.isResponsesApiModel(id)) {
-			const effort =
-				(this.options.reasoningEffort as ReasoningEffortWithMinimal | undefined) ??
-				(info.reasoningEffort as ReasoningEffortWithMinimal | undefined)
-
-			if (effort) {
-				;(params.reasoning as any) = { reasoning_effort: effort }
+		// For GPT-5 models, ensure we support minimal reasoning effort
+		if (this.isGpt5Model(id) && params.reasoning) {
+			// Allow "minimal" effort for GPT-5 models
+			const effort = this.options.reasoningEffort
+			if (effort === "low" || effort === "medium" || effort === "high") {
+				params.reasoning.reasoning_effort = effort
 			}
 		}
 
@@ -1224,50 +423,25 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		return { id: id.startsWith("o3-mini") ? "o3-mini" : id, info, ...params, verbosity: params.verbosity }
 	}
 
-	/**
-	 * Gets the last GPT-5 response ID captured from the Responses API stream.
-	 * Used for maintaining conversation continuity across requests.
-	 * @returns The response ID, or undefined if not available yet
-	 */
-	getLastResponseId(): string | undefined {
-		return this.lastResponseId
-	}
-
-	/**
-	 * Sets the last GPT-5 response ID for conversation continuity.
-	 * Typically only used in tests or special flows.
-	 * @param responseId The GPT-5 response ID to store
-	 */
-	setResponseId(responseId: string): void {
-		this.lastResponseId = responseId
-	}
-
 	async completePrompt(prompt: string): Promise<string> {
 		try {
 			const { id, temperature, reasoning, verbosity } = this.getModel()
-			const isResponsesApi = this.isResponsesApiModel(id)
 
-			if (isResponsesApi) {
-				// Models that use the Responses API (GPT-5 and Codex Mini) don't support non-streaming completion
-				throw new Error(`completePrompt is not supported for ${id}. Use createMessage (Responses API) instead.`)
-			}
-
-			const params: any = {
+			const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming & {
+				verbosity?: VerbosityLevel
+			} = {
 				model: id,
 				messages: [{ role: "user", content: prompt }],
+				temperature,
+				...(reasoning && reasoning),
 			}
 
-			// Add temperature if supported
-			if (temperature !== undefined) {
-				params.temperature = temperature
-			}
-
-			// Add reasoning parameters for models that support them
-			if (reasoning) {
-				Object.assign(params, reasoning)
+			// Add verbosity for GPT-5 models
+			if (this.isGpt5Model(id) && verbosity) {
+				params.verbosity = verbosity
 			}
 
-			const response = await this.client.chat.completions.create(params)
+			const response = await this.client.chat.completions.create(params as any)
 			return response.choices[0]?.message.content || ""
 		} catch (error) {
 			if (error instanceof Error) {
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index eed719cf0fb..85abcf1a690 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -305,7 +305,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				],
 				stream: true,
 				...(isGrokXAI ? {} : { stream_options: { include_usage: true } }),
-				reasoning_effort: modelInfo.reasoningEffort as "low" | "medium" | "high" | undefined,
+				reasoning_effort: modelInfo.reasoningEffort,
 				temperature: undefined,
 			}
 
@@ -330,7 +330,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 					},
 					...convertToOpenAiMessages(messages),
 				],
-				reasoning_effort: modelInfo.reasoningEffort as "low" | "medium" | "high" | undefined,
+				reasoning_effort: modelInfo.reasoningEffort,
 				temperature: undefined,
 			}
 
diff --git a/src/api/providers/requesty.ts b/src/api/providers/requesty.ts
index d2e55fc8f01..8af0b9aa426 100644
--- a/src/api/providers/requesty.ts
+++ b/src/api/providers/requesty.ts
@@ -116,7 +116,7 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
 			model,
 			max_tokens,
 			temperature,
-			...(reasoning_effort && reasoning_effort !== "minimal" && { reasoning_effort }),
+			...(reasoning_effort && { reasoning_effort }),
 			...(thinking && { thinking }),
 			stream: true,
 			stream_options: { include_usage: true },
diff --git a/src/api/transform/model-params.ts b/src/api/transform/model-params.ts
index 933697c0a53..cc30aa56053 100644
--- a/src/api/transform/model-params.ts
+++ b/src/api/transform/model-params.ts
@@ -2,7 +2,6 @@ import {
 	type ModelInfo,
 	type ProviderSettings,
 	type VerbosityLevel,
-	type ReasoningEffortWithMinimal,
 	ANTHROPIC_DEFAULT_MAX_TOKENS,
 } from "@roo-code/types"
 
@@ -39,7 +38,7 @@ type GetModelParamsOptions<T extends Format> = {
 type BaseModelParams = {
 	maxTokens: number | undefined
 	temperature: number | undefined
-	reasoningEffort: ReasoningEffortWithMinimal | undefined
+	reasoningEffort: "low" | "medium" | "high" | undefined
 	reasoningBudget: number | undefined
 	verbosity: VerbosityLevel | undefined
 }
@@ -129,8 +128,7 @@ export function getModelParams({
 		temperature = 1.0
 	} else if (shouldUseReasoningEffort({ model, settings })) {
 		// "Traditional" reasoning models use the `reasoningEffort` parameter.
-		const effort = customReasoningEffort ?? model.reasoningEffort
-		reasoningEffort = effort as ReasoningEffortWithMinimal
+		reasoningEffort = customReasoningEffort ?? model.reasoningEffort
 	}
 
 	const params: BaseModelParams = { maxTokens, temperature, reasoningEffort, reasoningBudget, verbosity }
diff --git a/src/api/transform/reasoning.ts b/src/api/transform/reasoning.ts
index 46ef029ea39..a173c59b19b 100644
--- a/src/api/transform/reasoning.ts
+++ b/src/api/transform/reasoning.ts
@@ -2,7 +2,7 @@ import { BetaThinkingConfigParam } from "@anthropic-ai/sdk/resources/beta"
 import OpenAI from "openai"
 import type { GenerateContentConfig } from "@google/genai"
 
-import type { ModelInfo, ProviderSettings, ReasoningEffortWithMinimal } from "@roo-code/types"
+import type { ModelInfo, ProviderSettings } from "@roo-code/types"
 
 import { shouldUseReasoningBudget, shouldUseReasoningEffort } from "../../shared/api"
 
@@ -23,7 +23,7 @@ export type GeminiReasoningParams = GenerateContentConfig["thinkingConfig"]
 export type GetModelReasoningOptions = {
 	model: ModelInfo
 	reasoningBudget: number | undefined
-	reasoningEffort: ReasoningEffortWithMinimal | undefined
+	reasoningEffort: ReasoningEffort | undefined
 	settings: ProviderSettings
 }
 
@@ -36,9 +36,7 @@ export const getOpenRouterReasoning = ({
 	shouldUseReasoningBudget({ model, settings })
 		? { max_tokens: reasoningBudget }
 		: shouldUseReasoningEffort({ model, settings })
-			? reasoningEffort !== "minimal"
-				? { effort: reasoningEffort }
-				: undefined
+			? { effort: reasoningEffort }
 			: undefined
 
 export const getAnthropicReasoning = ({
@@ -52,19 +50,8 @@ export const getOpenAiReasoning = ({
 	model,
 	reasoningEffort,
 	settings,
-}: GetModelReasoningOptions): OpenAiReasoningParams | undefined => {
-	if (!shouldUseReasoningEffort({ model, settings })) {
-		return undefined
-	}
-
-	// If model has reasoning effort capability, return object even if effort is undefined
-	// This preserves the reasoning_effort field in the API call
-	if (reasoningEffort === "minimal") {
-		return undefined
-	}
-
-	return { reasoning_effort: reasoningEffort }
-}
+}: GetModelReasoningOptions): OpenAiReasoningParams | undefined =>
+	shouldUseReasoningEffort({ model, settings }) ? { reasoning_effort: reasoningEffort } : undefined
 
 export const getGeminiReasoning = ({
 	model,
diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts
index 5e96b6fb167..34cc255cd85 100644
--- a/src/core/task/Task.ts
+++ b/src/core/task/Task.ts
@@ -252,8 +252,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 	didCompleteReadingStream = false
 	assistantMessageParser?: AssistantMessageParser
 	isAssistantMessageParserEnabled = false
-	private lastUsedInstructions?: string
-	private skipPrevResponseIdOnce: boolean = false
 
 	constructor({
 		provider,
@@ -852,7 +850,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		progressStatus?: ToolProgressStatus,
 		options: {
 			isNonInteractive?: boolean
-			metadata?: Record<string, unknown>
 		} = {},
 		contextCondense?: ContextCondense,
 	): Promise<undefined> {
@@ -890,7 +887,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 						images,
 						partial,
 						contextCondense,
-						metadata: options.metadata,
 					})
 				}
 			} else {
@@ -906,9 +902,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 					lastMessage.images = images
 					lastMessage.partial = false
 					lastMessage.progressStatus = progressStatus
-					if (options.metadata) {
-						;(lastMessage as any).metadata = options.metadata
-					}
 
 					// Instead of streaming partialMessage events, we do a save
 					// and post like normal to persist to disk.
@@ -924,15 +917,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 						this.lastMessageTs = sayTs
 					}
 
-					await this.addToClineMessages({
-						ts: sayTs,
-						type: "say",
-						say: type,
-						text,
-						images,
-						contextCondense,
-						metadata: options.metadata,
-					})
+					await this.addToClineMessages({ ts: sayTs, type: "say", say: type, text, images, contextCondense })
 				}
 			}
 		} else {
@@ -1777,8 +1762,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				presentAssistantMessage(this)
 			}
 
-			await this.persistGpt5Metadata(reasoningMessage)
-
 			updateApiReqMsg()
 			await this.saveClineMessages()
 			await this.providerRef.deref()?.postStateToWebview()
@@ -1997,7 +1980,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		Task.lastGlobalApiRequestTime = Date.now()
 
 		const systemPrompt = await this.getSystemPrompt()
-		this.lastUsedInstructions = systemPrompt
 		const { contextTokens } = this.getTokenUsage()
 
 		if (contextTokens) {
@@ -2036,10 +2018,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 			if (truncateResult.error) {
 				await this.say("condense_context_error", truncateResult.error)
 			} else if (truncateResult.summary) {
-				// A condense operation occurred; for the next GPT‑5 API call we should NOT
-				// send previous_response_id so the request reflects the fresh condensed context.
-				this.skipPrevResponseIdOnce = true
-
 				const { summary, cost, prevContextTokens, newContextTokens = 0 } = truncateResult
 				const contextCondense: ContextCondense = { summary, cost, newContextTokens, prevContextTokens }
 				await this.say(
@@ -2056,7 +2034,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		}
 
 		const messagesSinceLastSummary = getMessagesSinceLastSummary(this.apiConversationHistory)
-		let cleanConversationHistory = maybeRemoveImageBlocks(messagesSinceLastSummary, this.api).map(
+		const cleanConversationHistory = maybeRemoveImageBlocks(messagesSinceLastSummary, this.api).map(
 			({ role, content }) => ({ role, content }),
 		)
 
@@ -2072,41 +2050,9 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 			throw new Error("Auto-approval limit reached and user did not approve continuation")
 		}
 
-		// Determine GPT‑5 previous_response_id from last persisted assistant turn (if available),
-		// unless a condense just occurred (skip once after condense).
-		let previousResponseId: string | undefined = undefined
-		try {
-			const modelId = this.api.getModel().id
-			if (modelId && modelId.startsWith("gpt-5") && !this.skipPrevResponseIdOnce) {
-				// Find the last assistant message that has a previous_response_id stored
-				const idx = findLastIndex(
-					this.clineMessages,
-					(m) =>
-						m.type === "say" &&
-						(m as any).say === "text" &&
-						(m as any).metadata?.gpt5?.previous_response_id,
-				)
-				if (idx !== -1) {
-					// Use the previous_response_id from the last assistant message for this request
-					previousResponseId = ((this.clineMessages[idx] as any).metadata.gpt5.previous_response_id ||
-						undefined) as string | undefined
-				}
-			}
-		} catch {
-			// non-fatal
-		}
-
 		const metadata: ApiHandlerCreateMessageMetadata = {
 			mode: mode,
 			taskId: this.taskId,
-			...(previousResponseId ? { previousResponseId } : {}),
-			// If a condense just occurred, explicitly suppress continuity fallback for the next call
-			...(this.skipPrevResponseIdOnce ? { suppressPreviousResponseId: true } : {}),
-		}
-
-		// Reset skip flag after applying (it only affects the immediate next call)
-		if (this.skipPrevResponseIdOnce) {
-			this.skipPrevResponseIdOnce = false
 		}
 
 		const stream = this.api.createMessage(systemPrompt, cleanConversationHistory, metadata)
@@ -2252,35 +2198,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		}
 	}
 
-	/**
-	 * Persist GPT-5 per-turn metadata (previous_response_id, instructions, reasoning_summary)
-	 * onto the last complete assistant say("text") message.
-	 */
-	private async persistGpt5Metadata(reasoningMessage?: string): Promise<void> {
-		try {
-			const modelId = this.api.getModel().id
-			if (!modelId || !modelId.startsWith("gpt-5")) return
-
-			const lastResponseId: string | undefined = (this.api as any)?.getLastResponseId?.()
-			const idx = findLastIndex(
-				this.clineMessages,
-				(m) => m.type === "say" && (m as any).say === "text" && m.partial !== true,
-			)
-			if (idx !== -1) {
-				const msg = this.clineMessages[idx] as any
-				msg.metadata = msg.metadata ?? {}
-				msg.metadata.gpt5 = {
-					...(msg.metadata.gpt5 ?? {}),
-					previous_response_id: lastResponseId,
-					instructions: this.lastUsedInstructions,
-					reasoning_summary: (reasoningMessage ?? "").trim() || undefined,
-				}
-			}
-		} catch {
-			// Non-fatal error in metadata persistence
-		}
-	}
-
 	// Getters
 
 	public get cwd() {
diff --git a/src/shared/api.ts b/src/shared/api.ts
index e9b57af3c17..014b903453e 100644
--- a/src/shared/api.ts
+++ b/src/shared/api.ts
@@ -6,15 +6,8 @@ import {
 } from "@roo-code/types"
 
 // ApiHandlerOptions
-// Extend ProviderSettings (minus apiProvider) with handler-specific toggles.
-export type ApiHandlerOptions = Omit<ProviderSettings, "apiProvider"> & {
-	/**
-	 * When true and using GPT‑5 Responses API, include reasoning.summary: "auto"
-	 * so the API returns reasoning summaries (we already parse and surface them).
-	 * Defaults to true; set to false to disable summaries.
-	 */
-	enableGpt5ReasoningSummary?: boolean
-}
+
+export type ApiHandlerOptions = Omit<ProviderSettings, "apiProvider">
 
 // RouterName
 
diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx
index 70a58f03bf8..74ba885d25d 100644
--- a/webview-ui/src/components/settings/ApiOptions.tsx
+++ b/webview-ui/src/components/settings/ApiOptions.tsx
@@ -576,12 +576,6 @@ const ApiOptions = ({
 								if (value !== "custom-arn" && selectedProvider === "bedrock") {
 									setApiConfigurationField("awsCustomArn", "")
 								}
-
-								// Clear reasoning effort when switching models to allow the new model's default to take effect
-								// This is especially important for GPT-5 models which default to "medium"
-								if (selectedProvider === "openai-native") {
-									setApiConfigurationField("reasoningEffort", undefined)
-								}
 							}}>
 							<SelectTrigger className="w-full">
 								<SelectValue placeholder={t("settings:common.select")} />
@@ -623,14 +617,11 @@ const ApiOptions = ({
 				modelInfo={selectedModelInfo}
 			/>
 
-			{/* Gate Verbosity UI by capability flag */}
-			{selectedModelInfo?.supportsVerbosity && (
-				<Verbosity
-					apiConfiguration={apiConfiguration}
-					setApiConfigurationField={setApiConfigurationField}
-					modelInfo={selectedModelInfo}
-				/>
-			)}
+			<Verbosity
+				apiConfiguration={apiConfiguration}
+				setApiConfigurationField={setApiConfigurationField}
+				modelInfo={selectedModelInfo}
+			/>
 
 			{!fromWelcomeView && (
 				<Collapsible open={isAdvancedSettingsOpen} onOpenChange={setIsAdvancedSettingsOpen}>
diff --git a/webview-ui/src/components/settings/ThinkingBudget.tsx b/webview-ui/src/components/settings/ThinkingBudget.tsx
index a3e2d428b4a..a49ec79efc2 100644
--- a/webview-ui/src/components/settings/ThinkingBudget.tsx
+++ b/webview-ui/src/components/settings/ThinkingBudget.tsx
@@ -1,12 +1,7 @@
 import { useEffect } from "react"
 import { Checkbox } from "vscrui"
 
-import {
-	type ProviderSettings,
-	type ModelInfo,
-	type ReasoningEffortWithMinimal,
-	reasoningEfforts,
-} from "@roo-code/types"
+import { type ProviderSettings, type ModelInfo, type ReasoningEffort, reasoningEfforts } from "@roo-code/types"
 
 import {
 	DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS,
@@ -32,35 +27,10 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod
 	const isGemini25Pro = selectedModelId && selectedModelId.includes("gemini-2.5-pro")
 	const minThinkingTokens = isGemini25Pro ? GEMINI_25_PRO_MIN_THINKING_TOKENS : 1024
 
-	// Check if this is a GPT-5 model to show "minimal" option
-	// Only show minimal for OpenAI Native provider GPT-5 models
-	const isOpenAiNativeProvider = apiConfiguration.apiProvider === "openai-native"
-	const isGpt5Model = isOpenAiNativeProvider && selectedModelId && selectedModelId.startsWith("gpt-5")
-	// Add "minimal" option for GPT-5 models
-	// Spread to convert readonly tuple into a mutable array, then expose as readonly for safety
-	const baseEfforts = [...reasoningEfforts] as ReasoningEffortWithMinimal[]
-	const availableReasoningEfforts: ReadonlyArray<ReasoningEffortWithMinimal> = isGpt5Model
-		? (["minimal", ...baseEfforts] as ReasoningEffortWithMinimal[])
-		: baseEfforts
-
-	// Default reasoning effort - use model's default if available
-	// GPT-5 models have "medium" as their default in the model configuration
-	const modelDefaultReasoningEffort = modelInfo?.reasoningEffort as ReasoningEffortWithMinimal | undefined
-	const defaultReasoningEffort: ReasoningEffortWithMinimal = modelDefaultReasoningEffort || "medium"
-	const currentReasoningEffort: ReasoningEffortWithMinimal =
-		(apiConfiguration.reasoningEffort as ReasoningEffortWithMinimal | undefined) || defaultReasoningEffort
-
 	const isReasoningBudgetSupported = !!modelInfo && modelInfo.supportsReasoningBudget
 	const isReasoningBudgetRequired = !!modelInfo && modelInfo.requiredReasoningBudget
 	const isReasoningEffortSupported = !!modelInfo && modelInfo.supportsReasoningEffort
 
-	// Set default reasoning effort when model supports it and no value is set
-	useEffect(() => {
-		if (isReasoningEffortSupported && !apiConfiguration.reasoningEffort && defaultReasoningEffort) {
-			setApiConfigurationField("reasoningEffort", defaultReasoningEffort)
-		}
-	}, [isReasoningEffortSupported, apiConfiguration.reasoningEffort, defaultReasoningEffort, setApiConfigurationField])
-
 	const enableReasoningEffort = apiConfiguration.enableReasoningEffort
 	const customMaxOutputTokens = apiConfiguration.modelMaxTokens || DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS
 	const customMaxThinkingTokens =
@@ -139,21 +109,13 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod
 				<label className="block font-medium mb-1">{t("settings:providers.reasoningEffort.label")}</label>
 			</div>
 			<Select
-				value={currentReasoningEffort}
-				onValueChange={(value: ReasoningEffortWithMinimal) => {
-					setApiConfigurationField("reasoningEffort", value)
-				}}>
+				value={apiConfiguration.reasoningEffort}
+				onValueChange={(value) => setApiConfigurationField("reasoningEffort", value as ReasoningEffort)}>
 				<SelectTrigger className="w-full">
-					<SelectValue
-						placeholder={
-							currentReasoningEffort
-								? t(`settings:providers.reasoningEffort.${currentReasoningEffort}`)
-								: t("settings:common.select")
-						}
-					/>
+					<SelectValue placeholder={t("settings:common.select")} />
 				</SelectTrigger>
 				<SelectContent>
-					{availableReasoningEfforts.map((value) => (
+					{reasoningEfforts.map((value) => (
 						<SelectItem key={value} value={value}>
 							{t(`settings:providers.reasoningEffort.${value}`)}
 						</SelectItem>
diff --git a/webview-ui/src/i18n/locales/ca/settings.json b/webview-ui/src/i18n/locales/ca/settings.json
index bcdf30a803d..3a534fb031c 100644
--- a/webview-ui/src/i18n/locales/ca/settings.json
+++ b/webview-ui/src/i18n/locales/ca/settings.json
@@ -429,7 +429,6 @@
 		},
 		"reasoningEffort": {
 			"label": "Esforç de raonament del model",
-			"minimal": "Mínim (el més ràpid)",
 			"high": "Alt",
 			"medium": "Mitjà",
 			"low": "Baix"
diff --git a/webview-ui/src/i18n/locales/de/settings.json b/webview-ui/src/i18n/locales/de/settings.json
index f434c5f413b..d13050cc7aa 100644
--- a/webview-ui/src/i18n/locales/de/settings.json
+++ b/webview-ui/src/i18n/locales/de/settings.json
@@ -429,7 +429,6 @@
 		},
 		"reasoningEffort": {
 			"label": "Modell-Denkaufwand",
-			"minimal": "Minimal (schnellste)",
 			"high": "Hoch",
 			"medium": "Mittel",
 			"low": "Niedrig"
diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json
index eb893d6c518..224ad4fdd77 100644
--- a/webview-ui/src/i18n/locales/en/settings.json
+++ b/webview-ui/src/i18n/locales/en/settings.json
@@ -428,10 +428,9 @@
 		},
 		"reasoningEffort": {
 			"label": "Model Reasoning Effort",
-			"minimal": "Minimal (Fastest)",
-			"low": "Low",
+			"high": "High",
 			"medium": "Medium",
-			"high": "High"
+			"low": "Low"
 		},
 		"verbosity": {
 			"label": "Output Verbosity",
diff --git a/webview-ui/src/i18n/locales/es/settings.json b/webview-ui/src/i18n/locales/es/settings.json
index f5fff79f6a6..79ac8c55109 100644
--- a/webview-ui/src/i18n/locales/es/settings.json
+++ b/webview-ui/src/i18n/locales/es/settings.json
@@ -429,7 +429,6 @@
 		},
 		"reasoningEffort": {
 			"label": "Esfuerzo de razonamiento del modelo",
-			"minimal": "Mínimo (el más rápido)",
 			"high": "Alto",
 			"medium": "Medio",
 			"low": "Bajo"
diff --git a/webview-ui/src/i18n/locales/fr/settings.json b/webview-ui/src/i18n/locales/fr/settings.json
index 3e2749c36c7..8a8738ed9b2 100644
--- a/webview-ui/src/i18n/locales/fr/settings.json
+++ b/webview-ui/src/i18n/locales/fr/settings.json
@@ -429,7 +429,6 @@
 		},
 		"reasoningEffort": {
 			"label": "Effort de raisonnement du modèle",
-			"minimal": "Minimal (le plus rapide)",
 			"high": "Élevé",
 			"medium": "Moyen",
 			"low": "Faible"
diff --git a/webview-ui/src/i18n/locales/hi/settings.json b/webview-ui/src/i18n/locales/hi/settings.json
index 4fecd0ea54d..18c5061a13f 100644
--- a/webview-ui/src/i18n/locales/hi/settings.json
+++ b/webview-ui/src/i18n/locales/hi/settings.json
@@ -429,7 +429,6 @@
 		},
 		"reasoningEffort": {
 			"label": "मॉडल तर्क प्रयास",
-			"minimal": "न्यूनतम (सबसे तेज़)",
 			"high": "उच्च",
 			"medium": "मध्यम",
 			"low": "निम्न"
diff --git a/webview-ui/src/i18n/locales/id/settings.json b/webview-ui/src/i18n/locales/id/settings.json
index 576784dc771..3a4800f4f2a 100644
--- a/webview-ui/src/i18n/locales/id/settings.json
+++ b/webview-ui/src/i18n/locales/id/settings.json
@@ -433,7 +433,6 @@
 		},
 		"reasoningEffort": {
 			"label": "Upaya Reasoning Model",
-			"minimal": "Minimal (Tercepat)",
 			"high": "Tinggi",
 			"medium": "Sedang",
 			"low": "Rendah"
diff --git a/webview-ui/src/i18n/locales/it/settings.json b/webview-ui/src/i18n/locales/it/settings.json
index 1a657c5a307..e116bf2ae39 100644
--- a/webview-ui/src/i18n/locales/it/settings.json
+++ b/webview-ui/src/i18n/locales/it/settings.json
@@ -429,7 +429,6 @@
 		},
 		"reasoningEffort": {
 			"label": "Sforzo di ragionamento del modello",
-			"minimal": "Minimo (più veloce)",
 			"high": "Alto",
 			"medium": "Medio",
 			"low": "Basso"
diff --git a/webview-ui/src/i18n/locales/ja/settings.json b/webview-ui/src/i18n/locales/ja/settings.json
index 9c35c02d644..407d31e4577 100644
--- a/webview-ui/src/i18n/locales/ja/settings.json
+++ b/webview-ui/src/i18n/locales/ja/settings.json
@@ -429,7 +429,6 @@
 		},
 		"reasoningEffort": {
 			"label": "モデル推論の労力",
-			"minimal": "最小 (最速)",
 			"high": "高",
 			"medium": "中",
 			"low": "低"
diff --git a/webview-ui/src/i18n/locales/ko/settings.json b/webview-ui/src/i18n/locales/ko/settings.json
index ee4d6a18897..3cdb2e8b4f4 100644
--- a/webview-ui/src/i18n/locales/ko/settings.json
+++ b/webview-ui/src/i18n/locales/ko/settings.json
@@ -429,7 +429,6 @@
 		},
 		"reasoningEffort": {
 			"label": "모델 추론 노력",
-			"minimal": "최소 (가장 빠름)",
 			"high": "높음",
 			"medium": "중간",
 			"low": "낮음"
diff --git a/webview-ui/src/i18n/locales/nl/settings.json b/webview-ui/src/i18n/locales/nl/settings.json
index 9a514520eb5..2061474d172 100644
--- a/webview-ui/src/i18n/locales/nl/settings.json
+++ b/webview-ui/src/i18n/locales/nl/settings.json
@@ -429,7 +429,6 @@
 		},
 		"reasoningEffort": {
 			"label": "Model redeneervermogen",
-			"minimal": "Minimaal (Snelst)",
 			"high": "Hoog",
 			"medium": "Middel",
 			"low": "Laag"
diff --git a/webview-ui/src/i18n/locales/pl/settings.json b/webview-ui/src/i18n/locales/pl/settings.json
index 6f2ff53e0b0..b081005400d 100644
--- a/webview-ui/src/i18n/locales/pl/settings.json
+++ b/webview-ui/src/i18n/locales/pl/settings.json
@@ -429,7 +429,6 @@
 		},
 		"reasoningEffort": {
 			"label": "Wysiłek rozumowania modelu",
-			"minimal": "Minimalny (najszybszy)",
 			"high": "Wysoki",
 			"medium": "Średni",
 			"low": "Niski"
diff --git a/webview-ui/src/i18n/locales/pt-BR/settings.json b/webview-ui/src/i18n/locales/pt-BR/settings.json
index 8dd6cec52b5..a71340391db 100644
--- a/webview-ui/src/i18n/locales/pt-BR/settings.json
+++ b/webview-ui/src/i18n/locales/pt-BR/settings.json
@@ -429,7 +429,6 @@
 		},
 		"reasoningEffort": {
 			"label": "Esforço de raciocínio do modelo",
-			"minimal": "Mínimo (mais rápido)",
 			"high": "Alto",
 			"medium": "Médio",
 			"low": "Baixo"
diff --git a/webview-ui/src/i18n/locales/ru/settings.json b/webview-ui/src/i18n/locales/ru/settings.json
index b758c86a941..00e9b790749 100644
--- a/webview-ui/src/i18n/locales/ru/settings.json
+++ b/webview-ui/src/i18n/locales/ru/settings.json
@@ -429,7 +429,6 @@
 		},
 		"reasoningEffort": {
 			"label": "Усилия по рассуждению модели",
-			"minimal": "Минимальный (самый быстрый)",
 			"high": "Высокие",
 			"medium": "Средние",
 			"low": "Низкие"
diff --git a/webview-ui/src/i18n/locales/tr/settings.json b/webview-ui/src/i18n/locales/tr/settings.json
index 542b2b25852..e79db2a3b26 100644
--- a/webview-ui/src/i18n/locales/tr/settings.json
+++ b/webview-ui/src/i18n/locales/tr/settings.json
@@ -429,7 +429,6 @@
 		},
 		"reasoningEffort": {
 			"label": "Model Akıl Yürütme Çabası",
-			"minimal": "Minimal (en hızlı)",
 			"high": "Yüksek",
 			"medium": "Orta",
 			"low": "Düşük"
diff --git a/webview-ui/src/i18n/locales/vi/settings.json b/webview-ui/src/i18n/locales/vi/settings.json
index 8b1dd793fe2..10de98d4da1 100644
--- a/webview-ui/src/i18n/locales/vi/settings.json
+++ b/webview-ui/src/i18n/locales/vi/settings.json
@@ -429,7 +429,6 @@
 		},
 		"reasoningEffort": {
 			"label": "Nỗ lực suy luận của mô hình",
-			"minimal": "Tối thiểu (nhanh nhất)",
 			"high": "Cao",
 			"medium": "Trung bình",
 			"low": "Thấp"
diff --git a/webview-ui/src/i18n/locales/zh-CN/settings.json b/webview-ui/src/i18n/locales/zh-CN/settings.json
index e475fd7baa4..a6971d288fd 100644
--- a/webview-ui/src/i18n/locales/zh-CN/settings.json
+++ b/webview-ui/src/i18n/locales/zh-CN/settings.json
@@ -429,7 +429,6 @@
 		},
 		"reasoningEffort": {
 			"label": "模型推理强度",
-			"minimal": "最小 (最快)",
 			"high": "高",
 			"medium": "中",
 			"low": "低"
diff --git a/webview-ui/src/i18n/locales/zh-TW/settings.json b/webview-ui/src/i18n/locales/zh-TW/settings.json
index 9242861eaaf..bec2ffd5e97 100644
--- a/webview-ui/src/i18n/locales/zh-TW/settings.json
+++ b/webview-ui/src/i18n/locales/zh-TW/settings.json
@@ -429,7 +429,6 @@
 		},
 		"reasoningEffort": {
 			"label": "模型推理強度",
-			"minimal": "最小 (最快)",
 			"high": "高",
 			"medium": "中",
 			"low": "低"