From 03d7374b566e01af8d17fb785ed8d91b5f6303f3 Mon Sep 17 00:00:00 2001
From: Hannes Rudolph <hrudolph@gmail.com>
Date: Tue, 9 Dec 2025 19:08:16 -0700
Subject: [PATCH 01/12] feat(deepseek): implement interleaved thinking mode for
 deepseek-reasoner

- Add thinking parameter support for deepseek-reasoner model
- Handle streaming reasoning_content from DeepSeek API
- Add tool call conversion (tool_use -> tool_calls) for thinking mode
- Add tool result conversion (tool_result -> tool messages)
- Extract reasoning from content blocks for API continuations
- Add getReasoningContent() method for accumulated reasoning
- Add comprehensive tests for interleaved thinking mode
---
 src/api/providers/__tests__/deepseek.spec.ts  | 219 +++++++++++++++-
 src/api/providers/deepseek.ts                 | 144 ++++++++++-
 src/api/transform/__tests__/r1-format.spec.ts | 216 ++++++++++++++++
 src/api/transform/r1-format.ts                | 242 +++++++++++++-----
 4 files changed, 749 insertions(+), 72 deletions(-)

diff --git a/src/api/providers/__tests__/deepseek.spec.ts b/src/api/providers/__tests__/deepseek.spec.ts
index 5e5a677590d..7a882d8a7ab 100644
--- a/src/api/providers/__tests__/deepseek.spec.ts
+++ b/src/api/providers/__tests__/deepseek.spec.ts
@@ -29,23 +29,75 @@ vi.mock("openai", () => {
 							}
 						}
 
+						// Check if this is a reasoning_content test by looking at model
+						const isReasonerModel = options.model?.includes("deepseek-reasoner")
+						const isToolCallTest = options.tools?.length > 0
+
 						// Return async iterator for streaming
 						return {
 							[Symbol.asyncIterator]: async function* () {
-								yield {
-									choices: [
-										{
-											delta: { content: "Test response" },
-											index: 0,
-										},
-									],
-									usage: null,
+								// For reasoner models, emit reasoning_content first
+								if (isReasonerModel) {
+									yield {
+										choices: [
+											{
+												delta: { reasoning_content: "Let me think about this..." },
+												index: 0,
+											},
+										],
+										usage: null,
+									}
+									yield {
+										choices: [
+											{
+												delta: { reasoning_content: " I'll analyze step by step." },
+												index: 0,
+											},
+										],
+										usage: null,
+									}
+								}
+
+								// For tool call tests with reasoner, emit tool call
+								if (isReasonerModel && isToolCallTest) {
+									yield {
+										choices: [
+											{
+												delta: {
+													tool_calls: [
+														{
+															index: 0,
+															id: "call_123",
+															function: {
+																name: "get_weather",
+																arguments: '{"location":"SF"}',
+															},
+														},
+													],
+												},
+												index: 0,
+											},
+										],
+										usage: null,
+									}
+								} else {
+									yield {
+										choices: [
+											{
+												delta: { content: "Test response" },
+												index: 0,
+											},
+										],
+										usage: null,
+									}
 								}
+
 								yield {
 									choices: [
 										{
 											delta: {},
 											index: 0,
+											finish_reason: isToolCallTest ? "tool_calls" : "stop",
 										},
 									],
 									usage: {
@@ -317,4 +369,155 @@ describe("DeepSeekHandler", () => {
 			expect(result.cacheReadTokens).toBeUndefined()
 		})
 	})
+
+	describe("interleaved thinking mode", () => {
+		const systemPrompt = "You are a helpful assistant."
+		const messages: Anthropic.Messages.MessageParam[] = [
+			{
+				role: "user",
+				content: [
+					{
+						type: "text" as const,
+						text: "Hello!",
+					},
+				],
+			},
+		]
+
+		it("should handle reasoning_content in streaming responses for deepseek-reasoner", async () => {
+			const reasonerHandler = new DeepSeekHandler({
+				...mockOptions,
+				apiModelId: "deepseek-reasoner",
+			})
+
+			const stream = reasonerHandler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Should have reasoning chunks
+			const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning")
+			expect(reasoningChunks.length).toBeGreaterThan(0)
+			expect(reasoningChunks[0].text).toBe("Let me think about this...")
+			expect(reasoningChunks[1].text).toBe(" I'll analyze step by step.")
+		})
+
+		it("should accumulate reasoning content via getReasoningContent()", async () => {
+			const reasonerHandler = new DeepSeekHandler({
+				...mockOptions,
+				apiModelId: "deepseek-reasoner",
+			})
+
+			// Before any API call, reasoning content should be undefined
+			expect(reasonerHandler.getReasoningContent()).toBeUndefined()
+
+			const stream = reasonerHandler.createMessage(systemPrompt, messages)
+			for await (const _chunk of stream) {
+				// Consume the stream
+			}
+
+			// After streaming, reasoning content should be accumulated
+			const reasoningContent = reasonerHandler.getReasoningContent()
+			expect(reasoningContent).toBe("Let me think about this... I'll analyze step by step.")
+		})
+
+		it("should pass thinking parameter for deepseek-reasoner model", async () => {
+			const reasonerHandler = new DeepSeekHandler({
+				...mockOptions,
+				apiModelId: "deepseek-reasoner",
+			})
+
+			const stream = reasonerHandler.createMessage(systemPrompt, messages)
+			for await (const _chunk of stream) {
+				// Consume the stream
+			}
+
+			// Verify that the thinking parameter was passed to the API
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					thinking: { type: "enabled" },
+				}),
+			)
+		})
+
+		it("should NOT pass thinking parameter for deepseek-chat model", async () => {
+			const chatHandler = new DeepSeekHandler({
+				...mockOptions,
+				apiModelId: "deepseek-chat",
+			})
+
+			const stream = chatHandler.createMessage(systemPrompt, messages)
+			for await (const _chunk of stream) {
+				// Consume the stream
+			}
+
+			// Verify that the thinking parameter was NOT passed to the API
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs.thinking).toBeUndefined()
+		})
+
+		it("should handle tool calls with reasoning_content", async () => {
+			const reasonerHandler = new DeepSeekHandler({
+				...mockOptions,
+				apiModelId: "deepseek-reasoner",
+			})
+
+			const tools: any[] = [
+				{
+					type: "function",
+					function: {
+						name: "get_weather",
+						description: "Get weather",
+						parameters: { type: "object", properties: {} },
+					},
+				},
+			]
+
+			const stream = reasonerHandler.createMessage(systemPrompt, messages, { taskId: "test", tools })
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Should have reasoning chunks
+			const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning")
+			expect(reasoningChunks.length).toBeGreaterThan(0)
+
+			// Should have tool call chunks
+			const toolCallChunks = chunks.filter((chunk) => chunk.type === "tool_call_partial")
+			expect(toolCallChunks.length).toBeGreaterThan(0)
+			expect(toolCallChunks[0].name).toBe("get_weather")
+
+			// Reasoning content should be accumulated for potential continuation
+			const reasoningContent = reasonerHandler.getReasoningContent()
+			expect(reasoningContent).toBeDefined()
+		})
+
+		it("should reset reasoning content for each new request", async () => {
+			const reasonerHandler = new DeepSeekHandler({
+				...mockOptions,
+				apiModelId: "deepseek-reasoner",
+			})
+
+			// First request
+			const stream1 = reasonerHandler.createMessage(systemPrompt, messages)
+			for await (const _chunk of stream1) {
+				// Consume the stream
+			}
+
+			const reasoningContent1 = reasonerHandler.getReasoningContent()
+			expect(reasoningContent1).toBeDefined()
+
+			// Second request should reset the reasoning content
+			const stream2 = reasonerHandler.createMessage(systemPrompt, messages)
+			for await (const _chunk of stream2) {
+				// Consume the stream
+			}
+
+			// The reasoning content should be fresh from the second request
+			const reasoningContent2 = reasonerHandler.getReasoningContent()
+			expect(reasoningContent2).toBe("Let me think about this... I'll analyze step by step.")
+		})
+	})
 })
diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts
index de119de6dba..670bb943b9a 100644
--- a/src/api/providers/deepseek.ts
+++ b/src/api/providers/deepseek.ts
@@ -1,13 +1,26 @@
-import { deepSeekModels, deepSeekDefaultModelId } from "@roo-code/types"
+import { Anthropic } from "@anthropic-ai/sdk"
+import OpenAI from "openai"
+
+import { deepSeekModels, deepSeekDefaultModelId, DEEP_SEEK_DEFAULT_TEMPERATURE } from "@roo-code/types"
 
 import type { ApiHandlerOptions } from "../../shared/api"
 
-import type { ApiStreamUsageChunk } from "../transform/stream"
+import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
 import { getModelParams } from "../transform/model-params"
+import { convertToR1Format } from "../transform/r1-format"
+import { XmlMatcher } from "../../utils/xml-matcher"
 
 import { OpenAiHandler } from "./openai"
+import type { ApiHandlerCreateMessageMetadata } from "../index"
+
+// Custom interface for DeepSeek params to support thinking mode
+type DeepSeekChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParamsStreaming & {
+	thinking?: { type: "enabled" | "disabled" }
+}
 
 export class DeepSeekHandler extends OpenAiHandler {
+	private currentReasoningContent: string = ""
+
 	constructor(options: ApiHandlerOptions) {
 		super({
 			...options,
@@ -19,6 +32,15 @@ export class DeepSeekHandler extends OpenAiHandler {
 		})
 	}
 
+	/**
+	 * Returns the accumulated reasoning content from the last API call.
+	 * This is used for interleaved thinking with tool calls - the reasoning_content
+	 * needs to be passed back to the API in subsequent requests within the same turn.
+	 */
+	getReasoningContent(): string | undefined {
+		return this.currentReasoningContent || undefined
+	}
+
 	override getModel() {
 		const id = this.options.apiModelId ?? deepSeekDefaultModelId
 		const info = deepSeekModels[id as keyof typeof deepSeekModels] || deepSeekModels[deepSeekDefaultModelId]
@@ -26,8 +48,124 @@ export class DeepSeekHandler extends OpenAiHandler {
 		return { id, info, ...params }
 	}
 
+	override async *createMessage(
+		systemPrompt: string,
+		messages: Anthropic.Messages.MessageParam[],
+		metadata?: ApiHandlerCreateMessageMetadata,
+	): ApiStream {
+		const modelId = this.options.apiModelId ?? deepSeekDefaultModelId
+		const { info: modelInfo } = this.getModel()
+
+		// Check if this is a thinking-enabled model (deepseek-reasoner)
+		const isThinkingModel = modelId.includes("deepseek-reasoner")
+
+		// Reset reasoning content accumulator for this request
+		this.currentReasoningContent = ""
+
+		// Convert messages to R1 format (merges consecutive same-role messages)
+		// This is required for DeepSeek which does not support successive messages with the same role
+		const convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
+
+		const requestOptions: DeepSeekChatCompletionParams = {
+			model: modelId,
+			temperature: this.options.modelTemperature ?? DEEP_SEEK_DEFAULT_TEMPERATURE,
+			messages: convertedMessages,
+			stream: true as const,
+			stream_options: { include_usage: true },
+			// Enable thinking mode for deepseek-reasoner or when tools are used with thinking model
+			...(isThinkingModel && { thinking: { type: "enabled" } }),
+			...(metadata?.tools && { tools: this.convertToolsForOpenAI(metadata.tools) }),
+			...(metadata?.tool_choice && { tool_choice: metadata.tool_choice }),
+			...(metadata?.toolProtocol === "native" && {
+				parallel_tool_calls: metadata.parallelToolCalls ?? false,
+			}),
+		}
+
+		// Add max_tokens if needed
+		this.addMaxTokensIfNeeded(requestOptions, modelInfo)
+
+		let stream
+		try {
+			stream = await this.getClient().chat.completions.create(requestOptions)
+		} catch (error) {
+			const { handleOpenAIError } = await import("./utils/openai-error-handler")
+			throw handleOpenAIError(error, "DeepSeek")
+		}
+
+		// XmlMatcher for <think> tags (used by some DeepSeek models)
+		const matcher = new XmlMatcher(
+			"think",
+			(chunk) =>
+				({
+					type: chunk.matched ? "reasoning" : "text",
+					text: chunk.data,
+				}) as const,
+		)
+
+		let lastUsage
+
+		for await (const chunk of stream) {
+			const delta = chunk.choices?.[0]?.delta ?? {}
+
+			// Handle regular content with <think> tag detection
+			if (delta.content) {
+				for (const matchedChunk of matcher.update(delta.content)) {
+					yield matchedChunk
+				}
+			}
+
+			// Handle reasoning_content from DeepSeek's interleaved thinking
+			// This is the proper way DeepSeek sends thinking content in streaming
+			if ("reasoning_content" in delta && delta.reasoning_content) {
+				const reasoningText = (delta.reasoning_content as string) || ""
+				// Accumulate reasoning content for potential tool call continuation
+				this.currentReasoningContent += reasoningText
+				yield {
+					type: "reasoning",
+					text: reasoningText,
+				}
+			}
+
+			// Handle tool calls
+			if (delta.tool_calls) {
+				for (const toolCall of delta.tool_calls) {
+					yield {
+						type: "tool_call_partial",
+						index: toolCall.index,
+						id: toolCall.id,
+						name: toolCall.function?.name,
+						arguments: toolCall.function?.arguments,
+					}
+				}
+			}
+
+			if (chunk.usage) {
+				lastUsage = chunk.usage
+			}
+		}
+
+		// Flush any remaining content from the XML matcher
+		for (const matchedChunk of matcher.final()) {
+			yield matchedChunk
+		}
+
+		if (lastUsage) {
+			yield this.processUsageMetrics(lastUsage, modelInfo)
+		}
+	}
+
+	/**
+	 * Get the OpenAI client instance for making API calls.
+	 * This is needed because the client is private in the parent class.
+	 */
+	private getClient(): OpenAI {
+		// Access the client through the parent class
+		// @ts-ignore - accessing private member for necessary functionality
+		return this.client
+	}
+
 	// Override to handle DeepSeek's usage metrics, including caching.
-	protected override processUsageMetrics(usage: any): ApiStreamUsageChunk {
+	protected override processUsageMetrics(usage: any, _modelInfo?: any): ApiStreamUsageChunk {
 		return {
 			type: "usage",
 			inputTokens: usage?.prompt_tokens || 0,
diff --git a/src/api/transform/__tests__/r1-format.spec.ts b/src/api/transform/__tests__/r1-format.spec.ts
index 80e641d94d8..edfe9dc5d14 100644
--- a/src/api/transform/__tests__/r1-format.spec.ts
+++ b/src/api/transform/__tests__/r1-format.spec.ts
@@ -179,4 +179,220 @@ describe("convertToR1Format", () => {
 
 		expect(convertToR1Format(input)).toEqual(expected)
 	})
+
+	describe("tool calls support for DeepSeek interleaved thinking", () => {
+		it("should convert assistant messages with tool_use to OpenAI format", () => {
+			const input: Anthropic.Messages.MessageParam[] = [
+				{ role: "user", content: "What's the weather?" },
+				{
+					role: "assistant",
+					content: [
+						{ type: "text", text: "Let me check the weather for you." },
+						{
+							type: "tool_use",
+							id: "call_123",
+							name: "get_weather",
+							input: { location: "San Francisco" },
+						},
+					],
+				},
+			]
+
+			const result = convertToR1Format(input)
+
+			expect(result).toHaveLength(2)
+			expect(result[0]).toEqual({ role: "user", content: "What's the weather?" })
+			expect(result[1]).toMatchObject({
+				role: "assistant",
+				content: "Let me check the weather for you.",
+				tool_calls: [
+					{
+						id: "call_123",
+						type: "function",
+						function: {
+							name: "get_weather",
+							arguments: '{"location":"San Francisco"}',
+						},
+					},
+				],
+			})
+		})
+
+		it("should convert user messages with tool_result to OpenAI tool messages", () => {
+			const input: Anthropic.Messages.MessageParam[] = [
+				{ role: "user", content: "What's the weather?" },
+				{
+					role: "assistant",
+					content: [
+						{
+							type: "tool_use",
+							id: "call_123",
+							name: "get_weather",
+							input: { location: "San Francisco" },
+						},
+					],
+				},
+				{
+					role: "user",
+					content: [
+						{
+							type: "tool_result",
+							tool_use_id: "call_123",
+							content: "72°F and sunny",
+						},
+					],
+				},
+			]
+
+			const result = convertToR1Format(input)
+
+			expect(result).toHaveLength(3)
+			expect(result[0]).toEqual({ role: "user", content: "What's the weather?" })
+			expect(result[1]).toMatchObject({
+				role: "assistant",
+				content: null,
+				tool_calls: expect.any(Array),
+			})
+			expect(result[2]).toEqual({
+				role: "tool",
+				tool_call_id: "call_123",
+				content: "72°F and sunny",
+			})
+		})
+
+		it("should handle tool_result with array content", () => {
+			const input: Anthropic.Messages.MessageParam[] = [
+				{
+					role: "user",
+					content: [
+						{
+							type: "tool_result",
+							tool_use_id: "call_456",
+							content: [
+								{ type: "text", text: "Line 1" },
+								{ type: "text", text: "Line 2" },
+							],
+						},
+					],
+				},
+			]
+
+			const result = convertToR1Format(input)
+
+			expect(result).toHaveLength(1)
+			expect(result[0]).toEqual({
+				role: "tool",
+				tool_call_id: "call_456",
+				content: "Line 1\nLine 2",
+			})
+		})
+
+		it("should preserve reasoning_content on assistant messages", () => {
+			const input = [
+				{ role: "user" as const, content: "Think about this" },
+				{
+					role: "assistant" as const,
+					content: "Here's my answer",
+					reasoning_content: "Let me analyze step by step...",
+				},
+			]
+
+			const result = convertToR1Format(input as Anthropic.Messages.MessageParam[])
+
+			expect(result).toHaveLength(2)
+			expect((result[1] as any).reasoning_content).toBe("Let me analyze step by step...")
+		})
+
+		it("should handle mixed tool_result and text in user message", () => {
+			const input: Anthropic.Messages.MessageParam[] = [
+				{
+					role: "user",
+					content: [
+						{
+							type: "tool_result",
+							tool_use_id: "call_789",
+							content: "Tool result",
+						},
+						{
+							type: "text",
+							text: "Please continue",
+						},
+					],
+				},
+			]
+
+			const result = convertToR1Format(input)
+
+			// Should produce two messages: tool message first, then user message
+			expect(result).toHaveLength(2)
+			expect(result[0]).toEqual({
+				role: "tool",
+				tool_call_id: "call_789",
+				content: "Tool result",
+			})
+			expect(result[1]).toEqual({
+				role: "user",
+				content: "Please continue",
+			})
+		})
+
+		it("should handle multiple tool calls in single assistant message", () => {
+			const input: Anthropic.Messages.MessageParam[] = [
+				{
+					role: "assistant",
+					content: [
+						{
+							type: "tool_use",
+							id: "call_1",
+							name: "tool_a",
+							input: { param: "a" },
+						},
+						{
+							type: "tool_use",
+							id: "call_2",
+							name: "tool_b",
+							input: { param: "b" },
+						},
+					],
+				},
+			]
+
+			const result = convertToR1Format(input)
+
+			expect(result).toHaveLength(1)
+			expect((result[0] as any).tool_calls).toHaveLength(2)
+			expect((result[0] as any).tool_calls[0].id).toBe("call_1")
+			expect((result[0] as any).tool_calls[1].id).toBe("call_2")
+		})
+
+		it("should not merge assistant messages that have tool calls", () => {
+			const input: Anthropic.Messages.MessageParam[] = [
+				{
+					role: "assistant",
+					content: [
+						{
+							type: "tool_use",
+							id: "call_1",
+							name: "tool_a",
+							input: {},
+						},
+					],
+				},
+				{
+					role: "assistant",
+					content: "Follow up response",
+				},
+			]
+
+			const result = convertToR1Format(input)
+
+			// Should NOT merge because first message has tool calls
+			expect(result).toHaveLength(2)
+			expect((result[0] as any).tool_calls).toBeDefined()
+			expect(result[1]).toEqual({
+				role: "assistant",
+				content: "Follow up response",
+			})
+		})
+	})
 })
diff --git a/src/api/transform/r1-format.ts b/src/api/transform/r1-format.ts
index 51a4b94dbc4..ebd491c1c94 100644
--- a/src/api/transform/r1-format.ts
+++ b/src/api/transform/r1-format.ts
@@ -5,94 +5,214 @@ type ContentPartText = OpenAI.Chat.ChatCompletionContentPartText
 type ContentPartImage = OpenAI.Chat.ChatCompletionContentPartImage
 type UserMessage = OpenAI.Chat.ChatCompletionUserMessageParam
 type AssistantMessage = OpenAI.Chat.ChatCompletionAssistantMessageParam
+type ToolMessage = OpenAI.Chat.ChatCompletionToolMessageParam
 type Message = OpenAI.Chat.ChatCompletionMessageParam
 type AnthropicMessage = Anthropic.Messages.MessageParam
 
+/**
+ * Extended assistant message type to support DeepSeek's interleaved thinking.
+ * DeepSeek's API returns reasoning_content alongside content and tool_calls,
+ * and requires it to be passed back in subsequent requests within the same turn.
+ */
+export type DeepSeekAssistantMessage = AssistantMessage & {
+	reasoning_content?: string
+}
+
 /**
  * Converts Anthropic messages to OpenAI format while merging consecutive messages with the same role.
  * This is required for DeepSeek Reasoner which does not support successive messages with the same role.
  *
+ * For DeepSeek's interleaved thinking mode:
+ * - Preserves reasoning_content on assistant messages for tool call continuations
+ * - Tool result messages are converted to OpenAI tool messages
+ * - reasoning_content from previous assistant messages is preserved until a new user turn
+ *
  * @param messages Array of Anthropic messages
  * @returns Array of OpenAI messages where consecutive messages with the same role are combined
  */
 export function convertToR1Format(messages: AnthropicMessage[]): Message[] {
-	return messages.reduce<Message[]>((merged, message) => {
-		const lastMessage = merged[merged.length - 1]
-		let messageContent: string | (ContentPartText | ContentPartImage)[] = ""
-		let hasImages = false
+	const result: Message[] = []
 
-		// Convert content to appropriate format
-		if (Array.isArray(message.content)) {
-			const textParts: string[] = []
-			const imageParts: ContentPartImage[] = []
+	for (const message of messages) {
+		// Check if the message has reasoning_content (for DeepSeek interleaved thinking)
+		const messageWithReasoning = message as AnthropicMessage & { reasoning_content?: string }
+		const reasoningContent = messageWithReasoning.reasoning_content
 
-			message.content.forEach((part) => {
-				if (part.type === "text") {
-					textParts.push(part.text)
+		if (message.role === "user") {
+			// Handle user messages - may contain tool_result blocks
+			if (Array.isArray(message.content)) {
+				const textParts: string[] = []
+				const imageParts: ContentPartImage[] = []
+				const toolResults: { tool_use_id: string; content: string }[] = []
+
+				for (const part of message.content) {
+					if (part.type === "text") {
+						textParts.push(part.text)
+					} else if (part.type === "image") {
+						imageParts.push({
+							type: "image_url",
+							image_url: { url: `data:${part.source.media_type};base64,${part.source.data}` },
+						})
+					} else if (part.type === "tool_result") {
+						// Convert tool_result to OpenAI tool message format
+						let content: string
+						if (typeof part.content === "string") {
+							content = part.content
+						} else if (Array.isArray(part.content)) {
+							content =
+								part.content
+									?.map((c) => {
+										if (c.type === "text") return c.text
+										if (c.type === "image") return "(image)"
+										return ""
+									})
+									.join("\n") ?? ""
+						} else {
+							content = ""
+						}
+						toolResults.push({
+							tool_use_id: part.tool_use_id,
+							content,
+						})
+					}
 				}
-				if (part.type === "image") {
-					hasImages = true
-					imageParts.push({
-						type: "image_url",
-						image_url: { url: `data:${part.source.media_type};base64,${part.source.data}` },
-					})
+
+				// Add tool messages first (they must follow assistant tool_use)
+				for (const toolResult of toolResults) {
+					const toolMessage: ToolMessage = {
+						role: "tool",
+						tool_call_id: toolResult.tool_use_id,
+						content: toolResult.content,
+					}
+					result.push(toolMessage)
 				}
-			})
 
-			if (hasImages) {
-				const parts: (ContentPartText | ContentPartImage)[] = []
-				if (textParts.length > 0) {
-					parts.push({ type: "text", text: textParts.join("\n") })
+				// Then add user message with text/image content if any
+				if (textParts.length > 0 || imageParts.length > 0) {
+					let content: UserMessage["content"]
+					if (imageParts.length > 0) {
+						const parts: (ContentPartText | ContentPartImage)[] = []
+						if (textParts.length > 0) {
+							parts.push({ type: "text", text: textParts.join("\n") })
+						}
+						parts.push(...imageParts)
+						content = parts
+					} else {
+						content = textParts.join("\n")
+					}
+
+					// Check if we can merge with the last message
+					const lastMessage = result[result.length - 1]
+					if (lastMessage?.role === "user") {
+						// Merge with existing user message
+						if (typeof lastMessage.content === "string" && typeof content === "string") {
+							lastMessage.content += `\n${content}`
+						} else {
+							const lastContent = Array.isArray(lastMessage.content)
+								? lastMessage.content
+								: [{ type: "text" as const, text: lastMessage.content || "" }]
+							const newContent = Array.isArray(content)
+								? content
+								: [{ type: "text" as const, text: content }]
+							lastMessage.content = [...lastContent, ...newContent] as UserMessage["content"]
+						}
+					} else {
+						result.push({ role: "user", content })
+					}
 				}
-				parts.push(...imageParts)
-				messageContent = parts
 			} else {
-				messageContent = textParts.join("\n")
+				// Simple string content
+				const lastMessage = result[result.length - 1]
+				if (lastMessage?.role === "user") {
+					if (typeof lastMessage.content === "string") {
+						lastMessage.content += `\n${message.content}`
+					} else {
+						;(lastMessage.content as (ContentPartText | ContentPartImage)[]).push({
+							type: "text",
+							text: message.content,
+						})
+					}
+				} else {
+					result.push({ role: "user", content: message.content })
+				}
 			}
-		} else {
-			messageContent = message.content
-		}
+		} else if (message.role === "assistant") {
+			// Handle assistant messages - may contain tool_use blocks and reasoning blocks
+			if (Array.isArray(message.content)) {
+				const textParts: string[] = []
+				const toolCalls: OpenAI.Chat.ChatCompletionMessageToolCall[] = []
+				let extractedReasoning: string | undefined
 
-		// If last message has same role, merge the content
-		if (lastMessage?.role === message.role) {
-			if (typeof lastMessage.content === "string" && typeof messageContent === "string") {
-				lastMessage.content += `\n${messageContent}`
-			}
-			// If either has image content, convert both to array format
-			else {
-				const lastContent = Array.isArray(lastMessage.content)
-					? lastMessage.content
-					: [{ type: "text" as const, text: lastMessage.content || "" }]
+				for (const part of message.content) {
+					if (part.type === "text") {
+						textParts.push(part.text)
+					} else if (part.type === "tool_use") {
+						toolCalls.push({
+							id: part.id,
+							type: "function",
+							function: {
+								name: part.name,
+								arguments: JSON.stringify(part.input),
+							},
+						})
+					} else if ((part as any).type === "reasoning" && (part as any).text) {
+						// Extract reasoning from content blocks (Task stores it this way)
+						extractedReasoning = (part as any).text
+					}
+				}
 
-				const newContent = Array.isArray(messageContent)
-					? messageContent
-					: [{ type: "text" as const, text: messageContent }]
+				// Use reasoning from content blocks if not provided at top level
+				const finalReasoning = reasoningContent || extractedReasoning
 
-				if (message.role === "assistant") {
-					const mergedContent = [...lastContent, ...newContent] as AssistantMessage["content"]
-					lastMessage.content = mergedContent
-				} else {
-					const mergedContent = [...lastContent, ...newContent] as UserMessage["content"]
-					lastMessage.content = mergedContent
-				}
-			}
-		} else {
-			// Add as new message with the correct type based on role
-			if (message.role === "assistant") {
-				const newMessage: AssistantMessage = {
+				const assistantMessage: DeepSeekAssistantMessage = {
 					role: "assistant",
-					content: messageContent as AssistantMessage["content"],
+					content: textParts.length > 0 ? textParts.join("\n") : null,
+					...(toolCalls.length > 0 && { tool_calls: toolCalls }),
+					// Preserve reasoning_content for DeepSeek interleaved thinking
+					...(finalReasoning && { reasoning_content: finalReasoning }),
+				}
+
+				// Check if we can merge with the last message (only if no tool calls)
+				const lastMessage = result[result.length - 1]
+				if (lastMessage?.role === "assistant" && !toolCalls.length && !(lastMessage as any).tool_calls) {
+					// Merge text content
+					if (typeof lastMessage.content === "string" && typeof assistantMessage.content === "string") {
+						lastMessage.content += `\n${assistantMessage.content}`
+					} else if (assistantMessage.content) {
+						const lastContent = lastMessage.content || ""
+						lastMessage.content = `${lastContent}\n${assistantMessage.content}`
+					}
+					// Preserve reasoning_content from the new message if present
+					if (reasoningContent) {
+						;(lastMessage as DeepSeekAssistantMessage).reasoning_content = reasoningContent
+					}
+				} else {
+					result.push(assistantMessage)
 				}
-				merged.push(newMessage)
 			} else {
-				const newMessage: UserMessage = {
-					role: "user",
-					content: messageContent as UserMessage["content"],
+				// Simple string content
+				const lastMessage = result[result.length - 1]
+				if (lastMessage?.role === "assistant" && !(lastMessage as any).tool_calls) {
+					if (typeof lastMessage.content === "string") {
+						lastMessage.content += `\n${message.content}`
+					} else {
+						lastMessage.content = message.content
+					}
+					// Preserve reasoning_content from the new message if present
+					if (reasoningContent) {
+						;(lastMessage as DeepSeekAssistantMessage).reasoning_content = reasoningContent
+					}
+				} else {
+					const assistantMessage: DeepSeekAssistantMessage = {
+						role: "assistant",
+						content: message.content,
+						...(reasoningContent && { reasoning_content: reasoningContent }),
+					}
+					result.push(assistantMessage)
 				}
-				merged.push(newMessage)
 			}
 		}
+	}
 
-		return merged
-	}, [])
+	return result
 }

From 3afd7f30056c08729fb8f92d9bc03c13e6f3d8bb Mon Sep 17 00:00:00 2001
From: Hannes Rudolph <hrudolph@gmail.com>
Date: Tue, 9 Dec 2025 21:20:38 -0700
Subject: [PATCH 02/12] feat(deepseek): enable preserveReasoning for
 interleaved thinking mode

Enable reasoning_content to be passed back to DeepSeek API during tool
call continuations within the same turn. This is required for DeepSeek's
interleaved thinking mode to work correctly with native tool calls.

See: https://api-docs.deepseek.com/guides/thinking_mode

- Add preserveReasoning: true to deepseek-reasoner model
- Add tests verifying preserveReasoning flag
---
 packages/types/src/providers/deepseek.ts     |  4 ++++
 src/api/providers/__tests__/deepseek.spec.ts | 23 +++++++++++++++++++-
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/packages/types/src/providers/deepseek.ts b/packages/types/src/providers/deepseek.ts
index a7380e57ea7..402f782de98 100644
--- a/packages/types/src/providers/deepseek.ts
+++ b/packages/types/src/providers/deepseek.ts
@@ -26,6 +26,10 @@ export const deepSeekModels = {
 		supportsPromptCache: true,
 		supportsNativeTools: true,
 		defaultToolProtocol: "native",
+		// preserveReasoning enables interleaved thinking mode for tool calls:
+		// DeepSeek requires reasoning_content to be passed back during tool call
+		// continuation within the same turn. See: https://api-docs.deepseek.com/guides/thinking_mode
+		preserveReasoning: true,
 		inputPrice: 0.28, // $0.28 per million tokens (cache miss) - Updated Dec 9, 2025
 		outputPrice: 0.42, // $0.42 per million tokens - Updated Dec 9, 2025
 		cacheWritesPrice: 0.28, // $0.28 per million tokens (cache miss) - Updated Dec 9, 2025
diff --git a/src/api/providers/__tests__/deepseek.spec.ts b/src/api/providers/__tests__/deepseek.spec.ts
index 7a882d8a7ab..bba6ec02d8e 100644
--- a/src/api/providers/__tests__/deepseek.spec.ts
+++ b/src/api/providers/__tests__/deepseek.spec.ts
@@ -122,7 +122,7 @@ vi.mock("openai", () => {
 import OpenAI from "openai"
 import type { Anthropic } from "@anthropic-ai/sdk"
 
-import { deepSeekDefaultModelId } from "@roo-code/types"
+import { deepSeekDefaultModelId, type ModelInfo } from "@roo-code/types"
 
 import type { ApiHandlerOptions } from "../../../shared/api"
 
@@ -226,6 +226,27 @@ describe("DeepSeekHandler", () => {
 			expect(model.info.supportsPromptCache).toBe(true)
 		})
 
+		it("should have preserveReasoning enabled for deepseek-reasoner to support interleaved thinking", () => {
+			// This is critical for DeepSeek's interleaved thinking mode with tool calls.
+			// See: https://api-docs.deepseek.com/guides/thinking_mode
+			// The reasoning_content needs to be passed back during tool call continuation
+			// within the same turn for the model to continue reasoning properly.
+			const handlerWithReasoner = new DeepSeekHandler({
+				...mockOptions,
+				apiModelId: "deepseek-reasoner",
+			})
+			const model = handlerWithReasoner.getModel()
+			// Cast to ModelInfo to access preserveReasoning which is an optional property
+			expect((model.info as ModelInfo).preserveReasoning).toBe(true)
+		})
+
+		it("should NOT have preserveReasoning enabled for deepseek-chat", () => {
+			// deepseek-chat doesn't use thinking mode, so no need to preserve reasoning
+			const model = handler.getModel()
+			// Cast to ModelInfo to access preserveReasoning which is an optional property
+			expect((model.info as ModelInfo).preserveReasoning).toBeUndefined()
+		})
+
 		it("should return provided model ID with default model info if model does not exist", () => {
 			const handlerWithInvalidModel = new DeepSeekHandler({
 				...mockOptions,

From 1354bab14c2761d64309484c577773c7f6ff8163 Mon Sep 17 00:00:00 2001
From: Hannes Rudolph <hrudolph@gmail.com>
Date: Tue, 9 Dec 2025 21:38:23 -0700
Subject: [PATCH 03/12] feat(deepseek): add included and excluded tools for
 deepseek-reasoner model

---
 packages/types/src/providers/deepseek.ts | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/packages/types/src/providers/deepseek.ts b/packages/types/src/providers/deepseek.ts
index 402f782de98..3b81e01209a 100644
--- a/packages/types/src/providers/deepseek.ts
+++ b/packages/types/src/providers/deepseek.ts
@@ -22,6 +22,8 @@ export const deepSeekModels = {
 	"deepseek-reasoner": {
 		maxTokens: 8192, // 8K max output
 		contextWindow: 128_000,
+		includedTools: ["search_replace"],
+		excludedTools: ["apply_diff"],
 		supportsImages: false,
 		supportsPromptCache: true,
 		supportsNativeTools: true,

From 6996db89caa8311646730f74291e9fe5ccbd7b47 Mon Sep 17 00:00:00 2001
From: Hannes Rudolph <hrudolph@gmail.com>
Date: Fri, 12 Dec 2025 17:01:22 -0700
Subject: [PATCH 04/12] fix: remove ts-ignore by making OpenAI client
 protected, fix reasoning merge

- Change OpenAiHandler.client from private to protected to allow
  DeepSeekHandler to access it properly without @ts-ignore
- Remove unused getClient() method from DeepSeekHandler
- Fix r1-format.ts to use finalReasoning (includes extracted reasoning
  from content blocks) instead of reasoningContent (top-level only)
  when merging assistant messages
---
 src/api/providers/deepseek.ts  | 12 +-----------
 src/api/providers/openai.ts    |  2 +-
 src/api/transform/r1-format.ts |  4 ++--
 3 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts
index 670bb943b9a..d83c6ee7e4b 100644
--- a/src/api/providers/deepseek.ts
+++ b/src/api/providers/deepseek.ts
@@ -86,7 +86,7 @@ export class DeepSeekHandler extends OpenAiHandler {
 
 		let stream
 		try {
-			stream = await this.getClient().chat.completions.create(requestOptions)
+			stream = await this.client.chat.completions.create(requestOptions)
 		} catch (error) {
 			const { handleOpenAIError } = await import("./utils/openai-error-handler")
 			throw handleOpenAIError(error, "DeepSeek")
@@ -154,16 +154,6 @@ export class DeepSeekHandler extends OpenAiHandler {
 		}
 	}
 
-	/**
-	 * Get the OpenAI client instance for making API calls.
-	 * This is needed because the client is private in the parent class.
-	 */
-	private getClient(): OpenAI {
-		// Access the client through the parent class
-		// @ts-ignore - accessing private member for necessary functionality
-		return this.client
-	}
-
 	// Override to handle DeepSeek's usage metrics, including caching.
 	protected override processUsageMetrics(usage: any, _modelInfo?: any): ApiStreamUsageChunk {
 		return {
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index 2a2065edd6e..b5edeffb483 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -31,7 +31,7 @@ import { handleOpenAIError } from "./utils/openai-error-handler"
 // compatible with the OpenAI API. We can also rename it to `OpenAIHandler`.
 export class OpenAiHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
-	private client: OpenAI
+	protected client: OpenAI
 	private readonly providerName = "OpenAI"
 
 	constructor(options: ApiHandlerOptions) {
diff --git a/src/api/transform/r1-format.ts b/src/api/transform/r1-format.ts
index ebd491c1c94..d4a7bef1ae7 100644
--- a/src/api/transform/r1-format.ts
+++ b/src/api/transform/r1-format.ts
@@ -183,8 +183,8 @@ export function convertToR1Format(messages: AnthropicMessage[]): Message[] {
 						lastMessage.content = `${lastContent}\n${assistantMessage.content}`
 					}
 					// Preserve reasoning_content from the new message if present
-					if (reasoningContent) {
-						;(lastMessage as DeepSeekAssistantMessage).reasoning_content = reasoningContent
+					if (finalReasoning) {
+						;(lastMessage as DeepSeekAssistantMessage).reasoning_content = finalReasoning
 					}
 				} else {
 					result.push(assistantMessage)

From dea81e458d67dda410517b83f5b894ff758ca6f4 Mon Sep 17 00:00:00 2001
From: Hannes Rudolph <hrudolph@gmail.com>
Date: Sat, 13 Dec 2025 10:26:01 -0700
Subject: [PATCH 05/12] feat(deepseek): comment out included and excluded tools
 for deepseek-reasoner model

---
 packages/types/src/providers/deepseek.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/types/src/providers/deepseek.ts b/packages/types/src/providers/deepseek.ts
index 3b81e01209a..9b439f0f956 100644
--- a/packages/types/src/providers/deepseek.ts
+++ b/packages/types/src/providers/deepseek.ts
@@ -22,8 +22,8 @@ export const deepSeekModels = {
 	"deepseek-reasoner": {
 		maxTokens: 8192, // 8K max output
 		contextWindow: 128_000,
-		includedTools: ["search_replace"],
-		excludedTools: ["apply_diff"],
+		// includedTools: ["search_replace"],
+		// excludedTools: ["apply_diff"],
 		supportsImages: false,
 		supportsPromptCache: true,
 		supportsNativeTools: true,

From 7d1264528bbe52f25ca8656de612520dcfbcdaaf Mon Sep 17 00:00:00 2001
From: Hannes Rudolph <hrudolph@gmail.com>
Date: Sat, 13 Dec 2025 10:26:15 -0700
Subject: [PATCH 06/12] feat(deepseek): update includedTools for
 deepseek-reasoner model

---
 packages/types/src/providers/deepseek.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/types/src/providers/deepseek.ts b/packages/types/src/providers/deepseek.ts
index 9b439f0f956..bd5113a39d9 100644
--- a/packages/types/src/providers/deepseek.ts
+++ b/packages/types/src/providers/deepseek.ts
@@ -22,7 +22,7 @@ export const deepSeekModels = {
 	"deepseek-reasoner": {
 		maxTokens: 8192, // 8K max output
 		contextWindow: 128_000,
-		// includedTools: ["search_replace"],
+		includedTools: ["edit_file"],
 		// excludedTools: ["apply_diff"],
 		supportsImages: false,
 		supportsPromptCache: true,

From c1f1560e00d4519deea899fb29ac46d56d5a415c Mon Sep 17 00:00:00 2001
From: Hannes Rudolph <hrudolph@gmail.com>
Date: Mon, 15 Dec 2025 11:02:07 -0700
Subject: [PATCH 07/12] feat(deepseek): add comments for preserveReasoning and
 remove included/excluded tools from deepseek-reasoner

---
 packages/types/src/providers/deepseek.ts | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/packages/types/src/providers/deepseek.ts b/packages/types/src/providers/deepseek.ts
index bd5113a39d9..c21dec06c92 100644
--- a/packages/types/src/providers/deepseek.ts
+++ b/packages/types/src/providers/deepseek.ts
@@ -12,6 +12,10 @@ export const deepSeekModels = {
 		supportsImages: false,
 		supportsPromptCache: true,
 		supportsNativeTools: true,
+		// preserveReasoning enables interleaved thinking mode for tool calls:
+		// DeepSeek requires reasoning_content to be passed back during tool call
+		// continuation within the same turn. See: https://api-docs.deepseek.com/guides/thinking_mode
+		preserveReasoning: true,
 		defaultToolProtocol: "native",
 		inputPrice: 0.28, // $0.28 per million tokens (cache miss) - Updated Dec 9, 2025
 		outputPrice: 0.42, // $0.42 per million tokens - Updated Dec 9, 2025
@@ -22,8 +26,6 @@ export const deepSeekModels = {
 	"deepseek-reasoner": {
 		maxTokens: 8192, // 8K max output
 		contextWindow: 128_000,
-		includedTools: ["edit_file"],
-		// excludedTools: ["apply_diff"],
 		supportsImages: false,
 		supportsPromptCache: true,
 		supportsNativeTools: true,

From 8933c06be4f26292d6810498d76e6f08de0cbffc Mon Sep 17 00:00:00 2001
From: Hannes Rudolph <hrudolph@gmail.com>
Date: Mon, 15 Dec 2025 12:11:49 -0700
Subject: [PATCH 08/12] feat(deepseek): update preserveReasoning comments and
 remove redundant entries from deepseek-reasoner model

---
 packages/types/src/providers/deepseek.ts | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/packages/types/src/providers/deepseek.ts b/packages/types/src/providers/deepseek.ts
index c21dec06c92..78025653e38 100644
--- a/packages/types/src/providers/deepseek.ts
+++ b/packages/types/src/providers/deepseek.ts
@@ -1,6 +1,9 @@
 import type { ModelInfo } from "../model.js"
 
 // https://platform.deepseek.com/docs/api
+// preserveReasoning enables interleaved thinking mode for tool calls:
+// DeepSeek requires reasoning_content to be passed back during tool call
+// continuation within the same turn. See: https://api-docs.deepseek.com/guides/thinking_mode
 export type DeepSeekModelId = keyof typeof deepSeekModels
 
 export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-chat"
@@ -12,10 +15,6 @@ export const deepSeekModels = {
 		supportsImages: false,
 		supportsPromptCache: true,
 		supportsNativeTools: true,
-		// preserveReasoning enables interleaved thinking mode for tool calls:
-		// DeepSeek requires reasoning_content to be passed back during tool call
-		// continuation within the same turn. See: https://api-docs.deepseek.com/guides/thinking_mode
-		preserveReasoning: true,
 		defaultToolProtocol: "native",
 		inputPrice: 0.28, // $0.28 per million tokens (cache miss) - Updated Dec 9, 2025
 		outputPrice: 0.42, // $0.42 per million tokens - Updated Dec 9, 2025
@@ -30,9 +29,6 @@ export const deepSeekModels = {
 		supportsPromptCache: true,
 		supportsNativeTools: true,
 		defaultToolProtocol: "native",
-		// preserveReasoning enables interleaved thinking mode for tool calls:
-		// DeepSeek requires reasoning_content to be passed back during tool call
-		// continuation within the same turn. See: https://api-docs.deepseek.com/guides/thinking_mode
 		preserveReasoning: true,
 		inputPrice: 0.28, // $0.28 per million tokens (cache miss) - Updated Dec 9, 2025
 		outputPrice: 0.42, // $0.42 per million tokens - Updated Dec 9, 2025

From de647e7ba5e6624e6ab1bb9555c46cc01bc6e921 Mon Sep 17 00:00:00 2001
From: Roo Code <roomote@roocode.com>
Date: Mon, 15 Dec 2025 20:16:44 +0000
Subject: [PATCH 09/12] fix(deepseek): add Azure AI Inference path handling for
 DeepSeek via Azure

- Change _isAzureAiInference and _getUrlHost from private to protected in OpenAiHandler
- Add OPENAI_AZURE_AI_INFERENCE_PATH import and path handling in DeepSeekHandler.createMessage()
- Update test to expect path options argument
---
 src/api/providers/__tests__/deepseek.spec.ts |  2 ++
 src/api/providers/deepseek.ts                | 15 +++++++++++++--
 src/api/providers/openai.ts                  |  4 ++--
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/api/providers/__tests__/deepseek.spec.ts b/src/api/providers/__tests__/deepseek.spec.ts
index bba6ec02d8e..cd0e8940a9c 100644
--- a/src/api/providers/__tests__/deepseek.spec.ts
+++ b/src/api/providers/__tests__/deepseek.spec.ts
@@ -455,10 +455,12 @@ describe("DeepSeekHandler", () => {
 			}
 
 			// Verify that the thinking parameter was passed to the API
+			// Note: mockCreate receives two arguments - request options and path options
 			expect(mockCreate).toHaveBeenCalledWith(
 				expect.objectContaining({
 					thinking: { type: "enabled" },
 				}),
+				{}, // Empty path options for non-Azure URLs
 			)
 		})
 
diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts
index d83c6ee7e4b..e060b6a6db0 100644
--- a/src/api/providers/deepseek.ts
+++ b/src/api/providers/deepseek.ts
@@ -1,7 +1,12 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 import OpenAI from "openai"
 
-import { deepSeekModels, deepSeekDefaultModelId, DEEP_SEEK_DEFAULT_TEMPERATURE } from "@roo-code/types"
+import {
+	deepSeekModels,
+	deepSeekDefaultModelId,
+	DEEP_SEEK_DEFAULT_TEMPERATURE,
+	OPENAI_AZURE_AI_INFERENCE_PATH,
+} from "@roo-code/types"
 
 import type { ApiHandlerOptions } from "../../shared/api"
 
@@ -84,9 +89,15 @@ export class DeepSeekHandler extends OpenAiHandler {
 		// Add max_tokens if needed
 		this.addMaxTokensIfNeeded(requestOptions, modelInfo)
 
+		// Check if base URL is Azure AI Inference (for DeepSeek via Azure)
+		const isAzureAiInference = this._isAzureAiInference(this.options.deepSeekBaseUrl)
+
 		let stream
 		try {
-			stream = await this.client.chat.completions.create(requestOptions)
+			stream = await this.client.chat.completions.create(
+				requestOptions,
+				isAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {},
+			)
 		} catch (error) {
 			const { handleOpenAIError } = await import("./utils/openai-error-handler")
 			throw handleOpenAIError(error, "DeepSeek")
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index b5edeffb483..b198fe11d37 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -478,7 +478,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		}
 	}
 
-	private _getUrlHost(baseUrl?: string): string {
+	protected _getUrlHost(baseUrl?: string): string {
 		try {
 			return new URL(baseUrl ?? "").host
 		} catch (error) {
@@ -491,7 +491,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		return urlHost.includes("x.ai")
 	}
 
-	private _isAzureAiInference(baseUrl?: string): boolean {
+	protected _isAzureAiInference(baseUrl?: string): boolean {
 		const urlHost = this._getUrlHost(baseUrl)
 		return urlHost.endsWith(".services.ai.azure.com")
 	}

From 76b8bfb9ae4be70819d8db9e3afe9c363dfcf372 Mon Sep 17 00:00:00 2001
From: Hannes Rudolph <hrudolph@gmail.com>
Date: Mon, 15 Dec 2025 14:28:03 -0700
Subject: [PATCH 10/12] This temp got best results on evals

---
 packages/types/src/providers/deepseek.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/types/src/providers/deepseek.ts b/packages/types/src/providers/deepseek.ts
index 78025653e38..80c72ba7250 100644
--- a/packages/types/src/providers/deepseek.ts
+++ b/packages/types/src/providers/deepseek.ts
@@ -39,4 +39,4 @@ export const deepSeekModels = {
 } as const satisfies Record<string, ModelInfo>
 
 // https://api-docs.deepseek.com/quick_start/parameter_settings
-export const DEEP_SEEK_DEFAULT_TEMPERATURE = 0
+export const DEEP_SEEK_DEFAULT_TEMPERATURE = 0.3

From b84b5b5eb3fa6e439c3fc3ad613f908fd558d0cd Mon Sep 17 00:00:00 2001
From: Hannes Rudolph <hrudolph@gmail.com>
Date: Tue, 16 Dec 2025 08:49:31 -0700
Subject: [PATCH 11/12] refactor(deepseek): remove unused getReasoningContent()
 method

- Remove currentReasoningContent field and accumulation logic
- Remove getReasoningContent() method (was never called outside tests)
- Simplify reasoning_content handling to just yield without accumulating
- Remove associated tests for removed functionality
---
 src/api/providers/__tests__/deepseek.spec.ts | 49 --------------------
 src/api/providers/deepseek.ts                | 19 +-------
 2 files changed, 1 insertion(+), 67 deletions(-)

diff --git a/src/api/providers/__tests__/deepseek.spec.ts b/src/api/providers/__tests__/deepseek.spec.ts
index cd0e8940a9c..1aac662d9a8 100644
--- a/src/api/providers/__tests__/deepseek.spec.ts
+++ b/src/api/providers/__tests__/deepseek.spec.ts
@@ -424,25 +424,6 @@ describe("DeepSeekHandler", () => {
 			expect(reasoningChunks[1].text).toBe(" I'll analyze step by step.")
 		})
 
-		it("should accumulate reasoning content via getReasoningContent()", async () => {
-			const reasonerHandler = new DeepSeekHandler({
-				...mockOptions,
-				apiModelId: "deepseek-reasoner",
-			})
-
-			// Before any API call, reasoning content should be undefined
-			expect(reasonerHandler.getReasoningContent()).toBeUndefined()
-
-			const stream = reasonerHandler.createMessage(systemPrompt, messages)
-			for await (const _chunk of stream) {
-				// Consume the stream
-			}
-
-			// After streaming, reasoning content should be accumulated
-			const reasoningContent = reasonerHandler.getReasoningContent()
-			expect(reasoningContent).toBe("Let me think about this... I'll analyze step by step.")
-		})
-
 		it("should pass thinking parameter for deepseek-reasoner model", async () => {
 			const reasonerHandler = new DeepSeekHandler({
 				...mockOptions,
@@ -511,36 +492,6 @@ describe("DeepSeekHandler", () => {
 			const toolCallChunks = chunks.filter((chunk) => chunk.type === "tool_call_partial")
 			expect(toolCallChunks.length).toBeGreaterThan(0)
 			expect(toolCallChunks[0].name).toBe("get_weather")
-
-			// Reasoning content should be accumulated for potential continuation
-			const reasoningContent = reasonerHandler.getReasoningContent()
-			expect(reasoningContent).toBeDefined()
-		})
-
-		it("should reset reasoning content for each new request", async () => {
-			const reasonerHandler = new DeepSeekHandler({
-				...mockOptions,
-				apiModelId: "deepseek-reasoner",
-			})
-
-			// First request
-			const stream1 = reasonerHandler.createMessage(systemPrompt, messages)
-			for await (const _chunk of stream1) {
-				// Consume the stream
-			}
-
-			const reasoningContent1 = reasonerHandler.getReasoningContent()
-			expect(reasoningContent1).toBeDefined()
-
-			// Second request should reset the reasoning content
-			const stream2 = reasonerHandler.createMessage(systemPrompt, messages)
-			for await (const _chunk of stream2) {
-				// Consume the stream
-			}
-
-			// The reasoning content should be fresh from the second request
-			const reasoningContent2 = reasonerHandler.getReasoningContent()
-			expect(reasoningContent2).toBe("Let me think about this... I'll analyze step by step.")
 		})
 	})
 })
diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts
index e060b6a6db0..a8765498d17 100644
--- a/src/api/providers/deepseek.ts
+++ b/src/api/providers/deepseek.ts
@@ -24,8 +24,6 @@ type DeepSeekChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParamsStream
 }
 
 export class DeepSeekHandler extends OpenAiHandler {
-	private currentReasoningContent: string = ""
-
 	constructor(options: ApiHandlerOptions) {
 		super({
 			...options,
@@ -37,15 +35,6 @@ export class DeepSeekHandler extends OpenAiHandler {
 		})
 	}
 
-	/**
-	 * Returns the accumulated reasoning content from the last API call.
-	 * This is used for interleaved thinking with tool calls - the reasoning_content
-	 * needs to be passed back to the API in subsequent requests within the same turn.
-	 */
-	getReasoningContent(): string | undefined {
-		return this.currentReasoningContent || undefined
-	}
-
 	override getModel() {
 		const id = this.options.apiModelId ?? deepSeekDefaultModelId
 		const info = deepSeekModels[id as keyof typeof deepSeekModels] || deepSeekModels[deepSeekDefaultModelId]
@@ -64,9 +53,6 @@ export class DeepSeekHandler extends OpenAiHandler {
 		// Check if this is a thinking-enabled model (deepseek-reasoner)
 		const isThinkingModel = modelId.includes("deepseek-reasoner")
 
-		// Reset reasoning content accumulator for this request
-		this.currentReasoningContent = ""
-
 		// Convert messages to R1 format (merges consecutive same-role messages)
 		// This is required for DeepSeek which does not support successive messages with the same role
 		const convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
@@ -128,12 +114,9 @@ export class DeepSeekHandler extends OpenAiHandler {
 			// Handle reasoning_content from DeepSeek's interleaved thinking
 			// This is the proper way DeepSeek sends thinking content in streaming
 			if ("reasoning_content" in delta && delta.reasoning_content) {
-				const reasoningText = (delta.reasoning_content as string) || ""
-				// Accumulate reasoning content for potential tool call continuation
-				this.currentReasoningContent += reasoningText
 				yield {
 					type: "reasoning",
-					text: reasoningText,
+					text: (delta.reasoning_content as string) || "",
 				}
 			}
 

From 31def3a291069416e8b75d660fe1d19b8c23c97e Mon Sep 17 00:00:00 2001
From: daniel-lxs <ricciodaniel98@gmail.com>
Date: Tue, 16 Dec 2025 17:49:53 -0500
Subject: [PATCH 12/12] refactor(deepseek): remove unnecessary XmlMatcher for
 <think> tags

The official DeepSeek API sends reasoning content via the dedicated
'reasoning_content' field, not embedded in content with <think> tags.
XmlMatcher was unnecessary and only relevant for self-hosted models
(which use native-ollama.ts, not deepseek.ts).
---
 src/api/providers/deepseek.ts | 23 ++++-------------------
 1 file changed, 4 insertions(+), 19 deletions(-)

diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts
index a8765498d17..01e747e11b1 100644
--- a/src/api/providers/deepseek.ts
+++ b/src/api/providers/deepseek.ts
@@ -13,7 +13,6 @@ import type { ApiHandlerOptions } from "../../shared/api"
 import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
 import { getModelParams } from "../transform/model-params"
 import { convertToR1Format } from "../transform/r1-format"
-import { XmlMatcher } from "../../utils/xml-matcher"
 
 import { OpenAiHandler } from "./openai"
 import type { ApiHandlerCreateMessageMetadata } from "../index"
@@ -89,25 +88,16 @@ export class DeepSeekHandler extends OpenAiHandler {
 			throw handleOpenAIError(error, "DeepSeek")
 		}
 
-		// XmlMatcher for <think> tags (used by some DeepSeek models)
-		const matcher = new XmlMatcher(
-			"think",
-			(chunk) =>
-				({
-					type: chunk.matched ? "reasoning" : "text",
-					text: chunk.data,
-				}) as const,
-		)
-
 		let lastUsage
 
 		for await (const chunk of stream) {
 			const delta = chunk.choices?.[0]?.delta ?? {}
 
-			// Handle regular content with <think> tag detection
+			// Handle regular text content
 			if (delta.content) {
-				for (const matchedChunk of matcher.update(delta.content)) {
-					yield matchedChunk
+				yield {
+					type: "text",
+					text: delta.content,
 				}
 			}
 
@@ -138,11 +128,6 @@ export class DeepSeekHandler extends OpenAiHandler {
 			}
 		}
 
-		// Flush any remaining content from the XML matcher
-		for (const matchedChunk of matcher.final()) {
-			yield matchedChunk
-		}
-
 		if (lastUsage) {
 			yield this.processUsageMetrics(lastUsage, modelInfo)
 		}