From f3192d78f6b956ab05da91244c1ead84f42ca002 Mon Sep 17 00:00:00 2001
From: Piotr Rogowski <piotr.rogowski@creativestyle.pl>
Date: Fri, 31 Jan 2025 23:58:47 +0100
Subject: [PATCH 1/4] Parse text in openai response stream to extract reasoning
 from <think> tags

---
 src/api/providers/__tests__/openai.test.ts | 116 +++++++++++++++++++++
 src/api/providers/openai.ts                |  62 +++++++++--
 2 files changed, 171 insertions(+), 7 deletions(-)
diff --git a/src/api/providers/__tests__/openai.test.ts b/src/api/providers/__tests__/openai.test.ts
index 52d0c5c2bb2..11256102bee 100644
--- a/src/api/providers/__tests__/openai.test.ts
+++ b/src/api/providers/__tests__/openai.test.ts
@@ -143,6 +143,122 @@ describe("OpenAiHandler", () => {
 			expect(textChunks).toHaveLength(1)
 			expect(textChunks[0].text).toBe("Test response")
 		})
+
+		it("should handle thinking tags in a stream", async () => {
+			const openaiOptions = {
+				...mockOptions,
+				openAiCustomModelInfo: {
+					thinkTokensInResponse: true,
+					contextWindow: 128_000,
+					supportsImages: false,
+					supportsPromptCache: false,
+				},
+			}
+			const handler = new OpenAiHandler(openaiOptions)
+			mockCreate.mockImplementationOnce(async (options) => {
+				return {
+					[Symbol.asyncIterator]: async function* () {
+						yield {
+							choices: [
+								{
+									delta: { content: "<think" },
+									index: 0,
+								},
+							],
+							usage: null,
+						}
+						yield {
+							choices: [
+								{
+									delta: { content: ">thoughts<" },
+									index: 1,
+								},
+							],
+							usage: null,
+						}
+						yield {
+							choices: [
+								{
+									delta: { content: "/think>" },
+									index: 2,
+								},
+							],
+							usage: null,
+						}
+						yield {
+							choices: [
+								{
+									delta: { content: "result" },
+									index: 2,
+								},
+							],
+							usage: null,
+						}
+					},
+				}
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			expect(chunks.length).toBeGreaterThan(0)
+			const textChunks = chunks.filter((chunk) => chunk.type === "text")
+			expect(textChunks).toHaveLength(1)
+			expect(textChunks[0].text).toBe("result")
+
+			const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning")
+			expect(reasoningChunks).toHaveLength(1)
+			expect(reasoningChunks[0].text).toBe("thoughts")
+		})
+
+		it("should handle thinking tags when not streaming", async () => {
+			const openaiOptions = {
+				...mockOptions,
+				openAiCustomModelInfo: {
+					thinkTokensInResponse: true,
+					contextWindow: 128_000,
+					supportsImages: false,
+					supportsPromptCache: false,
+				},
+				openAiStreamingEnabled: false,
+			}
+			const handler = new OpenAiHandler(openaiOptions)
+			mockCreate.mockImplementationOnce(async (options) => {
+				return {
+					id: "custom-test-completion",
+					choices: [
+						{
+							message: { role: "assistant", content: "<think>thoughts</think>result" },
+							finish_reason: "stop",
+							index: 0,
+						},
+					],
+					usage: {
+						prompt_tokens: 5,
+						completion_tokens: 7,
+						total_tokens: 12,
+					},
+				}
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			expect(chunks.length).toBeGreaterThan(0)
+			const textChunks = chunks.filter((chunk) => chunk.type === "text")
+			expect(textChunks).toHaveLength(1)
+			expect(textChunks[0].text).toBe("result")
+
+			const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning")
+			expect(reasoningChunks).toHaveLength(1)
+			expect(reasoningChunks[0].text).toBe("thoughts")
+		})
 	})
 
 	describe("error handling", () => {
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index 8431ffa4167..c9cb2edc2c4 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -10,7 +10,7 @@ import {
 import { ApiHandler, SingleCompletionHandler } from "../index"
 import { convertToOpenAiMessages } from "../transform/openai-format"
 import { convertToR1Format } from "../transform/r1-format"
-import { ApiStream } from "../transform/stream"
+import { ApiStream, ApiStreamChunk } from "../transform/stream"
 
 export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
 	protected options: ApiHandlerOptions
@@ -59,15 +59,15 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
 				requestOptions.max_tokens = modelInfo.maxTokens
 			}
 
+			const thinkingParser = new ThinkingTokenSeparator()
 			const stream = await this.client.chat.completions.create(requestOptions)
 
 			for await (const chunk of stream) {
 				const delta = chunk.choices[0]?.delta ?? {}
 
 				if (delta.content) {
-					yield {
-						type: "text",
-						text: delta.content,
+					for (const parsedChunk of thinkingParser.parseChunk(delta.content)) {
+						yield parsedChunk
 					}
 				}
 
@@ -101,9 +101,9 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
 
 			const response = await this.client.chat.completions.create(requestOptions)
 
-			yield {
-				type: "text",
-				text: response.choices[0]?.message.content || "",
+			const thinkingParser = new ThinkingTokenSeparator()
+			for (const parsedChunk of thinkingParser.parseChunk(response.choices[0]?.message.content || "")) {
+				yield parsedChunk
 			}
 			yield {
 				type: "usage",
@@ -137,3 +137,51 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
 		}
 	}
 }
+
+class ThinkingTokenSeparator {
+	private insideThinking = false
+	private buffer = ""
+
+	public parseChunk(chunk: string): ApiStreamChunk[] {
+		let parsed: ApiStreamChunk[] = []
+		chunk = this.buffer + chunk
+		this.buffer = ""
+
+		const parseTag = (tag: string, thinking: boolean) => {
+			if (chunk.indexOf(tag) !== -1) {
+				const [before, after] = chunk.split(tag)
+				if (before.length > 0) {
+					parsed.push({ type: thinking ? "text" : "reasoning", text: before })
+				}
+				chunk = after
+				this.insideThinking = thinking
+			} else if (this.endsWithIncompleteString(chunk, tag)) {
+				this.buffer = chunk
+				chunk = ""
+			}
+		}
+
+		if (!this.insideThinking) {
+			parseTag("<think>", true)
+		}
+		if (this.insideThinking) {
+			parseTag("</think>", false)
+		}
+
+		if (chunk.length > 0) {
+			parsed.push({ type: this.insideThinking ? "reasoning" : "text", text: chunk })
+		}
+
+		return parsed
+	}
+
+	private endsWithIncompleteString(chunk: string, str: string): boolean {
+		// iterate from end of the str and check if we start matching from any point
+		for (let i = str.length - 1; i >= 1; i--) {
+			if (chunk.endsWith(str.slice(0, i))) {
+				return true
+			}
+		}
+		return false
+	}
+}

From e3163a4d14a6500982d343e74f464c24ccca884b Mon Sep 17 00:00:00 2001
From: Hank Beasley <hank.beasley@durwella.com>
Date: Fri, 31 Jan 2025 10:52:41 -0600
Subject: [PATCH 2/4] Implement token parsing for <think> tags in responses and
 add UI toggle for model settings

---
 src/api/providers/openai.ts                   | 13 +++--
 src/shared/api.ts                             |  1 +
 .../src/components/settings/ApiOptions.tsx    | 49 +++++++++++++++++++
 3 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index c9cb2edc2c4..b77909f339f 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -40,6 +40,9 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
 		const modelId = this.options.openAiModelId ?? ""
 
 		const deepseekReasoner = modelId.includes("deepseek-reasoner")
+		const thinkingParser = modelInfo.thinkTokensInResponse
+		? new ThinkingTokenSeparator()
+		: new PassThroughTokenSeparator()
 
 		if (this.options.openAiStreamingEnabled ?? true) {
 			const systemMessage: OpenAI.Chat.ChatCompletionSystemMessageParam = {
@@ -59,9 +62,8 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
 				requestOptions.max_tokens = modelInfo.maxTokens
 			}
 
-			const thinkingParser = new ThinkingTokenSeparator()
 			const stream = await this.client.chat.completions.create(requestOptions)
-
+			
 			for await (const chunk of stream) {
 				const delta = chunk.choices[0]?.delta ?? {}
 
@@ -84,6 +86,7 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
 						outputTokens: chunk.usage.completion_tokens || 0,
 					}
 				}
+				
 			}
 		} else {
 			// o1 for instance doesnt support streaming, non-1 temp, or system prompt
@@ -101,7 +104,6 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
 
 			const response = await this.client.chat.completions.create(requestOptions)
 
-			const thinkingParser = new ThinkingTokenSeparator()
 			for (const parsedChunk of thinkingParser.parseChunk(response.choices[0]?.message.content || "")) {
 				yield parsedChunk
 			}
@@ -138,6 +140,11 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
 	}
 }
 
+class PassThroughTokenSeparator {
+	public parseChunk(chunk: string): ApiStreamChunk[] {
+		return [{ type: "text", text: chunk }]
+	}
+}
 class ThinkingTokenSeparator {
 	private insideThinking = false
 	private buffer = ""
diff --git a/src/shared/api.ts b/src/shared/api.ts
index 7da67fd3eb6..d1f7d929812 100644
--- a/src/shared/api.ts
+++ b/src/shared/api.ts
@@ -80,6 +80,7 @@ export interface ModelInfo {
 	cacheWritesPrice?: number
 	cacheReadsPrice?: number
 	description?: string
+	thinkTokensInResponse?: boolean
 }
 
 // Anthropic
diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx
index 4bdff0b061e..8d9ec47b2e7 100644
--- a/webview-ui/src/components/settings/ApiOptions.tsx
+++ b/webview-ui/src/components/settings/ApiOptions.tsx
@@ -893,6 +893,55 @@ const ApiOptions = ({ apiErrorMessage, modelIdErrorMessage }: ApiOptionsProps) =
 													This model feature is for computer use like sonnet 3.5 support
 												</p>
 											</div>
+
+											<div
+												className="feature-toggle"
+												style={{
+													borderTop: "1px solid var(--vscode-input-border)",
+													paddingTop: "12px",
+												}}>
+												<div style={{ display: "flex", alignItems: "center", gap: "8px" }}>
+													<Checkbox
+														checked={
+															apiConfiguration?.openAiCustomModelInfo
+																?.thinkTokensInResponse ?? false
+														}
+														onChange={(checked: boolean) => {
+															handleInputChange("openAiCustomModelInfo")({
+																target: {
+																	value: {
+																		...(apiConfiguration?.openAiCustomModelInfo ||
+																			openAiModelInfoSaneDefaults),
+																		thinkTokensInResponse: checked,
+																	},
+																},
+															})
+														}}>
+														<span style={{ fontWeight: 500 }}>Model Uses Tags</span>
+													</Checkbox>
+													<i
+														className="codicon codicon-info"
+														title="Enable if the model outputs &lt;Think&gt; &lt;/Think&gt; tags in a chat response. Some Deek Seek R1 providers output tokens with these tags."
+														style={{
+															fontSize: "12px",
+															color: "var(--vscode-descriptionForeground)",
+															cursor: "help",
+														}}
+													/>
+												</div>
+												<p
+													style={{
+														fontSize: "11px",
+														color: "var(--vscode-descriptionForeground)",
+														marginLeft: "24px",
+														marginTop: "4px",
+														lineHeight: "1.4",
+													}}>
+													Enable if the model outputs &lt;Think&gt; &lt;/Think&gt;tags in a
+													chat response. Some Deek Seek R1 providers output tokens with these
+													tags.
+												</p>
+											</div>
 										</div>
 									</div>
 								</div>

From b784715117a0369bea4146a2337ef1a5e3a86084 Mon Sep 17 00:00:00 2001
From: Hank Beasley <hank.beasley@durwella.com>
Date: Fri, 31 Jan 2025 11:29:39 -0600
Subject: [PATCH 3/4] Fix typo in tooltip text for DeepSeek R1 providers in
 ApiOptions component

---
 webview-ui/src/components/settings/ApiOptions.tsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx
index 8d9ec47b2e7..aadf307ebb5 100644
--- a/webview-ui/src/components/settings/ApiOptions.tsx
+++ b/webview-ui/src/components/settings/ApiOptions.tsx
@@ -921,7 +921,7 @@ const ApiOptions = ({ apiErrorMessage, modelIdErrorMessage }: ApiOptionsProps) =
 													</Checkbox>
 													<i
 														className="codicon codicon-info"
-														title="Enable if the model outputs &lt;Think&gt; &lt;/Think&gt; tags in a chat response. Some Deek Seek R1 providers output tokens with these tags."
+														title="Enable if the model outputs &lt;Think&gt; &lt;/Think&gt; tags in a chat response. Some DeepSeek R1 providers output tokens with these tags."
 														style={{
 															fontSize: "12px",
 															color: "var(--vscode-descriptionForeground)",
@@ -938,7 +938,7 @@ const ApiOptions = ({ apiErrorMessage, modelIdErrorMessage }: ApiOptionsProps) =
 														lineHeight: "1.4",
 													}}>
 													Enable if the model outputs &lt;Think&gt; &lt;/Think&gt;tags in a
-													chat response. Some Deek Seek R1 providers output tokens with these
+													chat response. Some DeepSeek R1 providers output tokens with these
 													tags.
 												</p>
 											</div>

From 74308e6f07e9f9473f5830b666cee55c925836ae Mon Sep 17 00:00:00 2001
From: Hank Beasley <hank.beasley@durwella.com>
Date: Thu, 6 Feb 2025 10:24:02 -0600
Subject: [PATCH 4/4] merge flush from Szpadel

---
 src/api/providers/__tests__/openai.test.ts | 10 +++---
 src/api/providers/openai.ts                | 37 +++++++++++++---------
 2 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/src/api/providers/__tests__/openai.test.ts b/src/api/providers/__tests__/openai.test.ts
index 11256102bee..12a3c6003f8 100644
--- a/src/api/providers/__tests__/openai.test.ts
+++ b/src/api/providers/__tests__/openai.test.ts
@@ -188,8 +188,8 @@ describe("OpenAiHandler", () => {
 						yield {
 							choices: [
 								{
-									delta: { content: "result" },
-									index: 2,
+									delta: { content: "result<th" },
+									index: 3,
 								},
 							],
 							usage: null,
@@ -207,7 +207,7 @@ describe("OpenAiHandler", () => {
 			expect(chunks.length).toBeGreaterThan(0)
 			const textChunks = chunks.filter((chunk) => chunk.type === "text")
 			expect(textChunks).toHaveLength(1)
-			expect(textChunks[0].text).toBe("result")
+			expect(textChunks[0].text).toBe("result<th")
 
 			const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning")
 			expect(reasoningChunks).toHaveLength(1)
@@ -231,7 +231,7 @@ describe("OpenAiHandler", () => {
 					id: "custom-test-completion",
 					choices: [
 						{
-							message: { role: "assistant", content: "<think>thoughts</think>result" },
+							message: { role: "assistant", content: "<think>thoughts</think>result<th" },
 							finish_reason: "stop",
 							index: 0,
 						},
@@ -253,7 +253,7 @@ describe("OpenAiHandler", () => {
 			expect(chunks.length).toBeGreaterThan(0)
 			const textChunks = chunks.filter((chunk) => chunk.type === "text")
 			expect(textChunks).toHaveLength(1)
-			expect(textChunks[0].text).toBe("result")
+			expect(textChunks[0].text).toBe("result<th")
 
 			const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning")
 			expect(reasoningChunks).toHaveLength(1)
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index 97a80881b9c..0bd372ce13f 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -18,20 +18,10 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
 
 	constructor(options: ApiHandlerOptions) {
 		this.options = options
-
-		let urlHost: string
-
-		try {
-			urlHost = new URL(this.options.openAiBaseUrl ?? "").host
-		} catch (error) {
-			// Likely an invalid `openAiBaseUrl`; we're still working on
-			// proper settings validation.
-			urlHost = ""
-		}
-
+		// Azure API shape slightly differs from the core API shape:
+		// https://github.com/openai/openai-node?tab=readme-ov-file#microsoft-azure-openai
+		const urlHost = new URL(this.options.openAiBaseUrl ?? "").host
 		if (urlHost === "azure.com" || urlHost.endsWith(".azure.com") || options.openAiUseAzure) {
-			// Azure API shape slightly differs from the core API shape:
-			// https://github.com/openai/openai-node?tab=readme-ov-file#microsoft-azure-openai
 			this.client = new AzureOpenAI({
 				baseURL: this.options.openAiBaseUrl,
 				apiKey: this.options.openAiApiKey,
@@ -97,6 +87,10 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
 					}
 				}
 			}
+
+			for (const parsedChunk of thinkingParser.flush()) {
+				yield parsedChunk
+			}
 		} else {
 			// o1 for instance doesnt support streaming, non-1 temp, or system prompt
 			const systemMessage: OpenAI.Chat.ChatCompletionUserMessageParam = {
@@ -113,7 +107,7 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
 
 			const response = await this.client.chat.completions.create(requestOptions)
 
-			for (const parsedChunk of thinkingParser.parseChunk(response.choices[0]?.message.content || "")) {
+			for (const parsedChunk of thinkingParser.parseChunk(response.choices[0]?.message.content || "", true)) {
 				yield parsedChunk
 			}
 			yield {
@@ -153,12 +147,16 @@ class PassThroughTokenSeparator {
 	public parseChunk(chunk: string): ApiStreamChunk[] {
 		return [{ type: "text", text: chunk }]
 	}
+
+	public flush(): ApiStreamChunk[] {
+		return []
+	}
 }
 class ThinkingTokenSeparator {
 	private insideThinking = false
 	private buffer = ""
 
-	public parseChunk(chunk: string): ApiStreamChunk[] {
+	public parseChunk(chunk: string, flush: boolean = false): ApiStreamChunk[] {
 		let parsed: ApiStreamChunk[] = []
 		chunk = this.buffer + chunk
 		this.buffer = ""
@@ -184,6 +182,11 @@ class ThinkingTokenSeparator {
 			parseTag("</think>", false)
 		}
 
+		if (flush) {
+			chunk = this.buffer + chunk
+			this.buffer = ""
+		}
+
 		if (chunk.length > 0) {
 			parsed.push({ type: this.insideThinking ? "reasoning" : "text", text: chunk })
 		}
@@ -200,4 +203,8 @@ class ThinkingTokenSeparator {
 		}
 		return false
 	}
+
+	public flush(): ApiStreamChunk[] {
+		return this.parseChunk("", true)
+	}
 }