From f3192d78f6b956ab05da91244c1ead84f42ca002 Mon Sep 17 00:00:00 2001
From: Piotr Rogowski
Date: Fri, 31 Jan 2025 23:58:47 +0100
Subject: [PATCH 1/4] Parse text in openai response stream to extract reasoning
from tags
---
src/api/providers/__tests__/openai.test.ts | 116 +++++++++++++++++++++
src/api/providers/openai.ts | 62 +++++++++--
2 files changed, 171 insertions(+), 7 deletions(-)
diff --git a/src/api/providers/__tests__/openai.test.ts b/src/api/providers/__tests__/openai.test.ts
index 52d0c5c2bb2..11256102bee 100644
--- a/src/api/providers/__tests__/openai.test.ts
+++ b/src/api/providers/__tests__/openai.test.ts
@@ -143,6 +143,122 @@ describe("OpenAiHandler", () => {
expect(textChunks).toHaveLength(1)
expect(textChunks[0].text).toBe("Test response")
})
+
+ it("should handle thinking tags in a stream", async () => {
+ const openaiOptions = {
+ ...mockOptions,
+ openAiCustomModelInfo: {
+ thinkTokensInResponse: true,
+ contextWindow: 128_000,
+ supportsImages: false,
+ supportsPromptCache: false,
+ },
+ }
+ const handler = new OpenAiHandler(openaiOptions)
+ mockCreate.mockImplementationOnce(async (options) => {
+ return {
+ [Symbol.asyncIterator]: async function* () {
+ yield {
+ choices: [
+ {
+ delta: { content: "thoughts<" },
+ index: 1,
+ },
+ ],
+ usage: null,
+ }
+ yield {
+ choices: [
+ {
+ delta: { content: "/think>" },
+ index: 2,
+ },
+ ],
+ usage: null,
+ }
+ yield {
+ choices: [
+ {
+ delta: { content: "result" },
+ index: 2,
+ },
+ ],
+ usage: null,
+ }
+ },
+ }
+ })
+
+ const stream = handler.createMessage(systemPrompt, messages)
+ const chunks: any[] = []
+ for await (const chunk of stream) {
+ chunks.push(chunk)
+ }
+
+ expect(chunks.length).toBeGreaterThan(0)
+ const textChunks = chunks.filter((chunk) => chunk.type === "text")
+ expect(textChunks).toHaveLength(1)
+ expect(textChunks[0].text).toBe("result")
+
+ const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning")
+ expect(reasoningChunks).toHaveLength(1)
+ expect(reasoningChunks[0].text).toBe("thoughts")
+ })
+
+ it("should handle thinking tags when not streaming", async () => {
+ const openaiOptions = {
+ ...mockOptions,
+ openAiCustomModelInfo: {
+ thinkTokensInResponse: true,
+ contextWindow: 128_000,
+ supportsImages: false,
+ supportsPromptCache: false,
+ },
+ openAiStreamingEnabled: false,
+ }
+ const handler = new OpenAiHandler(openaiOptions)
+ mockCreate.mockImplementationOnce(async (options) => {
+ return {
+ id: "custom-test-completion",
+ choices: [
+ {
+ message: { role: "assistant", content: "thoughtsresult" },
+ finish_reason: "stop",
+ index: 0,
+ },
+ ],
+ usage: {
+ prompt_tokens: 5,
+ completion_tokens: 7,
+ total_tokens: 12,
+ },
+ }
+ })
+
+ const stream = handler.createMessage(systemPrompt, messages)
+ const chunks: any[] = []
+ for await (const chunk of stream) {
+ chunks.push(chunk)
+ }
+
+ expect(chunks.length).toBeGreaterThan(0)
+ const textChunks = chunks.filter((chunk) => chunk.type === "text")
+ expect(textChunks).toHaveLength(1)
+ expect(textChunks[0].text).toBe("result")
+
+ const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning")
+ expect(reasoningChunks).toHaveLength(1)
+ expect(reasoningChunks[0].text).toBe("thoughts")
+ })
})
describe("error handling", () => {
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index 8431ffa4167..c9cb2edc2c4 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -10,7 +10,7 @@ import {
import { ApiHandler, SingleCompletionHandler } from "../index"
import { convertToOpenAiMessages } from "../transform/openai-format"
import { convertToR1Format } from "../transform/r1-format"
-import { ApiStream } from "../transform/stream"
+import { ApiStream, ApiStreamChunk } from "../transform/stream"
export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
protected options: ApiHandlerOptions
@@ -59,15 +59,15 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
requestOptions.max_tokens = modelInfo.maxTokens
}
+ const thinkingParser = new ThinkingTokenSeparator()
const stream = await this.client.chat.completions.create(requestOptions)
for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta ?? {}
if (delta.content) {
- yield {
- type: "text",
- text: delta.content,
+ for (const parsedChunk of thinkingParser.parseChunk(delta.content)) {
+ yield parsedChunk
}
}
@@ -101,9 +101,9 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
const response = await this.client.chat.completions.create(requestOptions)
- yield {
- type: "text",
- text: response.choices[0]?.message.content || "",
+ const thinkingParser = new ThinkingTokenSeparator()
+ for (const parsedChunk of thinkingParser.parseChunk(response.choices[0]?.message.content || "")) {
+ yield parsedChunk
}
yield {
type: "usage",
@@ -137,3 +137,51 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
}
}
}
+
+class ThinkingTokenSeparator {
+ private insideThinking = false
+ private buffer = ""
+
+ public parseChunk(chunk: string): ApiStreamChunk[] {
+ let parsed: ApiStreamChunk[] = []
+ chunk = this.buffer + chunk
+ this.buffer = ""
+
+ const parseTag = (tag: string, thinking: boolean) => {
+ if (chunk.indexOf(tag) !== -1) {
+ const [before, after] = chunk.split(tag)
+ if (before.length > 0) {
+ parsed.push({ type: thinking ? "text" : "reasoning", text: before })
+ }
+ chunk = after
+ this.insideThinking = thinking
+ } else if (this.endsWithIncompleteString(chunk, tag)) {
+ this.buffer = chunk
+ chunk = ""
+ }
+ }
+
+ if (!this.insideThinking) {
+ parseTag("", true)
+ }
+ if (this.insideThinking) {
+ parseTag("", false)
+ }
+
+ if (chunk.length > 0) {
+ parsed.push({ type: this.insideThinking ? "reasoning" : "text", text: chunk })
+ }
+
+ return parsed
+ }
+
+ private endsWithIncompleteString(chunk: string, str: string): boolean {
+ // iterate from end of the str and check if we start matching from any point
+ for (let i = str.length - 1; i >= 1; i--) {
+ if (chunk.endsWith(str.slice(0, i))) {
+ return true
+ }
+ }
+ return false
+ }
+}
From e3163a4d14a6500982d343e74f464c24ccca884b Mon Sep 17 00:00:00 2001
From: Hank Beasley
Date: Fri, 31 Jan 2025 10:52:41 -0600
Subject: [PATCH 2/4] Implement token parsing for tags in responses and
add UI toggle for model settings
---
src/api/providers/openai.ts | 13 +++--
src/shared/api.ts | 1 +
.../src/components/settings/ApiOptions.tsx | 49 +++++++++++++++++++
3 files changed, 60 insertions(+), 3 deletions(-)
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index c9cb2edc2c4..b77909f339f 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -40,6 +40,9 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
const modelId = this.options.openAiModelId ?? ""
const deepseekReasoner = modelId.includes("deepseek-reasoner")
+ const thinkingParser = modelInfo.thinkTokensInResponse
+ ? new ThinkingTokenSeparator()
+ : new PassThroughTokenSeparator()
if (this.options.openAiStreamingEnabled ?? true) {
const systemMessage: OpenAI.Chat.ChatCompletionSystemMessageParam = {
@@ -59,9 +62,8 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
requestOptions.max_tokens = modelInfo.maxTokens
}
- const thinkingParser = new ThinkingTokenSeparator()
const stream = await this.client.chat.completions.create(requestOptions)
-
+
for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta ?? {}
@@ -84,6 +86,7 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
outputTokens: chunk.usage.completion_tokens || 0,
}
}
+
}
} else {
// o1 for instance doesnt support streaming, non-1 temp, or system prompt
@@ -101,7 +104,6 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
const response = await this.client.chat.completions.create(requestOptions)
- const thinkingParser = new ThinkingTokenSeparator()
for (const parsedChunk of thinkingParser.parseChunk(response.choices[0]?.message.content || "")) {
yield parsedChunk
}
@@ -138,6 +140,11 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
}
}
+class PassThroughTokenSeparator {
+ public parseChunk(chunk: string): ApiStreamChunk[] {
+ return [{ type: "text", text: chunk }]
+ }
+}
class ThinkingTokenSeparator {
private insideThinking = false
private buffer = ""
diff --git a/src/shared/api.ts b/src/shared/api.ts
index 7da67fd3eb6..d1f7d929812 100644
--- a/src/shared/api.ts
+++ b/src/shared/api.ts
@@ -80,6 +80,7 @@ export interface ModelInfo {
cacheWritesPrice?: number
cacheReadsPrice?: number
description?: string
+ thinkTokensInResponse?: boolean
}
// Anthropic
diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx
index 4bdff0b061e..8d9ec47b2e7 100644
--- a/webview-ui/src/components/settings/ApiOptions.tsx
+++ b/webview-ui/src/components/settings/ApiOptions.tsx
@@ -893,6 +893,55 @@ const ApiOptions = ({ apiErrorMessage, modelIdErrorMessage }: ApiOptionsProps) =
This model feature is for computer use like sonnet 3.5 support
+
+
+
+ {
+ handleInputChange("openAiCustomModelInfo")({
+ target: {
+ value: {
+ ...(apiConfiguration?.openAiCustomModelInfo ||
+ openAiModelInfoSaneDefaults),
+ thinkTokensInResponse: checked,
+ },
+ },
+ })
+ }}>
+ Model Uses Tags
+
+
+
+
+ Enable if the model outputs <Think> </Think>tags in a
+ chat response. Some Deek Seek R1 providers output tokens with these
+ tags.
+
+
From b784715117a0369bea4146a2337ef1a5e3a86084 Mon Sep 17 00:00:00 2001
From: Hank Beasley
Date: Fri, 31 Jan 2025 11:29:39 -0600
Subject: [PATCH 3/4] Fix typo in tooltip text for DeepSeek R1 providers in
ApiOptions component
---
webview-ui/src/components/settings/ApiOptions.tsx | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx
index 8d9ec47b2e7..aadf307ebb5 100644
--- a/webview-ui/src/components/settings/ApiOptions.tsx
+++ b/webview-ui/src/components/settings/ApiOptions.tsx
@@ -921,7 +921,7 @@ const ApiOptions = ({ apiErrorMessage, modelIdErrorMessage }: ApiOptionsProps) =
Enable if the model outputs <Think> </Think>tags in a
- chat response. Some Deek Seek R1 providers output tokens with these
+ chat response. Some DeepSeek R1 providers output tokens with these
tags.
From 74308e6f07e9f9473f5830b666cee55c925836ae Mon Sep 17 00:00:00 2001
From: Hank Beasley
Date: Thu, 6 Feb 2025 10:24:02 -0600
Subject: [PATCH 4/4] merge flush from Szpadel
---
src/api/providers/__tests__/openai.test.ts | 10 +++---
src/api/providers/openai.ts | 37 +++++++++++++---------
2 files changed, 27 insertions(+), 20 deletions(-)
diff --git a/src/api/providers/__tests__/openai.test.ts b/src/api/providers/__tests__/openai.test.ts
index 11256102bee..12a3c6003f8 100644
--- a/src/api/providers/__tests__/openai.test.ts
+++ b/src/api/providers/__tests__/openai.test.ts
@@ -188,8 +188,8 @@ describe("OpenAiHandler", () => {
yield {
choices: [
{
- delta: { content: "result" },
- index: 2,
+ delta: { content: "result {
expect(chunks.length).toBeGreaterThan(0)
const textChunks = chunks.filter((chunk) => chunk.type === "text")
expect(textChunks).toHaveLength(1)
- expect(textChunks[0].text).toBe("result")
+ expect(textChunks[0].text).toBe("result | chunk.type === "reasoning")
expect(reasoningChunks).toHaveLength(1)
@@ -231,7 +231,7 @@ describe("OpenAiHandler", () => {
id: "custom-test-completion",
choices: [
{
- message: { role: "assistant", content: "thoughtsresult" },
+ message: { role: "assistant", content: "thoughtsresult | {
expect(chunks.length).toBeGreaterThan(0)
const textChunks = chunks.filter((chunk) => chunk.type === "text")
expect(textChunks).toHaveLength(1)
- expect(textChunks[0].text).toBe("result")
+ expect(textChunks[0].text).toBe("result | chunk.type === "reasoning")
expect(reasoningChunks).toHaveLength(1)
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index 97a80881b9c..0bd372ce13f 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -18,20 +18,10 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
constructor(options: ApiHandlerOptions) {
this.options = options
-
- let urlHost: string
-
- try {
- urlHost = new URL(this.options.openAiBaseUrl ?? "").host
- } catch (error) {
- // Likely an invalid `openAiBaseUrl`; we're still working on
- // proper settings validation.
- urlHost = ""
- }
-
+ // Azure API shape slightly differs from the core API shape:
+ // https://github.com/openai/openai-node?tab=readme-ov-file#microsoft-azure-openai
+ const urlHost = new URL(this.options.openAiBaseUrl ?? "").host
if (urlHost === "azure.com" || urlHost.endsWith(".azure.com") || options.openAiUseAzure) {
- // Azure API shape slightly differs from the core API shape:
- // https://github.com/openai/openai-node?tab=readme-ov-file#microsoft-azure-openai
this.client = new AzureOpenAI({
baseURL: this.options.openAiBaseUrl,
apiKey: this.options.openAiApiKey,
@@ -97,6 +87,10 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
}
}
}
+
+ for (const parsedChunk of thinkingParser.flush()) {
+ yield parsedChunk
+ }
} else {
// o1 for instance doesnt support streaming, non-1 temp, or system prompt
const systemMessage: OpenAI.Chat.ChatCompletionUserMessageParam = {
@@ -113,7 +107,7 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
const response = await this.client.chat.completions.create(requestOptions)
- for (const parsedChunk of thinkingParser.parseChunk(response.choices[0]?.message.content || "")) {
+ for (const parsedChunk of thinkingParser.parseChunk(response.choices[0]?.message.content || "", true)) {
yield parsedChunk
}
yield {
@@ -153,12 +147,16 @@ class PassThroughTokenSeparator {
public parseChunk(chunk: string): ApiStreamChunk[] {
return [{ type: "text", text: chunk }]
}
+
+ public flush(): ApiStreamChunk[] {
+ return []
+ }
}
class ThinkingTokenSeparator {
private insideThinking = false
private buffer = ""
- public parseChunk(chunk: string): ApiStreamChunk[] {
+ public parseChunk(chunk: string, flush: boolean = false): ApiStreamChunk[] {
let parsed: ApiStreamChunk[] = []
chunk = this.buffer + chunk
this.buffer = ""
@@ -184,6 +182,11 @@ class ThinkingTokenSeparator {
parseTag("", false)
}
+ if (flush) {
+ chunk = this.buffer + chunk
+ this.buffer = ""
+ }
+
if (chunk.length > 0) {
parsed.push({ type: this.insideThinking ? "reasoning" : "text", text: chunk })
}
@@ -200,4 +203,8 @@ class ThinkingTokenSeparator {
}
return false
}
+
+ public flush(): ApiStreamChunk[] {
+ return this.parseChunk("", true)
+ }
}
|