From f3192d78f6b956ab05da91244c1ead84f42ca002 Mon Sep 17 00:00:00 2001 From: Piotr Rogowski Date: Fri, 31 Jan 2025 23:58:47 +0100 Subject: [PATCH 1/4] Parse text in openai response stream to extract reasoning from tags --- src/api/providers/__tests__/openai.test.ts | 116 +++++++++++++++++++++ src/api/providers/openai.ts | 62 +++++++++-- 2 files changed, 171 insertions(+), 7 deletions(-) diff --git a/src/api/providers/__tests__/openai.test.ts b/src/api/providers/__tests__/openai.test.ts index 52d0c5c2bb2..11256102bee 100644 --- a/src/api/providers/__tests__/openai.test.ts +++ b/src/api/providers/__tests__/openai.test.ts @@ -143,6 +143,122 @@ describe("OpenAiHandler", () => { expect(textChunks).toHaveLength(1) expect(textChunks[0].text).toBe("Test response") }) + + it("should handle thinking tags in a stream", async () => { + const openaiOptions = { + ...mockOptions, + openAiCustomModelInfo: { + thinkTokensInResponse: true, + contextWindow: 128_000, + supportsImages: false, + supportsPromptCache: false, + }, + } + const handler = new OpenAiHandler(openaiOptions) + mockCreate.mockImplementationOnce(async (options) => { + return { + [Symbol.asyncIterator]: async function* () { + yield { + choices: [ + { + delta: { content: "thoughts<" }, + index: 1, + }, + ], + usage: null, + } + yield { + choices: [ + { + delta: { content: "/think>" }, + index: 2, + }, + ], + usage: null, + } + yield { + choices: [ + { + delta: { content: "result" }, + index: 2, + }, + ], + usage: null, + } + }, + } + }) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + expect(chunks.length).toBeGreaterThan(0) + const textChunks = chunks.filter((chunk) => chunk.type === "text") + expect(textChunks).toHaveLength(1) + expect(textChunks[0].text).toBe("result") + + const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning") + expect(reasoningChunks).toHaveLength(1) + expect(reasoningChunks[0].text).toBe("thoughts") + }) + + it("should handle thinking tags when not streaming", async () => { + const openaiOptions = { + ...mockOptions, + openAiCustomModelInfo: { + thinkTokensInResponse: true, + contextWindow: 128_000, + supportsImages: false, + supportsPromptCache: false, + }, + openAiStreamingEnabled: false, + } + const handler = new OpenAiHandler(openaiOptions) + mockCreate.mockImplementationOnce(async (options) => { + return { + id: "custom-test-completion", + choices: [ + { + message: { role: "assistant", content: "thoughtsresult" }, + finish_reason: "stop", + index: 0, + }, + ], + usage: { + prompt_tokens: 5, + completion_tokens: 7, + total_tokens: 12, + }, + } + }) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + expect(chunks.length).toBeGreaterThan(0) + const textChunks = chunks.filter((chunk) => chunk.type === "text") + expect(textChunks).toHaveLength(1) + expect(textChunks[0].text).toBe("result") + + const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning") + expect(reasoningChunks).toHaveLength(1) + expect(reasoningChunks[0].text).toBe("thoughts") + }) }) describe("error handling", () => { diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index 8431ffa4167..c9cb2edc2c4 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -10,7 +10,7 @@ import { import { ApiHandler, SingleCompletionHandler } from "../index" import { convertToOpenAiMessages } from "../transform/openai-format" import { convertToR1Format } from "../transform/r1-format" -import { ApiStream } from "../transform/stream" +import { ApiStream, ApiStreamChunk } from "../transform/stream" export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { protected options: ApiHandlerOptions @@ -59,15 +59,15 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { requestOptions.max_tokens = modelInfo.maxTokens } + const thinkingParser = new ThinkingTokenSeparator() const stream = await this.client.chat.completions.create(requestOptions) for await (const chunk of stream) { const delta = chunk.choices[0]?.delta ?? {} if (delta.content) { - yield { - type: "text", - text: delta.content, + for (const parsedChunk of thinkingParser.parseChunk(delta.content)) { + yield parsedChunk } } @@ -101,9 +101,9 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { const response = await this.client.chat.completions.create(requestOptions) - yield { - type: "text", - text: response.choices[0]?.message.content || "", + const thinkingParser = new ThinkingTokenSeparator() + for (const parsedChunk of thinkingParser.parseChunk(response.choices[0]?.message.content || "")) { + yield parsedChunk } yield { type: "usage", @@ -137,3 +137,51 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { } } } + +class ThinkingTokenSeparator { + private insideThinking = false + private buffer = "" + + public parseChunk(chunk: string): ApiStreamChunk[] { + let parsed: ApiStreamChunk[] = [] + chunk = this.buffer + chunk + this.buffer = "" + + const parseTag = (tag: string, thinking: boolean) => { + if (chunk.indexOf(tag) !== -1) { + const [before, after] = chunk.split(tag) + if (before.length > 0) { + parsed.push({ type: thinking ? "text" : "reasoning", text: before }) + } + chunk = after + this.insideThinking = thinking + } else if (this.endsWithIncompleteString(chunk, tag)) { + this.buffer = chunk + chunk = "" + } + } + + if (!this.insideThinking) { + parseTag("", true) + } + if (this.insideThinking) { + parseTag("", false) + } + + if (chunk.length > 0) { + parsed.push({ type: this.insideThinking ? "reasoning" : "text", text: chunk }) + } + + return parsed + } + + private endsWithIncompleteString(chunk: string, str: string): boolean { + // iterate from end of the str and check if we start matching from any point + for (let i = str.length - 1; i >= 1; i--) { + if (chunk.endsWith(str.slice(0, i))) { + return true + } + } + return false + } +} From e3163a4d14a6500982d343e74f464c24ccca884b Mon Sep 17 00:00:00 2001 From: Hank Beasley Date: Fri, 31 Jan 2025 10:52:41 -0600 Subject: [PATCH 2/4] Implement token parsing for tags in responses and add UI toggle for model settings --- src/api/providers/openai.ts | 13 +++-- src/shared/api.ts | 1 + .../src/components/settings/ApiOptions.tsx | 49 +++++++++++++++++++ 3 files changed, 60 insertions(+), 3 deletions(-) diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index c9cb2edc2c4..b77909f339f 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -40,6 +40,9 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { const modelId = this.options.openAiModelId ?? "" const deepseekReasoner = modelId.includes("deepseek-reasoner") + const thinkingParser = modelInfo.thinkTokensInResponse + ? new ThinkingTokenSeparator() + : new PassThroughTokenSeparator() if (this.options.openAiStreamingEnabled ?? true) { const systemMessage: OpenAI.Chat.ChatCompletionSystemMessageParam = { @@ -59,9 +62,8 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { requestOptions.max_tokens = modelInfo.maxTokens } - const thinkingParser = new ThinkingTokenSeparator() const stream = await this.client.chat.completions.create(requestOptions) - + for await (const chunk of stream) { const delta = chunk.choices[0]?.delta ?? {} @@ -84,6 +86,7 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { outputTokens: chunk.usage.completion_tokens || 0, } } + } } else { // o1 for instance doesnt support streaming, non-1 temp, or system prompt @@ -101,7 +104,6 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { const response = await this.client.chat.completions.create(requestOptions) - const thinkingParser = new ThinkingTokenSeparator() for (const parsedChunk of thinkingParser.parseChunk(response.choices[0]?.message.content || "")) { yield parsedChunk } @@ -138,6 +140,11 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { } } +class PassThroughTokenSeparator { + public parseChunk(chunk: string): ApiStreamChunk[] { + return [{ type: "text", text: chunk }] + } +} class ThinkingTokenSeparator { private insideThinking = false private buffer = "" diff --git a/src/shared/api.ts b/src/shared/api.ts index 7da67fd3eb6..d1f7d929812 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -80,6 +80,7 @@ export interface ModelInfo { cacheWritesPrice?: number cacheReadsPrice?: number description?: string + thinkTokensInResponse?: boolean } // Anthropic diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx index 4bdff0b061e..8d9ec47b2e7 100644 --- a/webview-ui/src/components/settings/ApiOptions.tsx +++ b/webview-ui/src/components/settings/ApiOptions.tsx @@ -893,6 +893,55 @@ const ApiOptions = ({ apiErrorMessage, modelIdErrorMessage }: ApiOptionsProps) = This model feature is for computer use like sonnet 3.5 support

+ +
+
+ { + handleInputChange("openAiCustomModelInfo")({ + target: { + value: { + ...(apiConfiguration?.openAiCustomModelInfo || + openAiModelInfoSaneDefaults), + thinkTokensInResponse: checked, + }, + }, + }) + }}> + Model Uses Tags + + +
+

+ Enable if the model outputs <Think> </Think>tags in a + chat response. Some Deek Seek R1 providers output tokens with these + tags. +

+
From b784715117a0369bea4146a2337ef1a5e3a86084 Mon Sep 17 00:00:00 2001 From: Hank Beasley Date: Fri, 31 Jan 2025 11:29:39 -0600 Subject: [PATCH 3/4] Fix typo in tooltip text for DeepSeek R1 providers in ApiOptions component --- webview-ui/src/components/settings/ApiOptions.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx index 8d9ec47b2e7..aadf307ebb5 100644 --- a/webview-ui/src/components/settings/ApiOptions.tsx +++ b/webview-ui/src/components/settings/ApiOptions.tsx @@ -921,7 +921,7 @@ const ApiOptions = ({ apiErrorMessage, modelIdErrorMessage }: ApiOptionsProps) = Enable if the model outputs <Think> </Think>tags in a - chat response. Some Deek Seek R1 providers output tokens with these + chat response. Some DeepSeek R1 providers output tokens with these tags.

From 74308e6f07e9f9473f5830b666cee55c925836ae Mon Sep 17 00:00:00 2001 From: Hank Beasley Date: Thu, 6 Feb 2025 10:24:02 -0600 Subject: [PATCH 4/4] merge flush from Szpadel --- src/api/providers/__tests__/openai.test.ts | 10 +++--- src/api/providers/openai.ts | 37 +++++++++++++--------- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/src/api/providers/__tests__/openai.test.ts b/src/api/providers/__tests__/openai.test.ts index 11256102bee..12a3c6003f8 100644 --- a/src/api/providers/__tests__/openai.test.ts +++ b/src/api/providers/__tests__/openai.test.ts @@ -188,8 +188,8 @@ describe("OpenAiHandler", () => { yield { choices: [ { - delta: { content: "result" }, - index: 2, + delta: { content: "result { expect(chunks.length).toBeGreaterThan(0) const textChunks = chunks.filter((chunk) => chunk.type === "text") expect(textChunks).toHaveLength(1) - expect(textChunks[0].text).toBe("result") + expect(textChunks[0].text).toBe("result chunk.type === "reasoning") expect(reasoningChunks).toHaveLength(1) @@ -231,7 +231,7 @@ describe("OpenAiHandler", () => { id: "custom-test-completion", choices: [ { - message: { role: "assistant", content: "thoughtsresult" }, + message: { role: "assistant", content: "thoughtsresult { expect(chunks.length).toBeGreaterThan(0) const textChunks = chunks.filter((chunk) => chunk.type === "text") expect(textChunks).toHaveLength(1) - expect(textChunks[0].text).toBe("result") + expect(textChunks[0].text).toBe("result chunk.type === "reasoning") expect(reasoningChunks).toHaveLength(1) diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index 97a80881b9c..0bd372ce13f 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -18,20 +18,10 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { constructor(options: ApiHandlerOptions) { this.options = options - - let urlHost: string - - try { - urlHost = new URL(this.options.openAiBaseUrl ?? "").host - } catch (error) { - // Likely an invalid `openAiBaseUrl`; we're still working on - // proper settings validation. - urlHost = "" - } - + // Azure API shape slightly differs from the core API shape: + // https://github.com/openai/openai-node?tab=readme-ov-file#microsoft-azure-openai + const urlHost = new URL(this.options.openAiBaseUrl ?? "").host if (urlHost === "azure.com" || urlHost.endsWith(".azure.com") || options.openAiUseAzure) { - // Azure API shape slightly differs from the core API shape: - // https://github.com/openai/openai-node?tab=readme-ov-file#microsoft-azure-openai this.client = new AzureOpenAI({ baseURL: this.options.openAiBaseUrl, apiKey: this.options.openAiApiKey, @@ -97,6 +87,10 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { } } } + + for (const parsedChunk of thinkingParser.flush()) { + yield parsedChunk + } } else { // o1 for instance doesnt support streaming, non-1 temp, or system prompt const systemMessage: OpenAI.Chat.ChatCompletionUserMessageParam = { @@ -113,7 +107,7 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { const response = await this.client.chat.completions.create(requestOptions) - for (const parsedChunk of thinkingParser.parseChunk(response.choices[0]?.message.content || "")) { + for (const parsedChunk of thinkingParser.parseChunk(response.choices[0]?.message.content || "", true)) { yield parsedChunk } yield { @@ -153,12 +147,16 @@ class PassThroughTokenSeparator { public parseChunk(chunk: string): ApiStreamChunk[] { return [{ type: "text", text: chunk }] } + + public flush(): ApiStreamChunk[] { + return [] + } } class ThinkingTokenSeparator { private insideThinking = false private buffer = "" - public parseChunk(chunk: string): ApiStreamChunk[] { + public parseChunk(chunk: string, flush: boolean = false): ApiStreamChunk[] { let parsed: ApiStreamChunk[] = [] chunk = this.buffer + chunk this.buffer = "" @@ -184,6 +182,11 @@ class ThinkingTokenSeparator { parseTag("
", false) } + if (flush) { + chunk = this.buffer + chunk + this.buffer = "" + } + if (chunk.length > 0) { parsed.push({ type: this.insideThinking ? "reasoning" : "text", text: chunk }) } @@ -200,4 +203,8 @@ class ThinkingTokenSeparator { } return false } + + public flush(): ApiStreamChunk[] { + return this.parseChunk("", true) + } }