diff --git a/README.md b/README.md index 876ccdd1452..373c0bb4ffc 100644 --- a/README.md +++ b/README.md @@ -5,20 +5,20 @@ Join Discord Join Reddit - GitHub Stars - VS Marketplace Installs +GitHub Stars +VS Marketplace Installs +

- -

Roo Code (prev. Roo Cline)

Download on VS Marketplace Feature Requests Rate & Review +
**Roo Code** is an AI-powered **autonomous coding agent** that lives in your editor. It can: diff --git a/src/api/providers/__tests__/openai.test.ts b/src/api/providers/__tests__/openai.test.ts index 52d0c5c2bb2..12a3c6003f8 100644 --- a/src/api/providers/__tests__/openai.test.ts +++ b/src/api/providers/__tests__/openai.test.ts @@ -143,6 +143,122 @@ describe("OpenAiHandler", () => { expect(textChunks).toHaveLength(1) expect(textChunks[0].text).toBe("Test response") }) + + it("should handle thinking tags in a stream", async () => { + const openaiOptions = { + ...mockOptions, + openAiCustomModelInfo: { + thinkTokensInResponse: true, + contextWindow: 128_000, + supportsImages: false, + supportsPromptCache: false, + }, + } + const handler = new OpenAiHandler(openaiOptions) + mockCreate.mockImplementationOnce(async (options) => { + return { + [Symbol.asyncIterator]: async function* () { + yield { + choices: [ + { + delta: { content: "thoughts<" }, + index: 1, + }, + ], + usage: null, + } + yield { + choices: [ + { + delta: { content: "/think>" }, + index: 2, + }, + ], + usage: null, + } + yield { + choices: [ + { + delta: { content: "result chunk.type === "text") + expect(textChunks).toHaveLength(1) + expect(textChunks[0].text).toBe("result chunk.type === "reasoning") + expect(reasoningChunks).toHaveLength(1) + expect(reasoningChunks[0].text).toBe("thoughts") + }) + + it("should handle thinking tags when not streaming", async () => { + const openaiOptions = { + ...mockOptions, + openAiCustomModelInfo: { + thinkTokensInResponse: true, + contextWindow: 128_000, + supportsImages: false, + supportsPromptCache: false, + }, + openAiStreamingEnabled: false, + } + const handler = new OpenAiHandler(openaiOptions) + mockCreate.mockImplementationOnce(async (options) => { + return { + id: "custom-test-completion", + choices: [ + { + message: { role: "assistant", content: "thoughtsresult chunk.type === "text") + expect(textChunks).toHaveLength(1) + expect(textChunks[0].text).toBe("result chunk.type === "reasoning") + expect(reasoningChunks).toHaveLength(1) + expect(reasoningChunks[0].text).toBe("thoughts") + }) }) describe("error handling", () => { diff --git a/src/api/providers/ollama.ts b/src/api/providers/ollama.ts index 4175b78fa54..03ce6ca0b55 100644 --- a/src/api/providers/ollama.ts +++ b/src/api/providers/ollama.ts @@ -20,7 +20,7 @@ export class OllamaHandler implements ApiHandler, SingleCompletionHandler { async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream { const modelId = this.getModel().id - const useR1Format = modelId.toLowerCase().includes('deepseek-r1') + const useR1Format = modelId.toLowerCase().includes("deepseek-r1") const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ { role: "system", content: systemPrompt }, ...(useR1Format ? convertToR1Format(messages) : convertToOpenAiMessages(messages)), @@ -53,10 +53,12 @@ export class OllamaHandler implements ApiHandler, SingleCompletionHandler { async completePrompt(prompt: string): Promise { try { const modelId = this.getModel().id - const useR1Format = modelId.toLowerCase().includes('deepseek-r1') + const useR1Format = modelId.toLowerCase().includes("deepseek-r1") const response = await this.client.chat.completions.create({ model: this.getModel().id, - messages: useR1Format ? convertToR1Format([{ role: "user", content: prompt }]) : [{ role: "user", content: prompt }], + messages: useR1Format + ? convertToR1Format([{ role: "user", content: prompt }]) + : [{ role: "user", content: prompt }], temperature: 0, stream: false, }) diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts index f1b5bcebd32..af5e77390f0 100644 --- a/src/api/providers/openai-native.ts +++ b/src/api/providers/openai-native.ts @@ -41,7 +41,7 @@ export class OpenAiNativeHandler implements ApiHandler, SingleCompletionHandler private async *handleO1FamilyMessage( modelId: string, systemPrompt: string, - messages: Anthropic.Messages.MessageParam[] + messages: Anthropic.Messages.MessageParam[], ): ApiStream { // o1 supports developer prompt with formatting // o1-preview and o1-mini only support user messages @@ -63,7 +63,7 @@ export class OpenAiNativeHandler implements ApiHandler, SingleCompletionHandler private async *handleO3FamilyMessage( modelId: string, systemPrompt: string, - messages: Anthropic.Messages.MessageParam[] + messages: Anthropic.Messages.MessageParam[], ): ApiStream { const stream = await this.client.chat.completions.create({ model: "o3-mini", @@ -85,7 +85,7 @@ export class OpenAiNativeHandler implements ApiHandler, SingleCompletionHandler private async *handleDefaultModelMessage( modelId: string, systemPrompt: string, - messages: Anthropic.Messages.MessageParam[] + messages: Anthropic.Messages.MessageParam[], ): ApiStream { const stream = await this.client.chat.completions.create({ model: modelId, @@ -98,9 +98,7 @@ export class OpenAiNativeHandler implements ApiHandler, SingleCompletionHandler yield* this.handleStreamResponse(stream) } - private async *yieldResponseData( - response: OpenAI.Chat.Completions.ChatCompletion - ): ApiStream { + private async *yieldResponseData(response: OpenAI.Chat.Completions.ChatCompletion): ApiStream { yield { type: "text", text: response.choices[0]?.message.content || "", @@ -112,9 +110,7 @@ export class OpenAiNativeHandler implements ApiHandler, SingleCompletionHandler } } - private async *handleStreamResponse( - stream: AsyncIterable - ): ApiStream { + private async *handleStreamResponse(stream: AsyncIterable): ApiStream { for await (const chunk of stream) { const delta = chunk.choices[0]?.delta if (delta?.content) { @@ -168,7 +164,7 @@ export class OpenAiNativeHandler implements ApiHandler, SingleCompletionHandler private getO1CompletionOptions( modelId: string, - prompt: string + prompt: string, ): OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming { return { model: modelId, @@ -178,7 +174,7 @@ export class OpenAiNativeHandler implements ApiHandler, SingleCompletionHandler private getO3CompletionOptions( modelId: string, - prompt: string + prompt: string, ): OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming { return { model: "o3-mini", @@ -189,7 +185,7 @@ export class OpenAiNativeHandler implements ApiHandler, SingleCompletionHandler private getDefaultCompletionOptions( modelId: string, - prompt: string + prompt: string, ): OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming { return { model: modelId, diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index acfbe43d797..0bd372ce13f 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -10,7 +10,7 @@ import { import { ApiHandler, SingleCompletionHandler } from "../index" import { convertToOpenAiMessages } from "../transform/openai-format" import { convertToR1Format } from "../transform/r1-format" -import { ApiStream } from "../transform/stream" +import { ApiStream, ApiStreamChunk } from "../transform/stream" export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { protected options: ApiHandlerOptions @@ -18,20 +18,10 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { constructor(options: ApiHandlerOptions) { this.options = options - - let urlHost: string - - try { - urlHost = new URL(this.options.openAiBaseUrl ?? "").host - } catch (error) { - // Likely an invalid `openAiBaseUrl`; we're still working on - // proper settings validation. - urlHost = "" - } - + // Azure API shape slightly differs from the core API shape: + // https://github.com/openai/openai-node?tab=readme-ov-file#microsoft-azure-openai + const urlHost = new URL(this.options.openAiBaseUrl ?? "").host if (urlHost === "azure.com" || urlHost.endsWith(".azure.com") || options.openAiUseAzure) { - // Azure API shape slightly differs from the core API shape: - // https://github.com/openai/openai-node?tab=readme-ov-file#microsoft-azure-openai this.client = new AzureOpenAI({ baseURL: this.options.openAiBaseUrl, apiKey: this.options.openAiApiKey, @@ -50,6 +40,9 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { const modelId = this.options.openAiModelId ?? "" const deepseekReasoner = modelId.includes("deepseek-reasoner") + const thinkingParser = modelInfo.thinkTokensInResponse + ? new ThinkingTokenSeparator() + : new PassThroughTokenSeparator() if (this.options.openAiStreamingEnabled ?? true) { const systemMessage: OpenAI.Chat.ChatCompletionSystemMessageParam = { @@ -75,9 +68,8 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { const delta = chunk.choices[0]?.delta ?? {} if (delta.content) { - yield { - type: "text", - text: delta.content, + for (const parsedChunk of thinkingParser.parseChunk(delta.content)) { + yield parsedChunk } } @@ -95,6 +87,10 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { } } } + + for (const parsedChunk of thinkingParser.flush()) { + yield parsedChunk + } } else { // o1 for instance doesnt support streaming, non-1 temp, or system prompt const systemMessage: OpenAI.Chat.ChatCompletionUserMessageParam = { @@ -111,9 +107,8 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { const response = await this.client.chat.completions.create(requestOptions) - yield { - type: "text", - text: response.choices[0]?.message.content || "", + for (const parsedChunk of thinkingParser.parseChunk(response.choices[0]?.message.content || "", true)) { + yield parsedChunk } yield { type: "usage", @@ -147,3 +142,69 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { } } } + +class PassThroughTokenSeparator { + public parseChunk(chunk: string): ApiStreamChunk[] { + return [{ type: "text", text: chunk }] + } + + public flush(): ApiStreamChunk[] { + return [] + } +} +class ThinkingTokenSeparator { + private insideThinking = false + private buffer = "" + + public parseChunk(chunk: string, flush: boolean = false): ApiStreamChunk[] { + let parsed: ApiStreamChunk[] = [] + chunk = this.buffer + chunk + this.buffer = "" + + const parseTag = (tag: string, thinking: boolean) => { + if (chunk.indexOf(tag) !== -1) { + const [before, after] = chunk.split(tag) + if (before.length > 0) { + parsed.push({ type: thinking ? "text" : "reasoning", text: before }) + } + chunk = after + this.insideThinking = thinking + } else if (this.endsWithIncompleteString(chunk, tag)) { + this.buffer = chunk + chunk = "" + } + } + + if (!this.insideThinking) { + parseTag("", true) + } + if (this.insideThinking) { + parseTag("", false) + } + + if (flush) { + chunk = this.buffer + chunk + this.buffer = "" + } + + if (chunk.length > 0) { + parsed.push({ type: this.insideThinking ? "reasoning" : "text", text: chunk }) + } + + return parsed + } + + private endsWithIncompleteString(chunk: string, str: string): boolean { + // iterate from end of the str and check if we start matching from any point + for (let i = str.length - 1; i >= 1; i--) { + if (chunk.endsWith(str.slice(0, i))) { + return true + } + } + return false + } + + public flush(): ApiStreamChunk[] { + return this.parseChunk("", true) + } +} diff --git a/src/shared/api.ts b/src/shared/api.ts index 39bc2a69cab..7ea3031e4cf 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -80,6 +80,7 @@ export interface ModelInfo { cacheWritesPrice?: number cacheReadsPrice?: number description?: string + thinkTokensInResponse?: boolean reasoningEffort?: "low" | "medium" | "high" } diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx index 277a532c486..de03893b8de 100644 --- a/webview-ui/src/components/settings/ApiOptions.tsx +++ b/webview-ui/src/components/settings/ApiOptions.tsx @@ -896,6 +896,55 @@ const ApiOptions = ({ apiErrorMessage, modelIdErrorMessage }: ApiOptionsProps) = This model feature is for computer use like sonnet 3.5 support

+ +
+
+ { + handleInputChange("openAiCustomModelInfo")({ + target: { + value: { + ...(apiConfiguration?.openAiCustomModelInfo || + openAiModelInfoSaneDefaults), + thinkTokensInResponse: checked, + }, + }, + }) + }}> + Model Uses Tags + + +
+

+ Enable if the model outputs <Think> </Think>tags in a + chat response. Some DeepSeek R1 providers output tokens with these + tags. +

+