From 7c05cce8b947ddeb0e4fabc9e638136f5eecb8ad Mon Sep 17 00:00:00 2001 From: xyOz Date: Sat, 17 May 2025 21:16:36 +0100 Subject: [PATCH 1/2] integration --- src/api/providers/lmstudio.ts | 57 +++++++++++++++++++++++++++++++++++ src/api/providers/ollama.ts | 20 ++++++++++-- 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/src/api/providers/lmstudio.ts b/src/api/providers/lmstudio.ts index 9a3ab187bf2..0696e876523 100644 --- a/src/api/providers/lmstudio.ts +++ b/src/api/providers/lmstudio.ts @@ -29,6 +29,47 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan ...convertToOpenAiMessages(messages), ] + // ------------------------- + // Track token usage + // ------------------------- + // Build content blocks so we can estimate prompt token count using the shared utility. + const toContentBlocks = ( + blocks: Anthropic.Messages.MessageParam[] | string, + ): Anthropic.Messages.ContentBlockParam[] => { + if (typeof blocks === "string") { + return [{ type: "text", text: blocks }] + } + + const result: Anthropic.Messages.ContentBlockParam[] = [] + for (const msg of blocks) { + if (typeof msg.content === "string") { + result.push({ type: "text", text: msg.content }) + } else if (Array.isArray(msg.content)) { + // Filter out text blocks only for counting purposes + for (const part of msg.content) { + if (part.type === "text") { + result.push({ type: "text", text: part.text }) + } + } + } + } + return result + } + + // Count prompt/input tokens (system prompt + user/assistant history) + let inputTokens = 0 + try { + inputTokens = await this.countTokens([ + { type: "text", text: systemPrompt }, + ...toContentBlocks(messages), + ]) + } catch (err) { + console.error("[LmStudio] Failed to count input tokens:", err) + inputTokens = 0 + } + + let assistantText = "" + try { // Create params object with optional draft model const params: any = { @@ -50,12 +91,28 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan for await (const chunk of results) { const delta = chunk.choices[0]?.delta if (delta?.content) { + assistantText += delta.content yield { type: "text", text: delta.content, } } } + + // After streaming completes, estimate output tokens and yield usage metrics + let outputTokens = 0 + try { + outputTokens = await this.countTokens([{ type: "text", text: assistantText }]) + } catch (err) { + console.error("[LmStudio] Failed to count output tokens:", err) + outputTokens = 0 + } + + yield { + type: "usage", + inputTokens, + outputTokens, + } as const } catch (error) { // LM Studio doesn't return an error code/body for now throw new Error( diff --git a/src/api/providers/ollama.ts b/src/api/providers/ollama.ts index 26374d5d583..1b721a59093 100644 --- a/src/api/providers/ollama.ts +++ b/src/api/providers/ollama.ts @@ -11,6 +11,9 @@ import { DEEP_SEEK_DEFAULT_TEMPERATURE } from "./constants" import { XmlMatcher } from "../../utils/xml-matcher" import { BaseProvider } from "./base-provider" +// Alias for the usage object returned in streaming chunks +type CompletionUsage = OpenAI.Chat.Completions.ChatCompletionChunk["usage"] + export class OllamaHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions private client: OpenAI @@ -37,6 +40,7 @@ export class OllamaHandler extends BaseProvider implements SingleCompletionHandl messages: openAiMessages, temperature: this.options.modelTemperature ?? 0, stream: true, + stream_options: { include_usage: true }, }) const matcher = new XmlMatcher( "think", @@ -46,18 +50,30 @@ export class OllamaHandler extends BaseProvider implements SingleCompletionHandl text: chunk.data, }) as const, ) + let lastUsage: CompletionUsage | undefined for await (const chunk of stream) { const delta = chunk.choices[0]?.delta if (delta?.content) { - for (const chunk of matcher.update(delta.content)) { - yield chunk + for (const matcherChunk of matcher.update(delta.content)) { + yield matcherChunk } } + if (chunk.usage) { + lastUsage = chunk.usage + } } for (const chunk of matcher.final()) { yield chunk } + + if (lastUsage) { + yield { + type: "usage", + inputTokens: lastUsage?.prompt_tokens || 0, + outputTokens: lastUsage?.completion_tokens || 0, + } + } } override getModel(): { id: string; info: ModelInfo } { From d45ad787ddb5c5f9ee4b193d8b5d861fa280dbad Mon Sep 17 00:00:00 2001 From: xyOz Date: Tue, 20 May 2025 20:07:02 +0100 Subject: [PATCH 2/2] Fix --- src/api/providers/lmstudio.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/api/providers/lmstudio.ts b/src/api/providers/lmstudio.ts index 8a191e108fe..0901cb27680 100644 --- a/src/api/providers/lmstudio.ts +++ b/src/api/providers/lmstudio.ts @@ -69,7 +69,7 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan let assistantText = "" try { - const params: any = { + const params: OpenAI.Chat.ChatCompletionCreateParamsStreaming & { draft_model?: string } = { model: this.getModel().id, messages: openAiMessages, temperature: this.options.modelTemperature ?? LMSTUDIO_DEFAULT_TEMPERATURE,