From 7c05cce8b947ddeb0e4fabc9e638136f5eecb8ad Mon Sep 17 00:00:00 2001
From: xyOz <xyoz-dev@proton.me>
Date: Sat, 17 May 2025 21:16:36 +0100
Subject: [PATCH 1/2] integration

---
 src/api/providers/lmstudio.ts | 57 +++++++++++++++++++++++++++++++++++
 src/api/providers/ollama.ts   | 20 ++++++++++--
 2 files changed, 75 insertions(+), 2 deletions(-)

diff --git a/src/api/providers/lmstudio.ts b/src/api/providers/lmstudio.ts
index 9a3ab187bf2..0696e876523 100644
--- a/src/api/providers/lmstudio.ts
+++ b/src/api/providers/lmstudio.ts
@@ -29,6 +29,47 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
 			...convertToOpenAiMessages(messages),
 		]
 
+		// -------------------------
+		// Track token usage
+		// -------------------------
+		// Build content blocks so we can estimate prompt token count using the shared utility.
+		const toContentBlocks = (
+			blocks: Anthropic.Messages.MessageParam[] | string,
+		): Anthropic.Messages.ContentBlockParam[] => {
+			if (typeof blocks === "string") {
+				return [{ type: "text", text: blocks }]
+			}
+
+			const result: Anthropic.Messages.ContentBlockParam[] = []
+			for (const msg of blocks) {
+				if (typeof msg.content === "string") {
+					result.push({ type: "text", text: msg.content })
+				} else if (Array.isArray(msg.content)) {
+					// Filter out text blocks only for counting purposes
+					for (const part of msg.content) {
+						if (part.type === "text") {
+							result.push({ type: "text", text: part.text })
+						}
+					}
+				}
+			}
+			return result
+		}
+
+		// Count prompt/input tokens (system prompt + user/assistant history)
+		let inputTokens = 0
+		try {
+			inputTokens = await this.countTokens([
+				{ type: "text", text: systemPrompt },
+				...toContentBlocks(messages),
+			])
+		} catch (err) {
+			console.error("[LmStudio] Failed to count input tokens:", err)
+			inputTokens = 0
+		}
+
+		let assistantText = ""
+
 		try {
 			// Create params object with optional draft model
 			const params: any = {
@@ -50,12 +91,28 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
 			for await (const chunk of results) {
 				const delta = chunk.choices[0]?.delta
 				if (delta?.content) {
+					assistantText += delta.content
 					yield {
 						type: "text",
 						text: delta.content,
 					}
 				}
 			}
+
+			// After streaming completes, estimate output tokens and yield usage metrics
+			let outputTokens = 0
+			try {
+				outputTokens = await this.countTokens([{ type: "text", text: assistantText }])
+			} catch (err) {
+				console.error("[LmStudio] Failed to count output tokens:", err)
+				outputTokens = 0
+			}
+
+			yield {
+				type: "usage",
+				inputTokens,
+				outputTokens,
+			} as const
 		} catch (error) {
 			// LM Studio doesn't return an error code/body for now
 			throw new Error(
diff --git a/src/api/providers/ollama.ts b/src/api/providers/ollama.ts
index 26374d5d583..1b721a59093 100644
--- a/src/api/providers/ollama.ts
+++ b/src/api/providers/ollama.ts
@@ -11,6 +11,9 @@ import { DEEP_SEEK_DEFAULT_TEMPERATURE } from "./constants"
 import { XmlMatcher } from "../../utils/xml-matcher"
 import { BaseProvider } from "./base-provider"
 
+// Alias for the usage object returned in streaming chunks
+type CompletionUsage = OpenAI.Chat.Completions.ChatCompletionChunk["usage"]
+
 export class OllamaHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	private client: OpenAI
@@ -37,6 +40,7 @@ export class OllamaHandler extends BaseProvider implements SingleCompletionHandl
 			messages: openAiMessages,
 			temperature: this.options.modelTemperature ?? 0,
 			stream: true,
+			stream_options: { include_usage: true },
 		})
 		const matcher = new XmlMatcher(
 			"think",
@@ -46,18 +50,30 @@ export class OllamaHandler extends BaseProvider implements SingleCompletionHandl
 					text: chunk.data,
 				}) as const,
 		)
+		let lastUsage: CompletionUsage | undefined
 		for await (const chunk of stream) {
 			const delta = chunk.choices[0]?.delta
 
 			if (delta?.content) {
-				for (const chunk of matcher.update(delta.content)) {
-					yield chunk
+				for (const matcherChunk of matcher.update(delta.content)) {
+					yield matcherChunk
 				}
 			}
+			if (chunk.usage) {
+				lastUsage = chunk.usage
+			}
 		}
 		for (const chunk of matcher.final()) {
 			yield chunk
 		}
+
+		if (lastUsage) {
+			yield {
+				type: "usage",
+				inputTokens: lastUsage?.prompt_tokens || 0,
+				outputTokens: lastUsage?.completion_tokens || 0,
+			}
+		}
 	}
 
 	override getModel(): { id: string; info: ModelInfo } {

From d45ad787ddb5c5f9ee4b193d8b5d861fa280dbad Mon Sep 17 00:00:00 2001
From: xyOz <xyoz-dev@proton.me>
Date: Tue, 20 May 2025 20:07:02 +0100
Subject: [PATCH 2/2] Fix

---
 src/api/providers/lmstudio.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/api/providers/lmstudio.ts b/src/api/providers/lmstudio.ts
index 8a191e108fe..0901cb27680 100644
--- a/src/api/providers/lmstudio.ts
+++ b/src/api/providers/lmstudio.ts
@@ -69,7 +69,7 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
 	let assistantText = ""
 
 	try {
-		const params: any = {
+		const params: OpenAI.Chat.ChatCompletionCreateParamsStreaming & { draft_model?: string } = {
 			model: this.getModel().id,
 			messages: openAiMessages,
 			temperature: this.options.modelTemperature ?? LMSTUDIO_DEFAULT_TEMPERATURE,