From af0a7e80b37d7f0dec5aae9adb986699f1297387 Mon Sep 17 00:00:00 2001
From: Roo Code <roomote@roocode.com>
Date: Mon, 2 Feb 2026 15:42:19 +0000
Subject: [PATCH 1/7] feat: migrate HuggingFace provider to use AI SDK
 @ai-sdk/huggingface package

- Add @ai-sdk/huggingface ^1.0.28 dependency
- Rewrite HuggingFaceHandler to extend BaseProvider with AI SDK
- Use streamText/generateText from ai package for streaming and completions
- Use shared utilities: convertToAiSdkMessages, convertToolsForAiSdk, processAiSdkStreamPart, mapToolChoice, handleAiSdkError
- Add processUsageMetrics for cache token handling from providerMetadata
- Add comprehensive test suite following Fireworks test patterns
- Default temperature: 0.5
- Base URL: https://router.huggingface.co/v1 (HuggingFace Responses API)

This follows the exact pattern from PR #11118 (Fireworks migration).
---
 .../providers/__tests__/huggingface.spec.ts   | 589 ++++++++++++++++++
 src/api/providers/huggingface.ts              | 227 ++++---
 src/package.json                              |   1 +
 3 files changed, 738 insertions(+), 79 deletions(-)
 create mode 100644 src/api/providers/__tests__/huggingface.spec.ts
diff --git a/src/api/providers/__tests__/huggingface.spec.ts b/src/api/providers/__tests__/huggingface.spec.ts
new file mode 100644
index 00000000000..6529c72c0d3
--- /dev/null
+++ b/src/api/providers/__tests__/huggingface.spec.ts
@@ -0,0 +1,589 @@
+// npx vitest run src/api/providers/__tests__/huggingface.spec.ts
+
+// Use vi.hoisted to define mock functions that can be referenced in hoisted vi.mock() calls
+const { mockStreamText, mockGenerateText } = vi.hoisted(() => ({
+	mockStreamText: vi.fn(),
+	mockGenerateText: vi.fn(),
+}))
+
+vi.mock("ai", async (importOriginal) => {
+	const actual = await importOriginal<typeof import("ai")>()
+	return {
+		...actual,
+		streamText: mockStreamText,
+		generateText: mockGenerateText,
+	}
+})
+
+vi.mock("@ai-sdk/huggingface", () => ({
+	createHuggingFace: vi.fn(() => {
+		// Return a function that returns a mock language model
+		return vi.fn(() => ({
+			modelId: "meta-llama/Llama-3.3-70B-Instruct",
+			provider: "huggingface",
+		}))
+	}),
+}))
+
+// Mock the fetchers
+vi.mock("../fetchers/huggingface", () => ({
+	getHuggingFaceModels: vi.fn(() => Promise.resolve({})),
+	getCachedHuggingFaceModels: vi.fn(() => ({})),
+}))
+
+import type { Anthropic } from "@anthropic-ai/sdk"
+
+import type { ApiHandlerOptions } from "../../../shared/api"
+
+import { HuggingFaceHandler } from "../huggingface"
+
+describe("HuggingFaceHandler", () => {
+	let handler: HuggingFaceHandler
+	let mockOptions: ApiHandlerOptions
+
+	beforeEach(() => {
+		mockOptions = {
+			huggingFaceApiKey: "test-huggingface-api-key",
+			huggingFaceModelId: "meta-llama/Llama-3.3-70B-Instruct",
+		}
+		handler = new HuggingFaceHandler(mockOptions)
+		vi.clearAllMocks()
+	})
+
+	describe("constructor", () => {
+		it("should initialize with provided options", () => {
+			expect(handler).toBeInstanceOf(HuggingFaceHandler)
+			expect(handler.getModel().id).toBe(mockOptions.huggingFaceModelId)
+		})
+
+		it("should use default model ID if not provided", () => {
+			const handlerWithoutModel = new HuggingFaceHandler({
+				...mockOptions,
+				huggingFaceModelId: undefined,
+			})
+			expect(handlerWithoutModel.getModel().id).toBe("meta-llama/Llama-3.3-70B-Instruct")
+		})
+
+		it("should throw error if API key is not provided", () => {
+			expect(() => {
+				new HuggingFaceHandler({
+					...mockOptions,
+					huggingFaceApiKey: undefined,
+				})
+			}).toThrow("Hugging Face API key is required")
+		})
+	})
+
+	describe("getModel", () => {
+		it("should return default model when no model is specified", () => {
+			const handlerWithoutModel = new HuggingFaceHandler({
+				huggingFaceApiKey: "test-huggingface-api-key",
+			})
+			const model = handlerWithoutModel.getModel()
+			expect(model.id).toBe("meta-llama/Llama-3.3-70B-Instruct")
+			expect(model.info).toBeDefined()
+		})
+
+		it("should return specified model when valid model is provided", () => {
+			const testModelId = "mistralai/Mistral-7B-Instruct-v0.3"
+			const handlerWithModel = new HuggingFaceHandler({
+				huggingFaceModelId: testModelId,
+				huggingFaceApiKey: "test-huggingface-api-key",
+			})
+			const model = handlerWithModel.getModel()
+			expect(model.id).toBe(testModelId)
+		})
+
+		it("should include model parameters from getModelParams", () => {
+			const model = handler.getModel()
+			expect(model).toHaveProperty("temperature")
+			expect(model).toHaveProperty("maxTokens")
+		})
+
+		it("should return fallback info when model not in cache", () => {
+			const model = handler.getModel()
+			expect(model.info).toEqual(
+				expect.objectContaining({
+					maxTokens: 8192,
+					contextWindow: 131072,
+					supportsImages: false,
+					supportsPromptCache: false,
+				}),
+			)
+		})
+	})
+
+	describe("createMessage", () => {
+		const systemPrompt = "You are a helpful assistant."
+		const messages: Anthropic.Messages.MessageParam[] = [
+			{
+				role: "user",
+				content: [
+					{
+						type: "text" as const,
+						text: "Hello!",
+					},
+				],
+			},
+		]
+
+		it("should handle streaming responses", async () => {
+			async function* mockFullStream() {
+				yield { type: "text-delta", text: "Test response from HuggingFace" }
+			}
+
+			const mockUsage = Promise.resolve({
+				inputTokens: 10,
+				outputTokens: 5,
+			})
+
+			const mockProviderMetadata = Promise.resolve({})
+
+			mockStreamText.mockReturnValue({
+				fullStream: mockFullStream(),
+				usage: mockUsage,
+				providerMetadata: mockProviderMetadata,
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			expect(chunks.length).toBeGreaterThan(0)
+			const textChunks = chunks.filter((chunk) => chunk.type === "text")
+			expect(textChunks).toHaveLength(1)
+			expect(textChunks[0].text).toBe("Test response from HuggingFace")
+		})
+
+		it("should include usage information", async () => {
+			async function* mockFullStream() {
+				yield { type: "text-delta", text: "Test response" }
+			}
+
+			const mockUsage = Promise.resolve({
+				inputTokens: 10,
+				outputTokens: 20,
+			})
+
+			const mockProviderMetadata = Promise.resolve({})
+
+			mockStreamText.mockReturnValue({
+				fullStream: mockFullStream(),
+				usage: mockUsage,
+				providerMetadata: mockProviderMetadata,
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			const usageChunks = chunks.filter((chunk) => chunk.type === "usage")
+			expect(usageChunks.length).toBeGreaterThan(0)
+			expect(usageChunks[0].inputTokens).toBe(10)
+			expect(usageChunks[0].outputTokens).toBe(20)
+		})
+
+		it("should handle cached tokens in usage data from providerMetadata", async () => {
+			async function* mockFullStream() {
+				yield { type: "text-delta", text: "Test response" }
+			}
+
+			const mockUsage = Promise.resolve({
+				inputTokens: 100,
+				outputTokens: 50,
+			})
+
+			// HuggingFace provides cache metrics via providerMetadata for supported models
+			const mockProviderMetadata = Promise.resolve({
+				huggingface: {
+					promptCacheHitTokens: 30,
+					promptCacheMissTokens: 70,
+				},
+			})
+
+			mockStreamText.mockReturnValue({
+				fullStream: mockFullStream(),
+				usage: mockUsage,
+				providerMetadata: mockProviderMetadata,
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			const usageChunks = chunks.filter((chunk) => chunk.type === "usage")
+			expect(usageChunks.length).toBeGreaterThan(0)
+			expect(usageChunks[0].inputTokens).toBe(100)
+			expect(usageChunks[0].outputTokens).toBe(50)
+			expect(usageChunks[0].cacheReadTokens).toBe(30)
+			expect(usageChunks[0].cacheWriteTokens).toBe(70)
+		})
+
+		it("should handle usage with details.cachedInputTokens when providerMetadata is not available", async () => {
+			async function* mockFullStream() {
+				yield { type: "text-delta", text: "Test response" }
+			}
+
+			const mockUsage = Promise.resolve({
+				inputTokens: 100,
+				outputTokens: 50,
+				details: {
+					cachedInputTokens: 25,
+				},
+			})
+
+			const mockProviderMetadata = Promise.resolve({})
+
+			mockStreamText.mockReturnValue({
+				fullStream: mockFullStream(),
+				usage: mockUsage,
+				providerMetadata: mockProviderMetadata,
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			const usageChunks = chunks.filter((chunk) => chunk.type === "usage")
+			expect(usageChunks.length).toBeGreaterThan(0)
+			expect(usageChunks[0].cacheReadTokens).toBe(25)
+			expect(usageChunks[0].cacheWriteTokens).toBeUndefined()
+		})
+
+		it("should pass correct temperature (0.5 default) to streamText", async () => {
+			async function* mockFullStream() {
+				yield { type: "text-delta", text: "Test" }
+			}
+
+			mockStreamText.mockReturnValue({
+				fullStream: mockFullStream(),
+				usage: Promise.resolve({ inputTokens: 0, outputTokens: 0 }),
+				providerMetadata: Promise.resolve({}),
+			})
+
+			const handlerWithDefaultTemp = new HuggingFaceHandler({
+				huggingFaceApiKey: "test-key",
+				huggingFaceModelId: "meta-llama/Llama-3.3-70B-Instruct",
+			})
+
+			const stream = handlerWithDefaultTemp.createMessage(systemPrompt, messages)
+			for await (const _ of stream) {
+				// consume stream
+			}
+
+			expect(mockStreamText).toHaveBeenCalledWith(
+				expect.objectContaining({
+					temperature: 0.5,
+				}),
+			)
+		})
+
+		it("should use user-specified temperature over provider defaults", async () => {
+			async function* mockFullStream() {
+				yield { type: "text-delta", text: "Test" }
+			}
+
+			mockStreamText.mockReturnValue({
+				fullStream: mockFullStream(),
+				usage: Promise.resolve({ inputTokens: 0, outputTokens: 0 }),
+				providerMetadata: Promise.resolve({}),
+			})
+
+			const handlerWithCustomTemp = new HuggingFaceHandler({
+				huggingFaceApiKey: "test-key",
+				huggingFaceModelId: "meta-llama/Llama-3.3-70B-Instruct",
+				modelTemperature: 0.7,
+			})
+
+			const stream = handlerWithCustomTemp.createMessage(systemPrompt, messages)
+			for await (const _ of stream) {
+				// consume stream
+			}
+
+			// User-specified temperature should take precedence over everything
+			expect(mockStreamText).toHaveBeenCalledWith(
+				expect.objectContaining({
+					temperature: 0.7,
+				}),
+			)
+		})
+
+		it("should handle stream with multiple chunks", async () => {
+			async function* mockFullStream() {
+				yield { type: "text-delta", text: "Hello" }
+				yield { type: "text-delta", text: " world" }
+			}
+
+			mockStreamText.mockReturnValue({
+				fullStream: mockFullStream(),
+				usage: Promise.resolve({ inputTokens: 5, outputTokens: 10 }),
+				providerMetadata: Promise.resolve({}),
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			const textChunks = chunks.filter((c) => c.type === "text")
+			expect(textChunks[0]).toEqual({ type: "text", text: "Hello" })
+			expect(textChunks[1]).toEqual({ type: "text", text: " world" })
+
+			const usageChunks = chunks.filter((c) => c.type === "usage")
+			expect(usageChunks[0]).toMatchObject({ type: "usage", inputTokens: 5, outputTokens: 10 })
+		})
+
+		it("should handle errors with handleAiSdkError", async () => {
+			async function* mockFullStream(): AsyncGenerator<any> {
+				yield { type: "text-delta", text: "" } // Yield something before error to satisfy lint
+				throw new Error("API Error")
+			}
+
+			mockStreamText.mockReturnValue({
+				fullStream: mockFullStream(),
+				usage: Promise.resolve({ inputTokens: 0, outputTokens: 0 }),
+				providerMetadata: Promise.resolve({}),
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+
+			await expect(async () => {
+				for await (const _ of stream) {
+					// consume stream
+				}
+			}).rejects.toThrow("HuggingFace: API Error")
+		})
+	})
+
+	describe("completePrompt", () => {
+		it("should complete a prompt using generateText", async () => {
+			mockGenerateText.mockResolvedValue({
+				text: "Test completion from HuggingFace",
+			})
+
+			const result = await handler.completePrompt("Test prompt")
+
+			expect(result).toBe("Test completion from HuggingFace")
+			expect(mockGenerateText).toHaveBeenCalledWith(
+				expect.objectContaining({
+					prompt: "Test prompt",
+				}),
+			)
+		})
+
+		it("should use default temperature in completePrompt", async () => {
+			mockGenerateText.mockResolvedValue({
+				text: "Test completion",
+			})
+
+			await handler.completePrompt("Test prompt")
+
+			expect(mockGenerateText).toHaveBeenCalledWith(
+				expect.objectContaining({
+					temperature: 0.5,
+				}),
+			)
+		})
+	})
+
+	describe("processUsageMetrics", () => {
+		it("should correctly process usage metrics including cache information from providerMetadata", () => {
+			class TestHuggingFaceHandler extends HuggingFaceHandler {
+				public testProcessUsageMetrics(usage: any, providerMetadata?: any) {
+					return this.processUsageMetrics(usage, providerMetadata)
+				}
+			}
+
+			const testHandler = new TestHuggingFaceHandler(mockOptions)
+
+			const usage = {
+				inputTokens: 100,
+				outputTokens: 50,
+			}
+
+			const providerMetadata = {
+				huggingface: {
+					promptCacheHitTokens: 20,
+					promptCacheMissTokens: 80,
+				},
+			}
+
+			const result = testHandler.testProcessUsageMetrics(usage, providerMetadata)
+
+			expect(result.type).toBe("usage")
+			expect(result.inputTokens).toBe(100)
+			expect(result.outputTokens).toBe(50)
+			expect(result.cacheWriteTokens).toBe(80)
+			expect(result.cacheReadTokens).toBe(20)
+		})
+
+		it("should handle missing cache metrics gracefully", () => {
+			class TestHuggingFaceHandler extends HuggingFaceHandler {
+				public testProcessUsageMetrics(usage: any, providerMetadata?: any) {
+					return this.processUsageMetrics(usage, providerMetadata)
+				}
+			}
+
+			const testHandler = new TestHuggingFaceHandler(mockOptions)
+
+			const usage = {
+				inputTokens: 100,
+				outputTokens: 50,
+			}
+
+			const result = testHandler.testProcessUsageMetrics(usage)
+
+			expect(result.type).toBe("usage")
+			expect(result.inputTokens).toBe(100)
+			expect(result.outputTokens).toBe(50)
+			expect(result.cacheWriteTokens).toBeUndefined()
+			expect(result.cacheReadTokens).toBeUndefined()
+		})
+
+		it("should include reasoning tokens when provided", () => {
+			class TestHuggingFaceHandler extends HuggingFaceHandler {
+				public testProcessUsageMetrics(usage: any, providerMetadata?: any) {
+					return this.processUsageMetrics(usage, providerMetadata)
+				}
+			}
+
+			const testHandler = new TestHuggingFaceHandler(mockOptions)
+
+			const usage = {
+				inputTokens: 100,
+				outputTokens: 50,
+				details: {
+					reasoningTokens: 30,
+				},
+			}
+
+			const result = testHandler.testProcessUsageMetrics(usage)
+
+			expect(result.reasoningTokens).toBe(30)
+		})
+	})
+
+	describe("tool handling", () => {
+		const systemPrompt = "You are a helpful assistant."
+		const messages: Anthropic.Messages.MessageParam[] = [
+			{
+				role: "user",
+				content: [{ type: "text" as const, text: "Hello!" }],
+			},
+		]
+
+		it("should handle tool calls in streaming", async () => {
+			async function* mockFullStream() {
+				yield {
+					type: "tool-input-start",
+					id: "tool-call-1",
+					toolName: "read_file",
+				}
+				yield {
+					type: "tool-input-delta",
+					id: "tool-call-1",
+					delta: '{"path":"test.ts"}',
+				}
+				yield {
+					type: "tool-input-end",
+					id: "tool-call-1",
+				}
+			}
+
+			const mockUsage = Promise.resolve({
+				inputTokens: 10,
+				outputTokens: 5,
+			})
+
+			const mockProviderMetadata = Promise.resolve({})
+
+			mockStreamText.mockReturnValue({
+				fullStream: mockFullStream(),
+				usage: mockUsage,
+				providerMetadata: mockProviderMetadata,
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages, {
+				taskId: "test-task",
+				tools: [
+					{
+						type: "function",
+						function: {
+							name: "read_file",
+							description: "Read a file",
+							parameters: {
+								type: "object",
+								properties: { path: { type: "string" } },
+								required: ["path"],
+							},
+						},
+					},
+				],
+			})
+
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			const toolCallStartChunks = chunks.filter((c) => c.type === "tool_call_start")
+			const toolCallDeltaChunks = chunks.filter((c) => c.type === "tool_call_delta")
+			const toolCallEndChunks = chunks.filter((c) => c.type === "tool_call_end")
+
+			expect(toolCallStartChunks.length).toBe(1)
+			expect(toolCallStartChunks[0].id).toBe("tool-call-1")
+			expect(toolCallStartChunks[0].name).toBe("read_file")
+
+			expect(toolCallDeltaChunks.length).toBe(1)
+			expect(toolCallDeltaChunks[0].delta).toBe('{"path":"test.ts"}')
+
+			expect(toolCallEndChunks.length).toBe(1)
+			expect(toolCallEndChunks[0].id).toBe("tool-call-1")
+		})
+
+		it("should ignore tool-call events to prevent duplicate tools in UI", async () => {
+			async function* mockFullStream() {
+				yield {
+					type: "tool-call",
+					toolCallId: "tool-call-1",
+					toolName: "read_file",
+					input: { path: "test.ts" },
+				}
+			}
+
+			const mockUsage = Promise.resolve({
+				inputTokens: 10,
+				outputTokens: 5,
+			})
+
+			const mockProviderMetadata = Promise.resolve({})
+
+			mockStreamText.mockReturnValue({
+				fullStream: mockFullStream(),
+				usage: mockUsage,
+				providerMetadata: mockProviderMetadata,
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// tool-call events should be ignored (only tool-input-start/delta/end are processed)
+			const toolCallChunks = chunks.filter(
+				(c) => c.type === "tool_call_start" || c.type === "tool_call_delta" || c.type === "tool_call_end",
+			)
+			expect(toolCallChunks.length).toBe(0)
+		})
+	})
+})
diff --git a/src/api/providers/huggingface.ts b/src/api/providers/huggingface.ts
index 21e429aaabf..bd20990954d 100644
--- a/src/api/providers/huggingface.ts
+++ b/src/api/providers/huggingface.ts
@@ -1,22 +1,36 @@
-import OpenAI from "openai"
 import { Anthropic } from "@anthropic-ai/sdk"
+import { createHuggingFace } from "@ai-sdk/huggingface"
+import { streamText, generateText, ToolSet } from "ai"
 
-import type { ModelRecord } from "@roo-code/types"
+import type { ModelRecord, ModelInfo } from "@roo-code/types"
 
 import type { ApiHandlerOptions } from "../../shared/api"
-import { ApiStream } from "../transform/stream"
-import { convertToOpenAiMessages } from "../transform/openai-format"
-import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
+
+import {
+	convertToAiSdkMessages,
+	convertToolsForAiSdk,
+	processAiSdkStreamPart,
+	mapToolChoice,
+	handleAiSdkError,
+} from "../transform/ai-sdk"
+import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
+import { getModelParams } from "../transform/model-params"
+
 import { DEFAULT_HEADERS } from "./constants"
 import { BaseProvider } from "./base-provider"
 import { getHuggingFaceModels, getCachedHuggingFaceModels } from "./fetchers/huggingface"
-import { handleOpenAIError } from "./utils/openai-error-handler"
+import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
+
+const HUGGINGFACE_DEFAULT_TEMPERATURE = 0.5
 
+/**
+ * HuggingFace provider using the dedicated @ai-sdk/huggingface package.
+ * Provides native support for various models on HuggingFace Hub via the Responses API.
+ */
 export class HuggingFaceHandler extends BaseProvider implements SingleCompletionHandler {
-	private client: OpenAI
-	private options: ApiHandlerOptions
+	protected options: ApiHandlerOptions
+	protected provider: ReturnType<typeof createHuggingFace>
 	private modelCache: ModelRecord | null = null
-	private readonly providerName = "HuggingFace"
 
 	constructor(options: ApiHandlerOptions) {
 		super()
@@ -26,10 +40,11 @@ export class HuggingFaceHandler extends BaseProvider implements SingleCompletion
 			throw new Error("Hugging Face API key is required")
 		}
 
-		this.client = new OpenAI({
+		// Create the HuggingFace provider using AI SDK
+		this.provider = createHuggingFace({
 			baseURL: "https://router.huggingface.co/v1",
 			apiKey: this.options.huggingFaceApiKey,
-			defaultHeaders: DEFAULT_HEADERS,
+			headers: DEFAULT_HEADERS,
 		})
 
 		// Try to get cached models first
@@ -47,91 +62,145 @@ export class HuggingFaceHandler extends BaseProvider implements SingleCompletion
 		}
 	}
 
-	override async *createMessage(
-		systemPrompt: string,
-		messages: Anthropic.Messages.MessageParam[],
-		metadata?: ApiHandlerCreateMessageMetadata,
-	): ApiStream {
-		const modelId = this.options.huggingFaceModelId || "meta-llama/Llama-3.3-70B-Instruct"
-		const temperature = this.options.modelTemperature ?? 0.7
-
-		const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
-			model: modelId,
-			temperature,
-			messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
-			stream: true,
-			stream_options: { include_usage: true },
-		}
+	override getModel(): { id: string; info: ModelInfo; maxTokens?: number; temperature?: number } {
+		const id = this.options.huggingFaceModelId || "meta-llama/Llama-3.3-70B-Instruct"
 
-		// Add max_tokens if specified
-		if (this.options.includeMaxTokens && this.options.modelMaxTokens) {
-			params.max_tokens = this.options.modelMaxTokens
-		}
+		// Try to get model info from cache
+		const cachedInfo = this.modelCache?.[id]
 
-		let stream
-		try {
-			stream = await this.client.chat.completions.create(params)
-		} catch (error) {
-			throw handleOpenAIError(error, this.providerName)
+		const info: ModelInfo = cachedInfo || {
+			maxTokens: 8192,
+			contextWindow: 131072,
+			supportsImages: false,
+			supportsPromptCache: false,
 		}
 
-		for await (const chunk of stream) {
-			const delta = chunk.choices[0]?.delta
+		const params = getModelParams({
+			format: "openai",
+			modelId: id,
+			model: info,
+			settings: this.options,
+			defaultTemperature: HUGGINGFACE_DEFAULT_TEMPERATURE,
+		})
 
-			if (delta?.content) {
-				yield {
-					type: "text",
-					text: delta.content,
-				}
-			}
+		return { id, info, ...params }
+	}
 
-			if (chunk.usage) {
-				yield {
-					type: "usage",
-					inputTokens: chunk.usage.prompt_tokens || 0,
-					outputTokens: chunk.usage.completion_tokens || 0,
-				}
+	/**
+	 * Get the language model for the configured model ID.
+	 */
+	protected getLanguageModel() {
+		const { id } = this.getModel()
+		return this.provider(id)
+	}
+
+	/**
+	 * Process usage metrics from the AI SDK response.
+	 */
+	protected processUsageMetrics(
+		usage: {
+			inputTokens?: number
+			outputTokens?: number
+			details?: {
+				cachedInputTokens?: number
+				reasoningTokens?: number
+			}
+		},
+		providerMetadata?: {
+			huggingface?: {
+				promptCacheHitTokens?: number
+				promptCacheMissTokens?: number
 			}
+		},
+	): ApiStreamUsageChunk {
+		// Extract cache metrics from HuggingFace's providerMetadata if available
+		const cacheReadTokens = providerMetadata?.huggingface?.promptCacheHitTokens ?? usage.details?.cachedInputTokens
+		const cacheWriteTokens = providerMetadata?.huggingface?.promptCacheMissTokens
+
+		return {
+			type: "usage",
+			inputTokens: usage.inputTokens || 0,
+			outputTokens: usage.outputTokens || 0,
+			cacheReadTokens,
+			cacheWriteTokens,
+			reasoningTokens: usage.details?.reasoningTokens,
 		}
 	}
 
-	async completePrompt(prompt: string): Promise<string> {
-		const modelId = this.options.huggingFaceModelId || "meta-llama/Llama-3.3-70B-Instruct"
-
-		try {
-			const response = await this.client.chat.completions.create({
-				model: modelId,
-				messages: [{ role: "user", content: prompt }],
-			})
+	/**
+	 * Get the max tokens parameter to include in the request.
+	 */
+	protected getMaxOutputTokens(): number | undefined {
+		const { info } = this.getModel()
+		return this.options.modelMaxTokens || info.maxTokens || undefined
+	}
 
-			return response.choices[0]?.message.content || ""
-		} catch (error) {
-			throw handleOpenAIError(error, this.providerName)
+	/**
+	 * Create a message stream using the AI SDK.
+	 */
+	override async *createMessage(
+		systemPrompt: string,
+		messages: Anthropic.Messages.MessageParam[],
+		metadata?: ApiHandlerCreateMessageMetadata,
+	): ApiStream {
+		const { temperature } = this.getModel()
+		const languageModel = this.getLanguageModel()
+
+		// Convert messages to AI SDK format
+		const aiSdkMessages = convertToAiSdkMessages(messages)
+
+		// Convert tools to OpenAI format first, then to AI SDK format
+		const openAiTools = this.convertToolsForOpenAI(metadata?.tools)
+		const aiSdkTools = convertToolsForAiSdk(openAiTools) as ToolSet | undefined
+
+		// Build the request options
+		const requestOptions: Parameters<typeof streamText>[0] = {
+			model: languageModel,
+			system: systemPrompt,
+			messages: aiSdkMessages,
+			temperature: this.options.modelTemperature ?? temperature ?? HUGGINGFACE_DEFAULT_TEMPERATURE,
+			maxOutputTokens: this.getMaxOutputTokens(),
+			tools: aiSdkTools,
+			toolChoice: mapToolChoice(metadata?.tool_choice),
 		}
-	}
 
-	override getModel() {
-		const modelId = this.options.huggingFaceModelId || "meta-llama/Llama-3.3-70B-Instruct"
+		// Use streamText for streaming responses
+		const result = streamText(requestOptions)
 
-		// Try to get model info from cache
-		const modelInfo = this.modelCache?.[modelId]
+		try {
+			// Process the full stream to get all events including reasoning
+			for await (const part of result.fullStream) {
+				for (const chunk of processAiSdkStreamPart(part)) {
+					yield chunk
+				}
+			}
 
-		if (modelInfo) {
-			return {
-				id: modelId,
-				info: modelInfo,
+			// Yield usage metrics at the end, including cache metrics from providerMetadata
+			const usage = await result.usage
+			const providerMetadata = await result.providerMetadata
+			if (usage) {
+				yield this.processUsageMetrics(usage, providerMetadata as any)
 			}
+		} catch (error) {
+			// Handle AI SDK errors (AI_RetryError, AI_APICallError, etc.)
+			throw handleAiSdkError(error, "HuggingFace")
 		}
+	}
 
-		// Fallback to default values if model not found in cache
-		return {
-			id: modelId,
-			info: {
-				maxTokens: 8192,
-				contextWindow: 131072,
-				supportsImages: false,
-				supportsPromptCache: false,
-			},
-		}
+	/**
+	 * Complete a prompt using the AI SDK generateText.
+	 */
+	async completePrompt(prompt: string): Promise<string> {
+		const { temperature } = this.getModel()
+		const languageModel = this.getLanguageModel()
+
+		const { text } = await generateText({
+			model: languageModel,
+			prompt,
+			maxOutputTokens: this.getMaxOutputTokens(),
+			temperature: this.options.modelTemperature ?? temperature ?? HUGGINGFACE_DEFAULT_TEMPERATURE,
+		})
+
+		return text
 	}
 }
diff --git a/src/package.json b/src/package.json
index 04402de28af..10971319359 100644
--- a/src/package.json
+++ b/src/package.json
@@ -454,6 +454,7 @@
 		"@ai-sdk/deepseek": "^2.0.14",
 		"@ai-sdk/fireworks": "^2.0.26",
 		"@ai-sdk/groq": "^3.0.19",
+		"@ai-sdk/huggingface": "^1.0.28",
 		"@ai-sdk/mistral": "^3.0.0",
 		"@ai-sdk/xai": "^3.0.46",
 		"sambanova-ai-provider": "^1.2.2",

From 5e4c11f2577aa36232a667c12943912d330cbffb Mon Sep 17 00:00:00 2001
From: daniel-lxs <ricciodaniel98@gmail.com>
Date: Mon, 2 Feb 2026 10:50:06 -0500
Subject: [PATCH 2/7] fix: set default temperature to 0.7 for HuggingFace
 provider

---
 src/api/providers/__tests__/huggingface.spec.ts | 6 +++---
 src/api/providers/huggingface.ts                | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/api/providers/__tests__/huggingface.spec.ts b/src/api/providers/__tests__/huggingface.spec.ts
index 6529c72c0d3..cf9a35b8232 100644
--- a/src/api/providers/__tests__/huggingface.spec.ts
+++ b/src/api/providers/__tests__/huggingface.spec.ts
@@ -258,7 +258,7 @@ describe("HuggingFaceHandler", () => {
 			expect(usageChunks[0].cacheWriteTokens).toBeUndefined()
 		})
 
-		it("should pass correct temperature (0.5 default) to streamText", async () => {
+		it("should pass correct temperature (0.7 default) to streamText", async () => {
 			async function* mockFullStream() {
 				yield { type: "text-delta", text: "Test" }
 			}
@@ -281,7 +281,7 @@ describe("HuggingFaceHandler", () => {
 
 			expect(mockStreamText).toHaveBeenCalledWith(
 				expect.objectContaining({
-					temperature: 0.5,
+					temperature: 0.7,
 				}),
 			)
 		})
@@ -389,7 +389,7 @@ describe("HuggingFaceHandler", () => {
 
 			expect(mockGenerateText).toHaveBeenCalledWith(
 				expect.objectContaining({
-					temperature: 0.5,
+					temperature: 0.7,
 				}),
 			)
 		})
diff --git a/src/api/providers/huggingface.ts b/src/api/providers/huggingface.ts
index bd20990954d..bf3faab47c4 100644
--- a/src/api/providers/huggingface.ts
+++ b/src/api/providers/huggingface.ts
@@ -21,7 +21,7 @@ import { BaseProvider } from "./base-provider"
 import { getHuggingFaceModels, getCachedHuggingFaceModels } from "./fetchers/huggingface"
 import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
 
-const HUGGINGFACE_DEFAULT_TEMPERATURE = 0.5
+const HUGGINGFACE_DEFAULT_TEMPERATURE = 0.7
 
 /**
  * HuggingFace provider using the dedicated @ai-sdk/huggingface package.

From 1591a3cd4fbc188fb5138b4e4ee321647df4b24e Mon Sep 17 00:00:00 2001
From: daniel-lxs <ricciodaniel98@gmail.com>
Date: Mon, 2 Feb 2026 16:21:04 -0500
Subject: [PATCH 3/7] fix(huggingface): handle non-streaming tool calls without
 UI duplication

- Add createAiSdkToolStreamProcessor() to handle tool call deduplication
- Tracks tool IDs seen via tool-input-start streaming events
- Emits tool-call events only for tools that weren't streamed
- Converts tool-call to start/delta/end for UI consistency
- Update HuggingFace provider to use the new processor
- Add comprehensive tests for the new functionality
---
 .../providers/__tests__/huggingface.spec.ts   |  68 ++++++++-
 src/api/providers/huggingface.ts              |   9 +-
 src/api/transform/__tests__/ai-sdk.spec.ts    | 143 ++++++++++++++++++
 src/api/transform/ai-sdk.ts                   |  89 +++++++++++
 4 files changed, 300 insertions(+), 9 deletions(-)

diff --git a/src/api/providers/__tests__/huggingface.spec.ts b/src/api/providers/__tests__/huggingface.spec.ts
index cf9a35b8232..5c897d202ea 100644
--- a/src/api/providers/__tests__/huggingface.spec.ts
+++ b/src/api/providers/__tests__/huggingface.spec.ts
@@ -550,7 +550,9 @@ describe("HuggingFaceHandler", () => {
 			expect(toolCallEndChunks[0].id).toBe("tool-call-1")
 		})
 
-		it("should ignore tool-call events to prevent duplicate tools in UI", async () => {
+		it("should process tool-call events for non-streaming providers", async () => {
+			// HuggingFace doesn't stream tool inputs, it only emits tool-call events
+			// The processor should convert tool-call to start/delta/end events
 			async function* mockFullStream() {
 				yield {
 					type: "tool-call",
@@ -579,11 +581,65 @@ describe("HuggingFaceHandler", () => {
 				chunks.push(chunk)
 			}
 
-			// tool-call events should be ignored (only tool-input-start/delta/end are processed)
-			const toolCallChunks = chunks.filter(
-				(c) => c.type === "tool_call_start" || c.type === "tool_call_delta" || c.type === "tool_call_end",
-			)
-			expect(toolCallChunks.length).toBe(0)
+			// tool-call events should be converted to start/delta/end for consistency
+			const toolCallStartChunks = chunks.filter((c) => c.type === "tool_call_start")
+			const toolCallDeltaChunks = chunks.filter((c) => c.type === "tool_call_delta")
+			const toolCallEndChunks = chunks.filter((c) => c.type === "tool_call_end")
+
+			expect(toolCallStartChunks.length).toBe(1)
+			expect(toolCallStartChunks[0].id).toBe("tool-call-1")
+			expect(toolCallStartChunks[0].name).toBe("read_file")
+
+			expect(toolCallDeltaChunks.length).toBe(1)
+			expect(toolCallDeltaChunks[0].delta).toBe('{"path":"test.ts"}')
+
+			expect(toolCallEndChunks.length).toBe(1)
+			expect(toolCallEndChunks[0].id).toBe("tool-call-1")
+		})
+
+		it("should ignore tool-call events when tool was already streamed", async () => {
+			// When a provider streams tool inputs AND sends tool-call, we should not duplicate
+			async function* mockFullStream() {
+				// First, streaming events
+				yield { type: "tool-input-start", id: "tool-call-1", toolName: "read_file" }
+				yield { type: "tool-input-delta", id: "tool-call-1", delta: '{"path":"test.ts"}' }
+				yield { type: "tool-input-end", id: "tool-call-1" }
+				// Then the tool-call event (should be ignored)
+				yield {
+					type: "tool-call",
+					toolCallId: "tool-call-1",
+					toolName: "read_file",
+					input: { path: "test.ts" },
+				}
+			}
+
+			const mockUsage = Promise.resolve({
+				inputTokens: 10,
+				outputTokens: 5,
+			})
+
+			const mockProviderMetadata = Promise.resolve({})
+
+			mockStreamText.mockReturnValue({
+				fullStream: mockFullStream(),
+				usage: mockUsage,
+				providerMetadata: mockProviderMetadata,
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Should have exactly 1 of each (not duplicated)
+			const toolCallStartChunks = chunks.filter((c) => c.type === "tool_call_start")
+			const toolCallDeltaChunks = chunks.filter((c) => c.type === "tool_call_delta")
+			const toolCallEndChunks = chunks.filter((c) => c.type === "tool_call_end")
+
+			expect(toolCallStartChunks.length).toBe(1)
+			expect(toolCallDeltaChunks.length).toBe(1)
+			expect(toolCallEndChunks.length).toBe(1)
 		})
 	})
 })
diff --git a/src/api/providers/huggingface.ts b/src/api/providers/huggingface.ts
index bf3faab47c4..85dcec99353 100644
--- a/src/api/providers/huggingface.ts
+++ b/src/api/providers/huggingface.ts
@@ -9,7 +9,7 @@ import type { ApiHandlerOptions } from "../../shared/api"
 import {
 	convertToAiSdkMessages,
 	convertToolsForAiSdk,
-	processAiSdkStreamPart,
+	createAiSdkToolStreamProcessor,
 	mapToolChoice,
 	handleAiSdkError,
 } from "../transform/ai-sdk"
@@ -168,9 +168,12 @@ export class HuggingFaceHandler extends BaseProvider implements SingleCompletion
 		const result = streamText(requestOptions)
 
 		try {
-			// Process the full stream to get all events including reasoning
+			// Use the stateful processor to handle tool call deduplication
+			// HuggingFace doesn't emit streaming tool events (tool-input-start/delta/end),
+			// only the final tool-call event, so we need the processor to handle this
+			const processStreamPart = createAiSdkToolStreamProcessor()
 			for await (const part of result.fullStream) {
-				for (const chunk of processAiSdkStreamPart(part)) {
+				for (const chunk of processStreamPart(part)) {
 					yield chunk
 				}
 			}
diff --git a/src/api/transform/__tests__/ai-sdk.spec.ts b/src/api/transform/__tests__/ai-sdk.spec.ts
index fb4e3b9e2f2..9cfc2aa32f5 100644
--- a/src/api/transform/__tests__/ai-sdk.spec.ts
+++ b/src/api/transform/__tests__/ai-sdk.spec.ts
@@ -4,6 +4,7 @@ import {
 	convertToAiSdkMessages,
 	convertToolsForAiSdk,
 	processAiSdkStreamPart,
+	createAiSdkToolStreamProcessor,
 	mapToolChoice,
 	extractAiSdkErrorMessage,
 	handleAiSdkError,
@@ -495,6 +496,148 @@ describe("AI SDK conversion utilities", () => {
 		})
 	})
 
+	describe("createAiSdkToolStreamProcessor", () => {
+		it("processes text-delta chunks like processAiSdkStreamPart", () => {
+			const processor = createAiSdkToolStreamProcessor()
+			const part = { type: "text-delta" as const, id: "1", text: "Hello" }
+			const chunks = [...processor(part)]
+
+			expect(chunks).toHaveLength(1)
+			expect(chunks[0]).toEqual({ type: "text", text: "Hello" })
+		})
+
+		it("processes tool-input-start/delta/end events (streaming tools)", () => {
+			const processor = createAiSdkToolStreamProcessor()
+
+			// Simulate streaming tool events
+			const startChunks = [
+				...processor({ type: "tool-input-start" as const, id: "call_1", toolName: "read_file" }),
+			]
+			const deltaChunks = [...processor({ type: "tool-input-delta" as const, id: "call_1", delta: '{"path":' })]
+			const delta2Chunks = [
+				...processor({ type: "tool-input-delta" as const, id: "call_1", delta: '"test.ts"}' }),
+			]
+			const endChunks = [...processor({ type: "tool-input-end" as const, id: "call_1" })]
+
+			expect(startChunks).toHaveLength(1)
+			expect(startChunks[0]).toEqual({ type: "tool_call_start", id: "call_1", name: "read_file" })
+
+			expect(deltaChunks).toHaveLength(1)
+			expect(deltaChunks[0]).toEqual({ type: "tool_call_delta", id: "call_1", delta: '{"path":' })
+
+			expect(delta2Chunks).toHaveLength(1)
+			expect(delta2Chunks[0]).toEqual({ type: "tool_call_delta", id: "call_1", delta: '"test.ts"}' })
+
+			expect(endChunks).toHaveLength(1)
+			expect(endChunks[0]).toEqual({ type: "tool_call_end", id: "call_1" })
+		})
+
+		it("ignores tool-call events when tool was already streamed", () => {
+			const processor = createAiSdkToolStreamProcessor()
+
+			// Process streaming events first (consume the generator to update state)
+			Array.from(processor({ type: "tool-input-start" as const, id: "call_1", toolName: "read_file" }))
+			Array.from(processor({ type: "tool-input-delta" as const, id: "call_1", delta: '{"path":"test.ts"}' }))
+			Array.from(processor({ type: "tool-input-end" as const, id: "call_1" }))
+
+			// Now the tool-call event for the same tool should be ignored
+			const toolCallChunks = [
+				...processor({
+					type: "tool-call" as const,
+					toolCallId: "call_1",
+					toolName: "read_file",
+					input: { path: "test.ts" },
+				} as any),
+			]
+
+			expect(toolCallChunks).toHaveLength(0)
+		})
+
+		it("processes tool-call events for non-streaming providers", () => {
+			const processor = createAiSdkToolStreamProcessor()
+
+			// Directly process a tool-call event (no streaming events first)
+			const chunks = [
+				...processor({
+					type: "tool-call" as const,
+					toolCallId: "call_1",
+					toolName: "read_file",
+					input: { path: "test.ts" },
+				} as any),
+			]
+
+			// Should emit start/delta/end events
+			expect(chunks).toHaveLength(3)
+			expect(chunks[0]).toEqual({ type: "tool_call_start", id: "call_1", name: "read_file" })
+			expect(chunks[1]).toEqual({ type: "tool_call_delta", id: "call_1", delta: '{"path":"test.ts"}' })
+			expect(chunks[2]).toEqual({ type: "tool_call_end", id: "call_1" })
+		})
+
+		it("handles multiple tool calls correctly", () => {
+			const processor = createAiSdkToolStreamProcessor()
+
+			// First tool is streamed
+			Array.from(processor({ type: "tool-input-start" as const, id: "call_1", toolName: "read_file" }))
+			Array.from(processor({ type: "tool-input-end" as const, id: "call_1" }))
+
+			// Second tool is not streamed (non-streaming provider behavior)
+			const chunks = [
+				...processor({
+					type: "tool-call" as const,
+					toolCallId: "call_2",
+					toolName: "write_to_file",
+					input: { path: "output.ts", content: "test" },
+				} as any),
+			]
+
+			// Second tool should be emitted
+			expect(chunks).toHaveLength(3)
+			expect(chunks[0]).toEqual({ type: "tool_call_start", id: "call_2", name: "write_to_file" })
+
+			// First tool's tool-call should be ignored
+			const ignoredChunks = [
+				...processor({
+					type: "tool-call" as const,
+					toolCallId: "call_1",
+					toolName: "read_file",
+					input: {},
+				} as any),
+			]
+			expect(ignoredChunks).toHaveLength(0)
+		})
+
+		it("maintains separate state per processor instance", () => {
+			const processor1 = createAiSdkToolStreamProcessor()
+			const processor2 = createAiSdkToolStreamProcessor()
+
+			// Stream a tool with processor1
+			Array.from(processor1({ type: "tool-input-start" as const, id: "call_1", toolName: "test" }))
+			Array.from(processor1({ type: "tool-input-end" as const, id: "call_1" }))
+
+			// processor1 should ignore tool-call for call_1
+			const p1Chunks = [
+				...processor1({
+					type: "tool-call" as const,
+					toolCallId: "call_1",
+					toolName: "test",
+					input: {},
+				} as any),
+			]
+			expect(p1Chunks).toHaveLength(0)
+
+			// processor2 should emit tool-call for call_1 (it has its own state)
+			const p2Chunks = [
+				...processor2({
+					type: "tool-call" as const,
+					toolCallId: "call_1",
+					toolName: "test",
+					input: {},
+				} as any),
+			]
+			expect(p2Chunks).toHaveLength(3)
+		})
+	})
+
 	describe("mapToolChoice", () => {
 		it("should return undefined for null or undefined", () => {
 			expect(mapToolChoice(null)).toBeUndefined()
diff --git a/src/api/transform/ai-sdk.ts b/src/api/transform/ai-sdk.ts
index c6f37be694d..66945acf0f2 100644
--- a/src/api/transform/ai-sdk.ts
+++ b/src/api/transform/ai-sdk.ts
@@ -364,6 +364,95 @@ export function* processAiSdkStreamPart(part: ExtendedStreamPart): Generator<Api
 	}
 }
 
+/**
+ * Creates a stateful stream processor that handles tool call deduplication.
+ * Some AI SDK providers (like HuggingFace) don't emit streaming tool events
+ * (tool-input-start/delta/end), only the final tool-call event. This function
+ * returns a processor that tracks which tools have been processed via streaming
+ * events and emits tool-call events only for tools that weren't streamed.
+ *
+ * Usage:
+ * ```typescript
+ * const processStreamPart = createAiSdkToolStreamProcessor()
+ * for await (const part of result.fullStream) {
+ *     for (const chunk of processStreamPart(part)) {
+ *         yield chunk
+ *     }
+ * }
+ * ```
+ *
+ * @returns A generator function that processes stream parts with tool deduplication
+ */
+export function createAiSdkToolStreamProcessor(): (
+	part: ExtendedStreamPart,
+) => Generator<ApiStreamChunk, void, unknown> {
+	// Track tool IDs that have been processed via streaming events
+	const streamedToolIds = new Set<string>()
+
+	return function* processStreamPart(part: ExtendedStreamPart): Generator<ApiStreamChunk> {
+		switch (part.type) {
+			case "tool-input-start":
+				// Track that this tool has streaming events
+				streamedToolIds.add(part.id)
+				yield {
+					type: "tool_call_start",
+					id: part.id,
+					name: part.toolName,
+				}
+				break
+
+			case "tool-input-delta":
+				yield {
+					type: "tool_call_delta",
+					id: part.id,
+					delta: part.delta,
+				}
+				break
+
+			case "tool-input-end":
+				yield {
+					type: "tool_call_end",
+					id: part.id,
+				}
+				break
+
+			case "tool-call": {
+				// Only emit tool-call if this tool wasn't already processed via streaming
+				const toolCallPart = part as {
+					type: "tool-call"
+					toolCallId: string
+					toolName: string
+					input: unknown
+				}
+				if (!streamedToolIds.has(toolCallPart.toolCallId)) {
+					// Emit as start/delta/end for consistency with streaming providers
+					const args = JSON.stringify(toolCallPart.input)
+					yield {
+						type: "tool_call_start",
+						id: toolCallPart.toolCallId,
+						name: toolCallPart.toolName,
+					}
+					yield {
+						type: "tool_call_delta",
+						id: toolCallPart.toolCallId,
+						delta: args,
+					}
+					yield {
+						type: "tool_call_end",
+						id: toolCallPart.toolCallId,
+					}
+				}
+				break
+			}
+
+			// Handle all other events with the stateless processor
+			default:
+				yield* processAiSdkStreamPart(part)
+				break
+		}
+	}
+}
+
 /**
  * Type for AI SDK tool choice format.
  */

From fd374723fe6b49b260da34031948d7437dba1ef3 Mon Sep 17 00:00:00 2001
From: daniel-lxs <ricciodaniel98@gmail.com>
Date: Tue, 3 Feb 2026 09:25:17 -0500
Subject: [PATCH 4/7] fix(huggingface): properly handle tool arguments when
 streaming without deltas

HuggingFace SDK emits tool-input-start/end but NOT tool-input-delta.
The arguments come in the final tool-call event. The fix now:
1. Tracks tools that received actual deltas (toolsWithDeltas)
2. If tool-call arrives and tool was started but had no deltas,
   emit the arguments as a delta (HuggingFace-like behavior)
3. If tool-call arrives and tool had deltas, ignore it (fully streamed)
4. If tool-call arrives and tool wasn't started, emit full sequence
---
 src/api/transform/__tests__/ai-sdk.spec.ts | 49 +++++++++++++++++---
 src/api/transform/ai-sdk.ts                | 52 +++++++++++++++-------
 2 files changed, 79 insertions(+), 22 deletions(-)

diff --git a/src/api/transform/__tests__/ai-sdk.spec.ts b/src/api/transform/__tests__/ai-sdk.spec.ts
index 9cfc2aa32f5..ecb7613cf6c 100644
--- a/src/api/transform/__tests__/ai-sdk.spec.ts
+++ b/src/api/transform/__tests__/ai-sdk.spec.ts
@@ -573,11 +573,47 @@ describe("AI SDK conversion utilities", () => {
 			expect(chunks[2]).toEqual({ type: "tool_call_end", id: "call_1" })
 		})
 
+		it("handles HuggingFace-like providers (start/end but no deltas, args in tool-call)", () => {
+			const processor = createAiSdkToolStreamProcessor()
+
+			// HuggingFace emits tool-input-start and tool-input-end but NOT tool-input-delta
+			// The arguments come in the tool-call event
+			const startChunks = [
+				...processor({ type: "tool-input-start" as const, id: "call_1", toolName: "read_file" }),
+			]
+			const endChunks = [...processor({ type: "tool-input-end" as const, id: "call_1" })]
+
+			// tool-call should emit just the delta (arguments) since start/end were already emitted
+			const toolCallChunks = [
+				...processor({
+					type: "tool-call" as const,
+					toolCallId: "call_1",
+					toolName: "read_file",
+					input: { path: "test.ts" },
+				} as any),
+			]
+
+			expect(startChunks).toHaveLength(1)
+			expect(startChunks[0]).toEqual({ type: "tool_call_start", id: "call_1", name: "read_file" })
+
+			expect(endChunks).toHaveLength(1)
+			expect(endChunks[0]).toEqual({ type: "tool_call_end", id: "call_1" })
+
+			// The tool-call should emit just the delta with arguments
+			expect(toolCallChunks).toHaveLength(1)
+			expect(toolCallChunks[0]).toEqual({
+				type: "tool_call_delta",
+				id: "call_1",
+				delta: '{"path":"test.ts"}',
+			})
+		})
+
 		it("handles multiple tool calls correctly", () => {
 			const processor = createAiSdkToolStreamProcessor()
 
-			// First tool is streamed
+			// First tool is fully streamed with deltas
 			Array.from(processor({ type: "tool-input-start" as const, id: "call_1", toolName: "read_file" }))
+			Array.from(processor({ type: "tool-input-delta" as const, id: "call_1", delta: '{"path":"a.ts"}' }))
 			Array.from(processor({ type: "tool-input-end" as const, id: "call_1" }))
 
 			// Second tool is not streamed (non-streaming provider behavior)
@@ -590,17 +626,17 @@ describe("AI SDK conversion utilities", () => {
 				} as any),
 			]
 
-			// Second tool should be emitted
+			// Second tool should be emitted with full start/delta/end
 			expect(chunks).toHaveLength(3)
 			expect(chunks[0]).toEqual({ type: "tool_call_start", id: "call_2", name: "write_to_file" })
 
-			// First tool's tool-call should be ignored
+			// First tool's tool-call should be ignored (it had deltas)
 			const ignoredChunks = [
 				...processor({
 					type: "tool-call" as const,
 					toolCallId: "call_1",
 					toolName: "read_file",
-					input: {},
+					input: { path: "a.ts" },
 				} as any),
 			]
 			expect(ignoredChunks).toHaveLength(0)
@@ -610,11 +646,12 @@ describe("AI SDK conversion utilities", () => {
 			const processor1 = createAiSdkToolStreamProcessor()
 			const processor2 = createAiSdkToolStreamProcessor()
 
-			// Stream a tool with processor1
+			// Stream a tool fully with processor1 (with delta)
 			Array.from(processor1({ type: "tool-input-start" as const, id: "call_1", toolName: "test" }))
+			Array.from(processor1({ type: "tool-input-delta" as const, id: "call_1", delta: "{}" }))
 			Array.from(processor1({ type: "tool-input-end" as const, id: "call_1" }))
 
-			// processor1 should ignore tool-call for call_1
+			// processor1 should ignore tool-call for call_1 (it had deltas)
 			const p1Chunks = [
 				...processor1({
 					type: "tool-call" as const,
diff --git a/src/api/transform/ai-sdk.ts b/src/api/transform/ai-sdk.ts
index 66945acf0f2..3f91382d255 100644
--- a/src/api/transform/ai-sdk.ts
+++ b/src/api/transform/ai-sdk.ts
@@ -386,14 +386,16 @@ export function* processAiSdkStreamPart(part: ExtendedStreamPart): Generator<Api
 export function createAiSdkToolStreamProcessor(): (
 	part: ExtendedStreamPart,
 ) => Generator<ApiStreamChunk, void, unknown> {
-	// Track tool IDs that have been processed via streaming events
-	const streamedToolIds = new Set<string>()
+	// Track tool IDs that have been started via streaming events
+	const startedToolIds = new Set<string>()
+	// Track tool IDs that have received actual argument deltas
+	const toolsWithDeltas = new Set<string>()
 
 	return function* processStreamPart(part: ExtendedStreamPart): Generator<ApiStreamChunk> {
 		switch (part.type) {
 			case "tool-input-start":
-				// Track that this tool has streaming events
-				streamedToolIds.add(part.id)
+				// Track that this tool has started streaming
+				startedToolIds.add(part.id)
 				yield {
 					type: "tool_call_start",
 					id: part.id,
@@ -402,6 +404,8 @@ export function createAiSdkToolStreamProcessor(): (
 				break
 
 			case "tool-input-delta":
+				// Track that we received actual argument content for this tool
+				toolsWithDeltas.add(part.id)
 				yield {
 					type: "tool_call_delta",
 					id: part.id,
@@ -417,30 +421,46 @@ export function createAiSdkToolStreamProcessor(): (
 				break
 
 			case "tool-call": {
-				// Only emit tool-call if this tool wasn't already processed via streaming
+				// Handle tool-call events - the logic depends on whether we got streaming deltas
 				const toolCallPart = part as {
 					type: "tool-call"
 					toolCallId: string
 					toolName: string
 					input: unknown
 				}
-				if (!streamedToolIds.has(toolCallPart.toolCallId)) {
-					// Emit as start/delta/end for consistency with streaming providers
+
+				// If we received deltas, the arguments were already streamed - ignore tool-call
+				if (toolsWithDeltas.has(toolCallPart.toolCallId)) {
+					break
+				}
+
+				// If tool was started but no deltas received (like HuggingFace),
+				// emit the arguments from tool-call as a delta
+				if (startedToolIds.has(toolCallPart.toolCallId)) {
 					const args = JSON.stringify(toolCallPart.input)
-					yield {
-						type: "tool_call_start",
-						id: toolCallPart.toolCallId,
-						name: toolCallPart.toolName,
-					}
 					yield {
 						type: "tool_call_delta",
 						id: toolCallPart.toolCallId,
 						delta: args,
 					}
-					yield {
-						type: "tool_call_end",
-						id: toolCallPart.toolCallId,
-					}
+					break
+				}
+
+				// Tool wasn't started via streaming - emit full start/delta/end sequence
+				const args = JSON.stringify(toolCallPart.input)
+				yield {
+					type: "tool_call_start",
+					id: toolCallPart.toolCallId,
+					name: toolCallPart.toolName,
+				}
+				yield {
+					type: "tool_call_delta",
+					id: toolCallPart.toolCallId,
+					delta: args,
+				}
+				yield {
+					type: "tool_call_end",
+					id: toolCallPart.toolCallId,
 				}
 				break
 			}

From 33fd97279eb98ed27ce9e2bf6e724a74414db960 Mon Sep 17 00:00:00 2001
From: daniel-lxs <ricciodaniel98@gmail.com>
Date: Tue, 3 Feb 2026 09:30:48 -0500
Subject: [PATCH 5/7] fix(huggingface): defer tool_call_end until arguments
 received
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The sequence must be start → delta → end. For HuggingFace:
- tool-input-start emits tool_call_start
- tool-input-end is DEFERRED (no deltas yet, args not received)
- tool-call emits tool_call_delta + deferred tool_call_end

This ensures arguments appear before the end event.
---
 src/api/transform/__tests__/ai-sdk.spec.ts | 15 ++++++++-----
 src/api/transform/ai-sdk.ts                | 25 ++++++++++++++++++----
 2 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/src/api/transform/__tests__/ai-sdk.spec.ts b/src/api/transform/__tests__/ai-sdk.spec.ts
index ecb7613cf6c..17e03e6695e 100644
--- a/src/api/transform/__tests__/ai-sdk.spec.ts
+++ b/src/api/transform/__tests__/ai-sdk.spec.ts
@@ -581,9 +581,10 @@ describe("AI SDK conversion utilities", () => {
 			const startChunks = [
 				...processor({ type: "tool-input-start" as const, id: "call_1", toolName: "read_file" }),
 			]
+			// End is deferred since we haven't received deltas yet
 			const endChunks = [...processor({ type: "tool-input-end" as const, id: "call_1" })]
 
-			// tool-call should emit just the delta (arguments) since start/end were already emitted
+			// tool-call should emit the delta (arguments) AND the deferred end
 			const toolCallChunks = [
 				...processor({
 					type: "tool-call" as const,
@@ -596,16 +597,20 @@ describe("AI SDK conversion utilities", () => {
 			expect(startChunks).toHaveLength(1)
 			expect(startChunks[0]).toEqual({ type: "tool_call_start", id: "call_1", name: "read_file" })
 
-			expect(endChunks).toHaveLength(1)
-			expect(endChunks[0]).toEqual({ type: "tool_call_end", id: "call_1" })
+			// End is deferred when no deltas received
+			expect(endChunks).toHaveLength(0)
 
-			// The tool-call should emit just the delta with arguments
-			expect(toolCallChunks).toHaveLength(1)
+			// tool-call emits delta followed by the deferred end
+			expect(toolCallChunks).toHaveLength(2)
 			expect(toolCallChunks[0]).toEqual({
 				type: "tool_call_delta",
 				id: "call_1",
 				delta: '{"path":"test.ts"}',
 			})
+			expect(toolCallChunks[1]).toEqual({
+				type: "tool_call_end",
+				id: "call_1",
+			})
 		})
 
 		it("handles multiple tool calls correctly", () => {
diff --git a/src/api/transform/ai-sdk.ts b/src/api/transform/ai-sdk.ts
index 3f91382d255..8c15350077c 100644
--- a/src/api/transform/ai-sdk.ts
+++ b/src/api/transform/ai-sdk.ts
@@ -390,6 +390,8 @@ export function createAiSdkToolStreamProcessor(): (
 	const startedToolIds = new Set<string>()
 	// Track tool IDs that have received actual argument deltas
 	const toolsWithDeltas = new Set<string>()
+	// Track tool IDs that have ended but are waiting for arguments from tool-call
+	const pendingEndToolIds = new Set<string>()
 
 	return function* processStreamPart(part: ExtendedStreamPart): Generator<ApiStreamChunk> {
 		switch (part.type) {
@@ -414,9 +416,16 @@ export function createAiSdkToolStreamProcessor(): (
 				break
 
 			case "tool-input-end":
-				yield {
-					type: "tool_call_end",
-					id: part.id,
+				// If we already have deltas, we can emit the end now
+				// Otherwise, defer the end until we get arguments from tool-call
+				if (toolsWithDeltas.has(part.id)) {
+					yield {
+						type: "tool_call_end",
+						id: part.id,
+					}
+				} else {
+					// HuggingFace case: started but no deltas, arguments will come in tool-call
+					pendingEndToolIds.add(part.id)
 				}
 				break
 
@@ -435,7 +444,7 @@ export function createAiSdkToolStreamProcessor(): (
 				}
 
 				// If tool was started but no deltas received (like HuggingFace),
-				// emit the arguments from tool-call as a delta
+				// emit the arguments from tool-call as a delta, then the pending end
 				if (startedToolIds.has(toolCallPart.toolCallId)) {
 					const args = JSON.stringify(toolCallPart.input)
 					yield {
@@ -443,6 +452,14 @@ export function createAiSdkToolStreamProcessor(): (
 						id: toolCallPart.toolCallId,
 						delta: args,
 					}
+					// Now emit the deferred end
+					if (pendingEndToolIds.has(toolCallPart.toolCallId)) {
+						pendingEndToolIds.delete(toolCallPart.toolCallId)
+						yield {
+							type: "tool_call_end",
+							id: toolCallPart.toolCallId,
+						}
+					}
 					break
 				}
 

From 0546990fcb6da0d719a909f9fac2ffc556049ad0 Mon Sep 17 00:00:00 2001
From: daniel-lxs <ricciodaniel98@gmail.com>
Date: Tue, 3 Feb 2026 09:53:04 -0500
Subject: [PATCH 6/7] fix: switch HuggingFace to OpenAI-compatible SDK for tool
 support

- Replace @ai-sdk/huggingface with @ai-sdk/openai-compatible
  This fixes 'tool messages not supported' error (vercel/ai#10766)
- Add createAiSdkToolStreamProcessor() for tool call deduplication
  Handles providers that emit tool-input-start/end but provide args
  only in the final tool-call event
- Remove unused @ai-sdk/huggingface dependency
- Update tests for new SDK
---
 src/api/providers/__tests__/huggingface.spec.ts |  4 ++--
 src/api/providers/huggingface.ts                | 16 ++++++++++------
 src/package.json                                |  1 -
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/api/providers/__tests__/huggingface.spec.ts b/src/api/providers/__tests__/huggingface.spec.ts
index 5c897d202ea..1627c25f80e 100644
--- a/src/api/providers/__tests__/huggingface.spec.ts
+++ b/src/api/providers/__tests__/huggingface.spec.ts
@@ -15,8 +15,8 @@ vi.mock("ai", async (importOriginal) => {
 	}
 })
 
-vi.mock("@ai-sdk/huggingface", () => ({
-	createHuggingFace: vi.fn(() => {
+vi.mock("@ai-sdk/openai-compatible", () => ({
+	createOpenAICompatible: vi.fn(() => {
 		// Return a function that returns a mock language model
 		return vi.fn(() => ({
 			modelId: "meta-llama/Llama-3.3-70B-Instruct",
diff --git a/src/api/providers/huggingface.ts b/src/api/providers/huggingface.ts
index 85dcec99353..713d4d3e2b4 100644
--- a/src/api/providers/huggingface.ts
+++ b/src/api/providers/huggingface.ts
@@ -1,5 +1,5 @@
 import { Anthropic } from "@anthropic-ai/sdk"
-import { createHuggingFace } from "@ai-sdk/huggingface"
+import { createOpenAICompatible } from "@ai-sdk/openai-compatible"
 import { streamText, generateText, ToolSet } from "ai"
 
 import type { ModelRecord, ModelInfo } from "@roo-code/types"
@@ -24,12 +24,13 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ".
 const HUGGINGFACE_DEFAULT_TEMPERATURE = 0.7
 
 /**
- * HuggingFace provider using the dedicated @ai-sdk/huggingface package.
- * Provides native support for various models on HuggingFace Hub via the Responses API.
+ * HuggingFace provider using @ai-sdk/openai-compatible for OpenAI-compatible API.
+ * Uses HuggingFace's OpenAI-compatible endpoint to enable tool message support.
+ * @see https://github.com/vercel/ai/issues/10766 - Workaround for tool messages not supported in @ai-sdk/huggingface
  */
 export class HuggingFaceHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
-	protected provider: ReturnType<typeof createHuggingFace>
+	protected provider: ReturnType<typeof createOpenAICompatible>
 	private modelCache: ModelRecord | null = null
 
 	constructor(options: ApiHandlerOptions) {
@@ -40,8 +41,11 @@ export class HuggingFaceHandler extends BaseProvider implements SingleCompletion
 			throw new Error("Hugging Face API key is required")
 		}
 
-		// Create the HuggingFace provider using AI SDK
-		this.provider = createHuggingFace({
+		// Create an OpenAI-compatible provider pointing to HuggingFace's /v1 endpoint
+		// This fixes "tool messages not supported" error - the HuggingFace SDK doesn't
+		// properly handle function_call_output format, but OpenAI SDK does
+		this.provider = createOpenAICompatible({
+			name: "huggingface",
 			baseURL: "https://router.huggingface.co/v1",
 			apiKey: this.options.huggingFaceApiKey,
 			headers: DEFAULT_HEADERS,
diff --git a/src/package.json b/src/package.json
index 10971319359..04402de28af 100644
--- a/src/package.json
+++ b/src/package.json
@@ -454,7 +454,6 @@
 		"@ai-sdk/deepseek": "^2.0.14",
 		"@ai-sdk/fireworks": "^2.0.26",
 		"@ai-sdk/groq": "^3.0.19",
-		"@ai-sdk/huggingface": "^1.0.28",
 		"@ai-sdk/mistral": "^3.0.0",
 		"@ai-sdk/xai": "^3.0.46",
 		"sambanova-ai-provider": "^1.2.2",

From d60a437e65fc9bcd734e82ec6edf60e5bdc9105b Mon Sep 17 00:00:00 2001
From: daniel-lxs <ricciodaniel98@gmail.com>
Date: Tue, 3 Feb 2026 11:16:17 -0500
Subject: [PATCH 7/7] refactor: simplify HuggingFace to use standard
 processAiSdkStreamPart

With @ai-sdk/openai-compatible, the OpenAI-compatible SDK properly
streams tool events via tool-input-start/delta/end, so we don't need
the special createAiSdkToolStreamProcessor anymore.

- Remove createAiSdkToolStreamProcessor from ai-sdk.ts
- Use standard processAiSdkStreamPart in HuggingFace handler
- Remove tests for createAiSdkToolStreamProcessor
- Remove tests for tool-call event processing (not needed)
---
 .../providers/__tests__/huggingface.spec.ts   |  92 ---------
 src/api/providers/huggingface.ts              |  10 +-
 src/api/transform/__tests__/ai-sdk.spec.ts    | 185 ------------------
 src/api/transform/ai-sdk.ts                   | 126 ------------
 4 files changed, 4 insertions(+), 409 deletions(-)

diff --git a/src/api/providers/__tests__/huggingface.spec.ts b/src/api/providers/__tests__/huggingface.spec.ts
index 1627c25f80e..e7682474c1a 100644
--- a/src/api/providers/__tests__/huggingface.spec.ts
+++ b/src/api/providers/__tests__/huggingface.spec.ts
@@ -549,97 +549,5 @@ describe("HuggingFaceHandler", () => {
 			expect(toolCallEndChunks.length).toBe(1)
 			expect(toolCallEndChunks[0].id).toBe("tool-call-1")
 		})
-
-		it("should process tool-call events for non-streaming providers", async () => {
-			// HuggingFace doesn't stream tool inputs, it only emits tool-call events
-			// The processor should convert tool-call to start/delta/end events
-			async function* mockFullStream() {
-				yield {
-					type: "tool-call",
-					toolCallId: "tool-call-1",
-					toolName: "read_file",
-					input: { path: "test.ts" },
-				}
-			}
-
-			const mockUsage = Promise.resolve({
-				inputTokens: 10,
-				outputTokens: 5,
-			})
-
-			const mockProviderMetadata = Promise.resolve({})
-
-			mockStreamText.mockReturnValue({
-				fullStream: mockFullStream(),
-				usage: mockUsage,
-				providerMetadata: mockProviderMetadata,
-			})
-
-			const stream = handler.createMessage(systemPrompt, messages)
-			const chunks: any[] = []
-			for await (const chunk of stream) {
-				chunks.push(chunk)
-			}
-
-			// tool-call events should be converted to start/delta/end for consistency
-			const toolCallStartChunks = chunks.filter((c) => c.type === "tool_call_start")
-			const toolCallDeltaChunks = chunks.filter((c) => c.type === "tool_call_delta")
-			const toolCallEndChunks = chunks.filter((c) => c.type === "tool_call_end")
-
-			expect(toolCallStartChunks.length).toBe(1)
-			expect(toolCallStartChunks[0].id).toBe("tool-call-1")
-			expect(toolCallStartChunks[0].name).toBe("read_file")
-
-			expect(toolCallDeltaChunks.length).toBe(1)
-			expect(toolCallDeltaChunks[0].delta).toBe('{"path":"test.ts"}')
-
-			expect(toolCallEndChunks.length).toBe(1)
-			expect(toolCallEndChunks[0].id).toBe("tool-call-1")
-		})
-
-		it("should ignore tool-call events when tool was already streamed", async () => {
-			// When a provider streams tool inputs AND sends tool-call, we should not duplicate
-			async function* mockFullStream() {
-				// First, streaming events
-				yield { type: "tool-input-start", id: "tool-call-1", toolName: "read_file" }
-				yield { type: "tool-input-delta", id: "tool-call-1", delta: '{"path":"test.ts"}' }
-				yield { type: "tool-input-end", id: "tool-call-1" }
-				// Then the tool-call event (should be ignored)
-				yield {
-					type: "tool-call",
-					toolCallId: "tool-call-1",
-					toolName: "read_file",
-					input: { path: "test.ts" },
-				}
-			}
-
-			const mockUsage = Promise.resolve({
-				inputTokens: 10,
-				outputTokens: 5,
-			})
-
-			const mockProviderMetadata = Promise.resolve({})
-
-			mockStreamText.mockReturnValue({
-				fullStream: mockFullStream(),
-				usage: mockUsage,
-				providerMetadata: mockProviderMetadata,
-			})
-
-			const stream = handler.createMessage(systemPrompt, messages)
-			const chunks: any[] = []
-			for await (const chunk of stream) {
-				chunks.push(chunk)
-			}
-
-			// Should have exactly 1 of each (not duplicated)
-			const toolCallStartChunks = chunks.filter((c) => c.type === "tool_call_start")
-			const toolCallDeltaChunks = chunks.filter((c) => c.type === "tool_call_delta")
-			const toolCallEndChunks = chunks.filter((c) => c.type === "tool_call_end")
-
-			expect(toolCallStartChunks.length).toBe(1)
-			expect(toolCallDeltaChunks.length).toBe(1)
-			expect(toolCallEndChunks.length).toBe(1)
-		})
 	})
 })
diff --git a/src/api/providers/huggingface.ts b/src/api/providers/huggingface.ts
index 713d4d3e2b4..25d0608a331 100644
--- a/src/api/providers/huggingface.ts
+++ b/src/api/providers/huggingface.ts
@@ -9,7 +9,7 @@ import type { ApiHandlerOptions } from "../../shared/api"
 import {
 	convertToAiSdkMessages,
 	convertToolsForAiSdk,
-	createAiSdkToolStreamProcessor,
+	processAiSdkStreamPart,
 	mapToolChoice,
 	handleAiSdkError,
 } from "../transform/ai-sdk"
@@ -172,12 +172,10 @@ export class HuggingFaceHandler extends BaseProvider implements SingleCompletion
 		const result = streamText(requestOptions)
 
 		try {
-			// Use the stateful processor to handle tool call deduplication
-			// HuggingFace doesn't emit streaming tool events (tool-input-start/delta/end),
-			// only the final tool-call event, so we need the processor to handle this
-			const processStreamPart = createAiSdkToolStreamProcessor()
+			// Process the full stream to get all events
 			for await (const part of result.fullStream) {
-				for (const chunk of processStreamPart(part)) {
+				// Use the processAiSdkStreamPart utility to convert stream parts
+				for (const chunk of processAiSdkStreamPart(part)) {
 					yield chunk
 				}
 			}
diff --git a/src/api/transform/__tests__/ai-sdk.spec.ts b/src/api/transform/__tests__/ai-sdk.spec.ts
index 17e03e6695e..fb4e3b9e2f2 100644
--- a/src/api/transform/__tests__/ai-sdk.spec.ts
+++ b/src/api/transform/__tests__/ai-sdk.spec.ts
@@ -4,7 +4,6 @@ import {
 	convertToAiSdkMessages,
 	convertToolsForAiSdk,
 	processAiSdkStreamPart,
-	createAiSdkToolStreamProcessor,
 	mapToolChoice,
 	extractAiSdkErrorMessage,
 	handleAiSdkError,
@@ -496,190 +495,6 @@ describe("AI SDK conversion utilities", () => {
 		})
 	})
 
-	describe("createAiSdkToolStreamProcessor", () => {
-		it("processes text-delta chunks like processAiSdkStreamPart", () => {
-			const processor = createAiSdkToolStreamProcessor()
-			const part = { type: "text-delta" as const, id: "1", text: "Hello" }
-			const chunks = [...processor(part)]
-
-			expect(chunks).toHaveLength(1)
-			expect(chunks[0]).toEqual({ type: "text", text: "Hello" })
-		})
-
-		it("processes tool-input-start/delta/end events (streaming tools)", () => {
-			const processor = createAiSdkToolStreamProcessor()
-
-			// Simulate streaming tool events
-			const startChunks = [
-				...processor({ type: "tool-input-start" as const, id: "call_1", toolName: "read_file" }),
-			]
-			const deltaChunks = [...processor({ type: "tool-input-delta" as const, id: "call_1", delta: '{"path":' })]
-			const delta2Chunks = [
-				...processor({ type: "tool-input-delta" as const, id: "call_1", delta: '"test.ts"}' }),
-			]
-			const endChunks = [...processor({ type: "tool-input-end" as const, id: "call_1" })]
-
-			expect(startChunks).toHaveLength(1)
-			expect(startChunks[0]).toEqual({ type: "tool_call_start", id: "call_1", name: "read_file" })
-
-			expect(deltaChunks).toHaveLength(1)
-			expect(deltaChunks[0]).toEqual({ type: "tool_call_delta", id: "call_1", delta: '{"path":' })
-
-			expect(delta2Chunks).toHaveLength(1)
-			expect(delta2Chunks[0]).toEqual({ type: "tool_call_delta", id: "call_1", delta: '"test.ts"}' })
-
-			expect(endChunks).toHaveLength(1)
-			expect(endChunks[0]).toEqual({ type: "tool_call_end", id: "call_1" })
-		})
-
-		it("ignores tool-call events when tool was already streamed", () => {
-			const processor = createAiSdkToolStreamProcessor()
-
-			// Process streaming events first (consume the generator to update state)
-			Array.from(processor({ type: "tool-input-start" as const, id: "call_1", toolName: "read_file" }))
-			Array.from(processor({ type: "tool-input-delta" as const, id: "call_1", delta: '{"path":"test.ts"}' }))
-			Array.from(processor({ type: "tool-input-end" as const, id: "call_1" }))
-
-			// Now the tool-call event for the same tool should be ignored
-			const toolCallChunks = [
-				...processor({
-					type: "tool-call" as const,
-					toolCallId: "call_1",
-					toolName: "read_file",
-					input: { path: "test.ts" },
-				} as any),
-			]
-
-			expect(toolCallChunks).toHaveLength(0)
-		})
-
-		it("processes tool-call events for non-streaming providers", () => {
-			const processor = createAiSdkToolStreamProcessor()
-
-			// Directly process a tool-call event (no streaming events first)
-			const chunks = [
-				...processor({
-					type: "tool-call" as const,
-					toolCallId: "call_1",
-					toolName: "read_file",
-					input: { path: "test.ts" },
-				} as any),
-			]
-
-			// Should emit start/delta/end events
-			expect(chunks).toHaveLength(3)
-			expect(chunks[0]).toEqual({ type: "tool_call_start", id: "call_1", name: "read_file" })
-			expect(chunks[1]).toEqual({ type: "tool_call_delta", id: "call_1", delta: '{"path":"test.ts"}' })
-			expect(chunks[2]).toEqual({ type: "tool_call_end", id: "call_1" })
-		})
-
-		it("handles HuggingFace-like providers (start/end but no deltas, args in tool-call)", () => {
-			const processor = createAiSdkToolStreamProcessor()
-
-			// HuggingFace emits tool-input-start and tool-input-end but NOT tool-input-delta
-			// The arguments come in the tool-call event
-			const startChunks = [
-				...processor({ type: "tool-input-start" as const, id: "call_1", toolName: "read_file" }),
-			]
-			// End is deferred since we haven't received deltas yet
-			const endChunks = [...processor({ type: "tool-input-end" as const, id: "call_1" })]
-
-			// tool-call should emit the delta (arguments) AND the deferred end
-			const toolCallChunks = [
-				...processor({
-					type: "tool-call" as const,
-					toolCallId: "call_1",
-					toolName: "read_file",
-					input: { path: "test.ts" },
-				} as any),
-			]
-
-			expect(startChunks).toHaveLength(1)
-			expect(startChunks[0]).toEqual({ type: "tool_call_start", id: "call_1", name: "read_file" })
-
-			// End is deferred when no deltas received
-			expect(endChunks).toHaveLength(0)
-
-			// tool-call emits delta followed by the deferred end
-			expect(toolCallChunks).toHaveLength(2)
-			expect(toolCallChunks[0]).toEqual({
-				type: "tool_call_delta",
-				id: "call_1",
-				delta: '{"path":"test.ts"}',
-			})
-			expect(toolCallChunks[1]).toEqual({
-				type: "tool_call_end",
-				id: "call_1",
-			})
-		})
-
-		it("handles multiple tool calls correctly", () => {
-			const processor = createAiSdkToolStreamProcessor()
-
-			// First tool is fully streamed with deltas
-			Array.from(processor({ type: "tool-input-start" as const, id: "call_1", toolName: "read_file" }))
-			Array.from(processor({ type: "tool-input-delta" as const, id: "call_1", delta: '{"path":"a.ts"}' }))
-			Array.from(processor({ type: "tool-input-end" as const, id: "call_1" }))
-
-			// Second tool is not streamed (non-streaming provider behavior)
-			const chunks = [
-				...processor({
-					type: "tool-call" as const,
-					toolCallId: "call_2",
-					toolName: "write_to_file",
-					input: { path: "output.ts", content: "test" },
-				} as any),
-			]
-
-			// Second tool should be emitted with full start/delta/end
-			expect(chunks).toHaveLength(3)
-			expect(chunks[0]).toEqual({ type: "tool_call_start", id: "call_2", name: "write_to_file" })
-
-			// First tool's tool-call should be ignored (it had deltas)
-			const ignoredChunks = [
-				...processor({
-					type: "tool-call" as const,
-					toolCallId: "call_1",
-					toolName: "read_file",
-					input: { path: "a.ts" },
-				} as any),
-			]
-			expect(ignoredChunks).toHaveLength(0)
-		})
-
-		it("maintains separate state per processor instance", () => {
-			const processor1 = createAiSdkToolStreamProcessor()
-			const processor2 = createAiSdkToolStreamProcessor()
-
-			// Stream a tool fully with processor1 (with delta)
-			Array.from(processor1({ type: "tool-input-start" as const, id: "call_1", toolName: "test" }))
-			Array.from(processor1({ type: "tool-input-delta" as const, id: "call_1", delta: "{}" }))
-			Array.from(processor1({ type: "tool-input-end" as const, id: "call_1" }))
-
-			// processor1 should ignore tool-call for call_1 (it had deltas)
-			const p1Chunks = [
-				...processor1({
-					type: "tool-call" as const,
-					toolCallId: "call_1",
-					toolName: "test",
-					input: {},
-				} as any),
-			]
-			expect(p1Chunks).toHaveLength(0)
-
-			// processor2 should emit tool-call for call_1 (it has its own state)
-			const p2Chunks = [
-				...processor2({
-					type: "tool-call" as const,
-					toolCallId: "call_1",
-					toolName: "test",
-					input: {},
-				} as any),
-			]
-			expect(p2Chunks).toHaveLength(3)
-		})
-	})
-
 	describe("mapToolChoice", () => {
 		it("should return undefined for null or undefined", () => {
 			expect(mapToolChoice(null)).toBeUndefined()
diff --git a/src/api/transform/ai-sdk.ts b/src/api/transform/ai-sdk.ts
index 8c15350077c..c6f37be694d 100644
--- a/src/api/transform/ai-sdk.ts
+++ b/src/api/transform/ai-sdk.ts
@@ -364,132 +364,6 @@ export function* processAiSdkStreamPart(part: ExtendedStreamPart): Generator<Api
 	}
 }
 
-/**
- * Creates a stateful stream processor that handles tool call deduplication.
- * Some AI SDK providers (like HuggingFace) don't emit streaming tool events
- * (tool-input-start/delta/end), only the final tool-call event. This function
- * returns a processor that tracks which tools have been processed via streaming
- * events and emits tool-call events only for tools that weren't streamed.
- *
- * Usage:
- * ```typescript
- * const processStreamPart = createAiSdkToolStreamProcessor()
- * for await (const part of result.fullStream) {
- *     for (const chunk of processStreamPart(part)) {
- *         yield chunk
- *     }
- * }
- * ```
- *
- * @returns A generator function that processes stream parts with tool deduplication
- */
-export function createAiSdkToolStreamProcessor(): (
-	part: ExtendedStreamPart,
-) => Generator<ApiStreamChunk, void, unknown> {
-	// Track tool IDs that have been started via streaming events
-	const startedToolIds = new Set<string>()
-	// Track tool IDs that have received actual argument deltas
-	const toolsWithDeltas = new Set<string>()
-	// Track tool IDs that have ended but are waiting for arguments from tool-call
-	const pendingEndToolIds = new Set<string>()
-
-	return function* processStreamPart(part: ExtendedStreamPart): Generator<ApiStreamChunk> {
-		switch (part.type) {
-			case "tool-input-start":
-				// Track that this tool has started streaming
-				startedToolIds.add(part.id)
-				yield {
-					type: "tool_call_start",
-					id: part.id,
-					name: part.toolName,
-				}
-				break
-
-			case "tool-input-delta":
-				// Track that we received actual argument content for this tool
-				toolsWithDeltas.add(part.id)
-				yield {
-					type: "tool_call_delta",
-					id: part.id,
-					delta: part.delta,
-				}
-				break
-
-			case "tool-input-end":
-				// If we already have deltas, we can emit the end now
-				// Otherwise, defer the end until we get arguments from tool-call
-				if (toolsWithDeltas.has(part.id)) {
-					yield {
-						type: "tool_call_end",
-						id: part.id,
-					}
-				} else {
-					// HuggingFace case: started but no deltas, arguments will come in tool-call
-					pendingEndToolIds.add(part.id)
-				}
-				break
-
-			case "tool-call": {
-				// Handle tool-call events - the logic depends on whether we got streaming deltas
-				const toolCallPart = part as {
-					type: "tool-call"
-					toolCallId: string
-					toolName: string
-					input: unknown
-				}
-
-				// If we received deltas, the arguments were already streamed - ignore tool-call
-				if (toolsWithDeltas.has(toolCallPart.toolCallId)) {
-					break
-				}
-
-				// If tool was started but no deltas received (like HuggingFace),
-				// emit the arguments from tool-call as a delta, then the pending end
-				if (startedToolIds.has(toolCallPart.toolCallId)) {
-					const args = JSON.stringify(toolCallPart.input)
-					yield {
-						type: "tool_call_delta",
-						id: toolCallPart.toolCallId,
-						delta: args,
-					}
-					// Now emit the deferred end
-					if (pendingEndToolIds.has(toolCallPart.toolCallId)) {
-						pendingEndToolIds.delete(toolCallPart.toolCallId)
-						yield {
-							type: "tool_call_end",
-							id: toolCallPart.toolCallId,
-						}
-					}
-					break
-				}
-
-				// Tool wasn't started via streaming - emit full start/delta/end sequence
-				const args = JSON.stringify(toolCallPart.input)
-				yield {
-					type: "tool_call_start",
-					id: toolCallPart.toolCallId,
-					name: toolCallPart.toolName,
-				}
-				yield {
-					type: "tool_call_delta",
-					id: toolCallPart.toolCallId,
-					delta: args,
-				}
-				yield {
-					type: "tool_call_end",
-					id: toolCallPart.toolCallId,
-				}
-				break
-			}
-
-			// Handle all other events with the stateless processor
-			default:
-				yield* processAiSdkStreamPart(part)
-				break
-		}
-	}
-}
-
 /**
  * Type for AI SDK tool choice format.
  */