From af0a7e80b37d7f0dec5aae9adb986699f1297387 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Mon, 2 Feb 2026 15:42:19 +0000 Subject: [PATCH 1/7] feat: migrate HuggingFace provider to use AI SDK @ai-sdk/huggingface package - Add @ai-sdk/huggingface ^1.0.28 dependency - Rewrite HuggingFaceHandler to extend BaseProvider with AI SDK - Use streamText/generateText from ai package for streaming and completions - Use shared utilities: convertToAiSdkMessages, convertToolsForAiSdk, processAiSdkStreamPart, mapToolChoice, handleAiSdkError - Add processUsageMetrics for cache token handling from providerMetadata - Add comprehensive test suite following Fireworks test patterns - Default temperature: 0.5 - Base URL: https://router.huggingface.co/v1 (HuggingFace Responses API) This follows the exact pattern from PR #11118 (Fireworks migration). --- .../providers/__tests__/huggingface.spec.ts | 589 ++++++++++++++++++ src/api/providers/huggingface.ts | 227 ++++--- src/package.json | 1 + 3 files changed, 738 insertions(+), 79 deletions(-) create mode 100644 src/api/providers/__tests__/huggingface.spec.ts diff --git a/src/api/providers/__tests__/huggingface.spec.ts b/src/api/providers/__tests__/huggingface.spec.ts new file mode 100644 index 00000000000..6529c72c0d3 --- /dev/null +++ b/src/api/providers/__tests__/huggingface.spec.ts @@ -0,0 +1,589 @@ +// npx vitest run src/api/providers/__tests__/huggingface.spec.ts + +// Use vi.hoisted to define mock functions that can be referenced in hoisted vi.mock() calls +const { mockStreamText, mockGenerateText } = vi.hoisted(() => ({ + mockStreamText: vi.fn(), + mockGenerateText: vi.fn(), +})) + +vi.mock("ai", async (importOriginal) => { + const actual = await importOriginal() + return { + ...actual, + streamText: mockStreamText, + generateText: mockGenerateText, + } +}) + +vi.mock("@ai-sdk/huggingface", () => ({ + createHuggingFace: vi.fn(() => { + // Return a function that returns a mock language model + return vi.fn(() => ({ + modelId: "meta-llama/Llama-3.3-70B-Instruct", + provider: "huggingface", + })) + }), +})) + +// Mock the fetchers +vi.mock("../fetchers/huggingface", () => ({ + getHuggingFaceModels: vi.fn(() => Promise.resolve({})), + getCachedHuggingFaceModels: vi.fn(() => ({})), +})) + +import type { Anthropic } from "@anthropic-ai/sdk" + +import type { ApiHandlerOptions } from "../../../shared/api" + +import { HuggingFaceHandler } from "../huggingface" + +describe("HuggingFaceHandler", () => { + let handler: HuggingFaceHandler + let mockOptions: ApiHandlerOptions + + beforeEach(() => { + mockOptions = { + huggingFaceApiKey: "test-huggingface-api-key", + huggingFaceModelId: "meta-llama/Llama-3.3-70B-Instruct", + } + handler = new HuggingFaceHandler(mockOptions) + vi.clearAllMocks() + }) + + describe("constructor", () => { + it("should initialize with provided options", () => { + expect(handler).toBeInstanceOf(HuggingFaceHandler) + expect(handler.getModel().id).toBe(mockOptions.huggingFaceModelId) + }) + + it("should use default model ID if not provided", () => { + const handlerWithoutModel = new HuggingFaceHandler({ + ...mockOptions, + huggingFaceModelId: undefined, + }) + expect(handlerWithoutModel.getModel().id).toBe("meta-llama/Llama-3.3-70B-Instruct") + }) + + it("should throw error if API key is not provided", () => { + expect(() => { + new HuggingFaceHandler({ + ...mockOptions, + huggingFaceApiKey: undefined, + }) + }).toThrow("Hugging Face API key is required") + }) + }) + + describe("getModel", () => { + it("should return default model when no model is specified", () => { + const handlerWithoutModel = new HuggingFaceHandler({ + huggingFaceApiKey: "test-huggingface-api-key", + }) + const model = handlerWithoutModel.getModel() + expect(model.id).toBe("meta-llama/Llama-3.3-70B-Instruct") + expect(model.info).toBeDefined() + }) + + it("should return specified model when valid model is provided", () => { + const testModelId = "mistralai/Mistral-7B-Instruct-v0.3" + const handlerWithModel = new HuggingFaceHandler({ + huggingFaceModelId: testModelId, + huggingFaceApiKey: "test-huggingface-api-key", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + }) + + it("should include model parameters from getModelParams", () => { + const model = handler.getModel() + expect(model).toHaveProperty("temperature") + expect(model).toHaveProperty("maxTokens") + }) + + it("should return fallback info when model not in cache", () => { + const model = handler.getModel() + expect(model.info).toEqual( + expect.objectContaining({ + maxTokens: 8192, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + }), + ) + }) + }) + + describe("createMessage", () => { + const systemPrompt = "You are a helpful assistant." + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "text" as const, + text: "Hello!", + }, + ], + }, + ] + + it("should handle streaming responses", async () => { + async function* mockFullStream() { + yield { type: "text-delta", text: "Test response from HuggingFace" } + } + + const mockUsage = Promise.resolve({ + inputTokens: 10, + outputTokens: 5, + }) + + const mockProviderMetadata = Promise.resolve({}) + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream(), + usage: mockUsage, + providerMetadata: mockProviderMetadata, + }) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + expect(chunks.length).toBeGreaterThan(0) + const textChunks = chunks.filter((chunk) => chunk.type === "text") + expect(textChunks).toHaveLength(1) + expect(textChunks[0].text).toBe("Test response from HuggingFace") + }) + + it("should include usage information", async () => { + async function* mockFullStream() { + yield { type: "text-delta", text: "Test response" } + } + + const mockUsage = Promise.resolve({ + inputTokens: 10, + outputTokens: 20, + }) + + const mockProviderMetadata = Promise.resolve({}) + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream(), + usage: mockUsage, + providerMetadata: mockProviderMetadata, + }) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const usageChunks = chunks.filter((chunk) => chunk.type === "usage") + expect(usageChunks.length).toBeGreaterThan(0) + expect(usageChunks[0].inputTokens).toBe(10) + expect(usageChunks[0].outputTokens).toBe(20) + }) + + it("should handle cached tokens in usage data from providerMetadata", async () => { + async function* mockFullStream() { + yield { type: "text-delta", text: "Test response" } + } + + const mockUsage = Promise.resolve({ + inputTokens: 100, + outputTokens: 50, + }) + + // HuggingFace provides cache metrics via providerMetadata for supported models + const mockProviderMetadata = Promise.resolve({ + huggingface: { + promptCacheHitTokens: 30, + promptCacheMissTokens: 70, + }, + }) + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream(), + usage: mockUsage, + providerMetadata: mockProviderMetadata, + }) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const usageChunks = chunks.filter((chunk) => chunk.type === "usage") + expect(usageChunks.length).toBeGreaterThan(0) + expect(usageChunks[0].inputTokens).toBe(100) + expect(usageChunks[0].outputTokens).toBe(50) + expect(usageChunks[0].cacheReadTokens).toBe(30) + expect(usageChunks[0].cacheWriteTokens).toBe(70) + }) + + it("should handle usage with details.cachedInputTokens when providerMetadata is not available", async () => { + async function* mockFullStream() { + yield { type: "text-delta", text: "Test response" } + } + + const mockUsage = Promise.resolve({ + inputTokens: 100, + outputTokens: 50, + details: { + cachedInputTokens: 25, + }, + }) + + const mockProviderMetadata = Promise.resolve({}) + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream(), + usage: mockUsage, + providerMetadata: mockProviderMetadata, + }) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const usageChunks = chunks.filter((chunk) => chunk.type === "usage") + expect(usageChunks.length).toBeGreaterThan(0) + expect(usageChunks[0].cacheReadTokens).toBe(25) + expect(usageChunks[0].cacheWriteTokens).toBeUndefined() + }) + + it("should pass correct temperature (0.5 default) to streamText", async () => { + async function* mockFullStream() { + yield { type: "text-delta", text: "Test" } + } + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream(), + usage: Promise.resolve({ inputTokens: 0, outputTokens: 0 }), + providerMetadata: Promise.resolve({}), + }) + + const handlerWithDefaultTemp = new HuggingFaceHandler({ + huggingFaceApiKey: "test-key", + huggingFaceModelId: "meta-llama/Llama-3.3-70B-Instruct", + }) + + const stream = handlerWithDefaultTemp.createMessage(systemPrompt, messages) + for await (const _ of stream) { + // consume stream + } + + expect(mockStreamText).toHaveBeenCalledWith( + expect.objectContaining({ + temperature: 0.5, + }), + ) + }) + + it("should use user-specified temperature over provider defaults", async () => { + async function* mockFullStream() { + yield { type: "text-delta", text: "Test" } + } + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream(), + usage: Promise.resolve({ inputTokens: 0, outputTokens: 0 }), + providerMetadata: Promise.resolve({}), + }) + + const handlerWithCustomTemp = new HuggingFaceHandler({ + huggingFaceApiKey: "test-key", + huggingFaceModelId: "meta-llama/Llama-3.3-70B-Instruct", + modelTemperature: 0.7, + }) + + const stream = handlerWithCustomTemp.createMessage(systemPrompt, messages) + for await (const _ of stream) { + // consume stream + } + + // User-specified temperature should take precedence over everything + expect(mockStreamText).toHaveBeenCalledWith( + expect.objectContaining({ + temperature: 0.7, + }), + ) + }) + + it("should handle stream with multiple chunks", async () => { + async function* mockFullStream() { + yield { type: "text-delta", text: "Hello" } + yield { type: "text-delta", text: " world" } + } + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream(), + usage: Promise.resolve({ inputTokens: 5, outputTokens: 10 }), + providerMetadata: Promise.resolve({}), + }) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const textChunks = chunks.filter((c) => c.type === "text") + expect(textChunks[0]).toEqual({ type: "text", text: "Hello" }) + expect(textChunks[1]).toEqual({ type: "text", text: " world" }) + + const usageChunks = chunks.filter((c) => c.type === "usage") + expect(usageChunks[0]).toMatchObject({ type: "usage", inputTokens: 5, outputTokens: 10 }) + }) + + it("should handle errors with handleAiSdkError", async () => { + async function* mockFullStream(): AsyncGenerator { + yield { type: "text-delta", text: "" } // Yield something before error to satisfy lint + throw new Error("API Error") + } + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream(), + usage: Promise.resolve({ inputTokens: 0, outputTokens: 0 }), + providerMetadata: Promise.resolve({}), + }) + + const stream = handler.createMessage(systemPrompt, messages) + + await expect(async () => { + for await (const _ of stream) { + // consume stream + } + }).rejects.toThrow("HuggingFace: API Error") + }) + }) + + describe("completePrompt", () => { + it("should complete a prompt using generateText", async () => { + mockGenerateText.mockResolvedValue({ + text: "Test completion from HuggingFace", + }) + + const result = await handler.completePrompt("Test prompt") + + expect(result).toBe("Test completion from HuggingFace") + expect(mockGenerateText).toHaveBeenCalledWith( + expect.objectContaining({ + prompt: "Test prompt", + }), + ) + }) + + it("should use default temperature in completePrompt", async () => { + mockGenerateText.mockResolvedValue({ + text: "Test completion", + }) + + await handler.completePrompt("Test prompt") + + expect(mockGenerateText).toHaveBeenCalledWith( + expect.objectContaining({ + temperature: 0.5, + }), + ) + }) + }) + + describe("processUsageMetrics", () => { + it("should correctly process usage metrics including cache information from providerMetadata", () => { + class TestHuggingFaceHandler extends HuggingFaceHandler { + public testProcessUsageMetrics(usage: any, providerMetadata?: any) { + return this.processUsageMetrics(usage, providerMetadata) + } + } + + const testHandler = new TestHuggingFaceHandler(mockOptions) + + const usage = { + inputTokens: 100, + outputTokens: 50, + } + + const providerMetadata = { + huggingface: { + promptCacheHitTokens: 20, + promptCacheMissTokens: 80, + }, + } + + const result = testHandler.testProcessUsageMetrics(usage, providerMetadata) + + expect(result.type).toBe("usage") + expect(result.inputTokens).toBe(100) + expect(result.outputTokens).toBe(50) + expect(result.cacheWriteTokens).toBe(80) + expect(result.cacheReadTokens).toBe(20) + }) + + it("should handle missing cache metrics gracefully", () => { + class TestHuggingFaceHandler extends HuggingFaceHandler { + public testProcessUsageMetrics(usage: any, providerMetadata?: any) { + return this.processUsageMetrics(usage, providerMetadata) + } + } + + const testHandler = new TestHuggingFaceHandler(mockOptions) + + const usage = { + inputTokens: 100, + outputTokens: 50, + } + + const result = testHandler.testProcessUsageMetrics(usage) + + expect(result.type).toBe("usage") + expect(result.inputTokens).toBe(100) + expect(result.outputTokens).toBe(50) + expect(result.cacheWriteTokens).toBeUndefined() + expect(result.cacheReadTokens).toBeUndefined() + }) + + it("should include reasoning tokens when provided", () => { + class TestHuggingFaceHandler extends HuggingFaceHandler { + public testProcessUsageMetrics(usage: any, providerMetadata?: any) { + return this.processUsageMetrics(usage, providerMetadata) + } + } + + const testHandler = new TestHuggingFaceHandler(mockOptions) + + const usage = { + inputTokens: 100, + outputTokens: 50, + details: { + reasoningTokens: 30, + }, + } + + const result = testHandler.testProcessUsageMetrics(usage) + + expect(result.reasoningTokens).toBe(30) + }) + }) + + describe("tool handling", () => { + const systemPrompt = "You are a helpful assistant." + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [{ type: "text" as const, text: "Hello!" }], + }, + ] + + it("should handle tool calls in streaming", async () => { + async function* mockFullStream() { + yield { + type: "tool-input-start", + id: "tool-call-1", + toolName: "read_file", + } + yield { + type: "tool-input-delta", + id: "tool-call-1", + delta: '{"path":"test.ts"}', + } + yield { + type: "tool-input-end", + id: "tool-call-1", + } + } + + const mockUsage = Promise.resolve({ + inputTokens: 10, + outputTokens: 5, + }) + + const mockProviderMetadata = Promise.resolve({}) + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream(), + usage: mockUsage, + providerMetadata: mockProviderMetadata, + }) + + const stream = handler.createMessage(systemPrompt, messages, { + taskId: "test-task", + tools: [ + { + type: "function", + function: { + name: "read_file", + description: "Read a file", + parameters: { + type: "object", + properties: { path: { type: "string" } }, + required: ["path"], + }, + }, + }, + ], + }) + + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const toolCallStartChunks = chunks.filter((c) => c.type === "tool_call_start") + const toolCallDeltaChunks = chunks.filter((c) => c.type === "tool_call_delta") + const toolCallEndChunks = chunks.filter((c) => c.type === "tool_call_end") + + expect(toolCallStartChunks.length).toBe(1) + expect(toolCallStartChunks[0].id).toBe("tool-call-1") + expect(toolCallStartChunks[0].name).toBe("read_file") + + expect(toolCallDeltaChunks.length).toBe(1) + expect(toolCallDeltaChunks[0].delta).toBe('{"path":"test.ts"}') + + expect(toolCallEndChunks.length).toBe(1) + expect(toolCallEndChunks[0].id).toBe("tool-call-1") + }) + + it("should ignore tool-call events to prevent duplicate tools in UI", async () => { + async function* mockFullStream() { + yield { + type: "tool-call", + toolCallId: "tool-call-1", + toolName: "read_file", + input: { path: "test.ts" }, + } + } + + const mockUsage = Promise.resolve({ + inputTokens: 10, + outputTokens: 5, + }) + + const mockProviderMetadata = Promise.resolve({}) + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream(), + usage: mockUsage, + providerMetadata: mockProviderMetadata, + }) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + // tool-call events should be ignored (only tool-input-start/delta/end are processed) + const toolCallChunks = chunks.filter( + (c) => c.type === "tool_call_start" || c.type === "tool_call_delta" || c.type === "tool_call_end", + ) + expect(toolCallChunks.length).toBe(0) + }) + }) +}) diff --git a/src/api/providers/huggingface.ts b/src/api/providers/huggingface.ts index 21e429aaabf..bd20990954d 100644 --- a/src/api/providers/huggingface.ts +++ b/src/api/providers/huggingface.ts @@ -1,22 +1,36 @@ -import OpenAI from "openai" import { Anthropic } from "@anthropic-ai/sdk" +import { createHuggingFace } from "@ai-sdk/huggingface" +import { streamText, generateText, ToolSet } from "ai" -import type { ModelRecord } from "@roo-code/types" +import type { ModelRecord, ModelInfo } from "@roo-code/types" import type { ApiHandlerOptions } from "../../shared/api" -import { ApiStream } from "../transform/stream" -import { convertToOpenAiMessages } from "../transform/openai-format" -import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" + +import { + convertToAiSdkMessages, + convertToolsForAiSdk, + processAiSdkStreamPart, + mapToolChoice, + handleAiSdkError, +} from "../transform/ai-sdk" +import { ApiStream, ApiStreamUsageChunk } from "../transform/stream" +import { getModelParams } from "../transform/model-params" + import { DEFAULT_HEADERS } from "./constants" import { BaseProvider } from "./base-provider" import { getHuggingFaceModels, getCachedHuggingFaceModels } from "./fetchers/huggingface" -import { handleOpenAIError } from "./utils/openai-error-handler" +import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" + +const HUGGINGFACE_DEFAULT_TEMPERATURE = 0.5 +/** + * HuggingFace provider using the dedicated @ai-sdk/huggingface package. + * Provides native support for various models on HuggingFace Hub via the Responses API. + */ export class HuggingFaceHandler extends BaseProvider implements SingleCompletionHandler { - private client: OpenAI - private options: ApiHandlerOptions + protected options: ApiHandlerOptions + protected provider: ReturnType private modelCache: ModelRecord | null = null - private readonly providerName = "HuggingFace" constructor(options: ApiHandlerOptions) { super() @@ -26,10 +40,11 @@ export class HuggingFaceHandler extends BaseProvider implements SingleCompletion throw new Error("Hugging Face API key is required") } - this.client = new OpenAI({ + // Create the HuggingFace provider using AI SDK + this.provider = createHuggingFace({ baseURL: "https://router.huggingface.co/v1", apiKey: this.options.huggingFaceApiKey, - defaultHeaders: DEFAULT_HEADERS, + headers: DEFAULT_HEADERS, }) // Try to get cached models first @@ -47,91 +62,145 @@ export class HuggingFaceHandler extends BaseProvider implements SingleCompletion } } - override async *createMessage( - systemPrompt: string, - messages: Anthropic.Messages.MessageParam[], - metadata?: ApiHandlerCreateMessageMetadata, - ): ApiStream { - const modelId = this.options.huggingFaceModelId || "meta-llama/Llama-3.3-70B-Instruct" - const temperature = this.options.modelTemperature ?? 0.7 - - const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { - model: modelId, - temperature, - messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)], - stream: true, - stream_options: { include_usage: true }, - } + override getModel(): { id: string; info: ModelInfo; maxTokens?: number; temperature?: number } { + const id = this.options.huggingFaceModelId || "meta-llama/Llama-3.3-70B-Instruct" - // Add max_tokens if specified - if (this.options.includeMaxTokens && this.options.modelMaxTokens) { - params.max_tokens = this.options.modelMaxTokens - } + // Try to get model info from cache + const cachedInfo = this.modelCache?.[id] - let stream - try { - stream = await this.client.chat.completions.create(params) - } catch (error) { - throw handleOpenAIError(error, this.providerName) + const info: ModelInfo = cachedInfo || { + maxTokens: 8192, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, } - for await (const chunk of stream) { - const delta = chunk.choices[0]?.delta + const params = getModelParams({ + format: "openai", + modelId: id, + model: info, + settings: this.options, + defaultTemperature: HUGGINGFACE_DEFAULT_TEMPERATURE, + }) - if (delta?.content) { - yield { - type: "text", - text: delta.content, - } - } + return { id, info, ...params } + } - if (chunk.usage) { - yield { - type: "usage", - inputTokens: chunk.usage.prompt_tokens || 0, - outputTokens: chunk.usage.completion_tokens || 0, - } + /** + * Get the language model for the configured model ID. + */ + protected getLanguageModel() { + const { id } = this.getModel() + return this.provider(id) + } + + /** + * Process usage metrics from the AI SDK response. + */ + protected processUsageMetrics( + usage: { + inputTokens?: number + outputTokens?: number + details?: { + cachedInputTokens?: number + reasoningTokens?: number + } + }, + providerMetadata?: { + huggingface?: { + promptCacheHitTokens?: number + promptCacheMissTokens?: number } + }, + ): ApiStreamUsageChunk { + // Extract cache metrics from HuggingFace's providerMetadata if available + const cacheReadTokens = providerMetadata?.huggingface?.promptCacheHitTokens ?? usage.details?.cachedInputTokens + const cacheWriteTokens = providerMetadata?.huggingface?.promptCacheMissTokens + + return { + type: "usage", + inputTokens: usage.inputTokens || 0, + outputTokens: usage.outputTokens || 0, + cacheReadTokens, + cacheWriteTokens, + reasoningTokens: usage.details?.reasoningTokens, } } - async completePrompt(prompt: string): Promise { - const modelId = this.options.huggingFaceModelId || "meta-llama/Llama-3.3-70B-Instruct" - - try { - const response = await this.client.chat.completions.create({ - model: modelId, - messages: [{ role: "user", content: prompt }], - }) + /** + * Get the max tokens parameter to include in the request. + */ + protected getMaxOutputTokens(): number | undefined { + const { info } = this.getModel() + return this.options.modelMaxTokens || info.maxTokens || undefined + } - return response.choices[0]?.message.content || "" - } catch (error) { - throw handleOpenAIError(error, this.providerName) + /** + * Create a message stream using the AI SDK. + */ + override async *createMessage( + systemPrompt: string, + messages: Anthropic.Messages.MessageParam[], + metadata?: ApiHandlerCreateMessageMetadata, + ): ApiStream { + const { temperature } = this.getModel() + const languageModel = this.getLanguageModel() + + // Convert messages to AI SDK format + const aiSdkMessages = convertToAiSdkMessages(messages) + + // Convert tools to OpenAI format first, then to AI SDK format + const openAiTools = this.convertToolsForOpenAI(metadata?.tools) + const aiSdkTools = convertToolsForAiSdk(openAiTools) as ToolSet | undefined + + // Build the request options + const requestOptions: Parameters[0] = { + model: languageModel, + system: systemPrompt, + messages: aiSdkMessages, + temperature: this.options.modelTemperature ?? temperature ?? HUGGINGFACE_DEFAULT_TEMPERATURE, + maxOutputTokens: this.getMaxOutputTokens(), + tools: aiSdkTools, + toolChoice: mapToolChoice(metadata?.tool_choice), } - } - override getModel() { - const modelId = this.options.huggingFaceModelId || "meta-llama/Llama-3.3-70B-Instruct" + // Use streamText for streaming responses + const result = streamText(requestOptions) - // Try to get model info from cache - const modelInfo = this.modelCache?.[modelId] + try { + // Process the full stream to get all events including reasoning + for await (const part of result.fullStream) { + for (const chunk of processAiSdkStreamPart(part)) { + yield chunk + } + } - if (modelInfo) { - return { - id: modelId, - info: modelInfo, + // Yield usage metrics at the end, including cache metrics from providerMetadata + const usage = await result.usage + const providerMetadata = await result.providerMetadata + if (usage) { + yield this.processUsageMetrics(usage, providerMetadata as any) } + } catch (error) { + // Handle AI SDK errors (AI_RetryError, AI_APICallError, etc.) + throw handleAiSdkError(error, "HuggingFace") } + } - // Fallback to default values if model not found in cache - return { - id: modelId, - info: { - maxTokens: 8192, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - }, - } + /** + * Complete a prompt using the AI SDK generateText. + */ + async completePrompt(prompt: string): Promise { + const { temperature } = this.getModel() + const languageModel = this.getLanguageModel() + + const { text } = await generateText({ + model: languageModel, + prompt, + maxOutputTokens: this.getMaxOutputTokens(), + temperature: this.options.modelTemperature ?? temperature ?? HUGGINGFACE_DEFAULT_TEMPERATURE, + }) + + return text } } diff --git a/src/package.json b/src/package.json index 04402de28af..10971319359 100644 --- a/src/package.json +++ b/src/package.json @@ -454,6 +454,7 @@ "@ai-sdk/deepseek": "^2.0.14", "@ai-sdk/fireworks": "^2.0.26", "@ai-sdk/groq": "^3.0.19", + "@ai-sdk/huggingface": "^1.0.28", "@ai-sdk/mistral": "^3.0.0", "@ai-sdk/xai": "^3.0.46", "sambanova-ai-provider": "^1.2.2", From 5e4c11f2577aa36232a667c12943912d330cbffb Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Mon, 2 Feb 2026 10:50:06 -0500 Subject: [PATCH 2/7] fix: set default temperature to 0.7 for HuggingFace provider --- src/api/providers/__tests__/huggingface.spec.ts | 6 +++--- src/api/providers/huggingface.ts | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/api/providers/__tests__/huggingface.spec.ts b/src/api/providers/__tests__/huggingface.spec.ts index 6529c72c0d3..cf9a35b8232 100644 --- a/src/api/providers/__tests__/huggingface.spec.ts +++ b/src/api/providers/__tests__/huggingface.spec.ts @@ -258,7 +258,7 @@ describe("HuggingFaceHandler", () => { expect(usageChunks[0].cacheWriteTokens).toBeUndefined() }) - it("should pass correct temperature (0.5 default) to streamText", async () => { + it("should pass correct temperature (0.7 default) to streamText", async () => { async function* mockFullStream() { yield { type: "text-delta", text: "Test" } } @@ -281,7 +281,7 @@ describe("HuggingFaceHandler", () => { expect(mockStreamText).toHaveBeenCalledWith( expect.objectContaining({ - temperature: 0.5, + temperature: 0.7, }), ) }) @@ -389,7 +389,7 @@ describe("HuggingFaceHandler", () => { expect(mockGenerateText).toHaveBeenCalledWith( expect.objectContaining({ - temperature: 0.5, + temperature: 0.7, }), ) }) diff --git a/src/api/providers/huggingface.ts b/src/api/providers/huggingface.ts index bd20990954d..bf3faab47c4 100644 --- a/src/api/providers/huggingface.ts +++ b/src/api/providers/huggingface.ts @@ -21,7 +21,7 @@ import { BaseProvider } from "./base-provider" import { getHuggingFaceModels, getCachedHuggingFaceModels } from "./fetchers/huggingface" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" -const HUGGINGFACE_DEFAULT_TEMPERATURE = 0.5 +const HUGGINGFACE_DEFAULT_TEMPERATURE = 0.7 /** * HuggingFace provider using the dedicated @ai-sdk/huggingface package. From 1591a3cd4fbc188fb5138b4e4ee321647df4b24e Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Mon, 2 Feb 2026 16:21:04 -0500 Subject: [PATCH 3/7] fix(huggingface): handle non-streaming tool calls without UI duplication - Add createAiSdkToolStreamProcessor() to handle tool call deduplication - Tracks tool IDs seen via tool-input-start streaming events - Emits tool-call events only for tools that weren't streamed - Converts tool-call to start/delta/end for UI consistency - Update HuggingFace provider to use the new processor - Add comprehensive tests for the new functionality --- .../providers/__tests__/huggingface.spec.ts | 68 ++++++++- src/api/providers/huggingface.ts | 9 +- src/api/transform/__tests__/ai-sdk.spec.ts | 143 ++++++++++++++++++ src/api/transform/ai-sdk.ts | 89 +++++++++++ 4 files changed, 300 insertions(+), 9 deletions(-) diff --git a/src/api/providers/__tests__/huggingface.spec.ts b/src/api/providers/__tests__/huggingface.spec.ts index cf9a35b8232..5c897d202ea 100644 --- a/src/api/providers/__tests__/huggingface.spec.ts +++ b/src/api/providers/__tests__/huggingface.spec.ts @@ -550,7 +550,9 @@ describe("HuggingFaceHandler", () => { expect(toolCallEndChunks[0].id).toBe("tool-call-1") }) - it("should ignore tool-call events to prevent duplicate tools in UI", async () => { + it("should process tool-call events for non-streaming providers", async () => { + // HuggingFace doesn't stream tool inputs, it only emits tool-call events + // The processor should convert tool-call to start/delta/end events async function* mockFullStream() { yield { type: "tool-call", @@ -579,11 +581,65 @@ describe("HuggingFaceHandler", () => { chunks.push(chunk) } - // tool-call events should be ignored (only tool-input-start/delta/end are processed) - const toolCallChunks = chunks.filter( - (c) => c.type === "tool_call_start" || c.type === "tool_call_delta" || c.type === "tool_call_end", - ) - expect(toolCallChunks.length).toBe(0) + // tool-call events should be converted to start/delta/end for consistency + const toolCallStartChunks = chunks.filter((c) => c.type === "tool_call_start") + const toolCallDeltaChunks = chunks.filter((c) => c.type === "tool_call_delta") + const toolCallEndChunks = chunks.filter((c) => c.type === "tool_call_end") + + expect(toolCallStartChunks.length).toBe(1) + expect(toolCallStartChunks[0].id).toBe("tool-call-1") + expect(toolCallStartChunks[0].name).toBe("read_file") + + expect(toolCallDeltaChunks.length).toBe(1) + expect(toolCallDeltaChunks[0].delta).toBe('{"path":"test.ts"}') + + expect(toolCallEndChunks.length).toBe(1) + expect(toolCallEndChunks[0].id).toBe("tool-call-1") + }) + + it("should ignore tool-call events when tool was already streamed", async () => { + // When a provider streams tool inputs AND sends tool-call, we should not duplicate + async function* mockFullStream() { + // First, streaming events + yield { type: "tool-input-start", id: "tool-call-1", toolName: "read_file" } + yield { type: "tool-input-delta", id: "tool-call-1", delta: '{"path":"test.ts"}' } + yield { type: "tool-input-end", id: "tool-call-1" } + // Then the tool-call event (should be ignored) + yield { + type: "tool-call", + toolCallId: "tool-call-1", + toolName: "read_file", + input: { path: "test.ts" }, + } + } + + const mockUsage = Promise.resolve({ + inputTokens: 10, + outputTokens: 5, + }) + + const mockProviderMetadata = Promise.resolve({}) + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream(), + usage: mockUsage, + providerMetadata: mockProviderMetadata, + }) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Should have exactly 1 of each (not duplicated) + const toolCallStartChunks = chunks.filter((c) => c.type === "tool_call_start") + const toolCallDeltaChunks = chunks.filter((c) => c.type === "tool_call_delta") + const toolCallEndChunks = chunks.filter((c) => c.type === "tool_call_end") + + expect(toolCallStartChunks.length).toBe(1) + expect(toolCallDeltaChunks.length).toBe(1) + expect(toolCallEndChunks.length).toBe(1) }) }) }) diff --git a/src/api/providers/huggingface.ts b/src/api/providers/huggingface.ts index bf3faab47c4..85dcec99353 100644 --- a/src/api/providers/huggingface.ts +++ b/src/api/providers/huggingface.ts @@ -9,7 +9,7 @@ import type { ApiHandlerOptions } from "../../shared/api" import { convertToAiSdkMessages, convertToolsForAiSdk, - processAiSdkStreamPart, + createAiSdkToolStreamProcessor, mapToolChoice, handleAiSdkError, } from "../transform/ai-sdk" @@ -168,9 +168,12 @@ export class HuggingFaceHandler extends BaseProvider implements SingleCompletion const result = streamText(requestOptions) try { - // Process the full stream to get all events including reasoning + // Use the stateful processor to handle tool call deduplication + // HuggingFace doesn't emit streaming tool events (tool-input-start/delta/end), + // only the final tool-call event, so we need the processor to handle this + const processStreamPart = createAiSdkToolStreamProcessor() for await (const part of result.fullStream) { - for (const chunk of processAiSdkStreamPart(part)) { + for (const chunk of processStreamPart(part)) { yield chunk } } diff --git a/src/api/transform/__tests__/ai-sdk.spec.ts b/src/api/transform/__tests__/ai-sdk.spec.ts index fb4e3b9e2f2..9cfc2aa32f5 100644 --- a/src/api/transform/__tests__/ai-sdk.spec.ts +++ b/src/api/transform/__tests__/ai-sdk.spec.ts @@ -4,6 +4,7 @@ import { convertToAiSdkMessages, convertToolsForAiSdk, processAiSdkStreamPart, + createAiSdkToolStreamProcessor, mapToolChoice, extractAiSdkErrorMessage, handleAiSdkError, @@ -495,6 +496,148 @@ describe("AI SDK conversion utilities", () => { }) }) + describe("createAiSdkToolStreamProcessor", () => { + it("processes text-delta chunks like processAiSdkStreamPart", () => { + const processor = createAiSdkToolStreamProcessor() + const part = { type: "text-delta" as const, id: "1", text: "Hello" } + const chunks = [...processor(part)] + + expect(chunks).toHaveLength(1) + expect(chunks[0]).toEqual({ type: "text", text: "Hello" }) + }) + + it("processes tool-input-start/delta/end events (streaming tools)", () => { + const processor = createAiSdkToolStreamProcessor() + + // Simulate streaming tool events + const startChunks = [ + ...processor({ type: "tool-input-start" as const, id: "call_1", toolName: "read_file" }), + ] + const deltaChunks = [...processor({ type: "tool-input-delta" as const, id: "call_1", delta: '{"path":' })] + const delta2Chunks = [ + ...processor({ type: "tool-input-delta" as const, id: "call_1", delta: '"test.ts"}' }), + ] + const endChunks = [...processor({ type: "tool-input-end" as const, id: "call_1" })] + + expect(startChunks).toHaveLength(1) + expect(startChunks[0]).toEqual({ type: "tool_call_start", id: "call_1", name: "read_file" }) + + expect(deltaChunks).toHaveLength(1) + expect(deltaChunks[0]).toEqual({ type: "tool_call_delta", id: "call_1", delta: '{"path":' }) + + expect(delta2Chunks).toHaveLength(1) + expect(delta2Chunks[0]).toEqual({ type: "tool_call_delta", id: "call_1", delta: '"test.ts"}' }) + + expect(endChunks).toHaveLength(1) + expect(endChunks[0]).toEqual({ type: "tool_call_end", id: "call_1" }) + }) + + it("ignores tool-call events when tool was already streamed", () => { + const processor = createAiSdkToolStreamProcessor() + + // Process streaming events first (consume the generator to update state) + Array.from(processor({ type: "tool-input-start" as const, id: "call_1", toolName: "read_file" })) + Array.from(processor({ type: "tool-input-delta" as const, id: "call_1", delta: '{"path":"test.ts"}' })) + Array.from(processor({ type: "tool-input-end" as const, id: "call_1" })) + + // Now the tool-call event for the same tool should be ignored + const toolCallChunks = [ + ...processor({ + type: "tool-call" as const, + toolCallId: "call_1", + toolName: "read_file", + input: { path: "test.ts" }, + } as any), + ] + + expect(toolCallChunks).toHaveLength(0) + }) + + it("processes tool-call events for non-streaming providers", () => { + const processor = createAiSdkToolStreamProcessor() + + // Directly process a tool-call event (no streaming events first) + const chunks = [ + ...processor({ + type: "tool-call" as const, + toolCallId: "call_1", + toolName: "read_file", + input: { path: "test.ts" }, + } as any), + ] + + // Should emit start/delta/end events + expect(chunks).toHaveLength(3) + expect(chunks[0]).toEqual({ type: "tool_call_start", id: "call_1", name: "read_file" }) + expect(chunks[1]).toEqual({ type: "tool_call_delta", id: "call_1", delta: '{"path":"test.ts"}' }) + expect(chunks[2]).toEqual({ type: "tool_call_end", id: "call_1" }) + }) + + it("handles multiple tool calls correctly", () => { + const processor = createAiSdkToolStreamProcessor() + + // First tool is streamed + Array.from(processor({ type: "tool-input-start" as const, id: "call_1", toolName: "read_file" })) + Array.from(processor({ type: "tool-input-end" as const, id: "call_1" })) + + // Second tool is not streamed (non-streaming provider behavior) + const chunks = [ + ...processor({ + type: "tool-call" as const, + toolCallId: "call_2", + toolName: "write_to_file", + input: { path: "output.ts", content: "test" }, + } as any), + ] + + // Second tool should be emitted + expect(chunks).toHaveLength(3) + expect(chunks[0]).toEqual({ type: "tool_call_start", id: "call_2", name: "write_to_file" }) + + // First tool's tool-call should be ignored + const ignoredChunks = [ + ...processor({ + type: "tool-call" as const, + toolCallId: "call_1", + toolName: "read_file", + input: {}, + } as any), + ] + expect(ignoredChunks).toHaveLength(0) + }) + + it("maintains separate state per processor instance", () => { + const processor1 = createAiSdkToolStreamProcessor() + const processor2 = createAiSdkToolStreamProcessor() + + // Stream a tool with processor1 + Array.from(processor1({ type: "tool-input-start" as const, id: "call_1", toolName: "test" })) + Array.from(processor1({ type: "tool-input-end" as const, id: "call_1" })) + + // processor1 should ignore tool-call for call_1 + const p1Chunks = [ + ...processor1({ + type: "tool-call" as const, + toolCallId: "call_1", + toolName: "test", + input: {}, + } as any), + ] + expect(p1Chunks).toHaveLength(0) + + // processor2 should emit tool-call for call_1 (it has its own state) + const p2Chunks = [ + ...processor2({ + type: "tool-call" as const, + toolCallId: "call_1", + toolName: "test", + input: {}, + } as any), + ] + expect(p2Chunks).toHaveLength(3) + }) + }) + describe("mapToolChoice", () => { it("should return undefined for null or undefined", () => { expect(mapToolChoice(null)).toBeUndefined() diff --git a/src/api/transform/ai-sdk.ts b/src/api/transform/ai-sdk.ts index c6f37be694d..66945acf0f2 100644 --- a/src/api/transform/ai-sdk.ts +++ b/src/api/transform/ai-sdk.ts @@ -364,6 +364,95 @@ export function* processAiSdkStreamPart(part: ExtendedStreamPart): Generator Generator { + // Track tool IDs that have been processed via streaming events + const streamedToolIds = new Set() + + return function* processStreamPart(part: ExtendedStreamPart): Generator { + switch (part.type) { + case "tool-input-start": + // Track that this tool has streaming events + streamedToolIds.add(part.id) + yield { + type: "tool_call_start", + id: part.id, + name: part.toolName, + } + break + + case "tool-input-delta": + yield { + type: "tool_call_delta", + id: part.id, + delta: part.delta, + } + break + + case "tool-input-end": + yield { + type: "tool_call_end", + id: part.id, + } + break + + case "tool-call": { + // Only emit tool-call if this tool wasn't already processed via streaming + const toolCallPart = part as { + type: "tool-call" + toolCallId: string + toolName: string + input: unknown + } + if (!streamedToolIds.has(toolCallPart.toolCallId)) { + // Emit as start/delta/end for consistency with streaming providers + const args = JSON.stringify(toolCallPart.input) + yield { + type: "tool_call_start", + id: toolCallPart.toolCallId, + name: toolCallPart.toolName, + } + yield { + type: "tool_call_delta", + id: toolCallPart.toolCallId, + delta: args, + } + yield { + type: "tool_call_end", + id: toolCallPart.toolCallId, + } + } + break + } + + // Handle all other events with the stateless processor + default: + yield* processAiSdkStreamPart(part) + break + } + } +} + /** * Type for AI SDK tool choice format. */ From fd374723fe6b49b260da34031948d7437dba1ef3 Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Tue, 3 Feb 2026 09:25:17 -0500 Subject: [PATCH 4/7] fix(huggingface): properly handle tool arguments when streaming without deltas HuggingFace SDK emits tool-input-start/end but NOT tool-input-delta. The arguments come in the final tool-call event. The fix now: 1. Tracks tools that received actual deltas (toolsWithDeltas) 2. If tool-call arrives and tool was started but had no deltas, emit the arguments as a delta (HuggingFace-like behavior) 3. If tool-call arrives and tool had deltas, ignore it (fully streamed) 4. If tool-call arrives and tool wasn't started, emit full sequence --- src/api/transform/__tests__/ai-sdk.spec.ts | 49 +++++++++++++++++--- src/api/transform/ai-sdk.ts | 52 +++++++++++++++------- 2 files changed, 79 insertions(+), 22 deletions(-) diff --git a/src/api/transform/__tests__/ai-sdk.spec.ts b/src/api/transform/__tests__/ai-sdk.spec.ts index 9cfc2aa32f5..ecb7613cf6c 100644 --- a/src/api/transform/__tests__/ai-sdk.spec.ts +++ b/src/api/transform/__tests__/ai-sdk.spec.ts @@ -573,11 +573,47 @@ describe("AI SDK conversion utilities", () => { expect(chunks[2]).toEqual({ type: "tool_call_end", id: "call_1" }) }) + it("handles HuggingFace-like providers (start/end but no deltas, args in tool-call)", () => { + const processor = createAiSdkToolStreamProcessor() + + // HuggingFace emits tool-input-start and tool-input-end but NOT tool-input-delta + // The arguments come in the tool-call event + const startChunks = [ + ...processor({ type: "tool-input-start" as const, id: "call_1", toolName: "read_file" }), + ] + const endChunks = [...processor({ type: "tool-input-end" as const, id: "call_1" })] + + // tool-call should emit just the delta (arguments) since start/end were already emitted + const toolCallChunks = [ + ...processor({ + type: "tool-call" as const, + toolCallId: "call_1", + toolName: "read_file", + input: { path: "test.ts" }, + } as any), + ] + + expect(startChunks).toHaveLength(1) + expect(startChunks[0]).toEqual({ type: "tool_call_start", id: "call_1", name: "read_file" }) + + expect(endChunks).toHaveLength(1) + expect(endChunks[0]).toEqual({ type: "tool_call_end", id: "call_1" }) + + // The tool-call should emit just the delta with arguments + expect(toolCallChunks).toHaveLength(1) + expect(toolCallChunks[0]).toEqual({ + type: "tool_call_delta", + id: "call_1", + delta: '{"path":"test.ts"}', + }) + }) + it("handles multiple tool calls correctly", () => { const processor = createAiSdkToolStreamProcessor() - // First tool is streamed + // First tool is fully streamed with deltas Array.from(processor({ type: "tool-input-start" as const, id: "call_1", toolName: "read_file" })) + Array.from(processor({ type: "tool-input-delta" as const, id: "call_1", delta: '{"path":"a.ts"}' })) Array.from(processor({ type: "tool-input-end" as const, id: "call_1" })) // Second tool is not streamed (non-streaming provider behavior) @@ -590,17 +626,17 @@ describe("AI SDK conversion utilities", () => { } as any), ] - // Second tool should be emitted + // Second tool should be emitted with full start/delta/end expect(chunks).toHaveLength(3) expect(chunks[0]).toEqual({ type: "tool_call_start", id: "call_2", name: "write_to_file" }) - // First tool's tool-call should be ignored + // First tool's tool-call should be ignored (it had deltas) const ignoredChunks = [ ...processor({ type: "tool-call" as const, toolCallId: "call_1", toolName: "read_file", - input: {}, + input: { path: "a.ts" }, } as any), ] expect(ignoredChunks).toHaveLength(0) @@ -610,11 +646,12 @@ describe("AI SDK conversion utilities", () => { const processor1 = createAiSdkToolStreamProcessor() const processor2 = createAiSdkToolStreamProcessor() - // Stream a tool with processor1 + // Stream a tool fully with processor1 (with delta) Array.from(processor1({ type: "tool-input-start" as const, id: "call_1", toolName: "test" })) + Array.from(processor1({ type: "tool-input-delta" as const, id: "call_1", delta: "{}" })) Array.from(processor1({ type: "tool-input-end" as const, id: "call_1" })) - // processor1 should ignore tool-call for call_1 + // processor1 should ignore tool-call for call_1 (it had deltas) const p1Chunks = [ ...processor1({ type: "tool-call" as const, diff --git a/src/api/transform/ai-sdk.ts b/src/api/transform/ai-sdk.ts index 66945acf0f2..3f91382d255 100644 --- a/src/api/transform/ai-sdk.ts +++ b/src/api/transform/ai-sdk.ts @@ -386,14 +386,16 @@ export function* processAiSdkStreamPart(part: ExtendedStreamPart): Generator Generator { - // Track tool IDs that have been processed via streaming events - const streamedToolIds = new Set() + // Track tool IDs that have been started via streaming events + const startedToolIds = new Set() + // Track tool IDs that have received actual argument deltas + const toolsWithDeltas = new Set() return function* processStreamPart(part: ExtendedStreamPart): Generator { switch (part.type) { case "tool-input-start": - // Track that this tool has streaming events - streamedToolIds.add(part.id) + // Track that this tool has started streaming + startedToolIds.add(part.id) yield { type: "tool_call_start", id: part.id, @@ -402,6 +404,8 @@ export function createAiSdkToolStreamProcessor(): ( break case "tool-input-delta": + // Track that we received actual argument content for this tool + toolsWithDeltas.add(part.id) yield { type: "tool_call_delta", id: part.id, @@ -417,30 +421,46 @@ export function createAiSdkToolStreamProcessor(): ( break case "tool-call": { - // Only emit tool-call if this tool wasn't already processed via streaming + // Handle tool-call events - the logic depends on whether we got streaming deltas const toolCallPart = part as { type: "tool-call" toolCallId: string toolName: string input: unknown } - if (!streamedToolIds.has(toolCallPart.toolCallId)) { - // Emit as start/delta/end for consistency with streaming providers + + // If we received deltas, the arguments were already streamed - ignore tool-call + if (toolsWithDeltas.has(toolCallPart.toolCallId)) { + break + } + + // If tool was started but no deltas received (like HuggingFace), + // emit the arguments from tool-call as a delta + if (startedToolIds.has(toolCallPart.toolCallId)) { const args = JSON.stringify(toolCallPart.input) - yield { - type: "tool_call_start", - id: toolCallPart.toolCallId, - name: toolCallPart.toolName, - } yield { type: "tool_call_delta", id: toolCallPart.toolCallId, delta: args, } - yield { - type: "tool_call_end", - id: toolCallPart.toolCallId, - } + break + } + + // Tool wasn't started via streaming - emit full start/delta/end sequence + const args = JSON.stringify(toolCallPart.input) + yield { + type: "tool_call_start", + id: toolCallPart.toolCallId, + name: toolCallPart.toolName, + } + yield { + type: "tool_call_delta", + id: toolCallPart.toolCallId, + delta: args, + } + yield { + type: "tool_call_end", + id: toolCallPart.toolCallId, } break } From 33fd97279eb98ed27ce9e2bf6e724a74414db960 Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Tue, 3 Feb 2026 09:30:48 -0500 Subject: [PATCH 5/7] fix(huggingface): defer tool_call_end until arguments received MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sequence must be start → delta → end. For HuggingFace: - tool-input-start emits tool_call_start - tool-input-end is DEFERRED (no deltas yet, args not received) - tool-call emits tool_call_delta + deferred tool_call_end This ensures arguments appear before the end event. --- src/api/transform/__tests__/ai-sdk.spec.ts | 15 ++++++++----- src/api/transform/ai-sdk.ts | 25 ++++++++++++++++++---- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/src/api/transform/__tests__/ai-sdk.spec.ts b/src/api/transform/__tests__/ai-sdk.spec.ts index ecb7613cf6c..17e03e6695e 100644 --- a/src/api/transform/__tests__/ai-sdk.spec.ts +++ b/src/api/transform/__tests__/ai-sdk.spec.ts @@ -581,9 +581,10 @@ describe("AI SDK conversion utilities", () => { const startChunks = [ ...processor({ type: "tool-input-start" as const, id: "call_1", toolName: "read_file" }), ] + // End is deferred since we haven't received deltas yet const endChunks = [...processor({ type: "tool-input-end" as const, id: "call_1" })] - // tool-call should emit just the delta (arguments) since start/end were already emitted + // tool-call should emit the delta (arguments) AND the deferred end const toolCallChunks = [ ...processor({ type: "tool-call" as const, @@ -596,16 +597,20 @@ describe("AI SDK conversion utilities", () => { expect(startChunks).toHaveLength(1) expect(startChunks[0]).toEqual({ type: "tool_call_start", id: "call_1", name: "read_file" }) - expect(endChunks).toHaveLength(1) - expect(endChunks[0]).toEqual({ type: "tool_call_end", id: "call_1" }) + // End is deferred when no deltas received + expect(endChunks).toHaveLength(0) - // The tool-call should emit just the delta with arguments - expect(toolCallChunks).toHaveLength(1) + // tool-call emits delta followed by the deferred end + expect(toolCallChunks).toHaveLength(2) expect(toolCallChunks[0]).toEqual({ type: "tool_call_delta", id: "call_1", delta: '{"path":"test.ts"}', }) + expect(toolCallChunks[1]).toEqual({ + type: "tool_call_end", + id: "call_1", + }) }) it("handles multiple tool calls correctly", () => { diff --git a/src/api/transform/ai-sdk.ts b/src/api/transform/ai-sdk.ts index 3f91382d255..8c15350077c 100644 --- a/src/api/transform/ai-sdk.ts +++ b/src/api/transform/ai-sdk.ts @@ -390,6 +390,8 @@ export function createAiSdkToolStreamProcessor(): ( const startedToolIds = new Set() // Track tool IDs that have received actual argument deltas const toolsWithDeltas = new Set() + // Track tool IDs that have ended but are waiting for arguments from tool-call + const pendingEndToolIds = new Set() return function* processStreamPart(part: ExtendedStreamPart): Generator { switch (part.type) { @@ -414,9 +416,16 @@ export function createAiSdkToolStreamProcessor(): ( break case "tool-input-end": - yield { - type: "tool_call_end", - id: part.id, + // If we already have deltas, we can emit the end now + // Otherwise, defer the end until we get arguments from tool-call + if (toolsWithDeltas.has(part.id)) { + yield { + type: "tool_call_end", + id: part.id, + } + } else { + // HuggingFace case: started but no deltas, arguments will come in tool-call + pendingEndToolIds.add(part.id) } break @@ -435,7 +444,7 @@ export function createAiSdkToolStreamProcessor(): ( } // If tool was started but no deltas received (like HuggingFace), - // emit the arguments from tool-call as a delta + // emit the arguments from tool-call as a delta, then the pending end if (startedToolIds.has(toolCallPart.toolCallId)) { const args = JSON.stringify(toolCallPart.input) yield { @@ -443,6 +452,14 @@ export function createAiSdkToolStreamProcessor(): ( id: toolCallPart.toolCallId, delta: args, } + // Now emit the deferred end + if (pendingEndToolIds.has(toolCallPart.toolCallId)) { + pendingEndToolIds.delete(toolCallPart.toolCallId) + yield { + type: "tool_call_end", + id: toolCallPart.toolCallId, + } + } break } From 0546990fcb6da0d719a909f9fac2ffc556049ad0 Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Tue, 3 Feb 2026 09:53:04 -0500 Subject: [PATCH 6/7] fix: switch HuggingFace to OpenAI-compatible SDK for tool support - Replace @ai-sdk/huggingface with @ai-sdk/openai-compatible This fixes 'tool messages not supported' error (vercel/ai#10766) - Add createAiSdkToolStreamProcessor() for tool call deduplication Handles providers that emit tool-input-start/end but provide args only in the final tool-call event - Remove unused @ai-sdk/huggingface dependency - Update tests for new SDK --- src/api/providers/__tests__/huggingface.spec.ts | 4 ++-- src/api/providers/huggingface.ts | 16 ++++++++++------ src/package.json | 1 - 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/api/providers/__tests__/huggingface.spec.ts b/src/api/providers/__tests__/huggingface.spec.ts index 5c897d202ea..1627c25f80e 100644 --- a/src/api/providers/__tests__/huggingface.spec.ts +++ b/src/api/providers/__tests__/huggingface.spec.ts @@ -15,8 +15,8 @@ vi.mock("ai", async (importOriginal) => { } }) -vi.mock("@ai-sdk/huggingface", () => ({ - createHuggingFace: vi.fn(() => { +vi.mock("@ai-sdk/openai-compatible", () => ({ + createOpenAICompatible: vi.fn(() => { // Return a function that returns a mock language model return vi.fn(() => ({ modelId: "meta-llama/Llama-3.3-70B-Instruct", diff --git a/src/api/providers/huggingface.ts b/src/api/providers/huggingface.ts index 85dcec99353..713d4d3e2b4 100644 --- a/src/api/providers/huggingface.ts +++ b/src/api/providers/huggingface.ts @@ -1,5 +1,5 @@ import { Anthropic } from "@anthropic-ai/sdk" -import { createHuggingFace } from "@ai-sdk/huggingface" +import { createOpenAICompatible } from "@ai-sdk/openai-compatible" import { streamText, generateText, ToolSet } from "ai" import type { ModelRecord, ModelInfo } from "@roo-code/types" @@ -24,12 +24,13 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ". const HUGGINGFACE_DEFAULT_TEMPERATURE = 0.7 /** - * HuggingFace provider using the dedicated @ai-sdk/huggingface package. - * Provides native support for various models on HuggingFace Hub via the Responses API. + * HuggingFace provider using @ai-sdk/openai-compatible for OpenAI-compatible API. + * Uses HuggingFace's OpenAI-compatible endpoint to enable tool message support. + * @see https://github.com/vercel/ai/issues/10766 - Workaround for tool messages not supported in @ai-sdk/huggingface */ export class HuggingFaceHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions - protected provider: ReturnType + protected provider: ReturnType private modelCache: ModelRecord | null = null constructor(options: ApiHandlerOptions) { @@ -40,8 +41,11 @@ export class HuggingFaceHandler extends BaseProvider implements SingleCompletion throw new Error("Hugging Face API key is required") } - // Create the HuggingFace provider using AI SDK - this.provider = createHuggingFace({ + // Create an OpenAI-compatible provider pointing to HuggingFace's /v1 endpoint + // This fixes "tool messages not supported" error - the HuggingFace SDK doesn't + // properly handle function_call_output format, but OpenAI SDK does + this.provider = createOpenAICompatible({ + name: "huggingface", baseURL: "https://router.huggingface.co/v1", apiKey: this.options.huggingFaceApiKey, headers: DEFAULT_HEADERS, diff --git a/src/package.json b/src/package.json index 10971319359..04402de28af 100644 --- a/src/package.json +++ b/src/package.json @@ -454,7 +454,6 @@ "@ai-sdk/deepseek": "^2.0.14", "@ai-sdk/fireworks": "^2.0.26", "@ai-sdk/groq": "^3.0.19", - "@ai-sdk/huggingface": "^1.0.28", "@ai-sdk/mistral": "^3.0.0", "@ai-sdk/xai": "^3.0.46", "sambanova-ai-provider": "^1.2.2", From d60a437e65fc9bcd734e82ec6edf60e5bdc9105b Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Tue, 3 Feb 2026 11:16:17 -0500 Subject: [PATCH 7/7] refactor: simplify HuggingFace to use standard processAiSdkStreamPart With @ai-sdk/openai-compatible, the OpenAI-compatible SDK properly streams tool events via tool-input-start/delta/end, so we don't need the special createAiSdkToolStreamProcessor anymore. - Remove createAiSdkToolStreamProcessor from ai-sdk.ts - Use standard processAiSdkStreamPart in HuggingFace handler - Remove tests for createAiSdkToolStreamProcessor - Remove tests for tool-call event processing (not needed) --- .../providers/__tests__/huggingface.spec.ts | 92 --------- src/api/providers/huggingface.ts | 10 +- src/api/transform/__tests__/ai-sdk.spec.ts | 185 ------------------ src/api/transform/ai-sdk.ts | 126 ------------ 4 files changed, 4 insertions(+), 409 deletions(-) diff --git a/src/api/providers/__tests__/huggingface.spec.ts b/src/api/providers/__tests__/huggingface.spec.ts index 1627c25f80e..e7682474c1a 100644 --- a/src/api/providers/__tests__/huggingface.spec.ts +++ b/src/api/providers/__tests__/huggingface.spec.ts @@ -549,97 +549,5 @@ describe("HuggingFaceHandler", () => { expect(toolCallEndChunks.length).toBe(1) expect(toolCallEndChunks[0].id).toBe("tool-call-1") }) - - it("should process tool-call events for non-streaming providers", async () => { - // HuggingFace doesn't stream tool inputs, it only emits tool-call events - // The processor should convert tool-call to start/delta/end events - async function* mockFullStream() { - yield { - type: "tool-call", - toolCallId: "tool-call-1", - toolName: "read_file", - input: { path: "test.ts" }, - } - } - - const mockUsage = Promise.resolve({ - inputTokens: 10, - outputTokens: 5, - }) - - const mockProviderMetadata = Promise.resolve({}) - - mockStreamText.mockReturnValue({ - fullStream: mockFullStream(), - usage: mockUsage, - providerMetadata: mockProviderMetadata, - }) - - const stream = handler.createMessage(systemPrompt, messages) - const chunks: any[] = [] - for await (const chunk of stream) { - chunks.push(chunk) - } - - // tool-call events should be converted to start/delta/end for consistency - const toolCallStartChunks = chunks.filter((c) => c.type === "tool_call_start") - const toolCallDeltaChunks = chunks.filter((c) => c.type === "tool_call_delta") - const toolCallEndChunks = chunks.filter((c) => c.type === "tool_call_end") - - expect(toolCallStartChunks.length).toBe(1) - expect(toolCallStartChunks[0].id).toBe("tool-call-1") - expect(toolCallStartChunks[0].name).toBe("read_file") - - expect(toolCallDeltaChunks.length).toBe(1) - expect(toolCallDeltaChunks[0].delta).toBe('{"path":"test.ts"}') - - expect(toolCallEndChunks.length).toBe(1) - expect(toolCallEndChunks[0].id).toBe("tool-call-1") - }) - - it("should ignore tool-call events when tool was already streamed", async () => { - // When a provider streams tool inputs AND sends tool-call, we should not duplicate - async function* mockFullStream() { - // First, streaming events - yield { type: "tool-input-start", id: "tool-call-1", toolName: "read_file" } - yield { type: "tool-input-delta", id: "tool-call-1", delta: '{"path":"test.ts"}' } - yield { type: "tool-input-end", id: "tool-call-1" } - // Then the tool-call event (should be ignored) - yield { - type: "tool-call", - toolCallId: "tool-call-1", - toolName: "read_file", - input: { path: "test.ts" }, - } - } - - const mockUsage = Promise.resolve({ - inputTokens: 10, - outputTokens: 5, - }) - - const mockProviderMetadata = Promise.resolve({}) - - mockStreamText.mockReturnValue({ - fullStream: mockFullStream(), - usage: mockUsage, - providerMetadata: mockProviderMetadata, - }) - - const stream = handler.createMessage(systemPrompt, messages) - const chunks: any[] = [] - for await (const chunk of stream) { - chunks.push(chunk) - } - - // Should have exactly 1 of each (not duplicated) - const toolCallStartChunks = chunks.filter((c) => c.type === "tool_call_start") - const toolCallDeltaChunks = chunks.filter((c) => c.type === "tool_call_delta") - const toolCallEndChunks = chunks.filter((c) => c.type === "tool_call_end") - - expect(toolCallStartChunks.length).toBe(1) - expect(toolCallDeltaChunks.length).toBe(1) - expect(toolCallEndChunks.length).toBe(1) - }) }) }) diff --git a/src/api/providers/huggingface.ts b/src/api/providers/huggingface.ts index 713d4d3e2b4..25d0608a331 100644 --- a/src/api/providers/huggingface.ts +++ b/src/api/providers/huggingface.ts @@ -9,7 +9,7 @@ import type { ApiHandlerOptions } from "../../shared/api" import { convertToAiSdkMessages, convertToolsForAiSdk, - createAiSdkToolStreamProcessor, + processAiSdkStreamPart, mapToolChoice, handleAiSdkError, } from "../transform/ai-sdk" @@ -172,12 +172,10 @@ export class HuggingFaceHandler extends BaseProvider implements SingleCompletion const result = streamText(requestOptions) try { - // Use the stateful processor to handle tool call deduplication - // HuggingFace doesn't emit streaming tool events (tool-input-start/delta/end), - // only the final tool-call event, so we need the processor to handle this - const processStreamPart = createAiSdkToolStreamProcessor() + // Process the full stream to get all events for await (const part of result.fullStream) { - for (const chunk of processStreamPart(part)) { + // Use the processAiSdkStreamPart utility to convert stream parts + for (const chunk of processAiSdkStreamPart(part)) { yield chunk } } diff --git a/src/api/transform/__tests__/ai-sdk.spec.ts b/src/api/transform/__tests__/ai-sdk.spec.ts index 17e03e6695e..fb4e3b9e2f2 100644 --- a/src/api/transform/__tests__/ai-sdk.spec.ts +++ b/src/api/transform/__tests__/ai-sdk.spec.ts @@ -4,7 +4,6 @@ import { convertToAiSdkMessages, convertToolsForAiSdk, processAiSdkStreamPart, - createAiSdkToolStreamProcessor, mapToolChoice, extractAiSdkErrorMessage, handleAiSdkError, @@ -496,190 +495,6 @@ describe("AI SDK conversion utilities", () => { }) }) - describe("createAiSdkToolStreamProcessor", () => { - it("processes text-delta chunks like processAiSdkStreamPart", () => { - const processor = createAiSdkToolStreamProcessor() - const part = { type: "text-delta" as const, id: "1", text: "Hello" } - const chunks = [...processor(part)] - - expect(chunks).toHaveLength(1) - expect(chunks[0]).toEqual({ type: "text", text: "Hello" }) - }) - - it("processes tool-input-start/delta/end events (streaming tools)", () => { - const processor = createAiSdkToolStreamProcessor() - - // Simulate streaming tool events - const startChunks = [ - ...processor({ type: "tool-input-start" as const, id: "call_1", toolName: "read_file" }), - ] - const deltaChunks = [...processor({ type: "tool-input-delta" as const, id: "call_1", delta: '{"path":' })] - const delta2Chunks = [ - ...processor({ type: "tool-input-delta" as const, id: "call_1", delta: '"test.ts"}' }), - ] - const endChunks = [...processor({ type: "tool-input-end" as const, id: "call_1" })] - - expect(startChunks).toHaveLength(1) - expect(startChunks[0]).toEqual({ type: "tool_call_start", id: "call_1", name: "read_file" }) - - expect(deltaChunks).toHaveLength(1) - expect(deltaChunks[0]).toEqual({ type: "tool_call_delta", id: "call_1", delta: '{"path":' }) - - expect(delta2Chunks).toHaveLength(1) - expect(delta2Chunks[0]).toEqual({ type: "tool_call_delta", id: "call_1", delta: '"test.ts"}' }) - - expect(endChunks).toHaveLength(1) - expect(endChunks[0]).toEqual({ type: "tool_call_end", id: "call_1" }) - }) - - it("ignores tool-call events when tool was already streamed", () => { - const processor = createAiSdkToolStreamProcessor() - - // Process streaming events first (consume the generator to update state) - Array.from(processor({ type: "tool-input-start" as const, id: "call_1", toolName: "read_file" })) - Array.from(processor({ type: "tool-input-delta" as const, id: "call_1", delta: '{"path":"test.ts"}' })) - Array.from(processor({ type: "tool-input-end" as const, id: "call_1" })) - - // Now the tool-call event for the same tool should be ignored - const toolCallChunks = [ - ...processor({ - type: "tool-call" as const, - toolCallId: "call_1", - toolName: "read_file", - input: { path: "test.ts" }, - } as any), - ] - - expect(toolCallChunks).toHaveLength(0) - }) - - it("processes tool-call events for non-streaming providers", () => { - const processor = createAiSdkToolStreamProcessor() - - // Directly process a tool-call event (no streaming events first) - const chunks = [ - ...processor({ - type: "tool-call" as const, - toolCallId: "call_1", - toolName: "read_file", - input: { path: "test.ts" }, - } as any), - ] - - // Should emit start/delta/end events - expect(chunks).toHaveLength(3) - expect(chunks[0]).toEqual({ type: "tool_call_start", id: "call_1", name: "read_file" }) - expect(chunks[1]).toEqual({ type: "tool_call_delta", id: "call_1", delta: '{"path":"test.ts"}' }) - expect(chunks[2]).toEqual({ type: "tool_call_end", id: "call_1" }) - }) - - it("handles HuggingFace-like providers (start/end but no deltas, args in tool-call)", () => { - const processor = createAiSdkToolStreamProcessor() - - // HuggingFace emits tool-input-start and tool-input-end but NOT tool-input-delta - // The arguments come in the tool-call event - const startChunks = [ - ...processor({ type: "tool-input-start" as const, id: "call_1", toolName: "read_file" }), - ] - // End is deferred since we haven't received deltas yet - const endChunks = [...processor({ type: "tool-input-end" as const, id: "call_1" })] - - // tool-call should emit the delta (arguments) AND the deferred end - const toolCallChunks = [ - ...processor({ - type: "tool-call" as const, - toolCallId: "call_1", - toolName: "read_file", - input: { path: "test.ts" }, - } as any), - ] - - expect(startChunks).toHaveLength(1) - expect(startChunks[0]).toEqual({ type: "tool_call_start", id: "call_1", name: "read_file" }) - - // End is deferred when no deltas received - expect(endChunks).toHaveLength(0) - - // tool-call emits delta followed by the deferred end - expect(toolCallChunks).toHaveLength(2) - expect(toolCallChunks[0]).toEqual({ - type: "tool_call_delta", - id: "call_1", - delta: '{"path":"test.ts"}', - }) - expect(toolCallChunks[1]).toEqual({ - type: "tool_call_end", - id: "call_1", - }) - }) - - it("handles multiple tool calls correctly", () => { - const processor = createAiSdkToolStreamProcessor() - - // First tool is fully streamed with deltas - Array.from(processor({ type: "tool-input-start" as const, id: "call_1", toolName: "read_file" })) - Array.from(processor({ type: "tool-input-delta" as const, id: "call_1", delta: '{"path":"a.ts"}' })) - Array.from(processor({ type: "tool-input-end" as const, id: "call_1" })) - - // Second tool is not streamed (non-streaming provider behavior) - const chunks = [ - ...processor({ - type: "tool-call" as const, - toolCallId: "call_2", - toolName: "write_to_file", - input: { path: "output.ts", content: "test" }, - } as any), - ] - - // Second tool should be emitted with full start/delta/end - expect(chunks).toHaveLength(3) - expect(chunks[0]).toEqual({ type: "tool_call_start", id: "call_2", name: "write_to_file" }) - - // First tool's tool-call should be ignored (it had deltas) - const ignoredChunks = [ - ...processor({ - type: "tool-call" as const, - toolCallId: "call_1", - toolName: "read_file", - input: { path: "a.ts" }, - } as any), - ] - expect(ignoredChunks).toHaveLength(0) - }) - - it("maintains separate state per processor instance", () => { - const processor1 = createAiSdkToolStreamProcessor() - const processor2 = createAiSdkToolStreamProcessor() - - // Stream a tool fully with processor1 (with delta) - Array.from(processor1({ type: "tool-input-start" as const, id: "call_1", toolName: "test" })) - Array.from(processor1({ type: "tool-input-delta" as const, id: "call_1", delta: "{}" })) - Array.from(processor1({ type: "tool-input-end" as const, id: "call_1" })) - - // processor1 should ignore tool-call for call_1 (it had deltas) - const p1Chunks = [ - ...processor1({ - type: "tool-call" as const, - toolCallId: "call_1", - toolName: "test", - input: {}, - } as any), - ] - expect(p1Chunks).toHaveLength(0) - - // processor2 should emit tool-call for call_1 (it has its own state) - const p2Chunks = [ - ...processor2({ - type: "tool-call" as const, - toolCallId: "call_1", - toolName: "test", - input: {}, - } as any), - ] - expect(p2Chunks).toHaveLength(3) - }) - }) - describe("mapToolChoice", () => { it("should return undefined for null or undefined", () => { expect(mapToolChoice(null)).toBeUndefined() diff --git a/src/api/transform/ai-sdk.ts b/src/api/transform/ai-sdk.ts index 8c15350077c..c6f37be694d 100644 --- a/src/api/transform/ai-sdk.ts +++ b/src/api/transform/ai-sdk.ts @@ -364,132 +364,6 @@ export function* processAiSdkStreamPart(part: ExtendedStreamPart): Generator Generator { - // Track tool IDs that have been started via streaming events - const startedToolIds = new Set() - // Track tool IDs that have received actual argument deltas - const toolsWithDeltas = new Set() - // Track tool IDs that have ended but are waiting for arguments from tool-call - const pendingEndToolIds = new Set() - - return function* processStreamPart(part: ExtendedStreamPart): Generator { - switch (part.type) { - case "tool-input-start": - // Track that this tool has started streaming - startedToolIds.add(part.id) - yield { - type: "tool_call_start", - id: part.id, - name: part.toolName, - } - break - - case "tool-input-delta": - // Track that we received actual argument content for this tool - toolsWithDeltas.add(part.id) - yield { - type: "tool_call_delta", - id: part.id, - delta: part.delta, - } - break - - case "tool-input-end": - // If we already have deltas, we can emit the end now - // Otherwise, defer the end until we get arguments from tool-call - if (toolsWithDeltas.has(part.id)) { - yield { - type: "tool_call_end", - id: part.id, - } - } else { - // HuggingFace case: started but no deltas, arguments will come in tool-call - pendingEndToolIds.add(part.id) - } - break - - case "tool-call": { - // Handle tool-call events - the logic depends on whether we got streaming deltas - const toolCallPart = part as { - type: "tool-call" - toolCallId: string - toolName: string - input: unknown - } - - // If we received deltas, the arguments were already streamed - ignore tool-call - if (toolsWithDeltas.has(toolCallPart.toolCallId)) { - break - } - - // If tool was started but no deltas received (like HuggingFace), - // emit the arguments from tool-call as a delta, then the pending end - if (startedToolIds.has(toolCallPart.toolCallId)) { - const args = JSON.stringify(toolCallPart.input) - yield { - type: "tool_call_delta", - id: toolCallPart.toolCallId, - delta: args, - } - // Now emit the deferred end - if (pendingEndToolIds.has(toolCallPart.toolCallId)) { - pendingEndToolIds.delete(toolCallPart.toolCallId) - yield { - type: "tool_call_end", - id: toolCallPart.toolCallId, - } - } - break - } - - // Tool wasn't started via streaming - emit full start/delta/end sequence - const args = JSON.stringify(toolCallPart.input) - yield { - type: "tool_call_start", - id: toolCallPart.toolCallId, - name: toolCallPart.toolName, - } - yield { - type: "tool_call_delta", - id: toolCallPart.toolCallId, - delta: args, - } - yield { - type: "tool_call_end", - id: toolCallPart.toolCallId, - } - break - } - - // Handle all other events with the stateless processor - default: - yield* processAiSdkStreamPart(part) - break - } - } -} - /** * Type for AI SDK tool choice format. */