From 63bf3f9750efb34acafbd0ba112bae4e9436f425 Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Mon, 1 Dec 2025 17:02:23 -0500 Subject: [PATCH] fix: flush pending tool results before task delegation When tools are called in parallel (e.g., update_todo_list + new_task), the tool results accumulate in userMessageContent but aren't saved to API history until all tools complete. When new_task triggers delegation, the parent is disposed before these pending results are saved, causing 400 errors when the parent resumes (missing tool_result for tool_use). This fix: - Adds flushPendingToolResultsToHistory() method in Task.ts that saves pending userMessageContent to API history - Calls this method in delegateParentAndOpenChild() before disposing the parent task - Safe for both native/XML protocols and sequential/parallel execution (returns early if there's nothing to flush) --- src/core/task/Task.ts | 35 ++ .../flushPendingToolResultsToHistory.spec.ts | 346 ++++++++++++++++++ src/core/webview/ClineProvider.ts | 21 +- 3 files changed, 401 insertions(+), 1 deletion(-) create mode 100644 src/core/task/__tests__/flushPendingToolResultsToHistory.spec.ts diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index c4a52246aa8..e0abd536553 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -786,6 +786,41 @@ export class Task extends EventEmitter implements TaskLike { await this.saveApiConversationHistory() } + /** + * Flush any pending tool results to the API conversation history. + * + * This is critical for native tool protocol when the task is about to be + * delegated (e.g., via new_task). Before delegation, if other tools were + * called in the same turn before new_task, their tool_result blocks are + * accumulated in `userMessageContent` but haven't been saved to the API + * history yet. If we don't flush them before the parent is disposed, + * the API conversation will be incomplete and cause 400 errors when + * the parent resumes (missing tool_result for tool_use blocks). + * + * NOTE: The assistant message is typically already in history by the time + * tools execute (added in recursivelyMakeClineRequests after streaming completes). + * So we usually only need to flush the pending user message with tool_results. + */ + public async flushPendingToolResultsToHistory(): Promise { + // Only flush if there's actually pending content to save + if (this.userMessageContent.length === 0) { + return + } + + // Save the user message with tool_result blocks + const userMessage: Anthropic.MessageParam = { + role: "user", + content: this.userMessageContent, + } + const userMessageWithTs = { ...userMessage, ts: Date.now() } + this.apiConversationHistory.push(userMessageWithTs as ApiMessage) + + await this.saveApiConversationHistory() + + // Clear the pending content since it's now saved + this.userMessageContent = [] + } + private async saveApiConversationHistory() { try { await saveApiMessages({ diff --git a/src/core/task/__tests__/flushPendingToolResultsToHistory.spec.ts b/src/core/task/__tests__/flushPendingToolResultsToHistory.spec.ts new file mode 100644 index 00000000000..453fd1cad3a --- /dev/null +++ b/src/core/task/__tests__/flushPendingToolResultsToHistory.spec.ts @@ -0,0 +1,346 @@ +// npx vitest run core/task/__tests__/flushPendingToolResultsToHistory.spec.ts + +import * as os from "os" +import * as path from "path" +import * as vscode from "vscode" + +import type { GlobalState, ProviderSettings } from "@roo-code/types" +import { TelemetryService } from "@roo-code/telemetry" + +import { Task } from "../Task" +import { ClineProvider } from "../../webview/ClineProvider" +import { ContextProxy } from "../../config/ContextProxy" + +// Mock delay before any imports that might use it +vi.mock("delay", () => ({ + __esModule: true, + default: vi.fn().mockResolvedValue(undefined), +})) + +vi.mock("execa", () => ({ + execa: vi.fn(), +})) + +vi.mock("fs/promises", async (importOriginal) => { + const actual = (await importOriginal()) as Record + const mockFunctions = { + mkdir: vi.fn().mockResolvedValue(undefined), + writeFile: vi.fn().mockResolvedValue(undefined), + readFile: vi.fn().mockResolvedValue("[]"), + unlink: vi.fn().mockResolvedValue(undefined), + rmdir: vi.fn().mockResolvedValue(undefined), + } + + return { + ...actual, + ...mockFunctions, + default: mockFunctions, + } +}) + +vi.mock("p-wait-for", () => ({ + default: vi.fn().mockImplementation(async () => Promise.resolve()), +})) + +vi.mock("vscode", () => { + const mockDisposable = { dispose: vi.fn() } + const mockEventEmitter = { event: vi.fn(), fire: vi.fn() } + const mockTextDocument = { uri: { fsPath: "/mock/workspace/path/file.ts" } } + const mockTextEditor = { document: mockTextDocument } + const mockTab = { input: { uri: { fsPath: "/mock/workspace/path/file.ts" } } } + const mockTabGroup = { tabs: [mockTab] } + + return { + TabInputTextDiff: vi.fn(), + CodeActionKind: { + QuickFix: { value: "quickfix" }, + RefactorRewrite: { value: "refactor.rewrite" }, + }, + window: { + createTextEditorDecorationType: vi.fn().mockReturnValue({ + dispose: vi.fn(), + }), + visibleTextEditors: [mockTextEditor], + tabGroups: { + all: [mockTabGroup], + close: vi.fn(), + onDidChangeTabs: vi.fn(() => ({ dispose: vi.fn() })), + }, + showErrorMessage: vi.fn(), + }, + workspace: { + workspaceFolders: [ + { + uri: { fsPath: "/mock/workspace/path" }, + name: "mock-workspace", + index: 0, + }, + ], + createFileSystemWatcher: vi.fn(() => ({ + onDidCreate: vi.fn(() => mockDisposable), + onDidDelete: vi.fn(() => mockDisposable), + onDidChange: vi.fn(() => mockDisposable), + dispose: vi.fn(), + })), + fs: { + stat: vi.fn().mockResolvedValue({ type: 1 }), + }, + onDidSaveTextDocument: vi.fn(() => mockDisposable), + getConfiguration: vi.fn(() => ({ get: (key: string, defaultValue: any) => defaultValue })), + }, + env: { + uriScheme: "vscode", + language: "en", + }, + EventEmitter: vi.fn().mockImplementation(() => mockEventEmitter), + Disposable: { + from: vi.fn(), + }, + TabInputText: vi.fn(), + } +}) + +vi.mock("../../mentions", () => ({ + parseMentions: vi.fn().mockImplementation((text) => { + return Promise.resolve(`processed: ${text}`) + }), + openMention: vi.fn(), + getLatestTerminalOutput: vi.fn(), +})) + +vi.mock("../../../integrations/misc/extract-text", () => ({ + extractTextFromFile: vi.fn().mockResolvedValue("Mock file content"), +})) + +vi.mock("../../environment/getEnvironmentDetails", () => ({ + getEnvironmentDetails: vi.fn().mockResolvedValue(""), +})) + +vi.mock("../../ignore/RooIgnoreController") + +vi.mock("../../condense", async (importOriginal) => { + const actual = (await importOriginal()) as any + return { + ...actual, + summarizeConversation: vi.fn().mockResolvedValue({ + messages: [{ role: "user", content: [{ type: "text", text: "continued" }], ts: Date.now() }], + summary: "summary", + cost: 0, + newContextTokens: 1, + }), + } +}) + +vi.mock("../../../utils/storage", () => ({ + getTaskDirectoryPath: vi + .fn() + .mockImplementation((globalStoragePath, taskId) => Promise.resolve(`${globalStoragePath}/tasks/${taskId}`)), + getSettingsDirectoryPath: vi + .fn() + .mockImplementation((globalStoragePath) => Promise.resolve(`${globalStoragePath}/settings`)), +})) + +vi.mock("../../../utils/fs", () => ({ + fileExistsAtPath: vi.fn().mockReturnValue(false), +})) + +describe("flushPendingToolResultsToHistory", () => { + let mockProvider: any + let mockApiConfig: ProviderSettings + let mockOutputChannel: any + let mockExtensionContext: vscode.ExtensionContext + + beforeEach(() => { + if (!TelemetryService.hasInstance()) { + TelemetryService.createInstance([]) + } + + const storageUri = { + fsPath: path.join(os.tmpdir(), "test-storage"), + } + + mockExtensionContext = { + globalState: { + get: vi.fn().mockImplementation((key: keyof GlobalState) => undefined), + update: vi.fn().mockImplementation((_key, _value) => Promise.resolve()), + keys: vi.fn().mockReturnValue([]), + }, + globalStorageUri: storageUri, + workspaceState: { + get: vi.fn().mockImplementation((_key) => undefined), + update: vi.fn().mockImplementation((_key, _value) => Promise.resolve()), + keys: vi.fn().mockReturnValue([]), + }, + secrets: { + get: vi.fn().mockImplementation((_key) => Promise.resolve(undefined)), + store: vi.fn().mockImplementation((_key, _value) => Promise.resolve()), + delete: vi.fn().mockImplementation((_key) => Promise.resolve()), + }, + extensionUri: { + fsPath: "/mock/extension/path", + }, + extension: { + packageJSON: { + version: "1.0.0", + }, + }, + } as unknown as vscode.ExtensionContext + + mockOutputChannel = { + appendLine: vi.fn(), + append: vi.fn(), + clear: vi.fn(), + show: vi.fn(), + hide: vi.fn(), + dispose: vi.fn(), + } + + mockProvider = new ClineProvider( + mockExtensionContext, + mockOutputChannel, + "sidebar", + new ContextProxy(mockExtensionContext), + ) as any + + mockApiConfig = { + apiProvider: "anthropic", + apiModelId: "claude-3-5-sonnet-20241022", + apiKey: "test-api-key", + } + + mockProvider.postMessageToWebview = vi.fn().mockResolvedValue(undefined) + mockProvider.postStateToWebview = vi.fn().mockResolvedValue(undefined) + mockProvider.updateTaskHistory = vi.fn().mockResolvedValue(undefined) + }) + + it("should not save anything when userMessageContent is empty", async () => { + const task = new Task({ + provider: mockProvider, + apiConfiguration: mockApiConfig, + task: "test task", + startTask: false, + }) + + // Ensure userMessageContent is empty + task.userMessageContent = [] + const initialHistoryLength = task.apiConversationHistory.length + + // Call flush + await task.flushPendingToolResultsToHistory() + + // History should not have changed since userMessageContent was empty + expect(task.apiConversationHistory.length).toBe(initialHistoryLength) + }) + + it("should save user message when userMessageContent has pending tool results", async () => { + const task = new Task({ + provider: mockProvider, + apiConfiguration: mockApiConfig, + task: "test task", + startTask: false, + }) + + // Set up pending tool result in userMessageContent + task.userMessageContent = [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "File written successfully", + }, + ] + + await task.flushPendingToolResultsToHistory() + + // Should have saved 1 user message + expect(task.apiConversationHistory.length).toBe(1) + + // Check user message with tool result + const userMessage = task.apiConversationHistory[0] + expect(userMessage.role).toBe("user") + expect(Array.isArray(userMessage.content)).toBe(true) + expect((userMessage.content as any[])[0].type).toBe("tool_result") + expect((userMessage.content as any[])[0].tool_use_id).toBe("tool-123") + }) + + it("should clear userMessageContent after flushing", async () => { + const task = new Task({ + provider: mockProvider, + apiConfiguration: mockApiConfig, + task: "test task", + startTask: false, + }) + + // Set up pending tool result + task.userMessageContent = [ + { + type: "tool_result", + tool_use_id: "tool-456", + content: "Command executed", + }, + ] + + await task.flushPendingToolResultsToHistory() + + // userMessageContent should be cleared + expect(task.userMessageContent.length).toBe(0) + }) + + it("should handle multiple tool results in a single flush", async () => { + const task = new Task({ + provider: mockProvider, + apiConfiguration: mockApiConfig, + task: "test task", + startTask: false, + }) + + // Set up multiple pending tool results + task.userMessageContent = [ + { + type: "tool_result", + tool_use_id: "tool-1", + content: "First result", + }, + { + type: "tool_result", + tool_use_id: "tool-2", + content: "Second result", + }, + ] + + await task.flushPendingToolResultsToHistory() + + // Check user message has both tool results + const userMessage = task.apiConversationHistory[0] + expect(Array.isArray(userMessage.content)).toBe(true) + expect((userMessage.content as any[]).length).toBe(2) + expect((userMessage.content as any[])[0].tool_use_id).toBe("tool-1") + expect((userMessage.content as any[])[1].tool_use_id).toBe("tool-2") + }) + + it("should add timestamp to saved messages", async () => { + const task = new Task({ + provider: mockProvider, + apiConfiguration: mockApiConfig, + task: "test task", + startTask: false, + }) + + const beforeTs = Date.now() + + task.userMessageContent = [ + { + type: "tool_result", + tool_use_id: "tool-ts", + content: "Result", + }, + ] + + await task.flushPendingToolResultsToHistory() + + const afterTs = Date.now() + + // Message should have timestamp + expect((task.apiConversationHistory[0] as any).ts).toBeGreaterThanOrEqual(beforeTs) + expect((task.apiConversationHistory[0] as any).ts).toBeLessThanOrEqual(afterTs) + }) +}) diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index 4aab25bcb76..85a3e6ed3b0 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -2990,8 +2990,27 @@ export class ClineProvider `[delegateParentAndOpenChild] Parent mismatch: expected ${parentTaskId}, current ${parent.taskId}`, ) } + // 2) Flush pending tool results to API history BEFORE disposing the parent. + // This is critical for native tool protocol: when tools are called before new_task, + // their tool_result blocks are in userMessageContent but not yet saved to API history. + // If we don't flush them, the parent's API conversation will be incomplete and + // cause 400 errors when resumed (missing tool_result for tool_use blocks). + // + // NOTE: We do NOT pass the assistant message here because the assistant message + // is already added to apiConversationHistory by the normal flow in + // recursivelyMakeClineRequests BEFORE tools start executing. We only need to + // flush the pending user message with tool_results. + try { + await parent.flushPendingToolResultsToHistory() + } catch (error) { + this.log( + `[delegateParentAndOpenChild] Error flushing pending tool results (non-fatal): ${ + error instanceof Error ? error.message : String(error) + }`, + ) + } - // 2) Enforce single-open invariant by closing/disposing the parent first + // 3) Enforce single-open invariant by closing/disposing the parent first // This ensures we never have >1 tasks open at any time during delegation. // Await abort completion to ensure clean disposal and prevent unhandled rejections. try {