From 60693300107173732484437fb5c26d800f6f0f55 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Mon, 5 Jan 2026 14:07:14 +0000 Subject: [PATCH 1/2] fix: add explicit deduplication for duplicate tool_result blocks This prevents the "doom loop" that occurs when external terminal (e.g., PuTTY) fails during long-running commands and falls back to integrated terminal. The race condition between terminal fallback and user approval can generate duplicate tool_result blocks for the same tool_use_id. The fix adds an explicit deduplication step that filters out duplicate tool_results BEFORE any other processing, keeping only the first occurrence of each tool_use_id. Fixes #10465 --- .../__tests__/validateToolResultIds.spec.ts | 120 ++++++++++++++++++ src/core/task/validateToolResultIds.ts | 69 +++++++++- 2 files changed, 188 insertions(+), 1 deletion(-) diff --git a/src/core/task/__tests__/validateToolResultIds.spec.ts b/src/core/task/__tests__/validateToolResultIds.spec.ts index 28491aedd73..e7e5ff74a34 100644 --- a/src/core/task/__tests__/validateToolResultIds.spec.ts +++ b/src/core/task/__tests__/validateToolResultIds.spec.ts @@ -482,6 +482,126 @@ describe("validateAndFixToolResultIds", () => { expect(resultContent[1].type).toBe("text") expect((resultContent[1] as Anthropic.TextBlockParam).text).toBe("Some additional context") }) + + /** + * CRITICAL TEST: Terminal Fallback Duplicate tool_result Scenario + * + * This test verifies the fix for GitHub Issue #10465: + * "Terminal Fallback Causes Duplicate Tool Results and Doom Loop in Unattended Mode" + * + * THE BUG: + * When an external terminal (e.g., PuTTY) fails during long-running commands and + * falls back to the integrated terminal, a race condition can generate duplicate + * tool_result blocks for the SAME valid tool_use_id. This caused: + * 1. API protocol violation ("toolResult blocks exceeds toolUse blocks") + * 2. Unrecoverable "doom loop" breaking unattended access + * + * WHY EXISTING LOGIC DIDN'T CATCH THIS: + * The validation used a Set (existingToolResultIds) which automatically deduplicates, + * so it saw only ONE unique ID. With the ID being valid: + * - missingToolUseIds.length === 0 (ID exists in Set) + * - hasInvalidIds === false (ID is valid) + * Early return triggered, passing BOTH duplicates to the API. + * + * THE FIX: + * Explicit deduplication step that filters out duplicate tool_results + * BEFORE any other processing, keeping only the first occurrence. + */ + it("should filter out duplicate tool_results with identical valid tool_use_ids (terminal fallback scenario)", () => { + // Scenario: Assistant made a single tool call + const assistantMessage: Anthropic.MessageParam = { + role: "assistant", + content: [ + { + type: "tool_use", + id: "tooluse_QZ-pU8v2QKO8L8fHoJRI2g", + name: "execute_command", + input: { command: "ps aux | grep test", cwd: "/path/to/project" }, + }, + ], + } + + // Bug scenario: Two tool_results with the SAME valid tool_use_id + // This happens when terminal fallback generates a second result during the race condition + const userMessage: Anthropic.MessageParam = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "tooluse_QZ-pU8v2QKO8L8fHoJRI2g", // First result from command execution + content: "No test processes found", + }, + { + type: "tool_result", + tool_use_id: "tooluse_QZ-pU8v2QKO8L8fHoJRI2g", // Duplicate from user approval during fallback + content: '{"status":"approved","message":"The user approved this operation"}', + }, + ], + } + + const result = validateAndFixToolResultIds(userMessage, [assistantMessage]) + + expect(Array.isArray(result.content)).toBe(true) + const resultContent = result.content as Anthropic.ToolResultBlockParam[] + + // CRITICAL ASSERTION: Only ONE tool_result should remain + // This prevents the API protocol violation that caused the doom loop + expect(resultContent.length).toBe(1) + expect(resultContent[0].tool_use_id).toBe("tooluse_QZ-pU8v2QKO8L8fHoJRI2g") + // The first occurrence should be kept (with original content) + expect(resultContent[0].content).toBe("No test processes found") + }) + + /** + * Test that deduplication preserves text blocks alongside tool_results. + * This ensures the fix doesn't accidentally filter out non-tool_result content. + */ + it("should preserve text blocks while deduplicating tool_results with same valid ID", () => { + const assistantMessage: Anthropic.MessageParam = { + role: "assistant", + content: [ + { + type: "tool_use", + id: "tool-123", + name: "read_file", + input: { path: "test.txt" }, + }, + ], + } + + const userMessage: Anthropic.MessageParam = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "First result", + }, + { + type: "text", + text: "Environment details here", + }, + { + type: "tool_result", + tool_use_id: "tool-123", // Duplicate with same valid ID + content: "Duplicate result from fallback", + }, + ], + } + + const result = validateAndFixToolResultIds(userMessage, [assistantMessage]) + + expect(Array.isArray(result.content)).toBe(true) + const resultContent = result.content as Array + + // Should have: 1 tool_result + 1 text block (duplicate filtered out) + expect(resultContent.length).toBe(2) + expect(resultContent[0].type).toBe("tool_result") + expect((resultContent[0] as Anthropic.ToolResultBlockParam).tool_use_id).toBe("tool-123") + expect((resultContent[0] as Anthropic.ToolResultBlockParam).content).toBe("First result") + expect(resultContent[1].type).toBe("text") + expect((resultContent[1] as Anthropic.TextBlockParam).text).toBe("Environment details here") + }) }) describe("when there are more tool_uses than tool_results", () => { diff --git a/src/core/task/validateToolResultIds.ts b/src/core/task/validateToolResultIds.ts index ce89a4e167b..07169398141 100644 --- a/src/core/task/validateToolResultIds.ts +++ b/src/core/task/validateToolResultIds.ts @@ -78,10 +78,77 @@ export function validateAndFixToolResultIds( } // Find tool_result blocks in the user message - const toolResults = userMessage.content.filter( + let toolResults = userMessage.content.filter( (block): block is Anthropic.ToolResultBlockParam => block.type === "tool_result", ) + // ============================================================================ + // CRITICAL: Explicit deduplication of tool_result blocks + // ============================================================================ + // + // WHY THIS IS NECESSARY: + // This deduplication step prevents a "doom loop" that can occur when the external + // terminal (e.g., PuTTY) fails during long-running commands and falls back to + // the integrated terminal. The race condition between terminal fallback and user + // approval can generate duplicate tool_result blocks for the same tool_use_id. + // + // THE BUG SCENARIO: + // 1. Assistant calls execute_command with tool_use_id="tooluse_XYZ" + // 2. Long-running command executes via external terminal + // 3. External terminal fails, triggering fallback to integrated terminal + // 4. First tool_result is generated for "tooluse_XYZ" + // 5. User approval arrives during/after fallback, generating ANOTHER tool_result + // for the same "tooluse_XYZ" + // 6. Result: 2 tool_result blocks for 1 tool_use block + // + // WHY EXISTING CHECKS DIDN'T CATCH THIS: + // The checks below use a Set (existingToolResultIds) which automatically dedupes, + // so it sees only ONE unique ID. With: + // - missingToolUseIds.length === 0 (the ID exists in the Set) + // - hasInvalidIds === false (the ID is valid) + // The early return at the end of this block triggers, and BOTH duplicate + // tool_results pass through to the API, causing a protocol violation. + // + // THE FIX: + // Explicitly filter out duplicate tool_results BEFORE any other processing. + // This ensures exactly one tool_result per tool_use_id, regardless of how + // the duplicates were generated. + // + // REFERENCE: GitHub Issue #10465 + // ============================================================================ + + const seenToolResultIds = new Set() + const deduplicatedContent = userMessage.content.filter((block) => { + // Keep all non-tool_result blocks unchanged + if (block.type !== "tool_result") { + return true + } + + // For tool_result blocks: keep only the first occurrence of each tool_use_id + if (seenToolResultIds.has(block.tool_use_id)) { + // This is a duplicate - filter it out to prevent API protocol violation + return false + } + + seenToolResultIds.add(block.tool_use_id) + return true + }) + + // Update userMessage with deduplicated content + userMessage = { + ...userMessage, + content: deduplicatedContent, + } + + // Re-extract tool_results from deduplicated content for subsequent processing + toolResults = deduplicatedContent.filter( + (block): block is Anthropic.ToolResultBlockParam => block.type === "tool_result", + ) + + // ============================================================================ + // End of deduplication logic + // ============================================================================ + // Build a set of valid tool_use IDs const validToolUseIds = new Set(toolUseBlocks.map((block) => block.id)) From 4a049c4382ee3e6a4a476e4017db9e21aa5304e7 Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Mon, 5 Jan 2026 10:56:46 -0500 Subject: [PATCH 2/2] Add upstream fix for terminal fallback race condition - Add supersedePendingAsk() method to Task.ts - Call supersedePendingAsk() in ExecuteCommandTool.ts on fallback - Trim verbose comments in validation layer per code review --- src/core/task/Task.ts | 4 ++ .../__tests__/validateToolResultIds.spec.ts | 38 ++-------- src/core/task/validateToolResultIds.ts | 71 ++++--------------- src/core/tools/ExecuteCommandTool.ts | 3 + 4 files changed, 24 insertions(+), 92 deletions(-) diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index c1550bdd5a0..223be911459 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -1354,6 +1354,10 @@ export class Task extends EventEmitter implements TaskLike { this.handleWebviewAskResponse("noButtonClicked", text, images) } + public supersedePendingAsk(): void { + this.lastMessageTs = Date.now() + } + /** * Updates the API configuration but preserves the locked tool protocol. * The task's tool protocol is locked at creation time and should NOT change diff --git a/src/core/task/__tests__/validateToolResultIds.spec.ts b/src/core/task/__tests__/validateToolResultIds.spec.ts index e7e5ff74a34..0926e899aad 100644 --- a/src/core/task/__tests__/validateToolResultIds.spec.ts +++ b/src/core/task/__tests__/validateToolResultIds.spec.ts @@ -483,32 +483,9 @@ describe("validateAndFixToolResultIds", () => { expect((resultContent[1] as Anthropic.TextBlockParam).text).toBe("Some additional context") }) - /** - * CRITICAL TEST: Terminal Fallback Duplicate tool_result Scenario - * - * This test verifies the fix for GitHub Issue #10465: - * "Terminal Fallback Causes Duplicate Tool Results and Doom Loop in Unattended Mode" - * - * THE BUG: - * When an external terminal (e.g., PuTTY) fails during long-running commands and - * falls back to the integrated terminal, a race condition can generate duplicate - * tool_result blocks for the SAME valid tool_use_id. This caused: - * 1. API protocol violation ("toolResult blocks exceeds toolUse blocks") - * 2. Unrecoverable "doom loop" breaking unattended access - * - * WHY EXISTING LOGIC DIDN'T CATCH THIS: - * The validation used a Set (existingToolResultIds) which automatically deduplicates, - * so it saw only ONE unique ID. With the ID being valid: - * - missingToolUseIds.length === 0 (ID exists in Set) - * - hasInvalidIds === false (ID is valid) - * Early return triggered, passing BOTH duplicates to the API. - * - * THE FIX: - * Explicit deduplication step that filters out duplicate tool_results - * BEFORE any other processing, keeping only the first occurrence. - */ + // Verifies fix for GitHub #10465: Terminal fallback race condition can generate + // duplicate tool_results with the same valid tool_use_id, causing API protocol violations. it("should filter out duplicate tool_results with identical valid tool_use_ids (terminal fallback scenario)", () => { - // Scenario: Assistant made a single tool call const assistantMessage: Anthropic.MessageParam = { role: "assistant", content: [ @@ -521,8 +498,7 @@ describe("validateAndFixToolResultIds", () => { ], } - // Bug scenario: Two tool_results with the SAME valid tool_use_id - // This happens when terminal fallback generates a second result during the race condition + // Two tool_results with the SAME valid tool_use_id from terminal fallback race condition const userMessage: Anthropic.MessageParam = { role: "user", content: [ @@ -544,18 +520,12 @@ describe("validateAndFixToolResultIds", () => { expect(Array.isArray(result.content)).toBe(true) const resultContent = result.content as Anthropic.ToolResultBlockParam[] - // CRITICAL ASSERTION: Only ONE tool_result should remain - // This prevents the API protocol violation that caused the doom loop + // Only ONE tool_result should remain to prevent API protocol violation expect(resultContent.length).toBe(1) expect(resultContent[0].tool_use_id).toBe("tooluse_QZ-pU8v2QKO8L8fHoJRI2g") - // The first occurrence should be kept (with original content) expect(resultContent[0].content).toBe("No test processes found") }) - /** - * Test that deduplication preserves text blocks alongside tool_results. - * This ensures the fix doesn't accidentally filter out non-tool_result content. - */ it("should preserve text blocks while deduplicating tool_results with same valid ID", () => { const assistantMessage: Anthropic.MessageParam = { role: "assistant", diff --git a/src/core/task/validateToolResultIds.ts b/src/core/task/validateToolResultIds.ts index 07169398141..9dd73723a32 100644 --- a/src/core/task/validateToolResultIds.ts +++ b/src/core/task/validateToolResultIds.ts @@ -82,73 +82,30 @@ export function validateAndFixToolResultIds( (block): block is Anthropic.ToolResultBlockParam => block.type === "tool_result", ) - // ============================================================================ - // CRITICAL: Explicit deduplication of tool_result blocks - // ============================================================================ - // - // WHY THIS IS NECESSARY: - // This deduplication step prevents a "doom loop" that can occur when the external - // terminal (e.g., PuTTY) fails during long-running commands and falls back to - // the integrated terminal. The race condition between terminal fallback and user - // approval can generate duplicate tool_result blocks for the same tool_use_id. - // - // THE BUG SCENARIO: - // 1. Assistant calls execute_command with tool_use_id="tooluse_XYZ" - // 2. Long-running command executes via external terminal - // 3. External terminal fails, triggering fallback to integrated terminal - // 4. First tool_result is generated for "tooluse_XYZ" - // 5. User approval arrives during/after fallback, generating ANOTHER tool_result - // for the same "tooluse_XYZ" - // 6. Result: 2 tool_result blocks for 1 tool_use block - // - // WHY EXISTING CHECKS DIDN'T CATCH THIS: - // The checks below use a Set (existingToolResultIds) which automatically dedupes, - // so it sees only ONE unique ID. With: - // - missingToolUseIds.length === 0 (the ID exists in the Set) - // - hasInvalidIds === false (the ID is valid) - // The early return at the end of this block triggers, and BOTH duplicate - // tool_results pass through to the API, causing a protocol violation. - // - // THE FIX: - // Explicitly filter out duplicate tool_results BEFORE any other processing. - // This ensures exactly one tool_result per tool_use_id, regardless of how - // the duplicates were generated. - // - // REFERENCE: GitHub Issue #10465 - // ============================================================================ - + // Deduplicate tool_result blocks to prevent API protocol violations (GitHub #10465) + // Terminal fallback race conditions can generate duplicate tool_results with the same tool_use_id. + // Filter out duplicates before validation since Set-based checks below would miss them. const seenToolResultIds = new Set() const deduplicatedContent = userMessage.content.filter((block) => { - // Keep all non-tool_result blocks unchanged if (block.type !== "tool_result") { return true } - - // For tool_result blocks: keep only the first occurrence of each tool_use_id if (seenToolResultIds.has(block.tool_use_id)) { - // This is a duplicate - filter it out to prevent API protocol violation - return false + return false // Duplicate - filter out } - seenToolResultIds.add(block.tool_use_id) return true }) - // Update userMessage with deduplicated content userMessage = { ...userMessage, content: deduplicatedContent, } - // Re-extract tool_results from deduplicated content for subsequent processing toolResults = deduplicatedContent.filter( (block): block is Anthropic.ToolResultBlockParam => block.type === "tool_result", ) - // ============================================================================ - // End of deduplication logic - // ============================================================================ - // Build a set of valid tool_use IDs const validToolUseIds = new Set(toolUseBlocks.map((block) => block.id)) @@ -206,15 +163,12 @@ export function validateAndFixToolResultIds( ) } - // Create a mapping of tool_result IDs to corrected IDs - // Strategy: Match by position (first tool_result -> first tool_use, etc.) - // This handles most cases where the mismatch is due to ID confusion - // - // Track which tool_use IDs have been used to prevent duplicates + // Match tool_results to tool_uses by position and fix incorrect IDs const usedToolUseIds = new Set() + const contentArray = userMessage.content as Anthropic.Messages.ContentBlockParam[] - const correctedContent = userMessage.content - .map((block) => { + const correctedContent = contentArray + .map((block: Anthropic.Messages.ContentBlockParam) => { if (block.type !== "tool_result") { return block } @@ -244,17 +198,18 @@ export function validateAndFixToolResultIds( } // No corresponding tool_use for this tool_result, or the ID is already used - // Filter out this orphaned tool_result by returning null return null }) .filter((block): block is NonNullable => block !== null) // Add missing tool_result blocks for any tool_use that doesn't have one - // After the ID correction above, recalculate which tool_use IDs are now covered const coveredToolUseIds = new Set( correctedContent - .filter((b): b is Anthropic.ToolResultBlockParam => b.type === "tool_result") - .map((r) => r.tool_use_id), + .filter( + (b: Anthropic.Messages.ContentBlockParam): b is Anthropic.ToolResultBlockParam => + b.type === "tool_result", + ) + .map((r: Anthropic.ToolResultBlockParam) => r.tool_use_id), ) const stillMissingToolUseIds = toolUseBlocks.filter((toolUse) => !coveredToolUseIds.has(toolUse.id)) diff --git a/src/core/tools/ExecuteCommandTool.ts b/src/core/tools/ExecuteCommandTool.ts index f7271bffe9c..7feb71b0b89 100644 --- a/src/core/tools/ExecuteCommandTool.ts +++ b/src/core/tools/ExecuteCommandTool.ts @@ -116,6 +116,9 @@ export class ExecuteCommandTool extends BaseTool<"execute_command"> { provider?.postMessageToWebview({ type: "commandExecutionStatus", text: JSON.stringify(status) }) await task.say("shell_integration_warning") + // Invalidate pending ask from first execution to prevent race condition + task.supersedePendingAsk() + if (error instanceof ShellIntegrationError) { const [rejected, result] = await executeCommandInTerminal(task, { ...options,