diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 71ddc54f10e..03486b42be7 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -158,6 +158,7 @@ import { AutoApprovalHandler, checkAutoApproval } from "../auto-approval" import { MessageManager } from "../message-manager" import { validateAndFixToolResultIds } from "./validateToolResultIds" import { mergeConsecutiveApiMessages } from "./mergeConsecutiveApiMessages" +import { appendEnvironmentDetails, stripAppendedEnvironmentDetails } from "./appendEnvironmentDetails" const MAX_EXPONENTIAL_BACKOFF_SECONDS = 600 // 10 minutes const DEFAULT_USAGE_COLLECTION_TIMEOUT_MS = 5000 // 5 seconds @@ -2623,18 +2624,12 @@ export class Task extends EventEmitter implements TaskLike { if (lastUserMsgIndex >= 0) { const lastUserMsg = this.apiConversationHistory[lastUserMsgIndex] as any if (Array.isArray(lastUserMsg.content)) { - // Remove any existing environment_details blocks before adding fresh ones - const contentWithoutEnvDetails = lastUserMsg.content.filter((block: any) => { - if (block.type === "text" && typeof block.text === "string") { - const isEnvironmentDetailsBlock = - block.text.trim().startsWith("") && - block.text.trim().endsWith("") - return !isEnvironmentDetailsBlock - } - return true - }) - // Add fresh environment details - lastUserMsg.content = [...contentWithoutEnvDetails, { type: "text" as const, text: environmentDetails }] + // Remove any existing environment_details blocks before adding fresh ones, + // then append env details to the last text or tool_result block. + // This avoids creating standalone trailing text blocks which can break + // interleaved-thinking models like DeepSeek reasoner. + const contentWithoutEnvDetails = stripAppendedEnvironmentDetails(lastUserMsg.content) + lastUserMsg.content = appendEnvironmentDetails(contentWithoutEnvDetails, environmentDetails) } } @@ -2804,23 +2799,12 @@ export class Task extends EventEmitter implements TaskLike { // Remove any existing environment_details blocks before adding fresh ones. // This prevents duplicate environment details when resuming tasks, // where the old user message content may already contain environment details from the previous session. - // We check for both opening and closing tags to ensure we're matching complete environment detail blocks, - // not just mentions of the tag in regular content. - const contentWithoutEnvDetails = parsedUserContent.filter((block) => { - if (block.type === "text" && typeof block.text === "string") { - // Check if this text block is a complete environment_details block - // by verifying it starts with the opening tag and ends with the closing tag - const isEnvironmentDetailsBlock = - block.text.trim().startsWith("") && - block.text.trim().endsWith("") - return !isEnvironmentDetailsBlock - } - return true - }) + const contentWithoutEnvDetails = stripAppendedEnvironmentDetails(parsedUserContent) - // Add environment details as its own text block, separate from tool - // results. - let finalUserContent = [...contentWithoutEnvDetails, { type: "text" as const, text: environmentDetails }] + // Append environment details to the last text or tool_result block. + // This avoids creating standalone trailing text blocks which can break + // interleaved-thinking models like DeepSeek reasoner that expect specific message shapes. + let finalUserContent = appendEnvironmentDetails(contentWithoutEnvDetails, environmentDetails) // Only add user message to conversation history if: // 1. This is the first attempt (retryAttempt === 0), AND // 2. The original userContent was not empty (empty signals delegation resume where diff --git a/src/core/task/__tests__/appendEnvironmentDetails.spec.ts b/src/core/task/__tests__/appendEnvironmentDetails.spec.ts new file mode 100644 index 00000000000..9601b051ad7 --- /dev/null +++ b/src/core/task/__tests__/appendEnvironmentDetails.spec.ts @@ -0,0 +1,382 @@ +import type { TextPart, ImagePart } from "ai" +import type { + UserContentPart, + LegacyToolResultBlock, + LegacyToolResultTextBlock, +} from "../../task-persistence/rooMessage" +import { + appendEnvironmentDetails, + removeEnvironmentDetailsBlocks, + stripAppendedEnvironmentDetails, +} from "../appendEnvironmentDetails" + +describe("appendEnvironmentDetails", () => { + const envDetails = "\n# Test\nSome details\n" + + describe("empty content", () => { + it("should return a text block when content is empty", () => { + const result = appendEnvironmentDetails([], envDetails) + + expect(result).toHaveLength(1) + expect(result[0].type).toBe("text") + expect((result[0] as TextPart).text).toBe(envDetails) + }) + }) + + describe("text block handling", () => { + it("should append to the last text block", () => { + const content: UserContentPart[] = [{ type: "text", text: "User message" }] + + const result = appendEnvironmentDetails(content, envDetails) + + expect(result).toHaveLength(1) + expect(result[0].type).toBe("text") + expect((result[0] as TextPart).text).toBe("User message\n\n" + envDetails) + }) + + it("should append to the last text block when multiple text blocks exist", () => { + const content: UserContentPart[] = [ + { type: "text", text: "First message" }, + { type: "text", text: "Second message" }, + ] + + const result = appendEnvironmentDetails(content, envDetails) + + expect(result).toHaveLength(2) + expect((result[0] as TextPart).text).toBe("First message") + expect((result[1] as TextPart).text).toBe("Second message\n\n" + envDetails) + }) + + it("should not mutate the original content array", () => { + const content: UserContentPart[] = [{ type: "text", text: "Original" }] + + appendEnvironmentDetails(content, envDetails) + + expect((content[0] as TextPart).text).toBe("Original") + }) + }) + + describe("tool_result block handling", () => { + it("should append to tool_result with string content", () => { + const content: UserContentPart[] = [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Tool result text", + }, + ] + + const result = appendEnvironmentDetails(content, envDetails) + + expect(result).toHaveLength(1) + expect(result[0].type).toBe("tool_result") + const toolResult = result[0] as LegacyToolResultBlock + expect(toolResult.content).toBe("Tool result text\n\n" + envDetails) + expect(toolResult.tool_use_id).toBe("tool-123") + }) + + it("should append to tool_result with undefined content", () => { + const content: UserContentPart[] = [ + { + type: "tool_result", + tool_use_id: "tool-123", + } as LegacyToolResultBlock, + ] + + const result = appendEnvironmentDetails(content, envDetails) + + expect(result).toHaveLength(1) + const toolResult = result[0] as LegacyToolResultBlock + expect(toolResult.content).toBe(envDetails) + }) + + it("should append to tool_result with array content containing text", () => { + const content: UserContentPart[] = [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: [{ type: "text", text: "Result line 1" }], + }, + ] + + const result = appendEnvironmentDetails(content, envDetails) + + expect(result).toHaveLength(1) + const toolResult = result[0] as LegacyToolResultBlock + expect(Array.isArray(toolResult.content)).toBe(true) + const contentArray = toolResult.content as LegacyToolResultTextBlock[] + expect(contentArray).toHaveLength(1) + expect(contentArray[0].text).toBe("Result line 1\n\n" + envDetails) + }) + + it("should append to the last text block in tool_result array content", () => { + const content: UserContentPart[] = [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: [ + { type: "text", text: "First text" }, + { type: "text", text: "Last text" }, + ], + }, + ] + + const result = appendEnvironmentDetails(content, envDetails) + + const toolResult = result[0] as LegacyToolResultBlock + const contentArray = toolResult.content as LegacyToolResultTextBlock[] + expect(contentArray).toHaveLength(2) + expect(contentArray[0].text).toBe("First text") + expect(contentArray[1].text).toBe("Last text\n\n" + envDetails) + }) + + it("should preserve is_error flag on tool_result", () => { + const content: UserContentPart[] = [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Error message", + is_error: true, + }, + ] + + const result = appendEnvironmentDetails(content, envDetails) + + const toolResult = result[0] as LegacyToolResultBlock + expect(toolResult.is_error).toBe(true) + expect(toolResult.content).toBe("Error message\n\n" + envDetails) + }) + }) + + describe("mixed content handling", () => { + it("should append to last text block when text comes after tool_result", () => { + const content: UserContentPart[] = [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Tool result", + }, + { type: "text", text: "User comment" }, + ] + + const result = appendEnvironmentDetails(content, envDetails) + + expect(result).toHaveLength(2) + expect((result[0] as LegacyToolResultBlock).content).toBe("Tool result") + expect((result[1] as TextPart).text).toBe("User comment\n\n" + envDetails) + }) + + it("should append to last tool_result when no text block follows", () => { + const content: UserContentPart[] = [ + { type: "text", text: "User message" }, + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Tool result", + }, + ] + + const result = appendEnvironmentDetails(content, envDetails) + + expect(result).toHaveLength(2) + expect((result[0] as TextPart).text).toBe("User message") + expect((result[1] as LegacyToolResultBlock).content).toBe("Tool result\n\n" + envDetails) + }) + + it("should add new text block when content only has images", () => { + const content: UserContentPart[] = [ + { + type: "image", + image: new Uint8Array([1, 2, 3]), + } as ImagePart, + ] + + const result = appendEnvironmentDetails(content, envDetails) + + expect(result).toHaveLength(2) + expect(result[0].type).toBe("image") + expect(result[1].type).toBe("text") + expect((result[1] as TextPart).text).toBe(envDetails) + }) + + it("should handle multiple tool_results and append to the last one", () => { + const content: UserContentPart[] = [ + { + type: "tool_result", + tool_use_id: "tool-1", + content: "First result", + }, + { + type: "tool_result", + tool_use_id: "tool-2", + content: "Second result", + }, + ] + + const result = appendEnvironmentDetails(content, envDetails) + + expect(result).toHaveLength(2) + expect((result[0] as LegacyToolResultBlock).content).toBe("First result") + expect((result[1] as LegacyToolResultBlock).content).toBe("Second result\n\n" + envDetails) + }) + }) +}) + +describe("removeEnvironmentDetailsBlocks", () => { + const envDetailsBlock: UserContentPart = { + type: "text", + text: "\n# Test\nSome details\n", + } + + it("should remove standalone environment_details text blocks", () => { + const content: UserContentPart[] = [{ type: "text", text: "User message" }, envDetailsBlock] + + const result = removeEnvironmentDetailsBlocks(content) + + expect(result).toHaveLength(1) + expect((result[0] as TextPart).text).toBe("User message") + }) + + it("should not remove text blocks that mention environment_details but aren't complete blocks", () => { + const content: UserContentPart[] = [ + { type: "text", text: "Let me explain tags" }, + { type: "text", text: "The closing tag is " }, + ] + + const result = removeEnvironmentDetailsBlocks(content) + + expect(result).toHaveLength(2) + }) + + it("should preserve non-text blocks", () => { + const content: UserContentPart[] = [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Result", + }, + envDetailsBlock, + { + type: "image", + image: new Uint8Array([1, 2, 3]), + } as ImagePart, + ] + + const result = removeEnvironmentDetailsBlocks(content) + + expect(result).toHaveLength(2) + expect(result[0].type).toBe("tool_result") + expect(result[1].type).toBe("image") + }) + + it("should handle empty content", () => { + const result = removeEnvironmentDetailsBlocks([]) + expect(result).toHaveLength(0) + }) + + it("should handle whitespace around environment_details tags", () => { + const content: UserContentPart[] = [ + { + type: "text", + text: " \n# Test\nSome details\n ", + }, + ] + + const result = removeEnvironmentDetailsBlocks(content) + + expect(result).toHaveLength(0) + }) +}) + +describe("stripAppendedEnvironmentDetails", () => { + const envDetails = "\n# Test\nSome details\n" + + it("should strip environment details from the end of a text block", () => { + const content: UserContentPart[] = [{ type: "text", text: "User message\n\n" + envDetails }] + + const result = stripAppendedEnvironmentDetails(content) + + expect(result).toHaveLength(1) + expect((result[0] as TextPart).text).toBe("User message") + }) + + it("should strip environment details from tool_result string content", () => { + const content: UserContentPart[] = [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Tool result\n\n" + envDetails, + }, + ] + + const result = stripAppendedEnvironmentDetails(content) + + expect(result).toHaveLength(1) + expect((result[0] as LegacyToolResultBlock).content).toBe("Tool result") + }) + + it("should strip environment details from tool_result array content", () => { + const content: UserContentPart[] = [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: [{ type: "text", text: "Result text\n\n" + envDetails }], + }, + ] + + const result = stripAppendedEnvironmentDetails(content) + + const toolResult = result[0] as LegacyToolResultBlock + const contentArray = toolResult.content as LegacyToolResultTextBlock[] + expect(contentArray[0].text).toBe("Result text") + }) + + it("should also remove standalone environment_details blocks", () => { + const content: UserContentPart[] = [ + { type: "text", text: "User message" }, + { type: "text", text: envDetails }, + ] + + const result = stripAppendedEnvironmentDetails(content) + + expect(result).toHaveLength(1) + expect((result[0] as TextPart).text).toBe("User message") + }) + + it("should handle content without environment details", () => { + const content: UserContentPart[] = [ + { type: "text", text: "User message" }, + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Tool result", + }, + ] + + const result = stripAppendedEnvironmentDetails(content) + + expect(result).toEqual(content) + }) + + it("should handle empty content", () => { + const result = stripAppendedEnvironmentDetails([]) + expect(result).toHaveLength(0) + }) + + it("should preserve is_error flag when stripping from tool_result", () => { + const content: UserContentPart[] = [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Error\n\n" + envDetails, + is_error: true, + }, + ] + + const result = stripAppendedEnvironmentDetails(content) + + const toolResult = result[0] as LegacyToolResultBlock + expect(toolResult.is_error).toBe(true) + expect(toolResult.content).toBe("Error") + }) +}) diff --git a/src/core/task/appendEnvironmentDetails.ts b/src/core/task/appendEnvironmentDetails.ts new file mode 100644 index 00000000000..09384564589 --- /dev/null +++ b/src/core/task/appendEnvironmentDetails.ts @@ -0,0 +1,228 @@ +import type { UserContentPart, LegacyToolResultBlock, LegacyToolResultTextBlock } from "../task-persistence/rooMessage" +import type { TextPart } from "ai" + +/** + * Appends environment details to the last text block or tool_result block in user content. + * This avoids creating a standalone trailing text block, which can break interleaved-thinking + * models like DeepSeek reasoner that expect specific message shapes. + * + * Priority: + * 1. If the last block is a text block, append to it + * 2. If the last block is a tool_result, append to its content + * 3. If no suitable block found, add as a new text block (fallback) + * + * @param content - Array of content blocks from a user message + * @param environmentDetails - The environment details string to append + * @returns New array with environment details appended to the appropriate block + */ +export function appendEnvironmentDetails(content: UserContentPart[], environmentDetails: string): UserContentPart[] { + if (content.length === 0) { + // No existing content, just return the environment details as a text block + return [{ type: "text" as const, text: environmentDetails }] + } + + // Create a shallow copy so we don't mutate the original array + const result = [...content] + + // Find the last suitable block (text or tool_result) + let lastSuitableIndex = -1 + for (let i = result.length - 1; i >= 0; i--) { + const block = result[i] + if (block.type === "text" || block.type === "tool_result") { + lastSuitableIndex = i + break + } + } + + if (lastSuitableIndex === -1) { + // No text or tool_result block found (content only has images?), add new text block + result.push({ type: "text" as const, text: environmentDetails }) + return result + } + + const lastBlock = result[lastSuitableIndex] + + if (lastBlock.type === "text") { + // Append to existing text block + result[lastSuitableIndex] = { + type: "text" as const, + text: (lastBlock as TextPart).text + "\n\n" + environmentDetails, + } + } else if (lastBlock.type === "tool_result") { + // Append to tool_result content + result[lastSuitableIndex] = appendToToolResult(lastBlock as LegacyToolResultBlock, environmentDetails) + } + + return result +} + +/** + * Appends text to a tool_result block's content. + * Tool result content can be a string or an array of text blocks. + */ +function appendToToolResult(toolResult: LegacyToolResultBlock, textToAppend: string): LegacyToolResultBlock { + const { content, ...rest } = toolResult + + if (content === undefined || content === null) { + // No existing content, just set the text + return { + ...rest, + content: textToAppend, + } + } + + if (typeof content === "string") { + // String content, just concatenate + return { + ...rest, + content: content + "\n\n" + textToAppend, + } + } + + if (Array.isArray(content)) { + // Array content - find the last text block and append, or add new text block + const contentCopy = [...content] + let lastTextIndex = -1 + + for (let i = contentCopy.length - 1; i >= 0; i--) { + if (contentCopy[i].type === "text") { + lastTextIndex = i + break + } + } + + if (lastTextIndex >= 0) { + // Append to last text block in array + const lastTextBlock = contentCopy[lastTextIndex] as LegacyToolResultTextBlock + contentCopy[lastTextIndex] = { + type: "text" as const, + text: lastTextBlock.text + "\n\n" + textToAppend, + } + } else { + // No text block in array, add new one + contentCopy.push({ type: "text" as const, text: textToAppend }) + } + + return { + ...rest, + content: contentCopy, + } + } + + // Unknown content type, return with text appended as new content + return { + ...rest, + content: textToAppend, + } +} + +/** + * Removes any existing environment_details blocks from the content array. + * A block is considered an environment_details block if it's a text block + * that starts with and ends with . + * + * @param content - Array of content blocks to filter + * @returns New array with environment_details blocks removed + */ +export function removeEnvironmentDetailsBlocks(content: UserContentPart[]): UserContentPart[] { + return content.filter((block) => { + if (block.type === "text" && typeof (block as TextPart).text === "string") { + const trimmed = (block as TextPart).text.trim() + const isEnvironmentDetailsBlock = + trimmed.startsWith("") && trimmed.endsWith("") + return !isEnvironmentDetailsBlock + } + return true + }) +} + +/** + * Strips environment details from the last text block or tool_result in the content. + * This handles the case where environment details were appended to an existing block + * rather than added as a standalone block. + * + * @param content - Array of content blocks + * @returns New array with environment details stripped from the last suitable block + */ +export function stripAppendedEnvironmentDetails(content: UserContentPart[]): UserContentPart[] { + if (content.length === 0) { + return content + } + + // First, remove any standalone environment_details blocks + let result = removeEnvironmentDetailsBlocks(content) + + if (result.length === 0) { + return result + } + + // Then, strip appended environment details from the last block + const lastIndex = result.length - 1 + const lastBlock = result[lastIndex] + + if (lastBlock.type === "text") { + const strippedText = stripEnvDetailsFromText((lastBlock as TextPart).text) + if (strippedText !== (lastBlock as TextPart).text) { + result = [...result] + result[lastIndex] = { type: "text" as const, text: strippedText } + } + } else if (lastBlock.type === "tool_result") { + const strippedToolResult = stripEnvDetailsFromToolResult(lastBlock as LegacyToolResultBlock) + if (strippedToolResult !== lastBlock) { + result = [...result] + result[lastIndex] = strippedToolResult + } + } + + return result +} + +/** + * Strips environment details from the end of a text string. + */ +function stripEnvDetailsFromText(text: string): string { + // Match environment details at the end of the string, with optional preceding newlines + const envDetailsPattern = /\n*[\s\S]*<\/environment_details>\s*$/ + return text.replace(envDetailsPattern, "") +} + +/** + * Strips environment details from a tool_result block's content. + */ +function stripEnvDetailsFromToolResult(toolResult: LegacyToolResultBlock): LegacyToolResultBlock { + const { content, ...rest } = toolResult + + if (content === undefined || content === null) { + return toolResult + } + + if (typeof content === "string") { + const strippedContent = stripEnvDetailsFromText(content) + if (strippedContent === content) { + return toolResult + } + return { ...rest, content: strippedContent } + } + + if (Array.isArray(content)) { + let changed = false + const newContent = content.map((block) => { + if (block.type === "text") { + const strippedText = stripEnvDetailsFromText(block.text) + if (strippedText !== block.text) { + changed = true + return { type: "text" as const, text: strippedText } + } + } + return block + }) + + if (!changed) { + return toolResult + } + + return { ...rest, content: newContent } + } + + return toolResult +}