diff --git a/lib/tools/compress.ts b/lib/tools/compress.ts index 68ac5a56..4a40a18b 100644 --- a/lib/tools/compress.ts +++ b/lib/tools/compress.ts @@ -63,7 +63,7 @@ export function createCompressTool(ctx: PruneToolContext): ReturnType + + switch (part.type) { + case "text": + case "reasoning": + if (typeof p.text === "string") { + content += " " + p.text + } + break + + case "tool": { + const state = p.state as Record | undefined + if (!state) break + + // Include tool output (completed or error) + if (state.status === "completed" && typeof state.output === "string") { + content += " " + state.output + } else if (state.status === "error" && typeof state.error === "string") { + content += " " + state.error + } + + // Include tool input + if (state.input) { + content += + " " + + (typeof state.input === "string" + ? state.input + : JSON.stringify(state.input)) + } + break + } + + case "compaction": + if (typeof p.summary === "string") { + content += " " + p.summary + } + break + + case "subtask": + if (typeof p.summary === "string") { + content += " " + p.summary + } + if (typeof p.result === "string") { + content += " " + p.result + } + break + } + } + + return content +} + +function findExactMatches( messages: WithParts[], searchString: string, - logger: Logger, - compressSummaries: CompressSummary[] = [], - stringType: "startString" | "endString", -): { messageId: string; messageIndex: number } { - const matches: { messageId: string; messageIndex: number }[] = [] + compressSummaries: CompressSummary[], +): MatchResult[] { + const matches: MatchResult[] = [] + const seenMessageIds = new Set() - // First, search through existing compress summaries - // This allows referencing text from previous compress operations + // Search compress summaries first for (const summary of compressSummaries) { if (summary.summary.includes(searchString)) { const anchorIndex = messages.findIndex((m) => m.info.id === summary.anchorMessageId) - if (anchorIndex !== -1) { + if (anchorIndex !== -1 && !seenMessageIds.has(summary.anchorMessageId)) { + seenMessageIds.add(summary.anchorMessageId) matches.push({ messageId: summary.anchorMessageId, messageIndex: anchorIndex, + score: 100, + matchType: "exact", }) } } } - // Then search through raw messages + // Search raw messages for (let i = 0; i < messages.length; i++) { const msg = messages[i] - const parts = Array.isArray(msg.parts) ? msg.parts : [] + if (seenMessageIds.has(msg.info.id)) continue - for (const part of parts) { - let content = "" + const content = extractMessageContent(msg) + if (content.includes(searchString)) { + seenMessageIds.add(msg.info.id) + matches.push({ + messageId: msg.info.id, + messageIndex: i, + score: 100, + matchType: "exact", + }) + } + } - if (part.type === "text" && typeof part.text === "string") { - content = part.text - } else if (part.type === "tool" && part.state?.status === "completed") { - if (typeof part.state.output === "string") { - content = part.state.output - } - if (part.state.input) { - const inputStr = - typeof part.state.input === "string" - ? part.state.input - : JSON.stringify(part.state.input) - content += " " + inputStr - } - } + return matches +} - if (content.includes(searchString)) { - matches.push({ messageId: msg.info.id, messageIndex: i }) +function findFuzzyMatches( + messages: WithParts[], + searchString: string, + compressSummaries: CompressSummary[], + minScore: number, +): MatchResult[] { + const matches: MatchResult[] = [] + const seenMessageIds = new Set() + + // Search compress summaries first + for (const summary of compressSummaries) { + const score = partial_ratio(searchString, summary.summary) + if (score >= minScore) { + const anchorIndex = messages.findIndex((m) => m.info.id === summary.anchorMessageId) + if (anchorIndex !== -1 && !seenMessageIds.has(summary.anchorMessageId)) { + seenMessageIds.add(summary.anchorMessageId) + matches.push({ + messageId: summary.anchorMessageId, + messageIndex: anchorIndex, + score, + matchType: "fuzzy", + }) } } } - if (matches.length === 0) { + // Search raw messages + for (let i = 0; i < messages.length; i++) { + const msg = messages[i] + if (seenMessageIds.has(msg.info.id)) continue + + const content = extractMessageContent(msg) + const score = partial_ratio(searchString, content) + if (score >= minScore) { + seenMessageIds.add(msg.info.id) + matches.push({ + messageId: msg.info.id, + messageIndex: i, + score, + matchType: "fuzzy", + }) + } + } + + return matches +} + +export function findStringInMessages( + messages: WithParts[], + searchString: string, + logger: Logger, + compressSummaries: CompressSummary[] = [], + stringType: "startString" | "endString", + fuzzyConfig: FuzzyConfig = DEFAULT_FUZZY_CONFIG, +): { messageId: string; messageIndex: number } { + const searchableMessages = messages.length > 1 ? messages.slice(0, -1) : messages + const lastMessage = messages.length > 0 ? messages[messages.length - 1] : undefined + + const exactMatches = findExactMatches(searchableMessages, searchString, compressSummaries) + + if (exactMatches.length === 1) { + return { messageId: exactMatches[0].messageId, messageIndex: exactMatches[0].messageIndex } + } + + if (exactMatches.length > 1) { throw new Error( - `${stringType} not found in conversation. Make sure the string exists and is spelled exactly as it appears.`, + `Found multiple matches for ${stringType}. ` + + `Provide more surrounding context to uniquely identify the intended match.`, ) } - if (matches.length > 1) { + const fuzzyMatches = findFuzzyMatches( + searchableMessages, + searchString, + compressSummaries, + fuzzyConfig.minScore, + ) + + if (fuzzyMatches.length === 0) { + if (lastMessage) { + const lastMsgContent = extractMessageContent(lastMessage) + const lastMsgIndex = messages.length - 1 + if (lastMsgContent.includes(searchString)) { + // logger.info( + // `${stringType} found in last message (last resort) at index ${lastMsgIndex}`, + // ) + return { + messageId: lastMessage.info.id, + messageIndex: lastMsgIndex, + } + } + } + throw new Error( - `Found multiple matches for ${stringType}. Provide more surrounding context to uniquely identify the intended match.`, + `${stringType} not found in conversation. ` + + `Make sure the string exists and is spelled exactly as it appears.`, ) } - return matches[0] + fuzzyMatches.sort((a, b) => b.score - a.score) + + const best = fuzzyMatches[0] + const secondBest = fuzzyMatches[1] + + // Log fuzzy match candidates + // logger.info( + // `Fuzzy match for ${stringType}: best=${best.score}% (msg ${best.messageIndex})` + + // (secondBest + // ? `, secondBest=${secondBest.score}% (msg ${secondBest.messageIndex})` + // : ""), + // ) + + // Check confidence gap - best must be significantly better than second best + if (secondBest && best.score - secondBest.score < fuzzyConfig.minGap) { + throw new Error( + `Found multiple matches for ${stringType}. ` + + `Provide more unique surrounding context to disambiguate.`, + ) + } + + logger.info( + `Fuzzy matched ${stringType} with ${best.score}% confidence at message index ${best.messageIndex}`, + ) + + return { messageId: best.messageId, messageIndex: best.messageIndex } } -/** - * Collects all tool callIDs from messages between start and end indices (inclusive). - */ export function collectToolIdsInRange( messages: WithParts[], startIndex: number, @@ -100,9 +270,6 @@ export function collectToolIdsInRange( return toolIds } -/** - * Collects all message IDs from messages between start and end indices (inclusive). - */ export function collectMessageIdsInRange( messages: WithParts[], startIndex: number, @@ -120,10 +287,6 @@ export function collectMessageIdsInRange( return messageIds } -/** - * Collects all textual content (text parts, tool inputs, and tool outputs) - * from a range of messages. Used for token estimation. - */ export function collectContentInRange( messages: WithParts[], startIndex: number, diff --git a/package-lock.json b/package-lock.json index db846ddd..06f24aaf 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,6 +11,7 @@ "dependencies": { "@anthropic-ai/tokenizer": "^0.0.4", "@opencode-ai/sdk": "^1.1.48", + "fuzzball": "^2.2.3", "jsonc-parser": "^3.3.1", "ulid": "^3.0.2", "zod": "^4.3.6" @@ -587,6 +588,17 @@ "node": "^8.16.0 || ^10.6.0 || >=11.0.0" } }, + "node_modules/fuzzball": { + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/fuzzball/-/fuzzball-2.2.3.tgz", + "integrity": "sha512-sQDb3kjI7auA4YyE1YgEW85MTparcSgRgcCweUK06Cn0niY5lN+uhFiRUZKN4MQVGGiHxlbrYCA4nL1QjOXBLQ==", + "license": "MIT", + "dependencies": { + "heap": ">=0.2.0", + "lodash": "^4.17.21", + "setimmediate": "^1.0.5" + } + }, "node_modules/get-tsconfig": { "version": "4.13.0", "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.13.0.tgz", @@ -600,12 +612,24 @@ "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" } }, + "node_modules/heap": { + "version": "0.2.7", + "resolved": "https://registry.npmjs.org/heap/-/heap-0.2.7.tgz", + "integrity": "sha512-2bsegYkkHO+h/9MGbn6KWcE45cHZgPANo5LXF7EvWdT0yT2EguSVO1nDgU5c8+ZOPwp2vMNa7YFsJhVcDR9Sdg==", + "license": "MIT" + }, "node_modules/jsonc-parser": { "version": "3.3.1", "resolved": "https://registry.npmjs.org/jsonc-parser/-/jsonc-parser-3.3.1.tgz", "integrity": "sha512-HUgH65KyejrUFPvHFPbqOY0rsFip3Bo5wb4ngvdi1EpCYWUQDC5V+Y7mZws+DLkr4M//zQJoanu1SP+87Dv1oQ==", "license": "MIT" }, + "node_modules/lodash": { + "version": "4.17.23", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.23.tgz", + "integrity": "sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==", + "license": "MIT" + }, "node_modules/prettier": { "version": "3.8.1", "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.8.1.tgz", @@ -632,6 +656,12 @@ "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" } }, + "node_modules/setimmediate": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz", + "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==", + "license": "MIT" + }, "node_modules/tiktoken": { "version": "1.0.22", "resolved": "https://registry.npmjs.org/tiktoken/-/tiktoken-1.0.22.tgz", diff --git a/package.json b/package.json index 39e984ae..fc468c7d 100644 --- a/package.json +++ b/package.json @@ -44,6 +44,7 @@ "dependencies": { "@anthropic-ai/tokenizer": "^0.0.4", "@opencode-ai/sdk": "^1.1.48", + "fuzzball": "^2.2.3", "jsonc-parser": "^3.3.1", "ulid": "^3.0.2", "zod": "^4.3.6"