From 77fa946561c0c794515b3944a6f4b3d6ffec9658 Mon Sep 17 00:00:00 2001 From: Daniel Smolsky Date: Fri, 6 Feb 2026 15:46:59 -0500 Subject: [PATCH 1/6] feat: add fuzzy matching fallback for compress string matching Introduces fuzzy string matching using fuzzball library when exact substring match fails. This improves compress tool reliability by allowing close matches with configurable confidence thresholds (minScore: 85%, minGap: 15%) to reduce user friction. --- lib/tools/utils.ts | 247 ++++++++++++++++++++++++++++++++++++++------- package-lock.json | 30 ++++++ package.json | 1 + 3 files changed, 240 insertions(+), 38 deletions(-) diff --git a/lib/tools/utils.ts b/lib/tools/utils.ts index 8adec13e..942f070c 100644 --- a/lib/tools/utils.ts +++ b/lib/tools/utils.ts @@ -1,77 +1,248 @@ +import { partial_ratio } from "fuzzball" import type { WithParts, CompressSummary } from "../state" import type { Logger } from "../logger" /** - * Searches messages for a string and returns the message ID where it's found. - * Searches in text parts, tool outputs, tool inputs, and other textual content. - * Also searches through existing compress summaries to enable chained compression. - * Throws an error if the string is not found or found more than once. + * Configuration for fuzzy matching behavior */ -export function findStringInMessages( +export interface FuzzyConfig { + minScore: number // Minimum score to accept (0-100) + minGap: number // Minimum gap between best and second-best match +} + +export const DEFAULT_FUZZY_CONFIG: FuzzyConfig = { + minScore: 85, + minGap: 15, +} + +interface MatchResult { + messageId: string + messageIndex: number + score: number + matchType: "exact" | "fuzzy" +} + +/** + * Extracts all textual content from a message for matching purposes. + * Includes: text, reasoning, tool (input/output), compaction, and subtask parts. + */ +function extractMessageContent(msg: WithParts): string { + const parts = Array.isArray(msg.parts) ? msg.parts : [] + let content = "" + + for (const part of parts) { + const p = part as Record + + switch (part.type) { + case "text": + case "reasoning": + if (typeof p.text === "string") { + content += " " + p.text + } + break + + case "tool": { + const state = p.state as Record | undefined + if (!state) break + + // Include tool output (completed or error) + if (state.status === "completed" && typeof state.output === "string") { + content += " " + state.output + } else if (state.status === "error" && typeof state.error === "string") { + content += " " + state.error + } + + // Include tool input + if (state.input) { + content += + " " + + (typeof state.input === "string" + ? state.input + : JSON.stringify(state.input)) + } + break + } + + case "compaction": + if (typeof p.summary === "string") { + content += " " + p.summary + } + break + + case "subtask": + if (typeof p.summary === "string") { + content += " " + p.summary + } + if (typeof p.result === "string") { + content += " " + p.result + } + break + } + } + + return content +} + +/** + * Find all exact substring matches across messages and compress summaries. + */ +function findExactMatches( messages: WithParts[], searchString: string, - logger: Logger, - compressSummaries: CompressSummary[] = [], - stringType: "startString" | "endString", -): { messageId: string; messageIndex: number } { - const matches: { messageId: string; messageIndex: number }[] = [] + compressSummaries: CompressSummary[], +): MatchResult[] { + const matches: MatchResult[] = [] + const seenMessageIds = new Set() - // First, search through existing compress summaries - // This allows referencing text from previous compress operations + // Search compress summaries first for (const summary of compressSummaries) { if (summary.summary.includes(searchString)) { const anchorIndex = messages.findIndex((m) => m.info.id === summary.anchorMessageId) - if (anchorIndex !== -1) { + if (anchorIndex !== -1 && !seenMessageIds.has(summary.anchorMessageId)) { + seenMessageIds.add(summary.anchorMessageId) matches.push({ messageId: summary.anchorMessageId, messageIndex: anchorIndex, + score: 100, + matchType: "exact", }) } } } - // Then search through raw messages + // Search raw messages for (let i = 0; i < messages.length; i++) { const msg = messages[i] - const parts = Array.isArray(msg.parts) ? msg.parts : [] + if (seenMessageIds.has(msg.info.id)) continue - for (const part of parts) { - let content = "" + const content = extractMessageContent(msg) + if (content.includes(searchString)) { + seenMessageIds.add(msg.info.id) + matches.push({ + messageId: msg.info.id, + messageIndex: i, + score: 100, + matchType: "exact", + }) + } + } - if (part.type === "text" && typeof part.text === "string") { - content = part.text - } else if (part.type === "tool" && part.state?.status === "completed") { - if (typeof part.state.output === "string") { - content = part.state.output - } - if (part.state.input) { - const inputStr = - typeof part.state.input === "string" - ? part.state.input - : JSON.stringify(part.state.input) - content += " " + inputStr - } - } + return matches +} + +/** + * Find all fuzzy substring matches above the minimum score threshold. + */ +function findFuzzyMatches( + messages: WithParts[], + searchString: string, + compressSummaries: CompressSummary[], + minScore: number, +): MatchResult[] { + const matches: MatchResult[] = [] + const seenMessageIds = new Set() - if (content.includes(searchString)) { - matches.push({ messageId: msg.info.id, messageIndex: i }) + // Search compress summaries first + for (const summary of compressSummaries) { + const score = partial_ratio(searchString, summary.summary) + if (score >= minScore) { + const anchorIndex = messages.findIndex((m) => m.info.id === summary.anchorMessageId) + if (anchorIndex !== -1 && !seenMessageIds.has(summary.anchorMessageId)) { + seenMessageIds.add(summary.anchorMessageId) + matches.push({ + messageId: summary.anchorMessageId, + messageIndex: anchorIndex, + score, + matchType: "fuzzy", + }) } } } - if (matches.length === 0) { + // Search raw messages + for (let i = 0; i < messages.length; i++) { + const msg = messages[i] + if (seenMessageIds.has(msg.info.id)) continue + + const content = extractMessageContent(msg) + const score = partial_ratio(searchString, content) + if (score >= minScore) { + seenMessageIds.add(msg.info.id) + matches.push({ + messageId: msg.info.id, + messageIndex: i, + score, + matchType: "fuzzy", + }) + } + } + + return matches +} + +/** + * Searches messages for a string and returns the message ID where it's found. + * Uses exact matching first, then falls back to fuzzy matching with confidence thresholds. + * Searches in text parts, tool outputs, tool inputs, and compress summaries. + * Throws an error if no confident match is found. + */ +export function findStringInMessages( + messages: WithParts[], + searchString: string, + logger: Logger, + compressSummaries: CompressSummary[] = [], + stringType: "startString" | "endString", + fuzzyConfig: FuzzyConfig = DEFAULT_FUZZY_CONFIG, +): { messageId: string; messageIndex: number } { + // ============ PHASE 1: Exact Match ============ + const exactMatches = findExactMatches(messages, searchString, compressSummaries) + + if (exactMatches.length === 1) { + return { messageId: exactMatches[0].messageId, messageIndex: exactMatches[0].messageIndex } + } + + if (exactMatches.length > 1) { + throw new Error( + `Found multiple exact matches for ${stringType}. ` + + `Provide more surrounding context to uniquely identify the intended match.`, + ) + } + + // ============ PHASE 2: Fuzzy Match ============ + const fuzzyMatches = findFuzzyMatches( + messages, + searchString, + compressSummaries, + fuzzyConfig.minScore, + ) + + if (fuzzyMatches.length === 0) { throw new Error( - `${stringType} not found in conversation. Make sure the string exists and is spelled exactly as it appears.`, + `${stringType} not found in conversation (exact or fuzzy). ` + + `Make sure the string exists and is spelled correctly.`, ) } - if (matches.length > 1) { + // Sort by score descending to find best match + fuzzyMatches.sort((a, b) => b.score - a.score) + + const best = fuzzyMatches[0] + const secondBest = fuzzyMatches[1] + + // Check confidence gap - best must be significantly better than second best + if (secondBest && best.score - secondBest.score < fuzzyConfig.minGap) { throw new Error( - `Found multiple matches for ${stringType}. Provide more surrounding context to uniquely identify the intended match.`, + `Ambiguous fuzzy match for ${stringType}: ` + + `two candidates scored similarly (${best.score}% vs ${secondBest.score}%). ` + + `Provide more unique text to disambiguate.`, ) } - return matches[0] + logger.info( + `Fuzzy matched ${stringType} with ${best.score}% confidence at message index ${best.messageIndex}`, + ) + + return { messageId: best.messageId, messageIndex: best.messageIndex } } /** diff --git a/package-lock.json b/package-lock.json index 3acafeb3..fdbe031e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,6 +11,7 @@ "dependencies": { "@anthropic-ai/tokenizer": "^0.0.4", "@opencode-ai/sdk": "^1.1.48", + "fuzzball": "^2.2.3", "jsonc-parser": "^3.3.1", "ulid": "^3.0.2", "zod": "^4.3.6" @@ -587,6 +588,17 @@ "node": "^8.16.0 || ^10.6.0 || >=11.0.0" } }, + "node_modules/fuzzball": { + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/fuzzball/-/fuzzball-2.2.3.tgz", + "integrity": "sha512-sQDb3kjI7auA4YyE1YgEW85MTparcSgRgcCweUK06Cn0niY5lN+uhFiRUZKN4MQVGGiHxlbrYCA4nL1QjOXBLQ==", + "license": "MIT", + "dependencies": { + "heap": ">=0.2.0", + "lodash": "^4.17.21", + "setimmediate": "^1.0.5" + } + }, "node_modules/get-tsconfig": { "version": "4.13.0", "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.13.0.tgz", @@ -600,12 +612,24 @@ "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" } }, + "node_modules/heap": { + "version": "0.2.7", + "resolved": "https://registry.npmjs.org/heap/-/heap-0.2.7.tgz", + "integrity": "sha512-2bsegYkkHO+h/9MGbn6KWcE45cHZgPANo5LXF7EvWdT0yT2EguSVO1nDgU5c8+ZOPwp2vMNa7YFsJhVcDR9Sdg==", + "license": "MIT" + }, "node_modules/jsonc-parser": { "version": "3.3.1", "resolved": "https://registry.npmjs.org/jsonc-parser/-/jsonc-parser-3.3.1.tgz", "integrity": "sha512-HUgH65KyejrUFPvHFPbqOY0rsFip3Bo5wb4ngvdi1EpCYWUQDC5V+Y7mZws+DLkr4M//zQJoanu1SP+87Dv1oQ==", "license": "MIT" }, + "node_modules/lodash": { + "version": "4.17.23", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.23.tgz", + "integrity": "sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==", + "license": "MIT" + }, "node_modules/prettier": { "version": "3.8.1", "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.8.1.tgz", @@ -632,6 +656,12 @@ "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" } }, + "node_modules/setimmediate": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz", + "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==", + "license": "MIT" + }, "node_modules/tiktoken": { "version": "1.0.22", "resolved": "https://registry.npmjs.org/tiktoken/-/tiktoken-1.0.22.tgz", diff --git a/package.json b/package.json index f898033a..0ca29fc1 100644 --- a/package.json +++ b/package.json @@ -44,6 +44,7 @@ "dependencies": { "@anthropic-ai/tokenizer": "^0.0.4", "@opencode-ai/sdk": "^1.1.48", + "fuzzball": "^2.2.3", "jsonc-parser": "^3.3.1", "ulid": "^3.0.2", "zod": "^4.3.6" From a18755069dc17db74a15d71fb9b70eec93b07ce4 Mon Sep 17 00:00:00 2001 From: Daniel Smolsky Date: Fri, 6 Feb 2026 20:59:03 -0500 Subject: [PATCH 2/6] cleanup --- lib/tools/utils.ts | 80 +++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 44 deletions(-) diff --git a/lib/tools/utils.ts b/lib/tools/utils.ts index 942f070c..86d7361e 100644 --- a/lib/tools/utils.ts +++ b/lib/tools/utils.ts @@ -2,16 +2,13 @@ import { partial_ratio } from "fuzzball" import type { WithParts, CompressSummary } from "../state" import type { Logger } from "../logger" -/** - * Configuration for fuzzy matching behavior - */ export interface FuzzyConfig { - minScore: number // Minimum score to accept (0-100) - minGap: number // Minimum gap between best and second-best match + minScore: number + minGap: number } export const DEFAULT_FUZZY_CONFIG: FuzzyConfig = { - minScore: 85, + minScore: 95, minGap: 15, } @@ -22,10 +19,6 @@ interface MatchResult { matchType: "exact" | "fuzzy" } -/** - * Extracts all textual content from a message for matching purposes. - * Includes: text, reasoning, tool (input/output), compaction, and subtask parts. - */ function extractMessageContent(msg: WithParts): string { const parts = Array.isArray(msg.parts) ? msg.parts : [] let content = "" @@ -83,9 +76,6 @@ function extractMessageContent(msg: WithParts): string { return content } -/** - * Find all exact substring matches across messages and compress summaries. - */ function findExactMatches( messages: WithParts[], searchString: string, @@ -130,9 +120,6 @@ function findExactMatches( return matches } -/** - * Find all fuzzy substring matches above the minimum score threshold. - */ function findFuzzyMatches( messages: WithParts[], searchString: string, @@ -180,12 +167,6 @@ function findFuzzyMatches( return matches } -/** - * Searches messages for a string and returns the message ID where it's found. - * Uses exact matching first, then falls back to fuzzy matching with confidence thresholds. - * Searches in text parts, tool outputs, tool inputs, and compress summaries. - * Throws an error if no confident match is found. - */ export function findStringInMessages( messages: WithParts[], searchString: string, @@ -194,8 +175,10 @@ export function findStringInMessages( stringType: "startString" | "endString", fuzzyConfig: FuzzyConfig = DEFAULT_FUZZY_CONFIG, ): { messageId: string; messageIndex: number } { - // ============ PHASE 1: Exact Match ============ - const exactMatches = findExactMatches(messages, searchString, compressSummaries) + const searchableMessages = messages.length > 1 ? messages.slice(0, -1) : messages + const lastMessage = messages.length > 0 ? messages[messages.length - 1] : undefined + + const exactMatches = findExactMatches(searchableMessages, searchString, compressSummaries) if (exactMatches.length === 1) { return { messageId: exactMatches[0].messageId, messageIndex: exactMatches[0].messageIndex } @@ -203,38 +186,57 @@ export function findStringInMessages( if (exactMatches.length > 1) { throw new Error( - `Found multiple exact matches for ${stringType}. ` + - `Provide more surrounding context to uniquely identify the intended match.`, + `Found multiple matches for ${stringType}. ` + + `Provide more surrounding context to uniquely identify the intended match.`, ) } - // ============ PHASE 2: Fuzzy Match ============ const fuzzyMatches = findFuzzyMatches( - messages, + searchableMessages, searchString, compressSummaries, fuzzyConfig.minScore, ) if (fuzzyMatches.length === 0) { + if (lastMessage) { + const lastMsgContent = extractMessageContent(lastMessage) + const lastMsgIndex = messages.length - 1 + if (lastMsgContent.includes(searchString)) { + // logger.info( + // `${stringType} found in last message (last resort) at index ${lastMsgIndex}`, + // ) + return { + messageId: lastMessage.info.id, + messageIndex: lastMsgIndex, + } + } + } + throw new Error( - `${stringType} not found in conversation (exact or fuzzy). ` + - `Make sure the string exists and is spelled correctly.`, + `${stringType} not found in conversation. ` + + `Make sure the string exists and is spelled correctly.`, ) } - // Sort by score descending to find best match fuzzyMatches.sort((a, b) => b.score - a.score) const best = fuzzyMatches[0] const secondBest = fuzzyMatches[1] + // Log fuzzy match candidates + // logger.info( + // `Fuzzy match for ${stringType}: best=${best.score}% (msg ${best.messageIndex})` + + // (secondBest + // ? `, secondBest=${secondBest.score}% (msg ${secondBest.messageIndex})` + // : ""), + // ) + // Check confidence gap - best must be significantly better than second best if (secondBest && best.score - secondBest.score < fuzzyConfig.minGap) { throw new Error( - `Ambiguous fuzzy match for ${stringType}: ` + - `two candidates scored similarly (${best.score}% vs ${secondBest.score}%). ` + - `Provide more unique text to disambiguate.`, + `Found multiple matches for ${stringType}. ` + + `Provide more unique surrounding context to disambiguate.`, ) } @@ -245,9 +247,6 @@ export function findStringInMessages( return { messageId: best.messageId, messageIndex: best.messageIndex } } -/** - * Collects all tool callIDs from messages between start and end indices (inclusive). - */ export function collectToolIdsInRange( messages: WithParts[], startIndex: number, @@ -271,9 +270,6 @@ export function collectToolIdsInRange( return toolIds } -/** - * Collects all message IDs from messages between start and end indices (inclusive). - */ export function collectMessageIdsInRange( messages: WithParts[], startIndex: number, @@ -291,10 +287,6 @@ export function collectMessageIdsInRange( return messageIds } -/** - * Collects all textual content (text parts, tool inputs, and tool outputs) - * from a range of messages. Used for token estimation. - */ export function collectContentInRange( messages: WithParts[], startIndex: number, From 99b3a9048cb2614141c4a6a37c6927c949e6c6dd Mon Sep 17 00:00:00 2001 From: Daniel Smolsky Date: Fri, 6 Feb 2026 21:00:04 -0500 Subject: [PATCH 3/6] format --- lib/tools/utils.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/tools/utils.ts b/lib/tools/utils.ts index 86d7361e..41927993 100644 --- a/lib/tools/utils.ts +++ b/lib/tools/utils.ts @@ -187,7 +187,7 @@ export function findStringInMessages( if (exactMatches.length > 1) { throw new Error( `Found multiple matches for ${stringType}. ` + - `Provide more surrounding context to uniquely identify the intended match.`, + `Provide more surrounding context to uniquely identify the intended match.`, ) } @@ -215,7 +215,7 @@ export function findStringInMessages( throw new Error( `${stringType} not found in conversation. ` + - `Make sure the string exists and is spelled correctly.`, + `Make sure the string exists and is spelled correctly.`, ) } @@ -236,7 +236,7 @@ export function findStringInMessages( if (secondBest && best.score - secondBest.score < fuzzyConfig.minGap) { throw new Error( `Found multiple matches for ${stringType}. ` + - `Provide more unique surrounding context to disambiguate.`, + `Provide more unique surrounding context to disambiguate.`, ) } From 0034f2a5bf56aeabbee6df9facbeec7e94a62647 Mon Sep 17 00:00:00 2001 From: Daniel Smolsky Date: Fri, 6 Feb 2026 21:01:56 -0500 Subject: [PATCH 4/6] error wording --- lib/tools/utils.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/tools/utils.ts b/lib/tools/utils.ts index 41927993..5cea7c45 100644 --- a/lib/tools/utils.ts +++ b/lib/tools/utils.ts @@ -215,7 +215,7 @@ export function findStringInMessages( throw new Error( `${stringType} not found in conversation. ` + - `Make sure the string exists and is spelled correctly.`, + `Make sure the string exists and is spelled exactly as it appears.`, ) } From 8b1a10ce375ee7e0f8db0db14806de18f2476bd5 Mon Sep 17 00:00:00 2001 From: Daniel Smolsky Date: Fri, 6 Feb 2026 21:04:14 -0500 Subject: [PATCH 5/6] extra log for why --- lib/tools/compress.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/tools/compress.ts b/lib/tools/compress.ts index 68ac5a56..4a40a18b 100644 --- a/lib/tools/compress.ts +++ b/lib/tools/compress.ts @@ -63,7 +63,7 @@ export function createCompressTool(ctx: PruneToolContext): ReturnType Date: Fri, 6 Feb 2026 21:22:10 -0500 Subject: [PATCH 6/6] set default contextLimit to 100000 --- README.md | 5 ++--- dcp.schema.json | 2 +- lib/config.ts | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 6460527d..9ac9b6a8 100644 --- a/README.md +++ b/README.md @@ -105,10 +105,9 @@ DCP uses its own config file: > "nudgeEnabled": true, > "nudgeFrequency": 10, > // Token limit at which the model begins actively -> // compressing session context. Best kept around 40-60% of -> // the model's context window to stay in the "smart zone". +> // compressing session context to keep the model in the "smart zone" > // Accepts: number or "X%" (percentage of model's context window) -> "contextLimit": "60%", +> "contextLimit": 100000, > // Additional tools to protect from pruning > "protectedTools": [], > }, diff --git a/dcp.schema.json b/dcp.schema.json index e25f09b1..0486f79f 100644 --- a/dcp.schema.json +++ b/dcp.schema.json @@ -111,7 +111,7 @@ }, "contextLimit": { "description": "When session tokens exceed this limit, a compress nudge is injected (\"X%\" uses percentage of the model's context window)", - "default": "60%", + "default": 100000, "oneOf": [ { "type": "number" diff --git a/lib/config.ts b/lib/config.ts index 1a60307f..0ac4a1a0 100644 --- a/lib/config.ts +++ b/lib/config.ts @@ -504,7 +504,7 @@ const defaultConfig: PluginConfig = { nudgeEnabled: true, nudgeFrequency: 10, protectedTools: [...DEFAULT_PROTECTED_TOOLS], - contextLimit: "60%", + contextLimit: 100000, }, distill: { permission: "allow",