From 57ad8435a4ea713e10c435295ba294876e153180 Mon Sep 17 00:00:00 2001 From: engineer Date: Sun, 15 Feb 2026 10:18:03 -0800 Subject: [PATCH] fix(reflection): prevent action loops by reducing max attempts and adding loop detection (#80) - Reduce MAX_ATTEMPTS from 5 to 3 to stop pushing stuck agents - Add detectActionLoop() to detect repeated identical bash commands - Update buildEscalatingFeedback() with action loop and final attempt messages - Update buildSelfAssessmentPrompt() with attempt count awareness - Add 17 new unit tests for loop detection and attempt-aware prompting --- reflection-3.test-helpers.ts | 102 ++++++++++++++++-- reflection-3.ts | 115 +++++++++++++++++++-- test/reflection-3.unit.test.ts | 182 ++++++++++++++++++++++++++++++++- 3 files changed, 380 insertions(+), 19 deletions(-) diff --git a/reflection-3.test-helpers.ts b/reflection-3.test-helpers.ts index 0f21ccf..336889e 100644 --- a/reflection-3.test-helpers.ts +++ b/reflection-3.test-helpers.ts @@ -87,7 +87,7 @@ export function inferTaskType(text: string): TaskType { return "other" } -export function buildSelfAssessmentPrompt(context: TaskContext, agents: string, lastAssistantText?: string): string { +export function buildSelfAssessmentPrompt(context: TaskContext, agents: string, lastAssistantText?: string, attemptCount?: number): string { const safeContext = { ...context, detectedSignals: Array.isArray(context.detectedSignals) ? context.detectedSignals : [] @@ -107,6 +107,11 @@ export function buildSelfAssessmentPrompt(context: TaskContext, agents: string, ? `\n## Agent's Last Response\n${lastAssistantText.slice(0, 4000)}\n` : "" + const currentAttempt = attemptCount || 0 + const attemptSection = currentAttempt > 0 + ? `\n## Reflection History\n- This is reflection attempt ${currentAttempt + 1}/${MAX_ATTEMPTS} for this task.\n- Previous reflections found the task incomplete.\n- If you are repeating the same actions without progress, set "stuck": true and explain what is blocking you.\n` + : "" + return `SELF-ASSESS REFLECTION-3 You are evaluating an agent's work against workflow requirements. @@ -121,7 +126,7 @@ Analyze the task context, the agent's last response, and the tool signals to det ## Tool Commands Run ${safeContext.toolsSummary} -${assistantSection} +${assistantSection}${attemptSection} ${agents ? `## Project Instructions\n${agents.slice(0, 800)}\n\n` : ""}Return JSON only: { "task_summary": "...", @@ -148,7 +153,9 @@ Rules: - Direct pushes to main/master are not allowed; require a PR instead. - Provide a PR URL and CI status when a PR is required. - If stuck, propose an alternate approach. -- If you need user action (auth, 2FA, credentials), list it in needs_user_action.` +- If you need user action (auth, 2FA, credentials), list it in needs_user_action. +- If you are repeating the same actions (deploy, test, build) without making progress, set "stuck": true. +- Do not retry the same failing approach more than twice — try something different or report stuck.` } export function parseSelfAssessmentJson(text: string | null | undefined): SelfAssessment | null { @@ -416,13 +423,74 @@ export function getGitHubCopilotModelForRouting(modelSpec: string | null | undef } const FEEDBACK_MARKER = "## Reflection-3:" -const MAX_ATTEMPTS = 5 +const MAX_ATTEMPTS = 3 +const ACTION_LOOP_MIN_COMMANDS = 4 +const ACTION_LOOP_REPETITION_THRESHOLD = 0.6 + +/** + * Detects when the agent is repeating the same commands/actions without progress. + * Unlike detectPlanningLoop (read-heavy without writes), this catches action loops + * where the agent IS making write-like operations but repeating the same ones. + */ +export function detectActionLoop(messages: any[]): { + detected: boolean + repeatedCommands: string[] + totalCommands: number +} { + if (!Array.isArray(messages)) { + return { detected: false, repeatedCommands: [], totalCommands: 0 } + } + + const commands: string[] = [] + for (const msg of messages) { + if (msg.info?.role !== "assistant") continue + for (const part of msg.parts || []) { + if (part.type !== "tool") continue + const toolName = (part.tool || "").toString().toLowerCase() + const input = part.state?.input || {} + + if (toolName === "bash") { + const cmd = (input.command || input.cmd || "").toString().trim() + if (cmd) { + const normalized = cmd.replace(/\s+/g, " ").replace(/\d{10,}/g, "TIMESTAMP").toLowerCase() + commands.push(normalized) + } + } else if (toolName !== "read" && toolName !== "glob" && toolName !== "grep" && toolName !== "todowrite") { + const key = `${toolName}:${JSON.stringify(input).slice(0, 100)}` + commands.push(key) + } + } + } + + if (commands.length < ACTION_LOOP_MIN_COMMANDS) { + return { detected: false, repeatedCommands: [], totalCommands: commands.length } + } + + const counts = new Map() + for (const cmd of commands) { + counts.set(cmd, (counts.get(cmd) || 0) + 1) + } + + const repeatedCommands: string[] = [] + let repeatedCount = 0 + for (const [cmd, count] of counts) { + if (count >= 3) { + repeatedCommands.push(cmd) + repeatedCount += count + } + } + + const detected = repeatedCommands.length > 0 && repeatedCount / commands.length >= ACTION_LOOP_REPETITION_THRESHOLD + + return { detected, repeatedCommands, totalCommands: commands.length } +} export function buildEscalatingFeedback( attemptCount: number, severity: string, verdict: { feedback?: string; missing?: string[]; next_actions?: string[] } | undefined | null, - isPlanningLoop: boolean + isPlanningLoop: boolean, + isActionLoop?: boolean ): string { const safeVerdict = verdict ?? {} const missingItems = Array.isArray(safeVerdict.missing) ? safeVerdict.missing : [] @@ -445,6 +513,19 @@ Pick the FIRST item from your existing todo list and implement it. Open a file w Start coding NOW. No more planning.` } + if (isActionLoop) { + return `${FEEDBACK_MARKER} STOP: Action Loop Detected (attempt ${attemptCount}/${MAX_ATTEMPTS}) + +You are repeating the same commands without making progress. Running the same deploy/test/build cycle again will produce the same result. + +STOP and do ONE of these: +1. If the same test/eval keeps failing, analyze the failure output and fix the root cause before re-running. +2. If you cannot fix the root cause, explain what is blocking you and ask the user for help. +3. Try a completely different approach (e.g., test locally instead of via deployment). + +Do NOT re-run the same command hoping for a different result.` + } + if (attemptCount <= 2) { const missing = missingItems.length ? `\n### Missing\n${missingItems.map((m) => `- ${m}`).join("\n")}` @@ -463,11 +544,18 @@ Please address these issues and continue.` const missingBrief = missingItems.length ? `Still missing: ${missingItems.slice(0, 3).join(", ")}.` : "" - return `${FEEDBACK_MARKER} Still Incomplete (attempt ${attemptCount}/${MAX_ATTEMPTS}) + return `${FEEDBACK_MARKER} Final Attempt (${attemptCount}/${MAX_ATTEMPTS}) ${missingBrief} - You have been asked ${attemptCount} times to complete this task. Stop re-reading files or re-planning. Focus on the specific items above and implement them now. If something is blocking you, say what it is clearly.` +You have been asked ${attemptCount} times to complete this task. This is your LAST chance before reflection stops. + +If you cannot complete the remaining items: +- Explain clearly what is blocking you +- Set needs_user_action if you need user help +- Try a different approach instead of repeating the same steps + +Do NOT re-read files or re-plan. Either implement the fix now or explain why you cannot.` } export function shouldApplyPlanningLoop(taskType: TaskType, loopDetected: boolean): boolean { diff --git a/reflection-3.ts b/reflection-3.ts index c5fe266..724f8c9 100644 --- a/reflection-3.ts +++ b/reflection-3.ts @@ -22,7 +22,7 @@ async function reportError(err: unknown, context?: Record): Prom const SELF_ASSESSMENT_MARKER = "## Reflection-3 Self-Assessment" const FEEDBACK_MARKER = "## Reflection-3:" -const MAX_ATTEMPTS = 5 +const MAX_ATTEMPTS = 3 const JUDGE_BLOCKED_PATTERNS = [ /\bhaiku\b/i, @@ -36,6 +36,8 @@ const JUDGE_BLOCKED_PATTERNS = [ const PLANNING_LOOP_MIN_TOOL_CALLS = 8 const PLANNING_LOOP_WRITE_RATIO_THRESHOLD = 0.1 +const ACTION_LOOP_MIN_COMMANDS = 4 +const ACTION_LOOP_REPETITION_THRESHOLD = 0.6 type TaskType = "coding" | "docs" | "research" | "ops" | "other" type AgentMode = "plan" | "build" | "unknown" @@ -276,11 +278,76 @@ export function shouldApplyPlanningLoop(taskType: TaskType, loopDetected: boolea return taskType === "coding" } +/** + * Detects when the agent is repeating the same commands/actions without progress. + * Unlike detectPlanningLoop (read-heavy without writes), this catches action loops + * where the agent IS making write-like operations but repeating the same ones. + * Example: repeatedly re-deploying and re-running the same failing evaluation. + */ +export function detectActionLoop(messages: any[]): { + detected: boolean + repeatedCommands: string[] + totalCommands: number +} { + if (!Array.isArray(messages)) { + return { detected: false, repeatedCommands: [], totalCommands: 0 } + } + + const commands: string[] = [] + for (const msg of messages) { + if (msg.info?.role !== "assistant") continue + for (const part of msg.parts || []) { + if (part.type !== "tool") continue + const toolName = (part.tool || "").toString().toLowerCase() + const input = part.state?.input || {} + + if (toolName === "bash") { + const cmd = (input.command || input.cmd || "").toString().trim() + if (cmd) { + // Normalize: collapse whitespace and remove trailing timestamps/IDs + const normalized = cmd.replace(/\s+/g, " ").replace(/\d{10,}/g, "TIMESTAMP").toLowerCase() + commands.push(normalized) + } + } else if (toolName !== "read" && toolName !== "glob" && toolName !== "grep" && toolName !== "todowrite") { + // Track non-read tool calls by name + key input params + const key = `${toolName}:${JSON.stringify(input).slice(0, 100)}` + commands.push(key) + } + } + } + + if (commands.length < ACTION_LOOP_MIN_COMMANDS) { + return { detected: false, repeatedCommands: [], totalCommands: commands.length } + } + + // Count occurrences of each command + const counts = new Map() + for (const cmd of commands) { + counts.set(cmd, (counts.get(cmd) || 0) + 1) + } + + // Find commands repeated 3+ times + const repeatedCommands: string[] = [] + let repeatedCount = 0 + for (const [cmd, count] of counts) { + if (count >= 3) { + repeatedCommands.push(cmd) + repeatedCount += count + } + } + + // Loop detected if repeated commands make up a significant fraction + const detected = repeatedCommands.length > 0 && repeatedCount / commands.length >= ACTION_LOOP_REPETITION_THRESHOLD + + return { detected, repeatedCommands, totalCommands: commands.length } +} + export function buildEscalatingFeedback( attemptCount: number, severity: string, verdict: { feedback?: string; missing?: string[]; next_actions?: string[] } | undefined | null, - isPlanningLoop: boolean + isPlanningLoop: boolean, + isActionLoop?: boolean ): string { const safeVerdict = verdict ?? {} const missingItems = Array.isArray(safeVerdict.missing) ? safeVerdict.missing : [] @@ -303,6 +370,19 @@ Pick the FIRST item from your existing todo list and implement it. Open a file w Start coding NOW. No more planning.` } + if (isActionLoop) { + return `${FEEDBACK_MARKER} STOP: Action Loop Detected (attempt ${attemptCount}/${MAX_ATTEMPTS}) + +You are repeating the same commands without making progress. Running the same deploy/test/build cycle again will produce the same result. + +STOP and do ONE of these: +1. If the same test/eval keeps failing, analyze the failure output and fix the root cause before re-running. +2. If you cannot fix the root cause, explain what is blocking you and ask the user for help. +3. Try a completely different approach (e.g., test locally instead of via deployment). + +Do NOT re-run the same command hoping for a different result.` + } + if (attemptCount <= 2) { const missing = missingItems.length ? `\n### Missing\n${missingItems.map((m) => `- ${m}`).join("\n")}` @@ -321,11 +401,18 @@ Please address these issues and continue.` const missingBrief = missingItems.length ? `Still missing: ${missingItems.slice(0, 3).join(", ")}.` : "" - return `${FEEDBACK_MARKER} Still Incomplete (attempt ${attemptCount}/${MAX_ATTEMPTS}) + return `${FEEDBACK_MARKER} Final Attempt (${attemptCount}/${MAX_ATTEMPTS}) ${missingBrief} -You have been asked ${attemptCount} times to complete this task. Stop re-reading files or re-planning. Focus on the specific items above and implement them now. If something is blocking you, say what it is clearly.` +You have been asked ${attemptCount} times to complete this task. This is your LAST chance before reflection stops. + +If you cannot complete the remaining items: +- Explain clearly what is blocking you +- Set needs_user_action if you need user help +- Try a different approach instead of repeating the same steps + +Do NOT re-read files or re-plan. Either implement the fix now or explain why you cannot.` } function getLastRelevantUserMessageId(messages: any[]): string | null { @@ -909,7 +996,7 @@ function extractLastAssistantText(messages: any[]): string { return "" } -function buildSelfAssessmentPrompt(context: TaskContext, agents: string, lastAssistantText?: string): string { +function buildSelfAssessmentPrompt(context: TaskContext, agents: string, lastAssistantText?: string, attemptCount?: number): string { const safeContext = { ...context, detectedSignals: Array.isArray(context.detectedSignals) ? context.detectedSignals : [] @@ -929,6 +1016,11 @@ function buildSelfAssessmentPrompt(context: TaskContext, agents: string, lastAss ? `\n## Agent's Last Response\n${lastAssistantText.slice(0, 4000)}\n` : "" + const currentAttempt = attemptCount || 0 + const attemptSection = currentAttempt > 0 + ? `\n## Reflection History\n- This is reflection attempt ${currentAttempt + 1}/${MAX_ATTEMPTS} for this task.\n- Previous reflections found the task incomplete.\n- If you are repeating the same actions without progress, set "stuck": true and explain what is blocking you.\n` + : "" + return `SELF-ASSESS REFLECTION-3 You are evaluating an agent's work against workflow requirements. @@ -943,7 +1035,7 @@ Analyze the task context, the agent's last response, and the tool signals to det ## Tool Commands Run ${safeContext.toolsSummary} -${assistantSection} +${assistantSection}${attemptSection} ${agents ? `## Project Instructions\n${agents.slice(0, 800)}\n\n` : ""}Return JSON only: { "task_summary": "brief description of what was done", @@ -983,7 +1075,9 @@ Rules: - Direct pushes to main/master are not allowed; require a PR instead. - If stuck, propose an alternate approach. - If you need user action (auth, 2FA, credentials), list it in needs_user_action. -- PLANNING LOOP CHECK: If the task requires code changes (fix, implement, add, create, build, refactor, update) but the "Tool Commands Run" section shows ONLY read operations (read, glob, grep, git log, git status, git diff, webfetch, task/explore) and NO write operations (edit, write, bash with build/test/commit, github_create_pull_request, etc.), then the task is NOT complete. Set status to "in_progress", set stuck to true, and list "Implement the actual code changes" in remaining_work. Analyzing and recommending changes is not the same as making them.` +- PLANNING LOOP CHECK: If the task requires code changes (fix, implement, add, create, build, refactor, update) but the "Tool Commands Run" section shows ONLY read operations (read, glob, grep, git log, git status, git diff, webfetch, task/explore) and NO write operations (edit, write, bash with build/test/commit, github_create_pull_request, etc.), then the task is NOT complete. Set status to "in_progress", set stuck to true, and list "Implement the actual code changes" in remaining_work. Analyzing and recommending changes is not the same as making them. +- If you are repeating the same actions (deploy, test, build) without making progress, set "stuck": true. +- Do not retry the same failing approach more than twice — try something different or report stuck.` } function parseSelfAssessmentJson(text: string | null | undefined): SelfAssessment | null { @@ -1381,7 +1475,8 @@ export const Reflection3Plugin: Plugin = async ({ client, directory }) => { const lastAssistantText = extractLastAssistantText(messages) const customPrompt = await loadReflectionPrompt(directory) const agents = await getAgentsFile(directory) - const reflectionPrompt = customPrompt || buildSelfAssessmentPrompt(context, agents, lastAssistantText) + const currentAttemptCount = attempts.get(attemptKey) || 0 + const reflectionPrompt = customPrompt || buildSelfAssessmentPrompt(context, agents, lastAssistantText, currentAttemptCount) await showToast(client, directory, "Requesting reflection self-assessment...", "info") debug("Requesting reflection self-assessment") @@ -1584,6 +1679,7 @@ export const Reflection3Plugin: Plugin = async ({ client, directory }) => { const loopCheck = detectPlanningLoop(preFeedbackMessages || messages) const usePlanningLoopMessage = shouldApplyPlanningLoop(context.taskType, loopCheck.detected) + const actionLoopCheck = detectActionLoop(preFeedbackMessages || messages) const feedbackText = buildEscalatingFeedback( nextAttemptCount, analysis.severity || "MEDIUM", @@ -1592,7 +1688,8 @@ export const Reflection3Plugin: Plugin = async ({ client, directory }) => { missing: analysis.missing, next_actions: analysis.nextActions }, - usePlanningLoopMessage + usePlanningLoopMessage, + actionLoopCheck.detected ) // Apply task-based model routing to feedback injection diff --git a/test/reflection-3.unit.test.ts b/test/reflection-3.unit.test.ts index d432ea2..a4cec09 100644 --- a/test/reflection-3.unit.test.ts +++ b/test/reflection-3.unit.test.ts @@ -11,6 +11,7 @@ import { parseModelSpec, getCrossReviewModelSpec, getGitHubCopilotModelForRouting, + detectActionLoop, RoutingConfig } from "../reflection-3.test-helpers.ts" import { detectPlanningLoop } from "../reflection-3.ts" @@ -498,16 +499,19 @@ describe("buildEscalatingFeedback", () => { assert.ok(!result.includes("Some feedback")) }) - it("escalates tone after attempt 2", () => { + it("escalates to final attempt message after attempt 2", () => { const verdict = { missing: ["Run tests", "Create PR", "Check CI", "Update docs"] } const result = buildEscalatingFeedback(3, "high", verdict, false) - assert.ok(result.includes("Still Incomplete")) - assert.ok(result.includes("attempt 3/5")) + assert.ok(result.includes("Final Attempt")) + assert.ok(result.includes("3/3")) // Should truncate to first 3 missing items assert.ok(result.includes("Run tests")) assert.ok(result.includes("Create PR")) assert.ok(result.includes("Check CI")) assert.ok(!result.includes("Update docs")) + // Should include give-up guidance + assert.ok(result.includes("LAST chance")) + assert.ok(result.includes("needs_user_action")) }) it("handles verdict with empty arrays", () => { @@ -524,6 +528,31 @@ describe("buildEscalatingFeedback", () => { assert.ok(result.includes("Incomplete")) assert.ok(!result.includes("### Missing")) }) + + it("returns action loop message when isActionLoop is true", () => { + const result = buildEscalatingFeedback(2, "high", null, false, true) + assert.ok(result.includes("Action Loop Detected")) + assert.ok(result.includes("repeating the same commands")) + assert.ok(result.includes("Do NOT re-run")) + }) + + it("action loop includes attempt count", () => { + const result = buildEscalatingFeedback(2, "high", null, false, true) + assert.ok(result.includes("2/3")) + }) + + it("action loop ignores verdict content", () => { + const verdict = { feedback: "Some feedback", missing: ["item"], next_actions: ["action"] } + const result = buildEscalatingFeedback(1, "high", verdict, false, true) + assert.ok(result.includes("Action Loop Detected")) + assert.ok(!result.includes("Some feedback")) + }) + + it("planning loop takes priority over action loop", () => { + const result = buildEscalatingFeedback(1, "high", null, true, true) + assert.ok(result.includes("Planning Loop Detected")) + assert.ok(!result.includes("Action Loop Detected")) + }) }) describe("task-based model routing", () => { @@ -722,3 +751,150 @@ describe("GitHub Copilot model routing", () => { assert.strictEqual(getGitHubCopilotModelForRouting(undefined), null) }) }) + +describe("detectActionLoop", () => { + function makeToolMsg(tools: Array<{ tool: string; input?: any }>): any { + return { + info: { role: "assistant" }, + parts: tools.map(t => ({ + type: "tool", + tool: t.tool, + state: { input: t.input || {} } + })) + } + } + + it("returns false for non-array input", () => { + const result = detectActionLoop(null as any) + assert.strictEqual(result.detected, false) + }) + + it("returns false for empty messages", () => { + const result = detectActionLoop([]) + assert.strictEqual(result.detected, false) + }) + + it("returns false for too few commands", () => { + const messages = [makeToolMsg([ + { tool: "bash", input: { command: "npm test" } }, + { tool: "bash", input: { command: "npm run build" } } + ])] + const result = detectActionLoop(messages) + assert.strictEqual(result.detected, false) + }) + + it("detects repeated bash commands", () => { + const messages = [ + makeToolMsg([{ tool: "bash", input: { command: "kubectl apply -f deploy.yaml" } }]), + makeToolMsg([{ tool: "bash", input: { command: "npm run eval:stripe" } }]), + makeToolMsg([{ tool: "bash", input: { command: "kubectl apply -f deploy.yaml" } }]), + makeToolMsg([{ tool: "bash", input: { command: "npm run eval:stripe" } }]), + makeToolMsg([{ tool: "bash", input: { command: "kubectl apply -f deploy.yaml" } }]), + makeToolMsg([{ tool: "bash", input: { command: "npm run eval:stripe" } }]) + ] + const result = detectActionLoop(messages) + assert.strictEqual(result.detected, true) + assert.ok(result.repeatedCommands.length > 0) + }) + + it("ignores read-only tools (read, glob, grep, todowrite)", () => { + const messages = [ + makeToolMsg([ + { tool: "read", input: { path: "/file.ts" } }, + { tool: "glob", input: { pattern: "**/*.ts" } }, + { tool: "grep", input: { pattern: "foo" } }, + { tool: "todowrite", input: { todos: [] } }, + { tool: "bash", input: { command: "npm test" } }, + { tool: "bash", input: { command: "npm run build" } } + ]) + ] + const result = detectActionLoop(messages) + // Only 2 bash commands counted, below threshold + assert.strictEqual(result.detected, false) + }) + + it("does not flag diverse commands as a loop", () => { + const messages = [ + makeToolMsg([{ tool: "bash", input: { command: "npm test" } }]), + makeToolMsg([{ tool: "bash", input: { command: "npm run build" } }]), + makeToolMsg([{ tool: "bash", input: { command: "git status" } }]), + makeToolMsg([{ tool: "bash", input: { command: "git add ." } }]), + makeToolMsg([{ tool: "bash", input: { command: "git commit -m 'fix'" } }]) + ] + const result = detectActionLoop(messages) + assert.strictEqual(result.detected, false) + }) + + it("normalizes timestamps in commands", () => { + const messages = [ + makeToolMsg([{ tool: "bash", input: { command: "echo test_1771177929615" } }]), + makeToolMsg([{ tool: "bash", input: { command: "echo test_1771177931936" } }]), + makeToolMsg([{ tool: "bash", input: { command: "echo test_1771177933000" } }]), + makeToolMsg([{ tool: "bash", input: { command: "echo test_1771177935000" } }]) + ] + const result = detectActionLoop(messages) + // All commands normalize to the same thing + assert.strictEqual(result.detected, true) + }) + + it("skips non-assistant messages", () => { + const messages = [ + { info: { role: "user" }, parts: [{ type: "tool", tool: "bash", state: { input: { command: "npm test" } } }] }, + { info: { role: "user" }, parts: [{ type: "tool", tool: "bash", state: { input: { command: "npm test" } } }] }, + { info: { role: "user" }, parts: [{ type: "tool", tool: "bash", state: { input: { command: "npm test" } } }] }, + { info: { role: "user" }, parts: [{ type: "tool", tool: "bash", state: { input: { command: "npm test" } } }] } + ] + const result = detectActionLoop(messages) + assert.strictEqual(result.detected, false) + assert.strictEqual(result.totalCommands, 0) + }) +}) + +describe("buildSelfAssessmentPrompt attempt awareness", () => { + const baseContext = { + taskSummary: "Fix a bug", + taskType: "coding" as const, + agentMode: "build" as const, + requiresTests: false, + requiresBuild: false, + requiresPR: false, + requiresCI: false, + requiresLocalTests: false, + requiresLocalTestsEvidence: false, + pushedToDefaultBranch: false, + detectedSignals: [] as string[], + toolsSummary: "npm test: pass", + recentCommands: [], + humanMessages: [] as string[] + } + + it("does not include reflection history on first attempt (attemptCount=0)", () => { + const result = buildSelfAssessmentPrompt(baseContext, "", undefined, 0) + assert.ok(!result.includes("Reflection History")) + assert.ok(!result.includes("reflection attempt")) + }) + + it("does not include reflection history when attemptCount is undefined", () => { + const result = buildSelfAssessmentPrompt(baseContext, "") + assert.ok(!result.includes("Reflection History")) + }) + + it("includes reflection history on second attempt", () => { + const result = buildSelfAssessmentPrompt(baseContext, "", undefined, 1) + assert.ok(result.includes("## Reflection History")) + assert.ok(result.includes("reflection attempt 2/3")) + assert.ok(result.includes("repeating the same actions")) + assert.ok(result.includes('"stuck": true')) + }) + + it("includes reflection history on third attempt", () => { + const result = buildSelfAssessmentPrompt(baseContext, "", undefined, 2) + assert.ok(result.includes("reflection attempt 3/3")) + }) + + it("includes loop-awareness rules", () => { + const result = buildSelfAssessmentPrompt(baseContext, "") + assert.ok(result.includes("repeating the same actions")) + assert.ok(result.includes("Do not retry the same failing approach")) + }) +})