diff --git a/reflection-3.test-helpers.ts b/reflection-3.test-helpers.ts index 11ed9ac..3b38ee7 100644 --- a/reflection-3.test-helpers.ts +++ b/reflection-3.test-helpers.ts @@ -87,7 +87,7 @@ export function inferTaskType(text: string): TaskType { return "other" } -export function buildSelfAssessmentPrompt(context: TaskContext, agents: string, lastAssistantText?: string): string { +export function buildSelfAssessmentPrompt(context: TaskContext, agents: string, lastAssistantText?: string, attemptCount?: number): string { const safeContext = { ...context, detectedSignals: Array.isArray(context.detectedSignals) ? context.detectedSignals : [] @@ -107,6 +107,11 @@ export function buildSelfAssessmentPrompt(context: TaskContext, agents: string, ? `\n## Agent's Last Response\n${lastAssistantText.slice(0, 4000)}\n` : "" + const currentAttempt = attemptCount || 0 + const attemptSection = currentAttempt > 0 + ? `\n## Reflection History\n- This is reflection attempt ${currentAttempt + 1}/${MAX_ATTEMPTS} for this task.\n- Previous reflections found the task incomplete.\n- If you are repeating the same actions without progress, set "stuck": true and explain what is blocking you.\n` + : "" + return `SELF-ASSESS REFLECTION-3 You are evaluating an agent's work against workflow requirements. @@ -121,7 +126,7 @@ Analyze the task context, the agent's last response, and the tool signals to det ## Tool Commands Run ${safeContext.toolsSummary} -${assistantSection} +${assistantSection}${attemptSection} ${agents ? `## Project Instructions\n${agents.slice(0, 800)}\n\n` : ""}Return JSON only: { "task_summary": "...", @@ -148,7 +153,9 @@ Rules: - Direct pushes to main/master are not allowed; require a PR instead. - Provide a PR URL and CI status when a PR is required. - If stuck, propose an alternate approach. -- If you need user action (auth, 2FA, credentials), list it in needs_user_action.` +- If you need user action (auth, 2FA, credentials), list it in needs_user_action. +- If you are repeating the same actions (deploy, test, build) without making progress, set "stuck": true. +- Do not retry the same failing approach more than twice — try something different or report stuck.` } export function parseSelfAssessmentJson(text: string | null | undefined): SelfAssessment | null { @@ -416,13 +423,74 @@ export function getGitHubCopilotModelForRouting(modelSpec: string | null | undef } const FEEDBACK_MARKER = "## Reflection-3:" -const MAX_ATTEMPTS = 5 +const MAX_ATTEMPTS = 3 +const ACTION_LOOP_MIN_COMMANDS = 4 +const ACTION_LOOP_REPETITION_THRESHOLD = 0.6 + +/** + * Detects when the agent is repeating the same commands/actions without progress. + * Unlike detectPlanningLoop (read-heavy without writes), this catches action loops + * where the agent IS making write-like operations but repeating the same ones. + */ +export function detectActionLoop(messages: any[]): { + detected: boolean + repeatedCommands: string[] + totalCommands: number +} { + if (!Array.isArray(messages)) { + return { detected: false, repeatedCommands: [], totalCommands: 0 } + } + + const commands: string[] = [] + for (const msg of messages) { + if (msg.info?.role !== "assistant") continue + for (const part of msg.parts || []) { + if (part.type !== "tool") continue + const toolName = (part.tool || "").toString().toLowerCase() + const input = part.state?.input || {} + + if (toolName === "bash") { + const cmd = (input.command || input.cmd || "").toString().trim() + if (cmd) { + const normalized = cmd.replace(/\s+/g, " ").replace(/\d{10,}/g, "TIMESTAMP").toLowerCase() + commands.push(normalized) + } + } else if (toolName !== "read" && toolName !== "glob" && toolName !== "grep" && toolName !== "todowrite") { + const key = `${toolName}:${JSON.stringify(input).slice(0, 100)}` + commands.push(key) + } + } + } + + if (commands.length < ACTION_LOOP_MIN_COMMANDS) { + return { detected: false, repeatedCommands: [], totalCommands: commands.length } + } + + const counts = new Map() + for (const cmd of commands) { + counts.set(cmd, (counts.get(cmd) || 0) + 1) + } + + const repeatedCommands: string[] = [] + let repeatedCount = 0 + for (const [cmd, count] of counts) { + if (count >= 3) { + repeatedCommands.push(cmd) + repeatedCount += count + } + } + + const detected = repeatedCommands.length > 0 && repeatedCount / commands.length >= ACTION_LOOP_REPETITION_THRESHOLD + + return { detected, repeatedCommands, totalCommands: commands.length } +} export function buildEscalatingFeedback( attemptCount: number, severity: string, verdict: { feedback?: string; missing?: string[]; next_actions?: string[] } | undefined | null, - isPlanningLoop: boolean + isPlanningLoop: boolean, + isActionLoop?: boolean ): string { const safeVerdict = verdict ?? {} const missingItems = Array.isArray(safeVerdict.missing) ? safeVerdict.missing : [] @@ -445,6 +513,19 @@ Pick the FIRST item from your existing todo list and implement it. Open a file w Start coding NOW. No more planning.` } + if (isActionLoop) { + return `${FEEDBACK_MARKER} STOP: Action Loop Detected (attempt ${attemptCount}/${MAX_ATTEMPTS}) + +You are repeating the same commands without making progress. Running the same deploy/test/build cycle again will produce the same result. + +STOP and do ONE of these: +1. If the same test/eval keeps failing, analyze the failure output and fix the root cause before re-running. +2. If you cannot fix the root cause, explain what is blocking you and ask the user for help. +3. Try a completely different approach (e.g., test locally instead of via deployment). + +Do NOT re-run the same command hoping for a different result.` + } + if (attemptCount <= 2) { const missing = missingItems.length ? `\n### Missing\n${missingItems.map((m) => `- ${m}`).join("\n")}` @@ -463,11 +544,18 @@ Please address these issues and continue.` const missingBrief = missingItems.length ? `Still missing: ${missingItems.slice(0, 3).join(", ")}.` : "" - return `${FEEDBACK_MARKER} Still Incomplete (attempt ${attemptCount}/${MAX_ATTEMPTS}) + return `${FEEDBACK_MARKER} Final Attempt (${attemptCount}/${MAX_ATTEMPTS}) ${missingBrief} - You have been asked ${attemptCount} times to complete this task. Stop re-reading files or re-planning. Focus on the specific items above and implement them now. If something is blocking you, say what it is clearly.` +You have been asked ${attemptCount} times to complete this task. This is your LAST chance before reflection stops. + +If you cannot complete the remaining items: +- Explain clearly what is blocking you +- Set needs_user_action if you need user help +- Try a different approach instead of repeating the same steps + +Do NOT re-read files or re-plan. Either implement the fix now or explain why you cannot.` } export function shouldApplyPlanningLoop(taskType: TaskType, loopDetected: boolean): boolean { diff --git a/reflection-3.ts b/reflection-3.ts index 4034665..ef3e978 100644 --- a/reflection-3.ts +++ b/reflection-3.ts @@ -22,7 +22,7 @@ async function reportError(err: unknown, context?: Record): Prom const SELF_ASSESSMENT_MARKER = "## Reflection-3 Self-Assessment" const FEEDBACK_MARKER = "## Reflection-3:" -const MAX_ATTEMPTS = 5 +const MAX_ATTEMPTS = 3 const JUDGE_BLOCKED_PATTERNS = [ /\bhaiku\b/i, @@ -36,6 +36,8 @@ const JUDGE_BLOCKED_PATTERNS = [ const PLANNING_LOOP_MIN_TOOL_CALLS = 8 const PLANNING_LOOP_WRITE_RATIO_THRESHOLD = 0.1 +const ACTION_LOOP_MIN_COMMANDS = 4 +const ACTION_LOOP_REPETITION_THRESHOLD = 0.6 type TaskType = "coding" | "docs" | "research" | "ops" | "other" type AgentMode = "plan" | "build" | "unknown" @@ -276,11 +278,76 @@ export function shouldApplyPlanningLoop(taskType: TaskType, loopDetected: boolea return taskType === "coding" } +/** + * Detects when the agent is repeating the same commands/actions without progress. + * Unlike detectPlanningLoop (read-heavy without writes), this catches action loops + * where the agent IS making write-like operations but repeating the same ones. + * Example: repeatedly re-deploying and re-running the same failing evaluation. + */ +export function detectActionLoop(messages: any[]): { + detected: boolean + repeatedCommands: string[] + totalCommands: number +} { + if (!Array.isArray(messages)) { + return { detected: false, repeatedCommands: [], totalCommands: 0 } + } + + const commands: string[] = [] + for (const msg of messages) { + if (msg.info?.role !== "assistant") continue + for (const part of msg.parts || []) { + if (part.type !== "tool") continue + const toolName = (part.tool || "").toString().toLowerCase() + const input = part.state?.input || {} + + if (toolName === "bash") { + const cmd = (input.command || input.cmd || "").toString().trim() + if (cmd) { + // Normalize: collapse whitespace and remove trailing timestamps/IDs + const normalized = cmd.replace(/\s+/g, " ").replace(/\d{10,}/g, "TIMESTAMP").toLowerCase() + commands.push(normalized) + } + } else if (toolName !== "read" && toolName !== "glob" && toolName !== "grep" && toolName !== "todowrite") { + // Track non-read tool calls by name + key input params + const key = `${toolName}:${JSON.stringify(input).slice(0, 100)}` + commands.push(key) + } + } + } + + if (commands.length < ACTION_LOOP_MIN_COMMANDS) { + return { detected: false, repeatedCommands: [], totalCommands: commands.length } + } + + // Count occurrences of each command + const counts = new Map() + for (const cmd of commands) { + counts.set(cmd, (counts.get(cmd) || 0) + 1) + } + + // Find commands repeated 3+ times + const repeatedCommands: string[] = [] + let repeatedCount = 0 + for (const [cmd, count] of counts) { + if (count >= 3) { + repeatedCommands.push(cmd) + repeatedCount += count + } + } + + // Loop detected if repeated commands make up a significant fraction + const detected = repeatedCommands.length > 0 && repeatedCount / commands.length >= ACTION_LOOP_REPETITION_THRESHOLD + + return { detected, repeatedCommands, totalCommands: commands.length } +} + export function buildEscalatingFeedback( attemptCount: number, severity: string, verdict: { feedback?: string; missing?: string[]; next_actions?: string[] } | undefined | null, - isPlanningLoop: boolean + isPlanningLoop: boolean, + isActionLoop?: boolean ): string { const safeVerdict = verdict ?? {} const missingItems = Array.isArray(safeVerdict.missing) ? safeVerdict.missing : [] @@ -303,6 +370,19 @@ Pick the FIRST item from your existing todo list and implement it. Open a file w Start coding NOW. No more planning.` } + if (isActionLoop) { + return `${FEEDBACK_MARKER} STOP: Action Loop Detected (attempt ${attemptCount}/${MAX_ATTEMPTS}) + +You are repeating the same commands without making progress. Running the same deploy/test/build cycle again will produce the same result. + +STOP and do ONE of these: +1. If the same test/eval keeps failing, analyze the failure output and fix the root cause before re-running. +2. If you cannot fix the root cause, explain what is blocking you and ask the user for help. +3. Try a completely different approach (e.g., test locally instead of via deployment). + +Do NOT re-run the same command hoping for a different result.` + } + if (attemptCount <= 2) { const missing = missingItems.length ? `\n### Missing\n${missingItems.map((m) => `- ${m}`).join("\n")}` @@ -321,11 +401,18 @@ Please address these issues and continue.` const missingBrief = missingItems.length ? `Still missing: ${missingItems.slice(0, 3).join(", ")}.` : "" - return `${FEEDBACK_MARKER} Still Incomplete (attempt ${attemptCount}/${MAX_ATTEMPTS}) + return `${FEEDBACK_MARKER} Final Attempt (${attemptCount}/${MAX_ATTEMPTS}) ${missingBrief} -You have been asked ${attemptCount} times to complete this task. Stop re-reading files or re-planning. Focus on the specific items above and implement them now. If something is blocking you, say what it is clearly.` +You have been asked ${attemptCount} times to complete this task. This is your LAST chance before reflection stops. + +If you cannot complete the remaining items: +- Explain clearly what is blocking you +- Set needs_user_action if you need user help +- Try a different approach instead of repeating the same steps + +Do NOT re-read files or re-plan. Either implement the fix now or explain why you cannot.` } function getLastRelevantUserMessageId(messages: any[]): string | null { @@ -931,7 +1018,7 @@ function extractLastAssistantText(messages: any[]): string { return "" } -function buildSelfAssessmentPrompt(context: TaskContext, agents: string, lastAssistantText?: string): string { +function buildSelfAssessmentPrompt(context: TaskContext, agents: string, lastAssistantText?: string, attemptCount?: number): string { const safeContext = { ...context, detectedSignals: Array.isArray(context.detectedSignals) ? context.detectedSignals : [] @@ -951,6 +1038,11 @@ function buildSelfAssessmentPrompt(context: TaskContext, agents: string, lastAss ? `\n## Agent's Last Response\n${lastAssistantText.slice(0, 4000)}\n` : "" + const currentAttempt = attemptCount || 0 + const attemptSection = currentAttempt > 0 + ? `\n## Reflection History\n- This is reflection attempt ${currentAttempt + 1}/${MAX_ATTEMPTS} for this task.\n- Previous reflections found the task incomplete.\n- If you are repeating the same actions without progress, set "stuck": true and explain what is blocking you.\n` + : "" + return `SELF-ASSESS REFLECTION-3 You are evaluating an agent's work against workflow requirements. @@ -965,7 +1057,7 @@ Analyze the task context, the agent's last response, and the tool signals to det ## Tool Commands Run ${safeContext.toolsSummary} -${assistantSection} +${assistantSection}${attemptSection} ${agents ? `## Project Instructions\n${agents.slice(0, 800)}\n\n` : ""}Return JSON only: { "task_summary": "brief description of what was done", @@ -1005,7 +1097,9 @@ Rules: - Direct pushes to main/master are not allowed; require a PR instead. - If stuck, propose an alternate approach. - If you need user action (auth, 2FA, credentials), list it in needs_user_action. -- PLANNING LOOP CHECK: If the task requires code changes (fix, implement, add, create, build, refactor, update) but the "Tool Commands Run" section shows ONLY read operations (read, glob, grep, git log, git status, git diff, webfetch, task/explore) and NO write operations (edit, write, bash with build/test/commit, github_create_pull_request, etc.), then the task is NOT complete. Set status to "in_progress", set stuck to true, and list "Implement the actual code changes" in remaining_work. Analyzing and recommending changes is not the same as making them.` +- PLANNING LOOP CHECK: If the task requires code changes (fix, implement, add, create, build, refactor, update) but the "Tool Commands Run" section shows ONLY read operations (read, glob, grep, git log, git status, git diff, webfetch, task/explore) and NO write operations (edit, write, bash with build/test/commit, github_create_pull_request, etc.), then the task is NOT complete. Set status to "in_progress", set stuck to true, and list "Implement the actual code changes" in remaining_work. Analyzing and recommending changes is not the same as making them. +- If you are repeating the same actions (deploy, test, build) without making progress, set "stuck": true. +- Do not retry the same failing approach more than twice — try something different or report stuck.` } function parseSelfAssessmentJson(text: string | null | undefined): SelfAssessment | null { @@ -1403,7 +1497,8 @@ export const Reflection3Plugin: Plugin = async ({ client, directory }) => { const lastAssistantText = extractLastAssistantText(messages) const customPrompt = await loadReflectionPrompt(directory) const agents = await getAgentsFile(directory) - const reflectionPrompt = customPrompt || buildSelfAssessmentPrompt(context, agents, lastAssistantText) + const currentAttemptCount = attempts.get(attemptKey) || 0 + const reflectionPrompt = customPrompt || buildSelfAssessmentPrompt(context, agents, lastAssistantText, currentAttemptCount) await showToast(client, directory, "Requesting reflection self-assessment...", "info") debug("Requesting reflection self-assessment") @@ -1606,6 +1701,7 @@ export const Reflection3Plugin: Plugin = async ({ client, directory }) => { const loopCheck = detectPlanningLoop(preFeedbackMessages || messages) const usePlanningLoopMessage = shouldApplyPlanningLoop(context.taskType, loopCheck.detected) + const actionLoopCheck = detectActionLoop(preFeedbackMessages || messages) const feedbackText = buildEscalatingFeedback( nextAttemptCount, analysis.severity || "MEDIUM", @@ -1614,7 +1710,8 @@ export const Reflection3Plugin: Plugin = async ({ client, directory }) => { missing: analysis.missing, next_actions: analysis.nextActions }, - usePlanningLoopMessage + usePlanningLoopMessage, + actionLoopCheck.detected ) // Apply task-based model routing to feedback injection diff --git a/test/reflection-3.unit.test.ts b/test/reflection-3.unit.test.ts index 5758f39..5ee72cb 100644 --- a/test/reflection-3.unit.test.ts +++ b/test/reflection-3.unit.test.ts @@ -11,6 +11,7 @@ import { parseModelSpec, getCrossReviewModelSpec, getGitHubCopilotModelForRouting, + detectActionLoop, isPlanMode, RoutingConfig } from "../reflection-3.test-helpers.ts" @@ -499,16 +500,19 @@ describe("buildEscalatingFeedback", () => { assert.ok(!result.includes("Some feedback")) }) - it("escalates tone after attempt 2", () => { + it("escalates to final attempt message after attempt 2", () => { const verdict = { missing: ["Run tests", "Create PR", "Check CI", "Update docs"] } const result = buildEscalatingFeedback(3, "high", verdict, false) - assert.ok(result.includes("Still Incomplete")) - assert.ok(result.includes("attempt 3/5")) + assert.ok(result.includes("Final Attempt")) + assert.ok(result.includes("3/3")) // Should truncate to first 3 missing items assert.ok(result.includes("Run tests")) assert.ok(result.includes("Create PR")) assert.ok(result.includes("Check CI")) assert.ok(!result.includes("Update docs")) + // Should include give-up guidance + assert.ok(result.includes("LAST chance")) + assert.ok(result.includes("needs_user_action")) }) it("handles verdict with empty arrays", () => { @@ -525,6 +529,31 @@ describe("buildEscalatingFeedback", () => { assert.ok(result.includes("Incomplete")) assert.ok(!result.includes("### Missing")) }) + + it("returns action loop message when isActionLoop is true", () => { + const result = buildEscalatingFeedback(2, "high", null, false, true) + assert.ok(result.includes("Action Loop Detected")) + assert.ok(result.includes("repeating the same commands")) + assert.ok(result.includes("Do NOT re-run")) + }) + + it("action loop includes attempt count", () => { + const result = buildEscalatingFeedback(2, "high", null, false, true) + assert.ok(result.includes("2/3")) + }) + + it("action loop ignores verdict content", () => { + const verdict = { feedback: "Some feedback", missing: ["item"], next_actions: ["action"] } + const result = buildEscalatingFeedback(1, "high", verdict, false, true) + assert.ok(result.includes("Action Loop Detected")) + assert.ok(!result.includes("Some feedback")) + }) + + it("planning loop takes priority over action loop", () => { + const result = buildEscalatingFeedback(1, "high", null, true, true) + assert.ok(result.includes("Planning Loop Detected")) + assert.ok(!result.includes("Action Loop Detected")) + }) }) describe("task-based model routing", () => { @@ -724,193 +753,149 @@ describe("GitHub Copilot model routing", () => { }) }) -describe("isPlanMode", () => { - // Helper to create a message with given role and text parts - function msg(role: string, ...texts: string[]) { +describe("detectActionLoop", () => { + function makeToolMsg(tools: Array<{ tool: string; input?: any }>): any { return { - info: { role }, - parts: texts.map(t => ({ type: "text", text: t })) + info: { role: "assistant" }, + parts: tools.map(t => ({ + type: "tool", + tool: t.tool, + state: { input: t.input || {} } + })) } } - describe("system/developer message detection", () => { - it("detects 'Plan Mode' in system message", () => { - const messages = [msg("system", "# Plan Mode - System Reminder")] - assert.strictEqual(isPlanMode(messages), true) - }) - - it("detects 'plan mode ACTIVE' in developer message", () => { - const messages = [msg("developer", "CRITICAL: plan mode ACTIVE - you are in READ-ONLY phase")] - assert.strictEqual(isPlanMode(messages), true) - }) - - it("detects 'read-only mode' in system message", () => { - const messages = [msg("system", "You are in read-only mode")] - assert.strictEqual(isPlanMode(messages), true) - }) - - it("detects 'READ-ONLY phase' in system message", () => { - const messages = [msg("system", "you are in READ-ONLY phase")] - assert.strictEqual(isPlanMode(messages), true) - }) - - it("detects 'plan mode is active' in system message", () => { - const messages = [msg("system", "plan mode is active. Do not edit files.")] - assert.strictEqual(isPlanMode(messages), true) - }) + it("returns false for non-array input", () => { + const result = detectActionLoop(null as any) + assert.strictEqual(result.detected, false) }) - describe("system-reminder detection (OpenCode actual format)", () => { - it("detects default plan.txt system-reminder in user message", () => { - const reminder = ` -# Plan Mode - System Reminder - -CRITICAL: Plan mode ACTIVE - you are in READ-ONLY phase. STRICTLY FORBIDDEN: -ANY file edits, modifications, or system changes. -` - const messages = [msg("user", "Help me plan", reminder)] - assert.strictEqual(isPlanMode(messages), true) - }) - - it("detects experimental plan mode system-reminder", () => { - const reminder = ` -Plan mode is active. The user indicated that they do not want you to execute yet -- -you MUST NOT make any edits. -` - const messages = [msg("user", "Design the architecture", reminder)] - assert.strictEqual(isPlanMode(messages), true) - }) - - it("detects plan mode system-reminder even in older messages", () => { - const reminder = ` -Plan mode is active. READ-ONLY phase. -` - const messages = [ - msg("user", "First message", reminder), - msg("assistant", "Here is my plan..."), - msg("user", "Thanks, looks good") - ] - assert.strictEqual(isPlanMode(messages), true) - }) - - it("detects READ-ONLY phase in system-reminder", () => { - const reminder = ` -CRITICAL: you are in READ-ONLY phase. Do not modify files. -` - const messages = [msg("user", "Analyze the code", reminder)] - assert.strictEqual(isPlanMode(messages), true) - }) - - it("does NOT trigger on system-reminder without plan mode keywords", () => { - const reminder = ` -You have access to these tools: read, write, edit. -` - const messages = [msg("user", "Fix the bug", reminder)] - assert.strictEqual(isPlanMode(messages), false) - }) - - it("does NOT trigger on plan mode keywords outside system-reminder", () => { - // The user says "plan mode" literally -> detected via user message check, not system-reminder - const messages = [msg("user", "Enable plan mode")] - assert.strictEqual(isPlanMode(messages), true) // detected via user keyword check - }) + it("returns false for empty messages", () => { + const result = detectActionLoop([]) + assert.strictEqual(result.detected, false) }) - describe("user message keyword detection", () => { - it("detects 'plan mode' in user message (case insensitive)", () => { - const messages = [msg("user", "Switch to Plan Mode")] - assert.strictEqual(isPlanMode(messages), true) - }) + it("returns false for too few commands", () => { + const messages = [makeToolMsg([ + { tool: "bash", input: { command: "npm test" } }, + { tool: "bash", input: { command: "npm run build" } } + ])] + const result = detectActionLoop(messages) + assert.strictEqual(result.detected, false) + }) - it("detects 'plan' at start of user message", () => { - const messages = [msg("user", "plan the architecture for the new feature")] - assert.strictEqual(isPlanMode(messages), true) - }) + it("detects repeated bash commands", () => { + const messages = [ + makeToolMsg([{ tool: "bash", input: { command: "kubectl apply -f deploy.yaml" } }]), + makeToolMsg([{ tool: "bash", input: { command: "npm run eval:stripe" } }]), + makeToolMsg([{ tool: "bash", input: { command: "kubectl apply -f deploy.yaml" } }]), + makeToolMsg([{ tool: "bash", input: { command: "npm run eval:stripe" } }]), + makeToolMsg([{ tool: "bash", input: { command: "kubectl apply -f deploy.yaml" } }]), + makeToolMsg([{ tool: "bash", input: { command: "npm run eval:stripe" } }]) + ] + const result = detectActionLoop(messages) + assert.strictEqual(result.detected, true) + assert.ok(result.repeatedCommands.length > 0) + }) - it("detects 'create a plan' pattern", () => { - const messages = [msg("user", "create a plan for the refactoring")] - assert.strictEqual(isPlanMode(messages), true) - }) + it("ignores read-only tools (read, glob, grep, todowrite)", () => { + const messages = [ + makeToolMsg([ + { tool: "read", input: { path: "/file.ts" } }, + { tool: "glob", input: { pattern: "**/*.ts" } }, + { tool: "grep", input: { pattern: "foo" } }, + { tool: "todowrite", input: { todos: [] } }, + { tool: "bash", input: { command: "npm test" } }, + { tool: "bash", input: { command: "npm run build" } } + ]) + ] + const result = detectActionLoop(messages) + // Only 2 bash commands counted, below threshold + assert.strictEqual(result.detected, false) + }) - it("detects 'write a plan' pattern", () => { - const messages = [msg("user", "write a detailed plan")] - assert.strictEqual(isPlanMode(messages), true) - }) + it("does not flag diverse commands as a loop", () => { + const messages = [ + makeToolMsg([{ tool: "bash", input: { command: "npm test" } }]), + makeToolMsg([{ tool: "bash", input: { command: "npm run build" } }]), + makeToolMsg([{ tool: "bash", input: { command: "git status" } }]), + makeToolMsg([{ tool: "bash", input: { command: "git add ." } }]), + makeToolMsg([{ tool: "bash", input: { command: "git commit -m 'fix'" } }]) + ] + const result = detectActionLoop(messages) + assert.strictEqual(result.detected, false) + }) - it("does NOT detect 'plan' in the middle of unrelated text", () => { - const messages = [msg("user", "Fix the airplane display bug")] - assert.strictEqual(isPlanMode(messages), false) - }) + it("normalizes timestamps in commands", () => { + const messages = [ + makeToolMsg([{ tool: "bash", input: { command: "echo test_1771177929615" } }]), + makeToolMsg([{ tool: "bash", input: { command: "echo test_1771177931936" } }]), + makeToolMsg([{ tool: "bash", input: { command: "echo test_1771177933000" } }]), + makeToolMsg([{ tool: "bash", input: { command: "echo test_1771177935000" } }]) + ] + const result = detectActionLoop(messages) + // All commands normalize to the same thing + assert.strictEqual(result.detected, true) + }) - it("does NOT trigger on regular coding tasks", () => { - const messages = [msg("user", "Fix the login bug and add tests")] - assert.strictEqual(isPlanMode(messages), false) - }) + it("skips non-assistant messages", () => { + const messages = [ + { info: { role: "user" }, parts: [{ type: "tool", tool: "bash", state: { input: { command: "npm test" } } }] }, + { info: { role: "user" }, parts: [{ type: "tool", tool: "bash", state: { input: { command: "npm test" } } }] }, + { info: { role: "user" }, parts: [{ type: "tool", tool: "bash", state: { input: { command: "npm test" } } }] }, + { info: { role: "user" }, parts: [{ type: "tool", tool: "bash", state: { input: { command: "npm test" } } }] } + ] + const result = detectActionLoop(messages) + assert.strictEqual(result.detected, false) + assert.strictEqual(result.totalCommands, 0) }) +}) - describe("reflection message handling", () => { - it("skips reflection messages when looking for user keywords", () => { - const reflectionMsg = { - info: { role: "user" }, - parts: [{ type: "text", text: "## Reflection-3 Self-Assessment\nplan mode test" }] - } - const messages = [msg("user", "Fix the bug"), reflectionMsg] - assert.strictEqual(isPlanMode(messages), false) - }) +describe("buildSelfAssessmentPrompt attempt awareness", () => { + const baseContext = { + taskSummary: "Fix a bug", + taskType: "coding" as const, + agentMode: "build" as const, + requiresTests: false, + requiresBuild: false, + requiresPR: false, + requiresCI: false, + requiresLocalTests: false, + requiresLocalTestsEvidence: false, + pushedToDefaultBranch: false, + detectedSignals: [] as string[], + toolsSummary: "npm test: pass", + recentCommands: [], + humanMessages: [] as string[] + } - it("checks non-reflection user message even after reflection message", () => { - const reflectionMsg = { - info: { role: "user" }, - parts: [{ type: "text", text: "## Reflection-3 Self-Assessment\nsome assessment" }] - } - const messages = [msg("user", "Switch to plan mode"), reflectionMsg] - // Walks backward: skips reflectionMsg, finds "Switch to plan mode" - assert.strictEqual(isPlanMode(messages), true) - }) + it("does not include reflection history on first attempt (attemptCount=0)", () => { + const result = buildSelfAssessmentPrompt(baseContext, "", undefined, 0) + assert.ok(!result.includes("Reflection History")) + assert.ok(!result.includes("reflection attempt")) }) - describe("multiple text parts in a single message", () => { - it("checks all text parts, not just the last one", () => { - const messages = [{ - info: { role: "user" }, - parts: [ - { type: "text", text: "plan mode please" }, - { type: "text", text: "I want to think about this" } - ] - }] - assert.strictEqual(isPlanMode(messages), true) - }) + it("does not include reflection history when attemptCount is undefined", () => { + const result = buildSelfAssessmentPrompt(baseContext, "") + assert.ok(!result.includes("Reflection History")) }) - describe("edge cases", () => { - it("returns false for empty messages array", () => { - assert.strictEqual(isPlanMode([]), false) - }) - - it("returns false for messages with no parts", () => { - const messages = [{ info: { role: "user" } }] - assert.strictEqual(isPlanMode(messages), false) - }) - - it("returns false for messages with empty text parts", () => { - const messages = [{ info: { role: "user" }, parts: [{ type: "text", text: "" }] }] - assert.strictEqual(isPlanMode(messages), false) - }) + it("includes reflection history on second attempt", () => { + const result = buildSelfAssessmentPrompt(baseContext, "", undefined, 1) + assert.ok(result.includes("## Reflection History")) + assert.ok(result.includes("reflection attempt 2/3")) + assert.ok(result.includes("repeating the same actions")) + assert.ok(result.includes('"stuck": true')) + }) - it("returns false for assistant-only messages", () => { - const messages = [msg("assistant", "Here is the plan for the feature")] - assert.strictEqual(isPlanMode(messages), false) - }) + it("includes reflection history on third attempt", () => { + const result = buildSelfAssessmentPrompt(baseContext, "", undefined, 2) + assert.ok(result.includes("reflection attempt 3/3")) + }) - it("handles build-switch reminder (should NOT be plan mode)", () => { - const reminder = ` -Your operational mode has changed from plan to build. -You are no longer in read-only mode. -` - // "no longer in read-only mode" should not match — but "plan" + system-reminder exists - // The regex checks for "plan mode" (case insensitive) — "from plan to build" contains "plan" but NOT "plan mode" - const messages = [msg("user", "Now implement it", reminder)] - assert.strictEqual(isPlanMode(messages), false) - }) + it("includes loop-awareness rules", () => { + const result = buildSelfAssessmentPrompt(baseContext, "") + assert.ok(result.includes("repeating the same actions")) + assert.ok(result.includes("Do not retry the same failing approach")) }) })