diff --git a/reflection-3.test-helpers.ts b/reflection-3.test-helpers.ts index 0f21ccf..11ed9ac 100644 --- a/reflection-3.test-helpers.ts +++ b/reflection-3.test-helpers.ts @@ -474,3 +474,65 @@ export function shouldApplyPlanningLoop(taskType: TaskType, loopDetected: boolea if (!loopDetected) return false return taskType === "coding" } + +const SELF_ASSESSMENT_MARKER = "## Reflection-3 Self-Assessment" + +export function isPlanMode(messages: any[]): boolean { + // Check system/developer messages for plan mode indicators + const hasSystemPlanMode = messages.some((m: any) => + (m.info?.role === "system" || m.info?.role === "developer") && + m.parts?.some((p: any) => + p.type === "text" && + p.text && + (p.text.includes("Plan Mode") || + p.text.includes("plan mode ACTIVE") || + p.text.includes("plan mode is active") || + p.text.includes("read-only mode") || + p.text.includes("READ-ONLY phase")) + ) + ) + if (hasSystemPlanMode) return true + + // OpenCode injects plan mode as inside user message parts. + // Check ALL text parts of ALL messages for plan mode system-reminder patterns. + for (const msg of messages) { + for (const part of msg.parts || []) { + if (part.type === "text" && part.text) { + const text = part.text + if ( + text.includes("") && + (/plan mode/i.test(text) || /READ-ONLY phase/i.test(text)) + ) { + return true + } + } + } + } + + // Check the last non-reflection user message for plan-related keywords + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i] + if (msg.info?.role === "user") { + let isReflection = false + const texts: string[] = [] + for (const part of msg.parts || []) { + if (part.type === "text" && part.text) { + if (part.text.includes(SELF_ASSESSMENT_MARKER)) { + isReflection = true + break + } + texts.push(part.text) + } + } + if (!isReflection && texts.length > 0) { + for (const text of texts) { + if (/plan mode/i.test(text)) return true + if (/\b(create|make|draft|generate|propose|write|update)\b.{1,30}\bplan\b/i.test(text)) return true + if (/^plan\b/i.test(text.trim())) return true + } + return false + } + } + } + return false +} diff --git a/reflection-3.ts b/reflection-3.ts index c5fe266..4034665 100644 --- a/reflection-3.ts +++ b/reflection-3.ts @@ -359,7 +359,8 @@ function isJudgeSession(sessionId: string, messages: any[], judgeSessionIds: Set return false } -function isPlanMode(messages: any[]): boolean { +export function isPlanMode(messages: any[]): boolean { + // Check system/developer messages for plan mode indicators const hasSystemPlanMode = messages.some((m: any) => (m.info?.role === "system" || m.info?.role === "developer") && m.parts?.some((p: any) => @@ -367,29 +368,50 @@ function isPlanMode(messages: any[]): boolean { p.text && (p.text.includes("Plan Mode") || p.text.includes("plan mode ACTIVE") || - p.text.includes("read-only mode")) + p.text.includes("plan mode is active") || + p.text.includes("read-only mode") || + p.text.includes("READ-ONLY phase")) ) ) if (hasSystemPlanMode) return true + // OpenCode injects plan mode as inside user message parts. + // Check ALL text parts of ALL messages for plan mode system-reminder patterns. + for (const msg of messages) { + for (const part of msg.parts || []) { + if (part.type === "text" && part.text) { + const text = part.text + if ( + text.includes("") && + (/plan mode/i.test(text) || /READ-ONLY phase/i.test(text)) + ) { + return true + } + } + } + } + + // Check the last non-reflection user message for plan-related keywords for (let i = messages.length - 1; i >= 0; i--) { const msg = messages[i] if (msg.info?.role === "user") { let isReflection = false - let text = "" + const texts: string[] = [] for (const part of msg.parts || []) { if (part.type === "text" && part.text) { - text = part.text if (part.text.includes(SELF_ASSESSMENT_MARKER)) { isReflection = true break } + texts.push(part.text) } } - if (!isReflection && text) { - if (/plan mode/i.test(text)) return true - if (/\b(create|make|draft|generate|propose|write|update)\b.{1,30}\bplan\b/i.test(text)) return true - if (/^plan\b/i.test(text.trim())) return true + if (!isReflection && texts.length > 0) { + for (const text of texts) { + if (/plan mode/i.test(text)) return true + if (/\b(create|make|draft|generate|propose|write|update)\b.{1,30}\bplan\b/i.test(text)) return true + if (/^plan\b/i.test(text.trim())) return true + } return false } } diff --git a/test/reflection-3.unit.test.ts b/test/reflection-3.unit.test.ts index d432ea2..5758f39 100644 --- a/test/reflection-3.unit.test.ts +++ b/test/reflection-3.unit.test.ts @@ -11,6 +11,7 @@ import { parseModelSpec, getCrossReviewModelSpec, getGitHubCopilotModelForRouting, + isPlanMode, RoutingConfig } from "../reflection-3.test-helpers.ts" import { detectPlanningLoop } from "../reflection-3.ts" @@ -722,3 +723,194 @@ describe("GitHub Copilot model routing", () => { assert.strictEqual(getGitHubCopilotModelForRouting(undefined), null) }) }) + +describe("isPlanMode", () => { + // Helper to create a message with given role and text parts + function msg(role: string, ...texts: string[]) { + return { + info: { role }, + parts: texts.map(t => ({ type: "text", text: t })) + } + } + + describe("system/developer message detection", () => { + it("detects 'Plan Mode' in system message", () => { + const messages = [msg("system", "# Plan Mode - System Reminder")] + assert.strictEqual(isPlanMode(messages), true) + }) + + it("detects 'plan mode ACTIVE' in developer message", () => { + const messages = [msg("developer", "CRITICAL: plan mode ACTIVE - you are in READ-ONLY phase")] + assert.strictEqual(isPlanMode(messages), true) + }) + + it("detects 'read-only mode' in system message", () => { + const messages = [msg("system", "You are in read-only mode")] + assert.strictEqual(isPlanMode(messages), true) + }) + + it("detects 'READ-ONLY phase' in system message", () => { + const messages = [msg("system", "you are in READ-ONLY phase")] + assert.strictEqual(isPlanMode(messages), true) + }) + + it("detects 'plan mode is active' in system message", () => { + const messages = [msg("system", "plan mode is active. Do not edit files.")] + assert.strictEqual(isPlanMode(messages), true) + }) + }) + + describe("system-reminder detection (OpenCode actual format)", () => { + it("detects default plan.txt system-reminder in user message", () => { + const reminder = ` +# Plan Mode - System Reminder + +CRITICAL: Plan mode ACTIVE - you are in READ-ONLY phase. STRICTLY FORBIDDEN: +ANY file edits, modifications, or system changes. +` + const messages = [msg("user", "Help me plan", reminder)] + assert.strictEqual(isPlanMode(messages), true) + }) + + it("detects experimental plan mode system-reminder", () => { + const reminder = ` +Plan mode is active. The user indicated that they do not want you to execute yet -- +you MUST NOT make any edits. +` + const messages = [msg("user", "Design the architecture", reminder)] + assert.strictEqual(isPlanMode(messages), true) + }) + + it("detects plan mode system-reminder even in older messages", () => { + const reminder = ` +Plan mode is active. READ-ONLY phase. +` + const messages = [ + msg("user", "First message", reminder), + msg("assistant", "Here is my plan..."), + msg("user", "Thanks, looks good") + ] + assert.strictEqual(isPlanMode(messages), true) + }) + + it("detects READ-ONLY phase in system-reminder", () => { + const reminder = ` +CRITICAL: you are in READ-ONLY phase. Do not modify files. +` + const messages = [msg("user", "Analyze the code", reminder)] + assert.strictEqual(isPlanMode(messages), true) + }) + + it("does NOT trigger on system-reminder without plan mode keywords", () => { + const reminder = ` +You have access to these tools: read, write, edit. +` + const messages = [msg("user", "Fix the bug", reminder)] + assert.strictEqual(isPlanMode(messages), false) + }) + + it("does NOT trigger on plan mode keywords outside system-reminder", () => { + // The user says "plan mode" literally -> detected via user message check, not system-reminder + const messages = [msg("user", "Enable plan mode")] + assert.strictEqual(isPlanMode(messages), true) // detected via user keyword check + }) + }) + + describe("user message keyword detection", () => { + it("detects 'plan mode' in user message (case insensitive)", () => { + const messages = [msg("user", "Switch to Plan Mode")] + assert.strictEqual(isPlanMode(messages), true) + }) + + it("detects 'plan' at start of user message", () => { + const messages = [msg("user", "plan the architecture for the new feature")] + assert.strictEqual(isPlanMode(messages), true) + }) + + it("detects 'create a plan' pattern", () => { + const messages = [msg("user", "create a plan for the refactoring")] + assert.strictEqual(isPlanMode(messages), true) + }) + + it("detects 'write a plan' pattern", () => { + const messages = [msg("user", "write a detailed plan")] + assert.strictEqual(isPlanMode(messages), true) + }) + + it("does NOT detect 'plan' in the middle of unrelated text", () => { + const messages = [msg("user", "Fix the airplane display bug")] + assert.strictEqual(isPlanMode(messages), false) + }) + + it("does NOT trigger on regular coding tasks", () => { + const messages = [msg("user", "Fix the login bug and add tests")] + assert.strictEqual(isPlanMode(messages), false) + }) + }) + + describe("reflection message handling", () => { + it("skips reflection messages when looking for user keywords", () => { + const reflectionMsg = { + info: { role: "user" }, + parts: [{ type: "text", text: "## Reflection-3 Self-Assessment\nplan mode test" }] + } + const messages = [msg("user", "Fix the bug"), reflectionMsg] + assert.strictEqual(isPlanMode(messages), false) + }) + + it("checks non-reflection user message even after reflection message", () => { + const reflectionMsg = { + info: { role: "user" }, + parts: [{ type: "text", text: "## Reflection-3 Self-Assessment\nsome assessment" }] + } + const messages = [msg("user", "Switch to plan mode"), reflectionMsg] + // Walks backward: skips reflectionMsg, finds "Switch to plan mode" + assert.strictEqual(isPlanMode(messages), true) + }) + }) + + describe("multiple text parts in a single message", () => { + it("checks all text parts, not just the last one", () => { + const messages = [{ + info: { role: "user" }, + parts: [ + { type: "text", text: "plan mode please" }, + { type: "text", text: "I want to think about this" } + ] + }] + assert.strictEqual(isPlanMode(messages), true) + }) + }) + + describe("edge cases", () => { + it("returns false for empty messages array", () => { + assert.strictEqual(isPlanMode([]), false) + }) + + it("returns false for messages with no parts", () => { + const messages = [{ info: { role: "user" } }] + assert.strictEqual(isPlanMode(messages), false) + }) + + it("returns false for messages with empty text parts", () => { + const messages = [{ info: { role: "user" }, parts: [{ type: "text", text: "" }] }] + assert.strictEqual(isPlanMode(messages), false) + }) + + it("returns false for assistant-only messages", () => { + const messages = [msg("assistant", "Here is the plan for the feature")] + assert.strictEqual(isPlanMode(messages), false) + }) + + it("handles build-switch reminder (should NOT be plan mode)", () => { + const reminder = ` +Your operational mode has changed from plan to build. +You are no longer in read-only mode. +` + // "no longer in read-only mode" should not match — but "plan" + system-reminder exists + // The regex checks for "plan mode" (case insensitive) — "from plan to build" contains "plan" but NOT "plan mode" + const messages = [msg("user", "Now implement it", reminder)] + assert.strictEqual(isPlanMode(messages), false) + }) + }) +})