From 4e7d0b96cddb7dec8b506ac442533678013bdcbe Mon Sep 17 00:00:00 2001 From: Zbigniew Sobiecki Date: Sat, 14 Mar 2026 07:39:33 +0000 Subject: [PATCH] feat(backends): add completion-check continuation loop and shared evidence module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces a continuation loop in both Claude Code and OpenCode backends so agents that fail a post-completion check (e.g. no authoritative PR sidecar written) can resume the session and retry rather than immediately failing the run. ## Core changes ### Shared completion module (`src/backends/completion.ts`) - Extract `applyCompletionEvidence()` from OpenCode's local scope to the shared `completion.ts` module so both backends share identical evidence-upgrade logic. ### Claude Code backend (`src/backends/claude-code/index.ts`) - Add `consumeStream()` helper: processes the SDK stream, returns `turnCount` and `toolCallCount` (replaces the `StreamConsumptionContext` mutable-ref pattern). - Add `countToolCalls()` helper to count `tool_use` blocks without deep nesting. - Add `decideContinuation()` helper: encapsulates the completion-failure check, max-turns guard, and continuation-warning log; keeps `execute()` complexity low. - Add `cleanupPersistedSession()`: removes `~/.claude/projects/` after each worker run. Encodes cwd with `replaceAll(path.sep, '-')` to match the SDK's actual directory naming (fix: plain `path.join` silently produced a wrong path). - Continuation loop in `execute()`: on completion failure, re-prompt the existing session with `{ continue: true }` up to `maxContinuationTurns` times. - Fix log message: "execution completed" → "turn completed" (accurate for both initial and continuation turns). - Fix spread pattern: `...(bool && obj)` → `...(bool ? obj : {})`. - Log `toolCallCount` in the continuation warning (parity with OpenCode). ### OpenCode backend (`src/backends/opencode/index.ts`) - Remove local `applyCompletionEvidence()` in favour of the shared import. - Fix `maxContinuationTurns` fallback: `1` → `0` (match Claude Code; if `completionRequirements` is absent there is nothing to check). ### Adapter (`src/backends/adapter.ts`) - Bump `maxContinuationTurns` from 1 → 2. ## Tests - `tests/unit/backends/completion.test.ts` (new): covers `applyCompletionEvidence` for all cases (no sidecar, sidecar upgrades text evidence, adds missing prUrl, default command fallback). - `tests/unit/backends/claude-code.test.ts`: five new continuation-loop scenarios (success after retry, exhausted turns, non-success stops immediately, cost accumulation, no-op when completionRequirements absent). Adds `beforeEach` reset and renames inner helper to `queueStream` to avoid shadowing the outer `mockStream`. - `tests/unit/backends/adapter.test.ts`: update `maxContinuationTurns` expectation. Co-Authored-By: Claude Sonnet 4.6 --- src/backends/adapter.ts | 2 +- src/backends/claude-code/index.ts | 233 +++++++++++++++++----- src/backends/completion.ts | 23 +++ src/backends/opencode/index.ts | 27 +-- tests/unit/backends/adapter.test.ts | 2 +- tests/unit/backends/claude-code.test.ts | 254 +++++++++++++++++++++++- tests/unit/backends/completion.test.ts | 113 +++++++++++ 7 files changed, 584 insertions(+), 70 deletions(-) create mode 100644 tests/unit/backends/completion.test.ts diff --git a/src/backends/adapter.ts b/src/backends/adapter.ts index 1dd74c89..5938f695 100644 --- a/src/backends/adapter.ts +++ b/src/backends/adapter.ts @@ -222,7 +222,7 @@ async function buildExecutionPlan( prSidecarPath, reviewSidecarPath, pushedChangesSidecarPath, - maxContinuationTurns: 1, + maxContinuationTurns: 2, }; // Override GITHUB_TOKEN in subprocess secrets with agent-scoped token diff --git a/src/backends/claude-code/index.ts b/src/backends/claude-code/index.ts index 7243e3b4..43bb287e 100644 --- a/src/backends/claude-code/index.ts +++ b/src/backends/claude-code/index.ts @@ -1,4 +1,6 @@ import { constants, accessSync, existsSync, readdirSync, statSync, writeFileSync } from 'node:fs'; +import { rm } from 'node:fs/promises'; +import { homedir } from 'node:os'; import path from 'node:path'; import { query } from '@anthropic-ai/claude-agent-sdk'; import type { @@ -13,6 +15,12 @@ import { logger } from '../../utils/logging.js'; import { extractPRUrl } from '../../utils/prUrl.js'; import { getWorkspaceDir } from '../../utils/repo.js'; import { CLAUDE_CODE_ENGINE_DEFINITION } from '../catalog.js'; +import { + type CompletionRequirements, + applyCompletionEvidence, + getCompletionFailure, + readCompletionEvidence, +} from '../completion.js'; import { cleanupContextFiles } from '../contextFiles.js'; import { buildSystemPrompt, buildTaskPrompt } from '../nativeTools.js'; import type { AgentEngine, AgentEngineResult, AgentExecutionPlan } from '../types.js'; @@ -213,7 +221,7 @@ function buildResult( } : undefined; - input.logWriter('INFO', 'Claude Code SDK execution completed', { + input.logWriter('INFO', 'Claude Code SDK turn completed', { success, subtype: resultMessage?.subtype, turns: resultMessage?.num_turns, @@ -336,6 +344,112 @@ function logLlmCall( }); } +function countToolCalls(assistantMsg: SDKAssistantMessage): number { + return (assistantMsg.message?.content ?? []).filter((b) => b.type === 'tool_use').length; +} + +/** + * Consume the Claude Code SDK stream and collect assistant messages, result, and counters. + * Returns the updated turn count and tool call count accumulated during this stream. + */ +async function consumeStream( + stream: ReturnType, + input: AgentExecutionPlan, + model: string, + startTurnCount: number, +): Promise<{ + assistantMessages: SDKAssistantMessage[]; + resultMessage: SDKResultMessage | undefined; + turnCount: number; + toolCallCount: number; +}> { + const assistantMessages: SDKAssistantMessage[] = []; + let resultMessage: SDKResultMessage | undefined; + let turnCount = startTurnCount; + let toolCallCount = 0; + + for await (const message of stream) { + if (message.type === 'assistant') { + const assistantMsg = message as SDKAssistantMessage; + assistantMessages.push(assistantMsg); + turnCount++; + await input.progressReporter.onIteration(turnCount, input.maxIterations); + processAssistantMessage(assistantMsg, turnCount, input); + toolCallCount += countToolCalls(assistantMsg); + logLlmCall(input, assistantMsg, turnCount, model); + } else if (message.type === 'system') { + const sysMsg = message as { subtype: string; [key: string]: unknown }; + if (sysMsg.subtype === 'task_notification') { + processTaskNotification(sysMsg, input); + } else { + processSystemMessage(sysMsg, input.logWriter); + } + } else if (message.type === 'result') { + resultMessage = message as SDKResultMessage; + } + } + + return { assistantMessages, resultMessage, turnCount, toolCallCount }; +} + +/** + * Clean up the Claude Code persisted session directory. + * Since workers are ephemeral, there's no need to keep session data after execution. + * + * The SDK encodes cwd into the session directory name by replacing path separators with '-'. + * For example, /tmp/cascade-repo-abc becomes ~/.claude/projects/-tmp-cascade-repo-abc. + */ +async function cleanupPersistedSession(repoDir: string): Promise { + const encodedDir = repoDir.replaceAll(path.sep, '-'); + const sessionDir = path.join(homedir(), '.claude', 'projects', encodedDir); + try { + if (existsSync(sessionDir)) { + await rm(sessionDir, { recursive: true, force: true }); + } + } catch { + // Best-effort cleanup + } +} + +type ContinuationDecision = + | { done: true; result: AgentEngineResult } + | { done: false; promptText: string }; + +/** + * Check completion requirements and decide whether to continue or return a final result. + * Logs the continuation warning when a new turn is needed. + */ +function decideContinuation( + result: AgentEngineResult, + completionRequirements: CompletionRequirements | undefined, + continuationTurns: number, + maxContinuationTurns: number, + totalCost: number | undefined, + logWriter: AgentExecutionPlan['logWriter'], + toolCallCount: number, +): ContinuationDecision { + const completionFailure = getCompletionFailure( + completionRequirements, + readCompletionEvidence(completionRequirements), + ); + if (!completionFailure) { + return { done: true, result: { ...result, cost: totalCost } }; + } + if (continuationTurns >= maxContinuationTurns) { + return { + done: true, + result: { ...result, success: false, error: completionFailure.error, cost: totalCost }, + }; + } + logWriter('WARN', 'Claude Code completion check failed; continuing session', { + reason: completionFailure.error, + continuationTurn: continuationTurns + 1, + maxContinuationTurns, + toolCallCount, + }); + return { done: false, promptText: completionFailure.continuationPrompt }; +} + /** * Claude Code SDK backend for CASCADE. * @@ -384,61 +498,90 @@ export class ClaudeCodeEngine implements AgentEngine { debugRepoDirectory(input.repoDir); - const assistantMessages: SDKAssistantMessage[] = []; - let resultMessage: SDKResultMessage | undefined; + const maxContinuationTurns = input.completionRequirements?.maxContinuationTurns ?? 0; + let continuationTurns = 0; + let promptText = taskPrompt; + let isContinuation = false; let turnCount = 0; - const stderrChunks: string[] = []; + let totalCost: number | undefined; try { - const stream = query({ - prompt: taskPrompt, - options: { - model, - systemPrompt, - cwd: input.repoDir, - additionalDirectories: [getWorkspaceDir()], - maxBudgetUsd: input.budgetUsd, - permissionMode: 'bypassPermissions', - allowDangerouslySkipPermissions: true, - tools: sdkTools, - allowedTools: sdkTools, - persistSession: false, - hooks, - env, - debug: true, - stderr: (data: string) => { - stderrChunks.push(data); - input.logWriter('INFO', 'Claude Code stderr', { data: data.trim() }); + for (;;) { + const stderrChunks: string[] = []; + const stream = query({ + prompt: promptText, + options: { + model, + systemPrompt, + cwd: input.repoDir, + additionalDirectories: [getWorkspaceDir()], + maxBudgetUsd: input.budgetUsd, + permissionMode: 'bypassPermissions', + allowDangerouslySkipPermissions: true, + tools: sdkTools, + allowedTools: sdkTools, + persistSession: true, + hooks, + env, + debug: true, + stderr: (data: string) => { + stderrChunks.push(data); + input.logWriter('INFO', 'Claude Code stderr', { data: data.trim() }); + }, + ...(isContinuation ? { continue: true } : {}), }, - }, - }); - - for await (const message of stream) { - if (message.type === 'assistant') { - const assistantMsg = message as SDKAssistantMessage; - assistantMessages.push(assistantMsg); - turnCount++; - await input.progressReporter.onIteration(turnCount, input.maxIterations); - processAssistantMessage(assistantMsg, turnCount, input); - logLlmCall(input, assistantMsg, turnCount, model); - } else if (message.type === 'system') { - const sysMsg = message as { subtype: string; [key: string]: unknown }; - if (sysMsg.subtype === 'task_notification') { - processTaskNotification(sysMsg, input); - } else { - processSystemMessage(sysMsg, input.logWriter); - } - } else if (message.type === 'result') { - resultMessage = message as SDKResultMessage; + }); + + const { + assistantMessages, + resultMessage, + turnCount: newTurnCount, + toolCallCount, + } = await consumeStream(stream, input, model, turnCount); + turnCount = newTurnCount; + + const turnResult = buildResult( + assistantMessages, + resultMessage, + stderrChunks, + input, + startTime, + ); + + // Accumulate cost across continuation turns + if (turnResult.cost !== undefined) { + totalCost = (totalCost ?? 0) + turnResult.cost; } - } - return buildResult(assistantMessages, resultMessage, stderrChunks, input, startTime); + const result = applyCompletionEvidence(turnResult, input.completionRequirements); + + // Don't continue on non-success results + if (!result.success) { + return { ...result, cost: totalCost }; + } + + const decision = decideContinuation( + result, + input.completionRequirements, + continuationTurns, + maxContinuationTurns, + totalCost, + input.logWriter, + toolCallCount, + ); + if (decision.done) return decision.result; + + continuationTurns++; + promptText = decision.promptText; + isContinuation = true; + } } finally { // Clean up offloaded context files after execution if (hasOffloadedContext) { await cleanupContextFiles(input.repoDir); } + // Clean up persisted session directory — workers are ephemeral + await cleanupPersistedSession(input.repoDir); } } } diff --git a/src/backends/completion.ts b/src/backends/completion.ts index e20190ee..42abbe89 100644 --- a/src/backends/completion.ts +++ b/src/backends/completion.ts @@ -1,5 +1,7 @@ import { existsSync, readFileSync } from 'node:fs'; +import type { AgentEngineResult } from './types.js'; + export interface CompletionRequirements { requiresPR?: boolean; requiresReview?: boolean; @@ -112,3 +114,24 @@ export function getCompletionFailure( return undefined; } + +/** + * Read sidecar files and upgrade text-based PR evidence to authoritative. + * Shared across Claude Code and OpenCode backends. + */ +export function applyCompletionEvidence( + result: AgentEngineResult, + completionRequirements: CompletionRequirements | undefined, +): AgentEngineResult { + const evidence = readCompletionEvidence(completionRequirements); + if (!evidence.prUrl) return result; + return { + ...result, + prUrl: evidence.prUrl, + prEvidence: { + source: 'native-tool-sidecar', + authoritative: true, + command: evidence.prCommand ?? 'cascade-tools scm create-pr', + }, + }; +} diff --git a/src/backends/opencode/index.ts b/src/backends/opencode/index.ts index 7a5b4cfc..a383a4fd 100644 --- a/src/backends/opencode/index.ts +++ b/src/backends/opencode/index.ts @@ -16,7 +16,11 @@ import { storeLlmCall } from '../../db/repositories/runsRepository.js'; import { logger } from '../../utils/logging.js'; import { extractPRUrl } from '../../utils/prUrl.js'; import { OPENCODE_ENGINE_DEFINITION } from '../catalog.js'; -import { getCompletionFailure, readCompletionEvidence } from '../completion.js'; +import { + applyCompletionEvidence, + getCompletionFailure, + readCompletionEvidence, +} from '../completion.js'; import { cleanupContextFiles } from '../contextFiles.js'; import { formatNativeToolTransportError, @@ -549,23 +553,6 @@ function createIdlePromise(state: OpenCodeStreamState): Promise { }); } -function applyCompletionEvidence( - result: AgentEngineResult, - input: AgentExecutionPlan, -): AgentEngineResult { - const evidence = readCompletionEvidence(input.completionRequirements); - if (!evidence.prUrl) return result; - return { - ...result, - prUrl: evidence.prUrl, - prEvidence: { - source: 'native-tool-sidecar', - authoritative: true, - command: evidence.prCommand ?? 'cascade-tools scm create-pr', - }, - }; -} - function buildOpenCodeResultFromResponse( input: AgentExecutionPlan, state: OpenCodeStreamState, @@ -731,7 +718,7 @@ async function runOpenCodeTurnLoop( initialPrompt: string, state: OpenCodeStreamState, ): Promise { - const maxContinuationTurns = input.completionRequirements?.maxContinuationTurns ?? 1; + const maxContinuationTurns = input.completionRequirements?.maxContinuationTurns ?? 0; let continuationTurns = 0; let promptText = initialPrompt; for (;;) { @@ -776,7 +763,7 @@ async function runOpenCodeTurnLoop( state, promptResponse, ); - const turnResult = applyCompletionEvidence(rawTurnResult, input); + const turnResult = applyCompletionEvidence(rawTurnResult, input.completionRequirements); if (!turnResult.success) return turnResult; const completionFailure = getCompletionFailure( diff --git a/tests/unit/backends/adapter.test.ts b/tests/unit/backends/adapter.test.ts index 81b52861..27253452 100644 --- a/tests/unit/backends/adapter.test.ts +++ b/tests/unit/backends/adapter.test.ts @@ -486,7 +486,7 @@ describe('executeWithEngine', () => { requiresPR: true, requiresReview: true, requiresPushedChanges: true, - maxContinuationTurns: 1, + maxContinuationTurns: 2, }), ); expect(backendInput.completionRequirements?.pushedChangesSidecarPath).toBeTruthy(); diff --git a/tests/unit/backends/claude-code.test.ts b/tests/unit/backends/claude-code.test.ts index f685208e..7a00f12c 100644 --- a/tests/unit/backends/claude-code.test.ts +++ b/tests/unit/backends/claude-code.test.ts @@ -14,7 +14,7 @@ vi.mock('../../../src/db/repositories/runsRepository.js', () => ({ storeLlmCall: (...args: unknown[]) => mockStoreLlmCall(...args), })); -import { existsSync, mkdtempSync, readFileSync, statSync } from 'node:fs'; +import { existsSync, mkdtempSync, readFileSync, rmSync, statSync, writeFileSync } from 'node:fs'; import { rm } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; @@ -404,7 +404,7 @@ describe('execute', () => { maxBudgetUsd: 5, permissionMode: 'bypassPermissions', allowDangerouslySkipPermissions: true, - persistSession: false, + persistSession: true, hooks: expect.objectContaining({ PreToolUse: expect.arrayContaining([expect.objectContaining({ matcher: 'Bash' })]), Stop: expect.arrayContaining([expect.objectContaining({ hooks: expect.any(Array) })]), @@ -816,7 +816,7 @@ describe('execute', () => { expect(input.logWriter).toHaveBeenCalledWith( 'INFO', - 'Claude Code SDK execution completed', + 'Claude Code SDK turn completed', expect.objectContaining({ success: true, durationMs: expect.any(Number), @@ -926,6 +926,254 @@ describe('execute', () => { }); }); +describe('continuation loop', () => { + beforeEach(() => { + mockQuery.mockReset(); + }); + + function queueStream(messages: Array<{ type: string; [key: string]: unknown }>) { + const iterator = messages[Symbol.iterator](); + const asyncIterator = { + [Symbol.asyncIterator]() { + return { + next() { + const result = iterator.next(); + return Promise.resolve(result); + }, + }; + }, + }; + mockQuery.mockReturnValueOnce(asyncIterator as ReturnType); + } + + it('continues session when completion check fails', async () => { + const tempDir = mkdtempSync(join(tmpdir(), 'cascade-cc-sidecar-')); + const prSidecarPath = join(tempDir, 'pr.json'); + + // First call: no sidecar written (completion check fails) + queueStream([ + { + type: 'result', + subtype: 'success', + result: 'I created a PR.', + total_cost_usd: 0.05, + num_turns: 3, + }, + ]); + // Second call: sidecar written (completion check passes) + const secondMessages = [ + { + type: 'result', + subtype: 'success', + result: 'PR created for real.', + total_cost_usd: 0.03, + num_turns: 1, + }, + ]; + const secondIterator = secondMessages[Symbol.iterator](); + mockQuery.mockReturnValueOnce({ + [Symbol.asyncIterator]() { + return { + next() { + // Write sidecar on first next() call to simulate tool execution + writeFileSync( + prSidecarPath, + JSON.stringify({ + prUrl: 'https://github.com/owner/repo/pull/42', + source: 'cascade-tools scm create-pr', + }), + ); + const result = secondIterator.next(); + return Promise.resolve(result); + }, + }; + }, + } as ReturnType); + + const engine = new ClaudeCodeEngine(); + const logWriter = vi.fn(); + const result = await engine.execute( + makeInput({ + logWriter, + completionRequirements: { + requiresPR: true, + prSidecarPath, + maxContinuationTurns: 2, + }, + }), + ); + rmSync(tempDir, { recursive: true, force: true }); + + expect(result.success).toBe(true); + expect(result.prUrl).toBe('https://github.com/owner/repo/pull/42'); + expect(result.prEvidence).toEqual({ + source: 'native-tool-sidecar', + authoritative: true, + command: 'cascade-tools scm create-pr', + }); + expect(mockQuery).toHaveBeenCalledTimes(2); + // Second call should have continue: true + expect(mockQuery.mock.calls[1][0]).toEqual( + expect.objectContaining({ + options: expect.objectContaining({ continue: true }), + }), + ); + expect(logWriter).toHaveBeenCalledWith( + 'WARN', + 'Claude Code completion check failed; continuing session', + expect.objectContaining({ + reason: 'Agent completed but no authoritative PR creation was recorded', + continuationTurn: 1, + maxContinuationTurns: 2, + }), + ); + }); + + it('fails after exhausting continuation turns', async () => { + const tempDir = mkdtempSync(join(tmpdir(), 'cascade-cc-exhaust-')); + const prSidecarPath = join(tempDir, 'pr.json'); + + // All calls succeed but never write the sidecar + for (let i = 0; i < 3; i++) { + queueStream([ + { + type: 'result', + subtype: 'success', + result: 'Narrated PR creation.', + total_cost_usd: 0.02, + num_turns: 1, + }, + ]); + } + + const engine = new ClaudeCodeEngine(); + const result = await engine.execute( + makeInput({ + completionRequirements: { + requiresPR: true, + prSidecarPath, + maxContinuationTurns: 2, + }, + }), + ); + rmSync(tempDir, { recursive: true, force: true }); + + expect(result.success).toBe(false); + expect(result.error).toBe('Agent completed but no authoritative PR creation was recorded'); + // Initial + 2 continuation = 3 total calls + expect(mockQuery).toHaveBeenCalledTimes(3); + }); + + it('does not continue on non-success results', async () => { + const tempDir = mkdtempSync(join(tmpdir(), 'cascade-cc-nosuccess-')); + const prSidecarPath = join(tempDir, 'pr.json'); + + queueStream([ + { + type: 'result', + subtype: 'error_max_turns', + errors: ['Exceeded maximum turns'], + total_cost_usd: 1.5, + num_turns: 20, + }, + ]); + + const engine = new ClaudeCodeEngine(); + const result = await engine.execute( + makeInput({ + completionRequirements: { + requiresPR: true, + prSidecarPath, + maxContinuationTurns: 2, + }, + }), + ); + rmSync(tempDir, { recursive: true, force: true }); + + expect(result.success).toBe(false); + expect(result.error).toBe('Exceeded maximum turns'); + // Should NOT have continued + expect(mockQuery).toHaveBeenCalledTimes(1); + }); + + it('accumulates cost across continuation turns', async () => { + const tempDir = mkdtempSync(join(tmpdir(), 'cascade-cc-cost-')); + const prSidecarPath = join(tempDir, 'pr.json'); + + // First call: no sidecar + queueStream([ + { + type: 'result', + subtype: 'success', + result: 'Working...', + total_cost_usd: 0.1, + num_turns: 2, + }, + ]); + // Second call: write sidecar + const secondMessages = [ + { + type: 'result', + subtype: 'success', + result: 'Done.', + total_cost_usd: 0.05, + num_turns: 1, + }, + ]; + const secondIterator = secondMessages[Symbol.iterator](); + mockQuery.mockReturnValueOnce({ + [Symbol.asyncIterator]() { + return { + next() { + writeFileSync( + prSidecarPath, + JSON.stringify({ + prUrl: 'https://github.com/owner/repo/pull/99', + source: 'cascade-tools scm create-pr', + }), + ); + const result = secondIterator.next(); + return Promise.resolve(result); + }, + }; + }, + } as ReturnType); + + const engine = new ClaudeCodeEngine(); + const result = await engine.execute( + makeInput({ + completionRequirements: { + requiresPR: true, + prSidecarPath, + maxContinuationTurns: 2, + }, + }), + ); + rmSync(tempDir, { recursive: true, force: true }); + + expect(result.success).toBe(true); + expect(result.cost).toBeCloseTo(0.15); + }); + + it('does not continue when no completionRequirements', async () => { + queueStream([ + { + type: 'result', + subtype: 'success', + result: 'Done.', + total_cost_usd: 0.01, + num_turns: 1, + }, + ]); + + const engine = new ClaudeCodeEngine(); + const result = await engine.execute(makeInput({ completionRequirements: undefined })); + + expect(result.success).toBe(true); + expect(mockQuery).toHaveBeenCalledTimes(1); + }); +}); + describe('ensureOnboardingFlag', () => { let fakeHome: string; let originalHome: string | undefined; diff --git a/tests/unit/backends/completion.test.ts b/tests/unit/backends/completion.test.ts new file mode 100644 index 00000000..c6c61463 --- /dev/null +++ b/tests/unit/backends/completion.test.ts @@ -0,0 +1,113 @@ +import { mkdtempSync, rmSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it } from 'vitest'; + +import { applyCompletionEvidence } from '../../../src/backends/completion.js'; +import type { AgentEngineResult } from '../../../src/backends/types.js'; + +describe('applyCompletionEvidence', () => { + it('returns result unchanged when no sidecar exists', () => { + const result: AgentEngineResult = { + success: true, + output: 'Done', + cost: 0.1, + prUrl: undefined, + prEvidence: undefined, + }; + const updated = applyCompletionEvidence(result, { + requiresPR: true, + prSidecarPath: '/nonexistent/path.json', + }); + expect(updated).toBe(result); + }); + + it('returns result unchanged when no completionRequirements', () => { + const result: AgentEngineResult = { + success: true, + output: 'Done', + cost: 0.1, + }; + const updated = applyCompletionEvidence(result, undefined); + expect(updated).toBe(result); + }); + + it('upgrades text evidence to authoritative when sidecar exists', () => { + const tempDir = mkdtempSync(join(tmpdir(), 'cascade-completion-test-')); + const prSidecarPath = join(tempDir, 'pr.json'); + writeFileSync( + prSidecarPath, + JSON.stringify({ + prUrl: 'https://github.com/owner/repo/pull/42', + source: 'cascade-tools scm create-pr', + }), + ); + + const result: AgentEngineResult = { + success: true, + output: 'PR created at https://github.com/owner/repo/pull/42', + cost: 0.1, + prUrl: 'https://github.com/owner/repo/pull/42', + prEvidence: { source: 'text', authoritative: false }, + }; + + const updated = applyCompletionEvidence(result, { + requiresPR: true, + prSidecarPath, + }); + + rmSync(tempDir, { recursive: true, force: true }); + + expect(updated.prUrl).toBe('https://github.com/owner/repo/pull/42'); + expect(updated.prEvidence).toEqual({ + source: 'native-tool-sidecar', + authoritative: true, + command: 'cascade-tools scm create-pr', + }); + // Should preserve other fields + expect(updated.success).toBe(true); + expect(updated.output).toBe('PR created at https://github.com/owner/repo/pull/42'); + expect(updated.cost).toBe(0.1); + }); + + it('adds PR evidence when result had no prUrl', () => { + const tempDir = mkdtempSync(join(tmpdir(), 'cascade-completion-test-')); + const prSidecarPath = join(tempDir, 'pr.json'); + writeFileSync( + prSidecarPath, + JSON.stringify({ + prUrl: 'https://github.com/owner/repo/pull/99', + source: 'cascade-tools scm create-pr', + }), + ); + + const result: AgentEngineResult = { + success: true, + output: 'Done', + cost: 0.1, + }; + + const updated = applyCompletionEvidence(result, { + requiresPR: true, + prSidecarPath, + }); + + rmSync(tempDir, { recursive: true, force: true }); + + expect(updated.prUrl).toBe('https://github.com/owner/repo/pull/99'); + expect(updated.prEvidence?.authoritative).toBe(true); + }); + + it('uses default command when source is missing from sidecar', () => { + const tempDir = mkdtempSync(join(tmpdir(), 'cascade-completion-test-')); + const prSidecarPath = join(tempDir, 'pr.json'); + writeFileSync(prSidecarPath, JSON.stringify({ prUrl: 'https://github.com/o/r/pull/1' })); + + const result: AgentEngineResult = { success: true, output: '', cost: 0 }; + const updated = applyCompletionEvidence(result, { prSidecarPath }); + + rmSync(tempDir, { recursive: true, force: true }); + + expect(updated.prEvidence?.command).toBe('cascade-tools scm create-pr'); + }); +});