Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 84 additions & 5 deletions src/backends/claude-code/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { randomUUID } from 'node:crypto';
import { constants, accessSync, existsSync, readdirSync, statSync, writeFileSync } from 'node:fs';
import { rm } from 'node:fs/promises';
import { homedir } from 'node:os';
Expand All @@ -9,6 +10,7 @@ import type {
SDKResultSuccess,
SDKStatusMessage,
SDKSystemMessage,
SDKUserMessage,
} from '@anthropic-ai/claude-agent-sdk';
import { getEngineSettings } from '../../config/engineSettings.js';
import { logger } from '../../utils/logging.js';
Expand All @@ -24,7 +26,7 @@ import {
import { cleanupContextFiles } from '../contextFiles.js';
import { buildSystemPrompt, buildTaskPrompt } from '../nativeTools.js';
import { logLlmCall } from '../shared/llmCallLogger.js';
import type { AgentEngine, AgentEngineResult, AgentExecutionPlan } from '../types.js';
import type { AgentEngine, AgentEngineResult, AgentExecutionPlan, ContextImage } from '../types.js';
import { buildClaudeEnv } from './env.js';
import { buildHooks } from './hooks.js';
import { CLAUDE_CODE_MODEL_IDS, DEFAULT_CLAUDE_CODE_MODEL } from './models.js';
Expand Down Expand Up @@ -64,6 +66,74 @@ export function ensureOnboardingFlag(): void {
}
}

const CLAUDE_SUPPORTED_IMAGE_TYPES = new Set([
'image/jpeg',
'image/png',
'image/gif',
'image/webp',
]);

/**
* Build an AsyncIterable of SDKUserMessages that delivers the task prompt text along
* with any work-item images as native SDK image content blocks.
*
* Used by the Claude Code engine to inject images directly into the first conversation
* turn rather than writing them to disk and hoping the agent reads the files.
*/
export async function* buildPromptWithImages(
text: string,
images: ContextImage[],
): AsyncIterable<SDKUserMessage> {
const imageBlocks = images
.filter((img) => CLAUDE_SUPPORTED_IMAGE_TYPES.has(img.mimeType))
.map((img) => ({
type: 'image' as const,
source: {
type: 'base64' as const,
media_type: img.mimeType as 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp',
data: img.base64Data,
},
}));

yield {
type: 'user',
message: { role: 'user', content: [{ type: 'text', text }, ...imageBlocks] },
parent_tool_use_id: null,
session_id: randomUUID(),
};
}

/**
* Filter context images to those supported by the Claude SDK.
* Logs an INFO message when images will be injected, and a WARN for any skipped MIME types.
*/
function filterContextImages(
contextInjections: AgentExecutionPlan['contextInjections'],
logWriter: AgentExecutionPlan['logWriter'],
): ContextImage[] {
const allImages = contextInjections.flatMap((inj) => inj.images ?? []);
const supported = allImages.filter((img) => CLAUDE_SUPPORTED_IMAGE_TYPES.has(img.mimeType));
const skipped = allImages.length - supported.length;
if (supported.length > 0) {
logWriter('INFO', 'Injecting work item images as SDK content blocks', {
count: supported.length,
});
}
if (skipped > 0) {
logWriter('WARN', 'Skipped unsupported image MIME types', {
skipped,
types: [
...new Set(
allImages
.filter((img) => !CLAUDE_SUPPORTED_IMAGE_TYPES.has(img.mimeType))
.map((img) => img.mimeType),
),
],
});
}
return supported;
}

/**
* Extract a GitHub PR URL from assistant messages (tool results containing create-pr output).
*/
Expand Down Expand Up @@ -525,9 +595,15 @@ export class ClaudeCodeEngine implements AgentEngine {
async execute(input: AgentExecutionPlan): Promise<AgentEngineResult> {
const startTime = Date.now();
const systemPrompt = buildSystemPrompt(input.systemPrompt, input.availableTools);

// Collect supported images for native SDK delivery; strip from injections so
// offloadLargeContext does not also write them to disk (redundant for this engine).
const supportedImages = filterContextImages(input.contextInjections, input.logWriter);
const injectionsForPrompt = input.contextInjections.map(({ images: _images, ...rest }) => rest);

const { prompt: taskPrompt, hasOffloadedContext } = await buildTaskPrompt(
input.taskPrompt,
input.contextInjections,
injectionsForPrompt,
input.repoDir,
);
// Resolve model again here for backward compatibility: execute() may be called
Expand Down Expand Up @@ -569,15 +645,18 @@ export class ClaudeCodeEngine implements AgentEngine {

const maxContinuationTurns = input.completionRequirements?.maxContinuationTurns ?? 0;
let continuationTurns = 0;
let promptText = taskPrompt;
// Use AsyncIterable prompt for the first turn when images are present; string otherwise.
// Continuation turns always use a plain string prompt.
let promptInput: string | AsyncIterable<SDKUserMessage> =
supportedImages.length > 0 ? buildPromptWithImages(taskPrompt, supportedImages) : taskPrompt;
let isContinuation = false;
let turnCount = 0;
let totalCost: number | undefined;

for (;;) {
const stderrChunks: string[] = [];
const stream = query({
prompt: promptText,
prompt: promptInput,
options: {
model,
systemPrompt,
Expand Down Expand Up @@ -641,7 +720,7 @@ export class ClaudeCodeEngine implements AgentEngine {
if (decision.done) return decision.result;

continuationTurns++;
promptText = decision.promptText;
promptInput = decision.promptText;
isContinuation = true;
}
}
Expand Down
8 changes: 5 additions & 3 deletions src/backends/shared/contextFiles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
* to read them on-demand using its built-in Read tool.
*
* When context injections contain images, each image is written as a binary
* file to `.cascade/context/images/` so native-tool engines (Claude Code,
* OpenCode, Codex) can read them with their built-in Read tool.
* file to `.cascade/context/images/` so native-tool engines (Codex, OpenCode)
* can read them with their built-in Read tool. The Claude Code engine receives
* images natively as SDK ImageBlockParam content blocks instead (see
* src/backends/claude-code/index.ts buildPromptWithImages).
*/
import { mkdir, rm, writeFile } from 'node:fs/promises';
import { join } from 'node:path';
Expand Down Expand Up @@ -104,7 +106,7 @@ function generateReadInstructions(files: OffloadedFile[], images: OffloadedImage
if (images.length > 0) {
if (files.length > 0) lines.push('');
lines.push(
`The following context images have been saved to \`${CONTEXT_OFFLOAD_CONFIG.contextDir}/${IMAGES_SUBDIR}/\`:`,
'The following work item images were pre-downloaded using authenticated credentials. Use the Read tool on the file paths below — do NOT curl or HTTP-fetch the original attachment URLs:',
);
lines.push('');
for (const img of images) {
Expand Down
5 changes: 3 additions & 2 deletions src/backends/shared/nativeToolPrompts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@ const NATIVE_TOOL_EXECUTION_RULES = `## Native Tool Execution Rules
You are operating in a native-tool environment, not a gadget/function-call environment.

- Never write pseudo tool calls such as \`[tool_call: ...]\`, \`ReadFile(...)\`, \`RipGrep(...)\`, \`Tmux(...)\`, \`CreatePR(...)\`, or similar function-call text in your assistant response.
- Use actual OpenCode/Codex tool invocations instead:
- Use your built-in tools instead:
- use built-in file/search tools or the shell tool for repository exploration
- use the edit tool for file modifications
- use the shell tool for all \`cascade-tools ...\`, \`git ...\`, \`rg ...\`, \`fd ...\`, test, lint, and build commands
- When the task instructions mention gadget names like \`CreatePR\`, \`PostComment\`, \`UpdateChecklistItem\`, \`Finish\`, \`ReadWorkItem\`, \`TodoUpsert\`, or \`TodoUpdateStatus\`, treat that as a request to run the equivalent real command or tool action, not to print the gadget name.
- If you catch yourself composing a pseudo tool call in plain text, stop and use the real tool instead.`;
- If you catch yourself composing a pseudo tool call in plain text, stop and use the real tool instead.
- Trello, JIRA, and GitHub attachment URLs require backend authentication. NEVER curl, wget, or HTTP-fetch them — they return an authorization error. Work item images are pre-fetched and available either as images in your conversation context or as files under \`.cascade/context/images/\` — use whichever is present; never fetch the original URLs.`;

/**
* Format a single CLI parameter for tool guidance documentation.
Expand Down
1 change: 1 addition & 0 deletions src/backends/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import type { CompletionRequirements } from './completion.js';

// Re-export shared contracts so downstream code that imports from here continues to work.
export type {
ContextImage,
ContextInjection,
LogWriter,
ProgressReporter,
Expand Down
112 changes: 112 additions & 0 deletions tests/unit/backends/claude-code-imagePrompt.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import type { SDKUserMessage } from '@anthropic-ai/claude-agent-sdk';
import type { ContentBlockParam } from '@anthropic-ai/sdk/resources';
import { describe, expect, it, vi } from 'vitest';
import type { ContextImage } from '../../../src/agents/contracts/index.js';
import { buildPromptWithImages } from '../../../src/backends/claude-code/index.js';

vi.mock('@anthropic-ai/claude-agent-sdk', () => ({
query: vi.fn(),
}));

vi.mock('../../../src/utils/logging.js', () => ({
logger: { warn: vi.fn(), info: vi.fn(), error: vi.fn(), debug: vi.fn() },
}));

vi.mock('../../../src/db/repositories/runsRepository.js', () => ({
storeLlmCall: vi.fn().mockResolvedValue(undefined),
}));

async function collect<T>(iterable: AsyncIterable<T>): Promise<T[]> {
const results: T[] = [];
for await (const item of iterable) results.push(item);
return results;
}

const PNG_IMAGE: ContextImage = {
base64Data: 'aGVsbG8=',
mimeType: 'image/png',
altText: 'A diagram',
};

const JPEG_IMAGE: ContextImage = {
base64Data: 'dGVzdA==',
mimeType: 'image/jpeg',
};

const TIFF_IMAGE: ContextImage = {
base64Data: 'dGlmZg==',
mimeType: 'image/tiff', // unsupported
};

describe('buildPromptWithImages', () => {
it('yields one SDKUserMessage with text block + image block', async () => {
const msgs = await collect(buildPromptWithImages('do task', [PNG_IMAGE]));

expect(msgs).toHaveLength(1);
const msg = msgs[0];
expect(msg.type).toBe('user');
expect(msg.message.role).toBe('user');

const content = msg.message.content as ContentBlockParam[];
expect(content).toHaveLength(2);
expect(content[0]).toEqual({ type: 'text', text: 'do task' });
expect(content[1].type).toBe('image');
const imageBlock = content[1] as {
type: 'image';
source: { type: string; media_type: string; data: string };
};
expect(imageBlock.source.type).toBe('base64');
expect(imageBlock.source.media_type).toBe('image/png');
expect(imageBlock.source.data).toBe('aGVsbG8=');
});

it('sets a non-empty session_id and null parent_tool_use_id', async () => {
const msgs = await collect(buildPromptWithImages('task', [PNG_IMAGE]));
const msg = msgs[0];
expect(msg.session_id).toBeTruthy();
expect(msg.parent_tool_use_id).toBeNull();
});

it('includes multiple images as separate image blocks', async () => {
const msgs = await collect(buildPromptWithImages('task', [PNG_IMAGE, JPEG_IMAGE]));
const content = msgs[0].message.content as ContentBlockParam[];
expect(content).toHaveLength(3); // text + 2 images
expect(content[0].type).toBe('text');
expect(content[1].type).toBe('image');
expect(content[2].type).toBe('image');
});

it('filters out unsupported MIME types', async () => {
const msgs = await collect(buildPromptWithImages('task', [PNG_IMAGE, TIFF_IMAGE]));
const content = msgs[0].message.content as ContentBlockParam[];
// text + 1 image (TIFF filtered out)
expect(content).toHaveLength(2);
expect(content[0].type).toBe('text');
expect(content[1].type).toBe('image');
});

it('yields text-only message when all images are unsupported', async () => {
const msgs = await collect(buildPromptWithImages('task', [TIFF_IMAGE]));
const content = msgs[0].message.content as ContentBlockParam[];
expect(content).toHaveLength(1);
expect(content[0]).toEqual({ type: 'text', text: 'task' });
});

it('yields text-only message when images array is empty', async () => {
const msgs = await collect(buildPromptWithImages('task', []));
const content = msgs[0].message.content as ContentBlockParam[];
expect(content).toHaveLength(1);
expect(content[0]).toEqual({ type: 'text', text: 'task' });
});

it('yields exactly one message', async () => {
const msgs = await collect(buildPromptWithImages('task', [PNG_IMAGE, JPEG_IMAGE, TIFF_IMAGE]));
expect(msgs).toHaveLength(1);
});

it('each call produces a unique session_id', async () => {
const msgs1 = await collect(buildPromptWithImages('task', [PNG_IMAGE]));
const msgs2 = await collect(buildPromptWithImages('task', [PNG_IMAGE]));
expect(msgs1[0].session_id).not.toBe(msgs2[0].session_id);
});
});
59 changes: 59 additions & 0 deletions tests/unit/backends/claude-code.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -895,6 +895,65 @@ describe('execute', () => {
await Promise.resolve();
expect(mockStoreLlmCall).not.toHaveBeenCalled();
});

it('passes AsyncIterable prompt to query() when contextInjections has images', async () => {
mockStream([
{ type: 'result', subtype: 'success', result: 'Done', total_cost_usd: 0, num_turns: 1 },
]);

const input = makeInput({
contextInjections: [
{
toolName: 'ReadWorkItem',
params: {},
result: 'card content',
description: 'Work item',
images: [{ base64Data: 'abc', mimeType: 'image/png' }],
},
],
});

await new ClaudeCodeEngine().execute(input);

const promptArg = mockQuery.mock.calls[0][0].prompt;
expect(typeof promptArg).not.toBe('string');
expect(promptArg[Symbol.asyncIterator]).toBeDefined();
});

it('logs image injection and strips images before buildTaskPrompt', async () => {
mockStream([
{ type: 'result', subtype: 'success', result: 'Done', total_cost_usd: 0, num_turns: 1 },
]);

const input = makeInput({
contextInjections: [
{
toolName: 'ReadWorkItem',
params: {},
result: 'card content',
description: 'Work item',
images: [{ base64Data: 'imagedata123', mimeType: 'image/png' }],
},
],
});

await new ClaudeCodeEngine().execute(input);

expect(input.logWriter).toHaveBeenCalledWith(
'INFO',
'Injecting work item images as SDK content blocks',
{ count: 1 },
);

// Collect text from the AsyncIterable prompt
const promptArg = mockQuery.mock.calls[0][0].prompt as AsyncIterable<{
message: { content: { type: string; text?: string }[] };
}>;
const msgs: { message: { content: { type: string; text?: string }[] } }[] = [];
for await (const m of promptArg) msgs.push(m);
const textBlock = msgs[0].message.content.find((b) => b.type === 'text');
expect(textBlock?.text).not.toContain('imagedata123');
});
});

describe('continuation loop', () => {
Expand Down
Loading