mongrel-intelligence · zbigniewsobiecki · Mar 18, 2026 · Mar 18, 2026
diff --git a/src/backends/claude-code/index.ts b/src/backends/claude-code/index.ts
@@ -1,3 +1,4 @@
+import { randomUUID } from 'node:crypto';
 import { constants, accessSync, existsSync, readdirSync, statSync, writeFileSync } from 'node:fs';
 import { rm } from 'node:fs/promises';
 import { homedir } from 'node:os';
@@ -9,6 +10,7 @@ import type {
 	SDKResultSuccess,
 	SDKStatusMessage,
 	SDKSystemMessage,
+	SDKUserMessage,
 } from '@anthropic-ai/claude-agent-sdk';
 import { getEngineSettings } from '../../config/engineSettings.js';
 import { logger } from '../../utils/logging.js';
@@ -24,7 +26,7 @@ import {
 import { cleanupContextFiles } from '../contextFiles.js';
 import { buildSystemPrompt, buildTaskPrompt } from '../nativeTools.js';
 import { logLlmCall } from '../shared/llmCallLogger.js';
-import type { AgentEngine, AgentEngineResult, AgentExecutionPlan } from '../types.js';
+import type { AgentEngine, AgentEngineResult, AgentExecutionPlan, ContextImage } from '../types.js';
 import { buildClaudeEnv } from './env.js';
 import { buildHooks } from './hooks.js';
 import { CLAUDE_CODE_MODEL_IDS, DEFAULT_CLAUDE_CODE_MODEL } from './models.js';
@@ -64,6 +66,74 @@ export function ensureOnboardingFlag(): void {
 	}
 }
 
+const CLAUDE_SUPPORTED_IMAGE_TYPES = new Set([
+	'image/jpeg',
+	'image/png',
+	'image/gif',
+	'image/webp',
+]);
+
+/**
+ * Build an AsyncIterable of SDKUserMessages that delivers the task prompt text along
+ * with any work-item images as native SDK image content blocks.
+ *
+ * Used by the Claude Code engine to inject images directly into the first conversation
+ * turn rather than writing them to disk and hoping the agent reads the files.
+ */
+export async function* buildPromptWithImages(
+	text: string,
+	images: ContextImage[],
+): AsyncIterable<SDKUserMessage> {
+	const imageBlocks = images
+		.filter((img) => CLAUDE_SUPPORTED_IMAGE_TYPES.has(img.mimeType))
+		.map((img) => ({
+			type: 'image' as const,
+			source: {
+				type: 'base64' as const,
+				media_type: img.mimeType as 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp',
+				data: img.base64Data,
+			},
+		}));
+
+	yield {
+		type: 'user',
+		message: { role: 'user', content: [{ type: 'text', text }, ...imageBlocks] },
+		parent_tool_use_id: null,
+		session_id: randomUUID(),
+	};
+}
+
+/**
+ * Filter context images to those supported by the Claude SDK.
+ * Logs an INFO message when images will be injected, and a WARN for any skipped MIME types.
+ */
+function filterContextImages(
+	contextInjections: AgentExecutionPlan['contextInjections'],
+	logWriter: AgentExecutionPlan['logWriter'],
+): ContextImage[] {
+	const allImages = contextInjections.flatMap((inj) => inj.images ?? []);
+	const supported = allImages.filter((img) => CLAUDE_SUPPORTED_IMAGE_TYPES.has(img.mimeType));
+	const skipped = allImages.length - supported.length;
+	if (supported.length > 0) {
+		logWriter('INFO', 'Injecting work item images as SDK content blocks', {
+			count: supported.length,
+		});
+	}
+	if (skipped > 0) {
+		logWriter('WARN', 'Skipped unsupported image MIME types', {
+			skipped,
+			types: [
+				...new Set(
+					allImages
+						.filter((img) => !CLAUDE_SUPPORTED_IMAGE_TYPES.has(img.mimeType))
+						.map((img) => img.mimeType),
+				),
+			],
+		});
+	}
+	return supported;
+}
+
 /**
  * Extract a GitHub PR URL from assistant messages (tool results containing create-pr output).
  */
@@ -525,9 +595,15 @@ export class ClaudeCodeEngine implements AgentEngine {
 	async execute(input: AgentExecutionPlan): Promise<AgentEngineResult> {
 		const startTime = Date.now();
 		const systemPrompt = buildSystemPrompt(input.systemPrompt, input.availableTools);
+
+		// Collect supported images for native SDK delivery; strip from injections so
+		// offloadLargeContext does not also write them to disk (redundant for this engine).
+		const supportedImages = filterContextImages(input.contextInjections, input.logWriter);
+		const injectionsForPrompt = input.contextInjections.map(({ images: _images, ...rest }) => rest);
+
 		const { prompt: taskPrompt, hasOffloadedContext } = await buildTaskPrompt(
 			input.taskPrompt,
-			input.contextInjections,
+			injectionsForPrompt,
 			input.repoDir,
 		);
 		// Resolve model again here for backward compatibility: execute() may be called
@@ -569,15 +645,18 @@ export class ClaudeCodeEngine implements AgentEngine {
 
 		const maxContinuationTurns = input.completionRequirements?.maxContinuationTurns ?? 0;
 		let continuationTurns = 0;
-		let promptText = taskPrompt;
+		// Use AsyncIterable prompt for the first turn when images are present; string otherwise.
+		// Continuation turns always use a plain string prompt.
+		let promptInput: string | AsyncIterable<SDKUserMessage> =
+			supportedImages.length > 0 ? buildPromptWithImages(taskPrompt, supportedImages) : taskPrompt;
 		let isContinuation = false;
 		let turnCount = 0;
 		let totalCost: number | undefined;
 
 		for (;;) {
 			const stderrChunks: string[] = [];
 			const stream = query({
-				prompt: promptText,
+				prompt: promptInput,
 				options: {
 					model,
 					systemPrompt,
@@ -641,7 +720,7 @@ export class ClaudeCodeEngine implements AgentEngine {
 			if (decision.done) return decision.result;
 
 			continuationTurns++;
-			promptText = decision.promptText;
+			promptInput = decision.promptText;
 			isContinuation = true;
 		}
 	}

diff --git a/src/backends/shared/contextFiles.ts b/src/backends/shared/contextFiles.ts
@@ -6,8 +6,10 @@
  * to read them on-demand using its built-in Read tool.
  *
  * When context injections contain images, each image is written as a binary
- * file to `.cascade/context/images/` so native-tool engines (Claude Code,
- * OpenCode, Codex) can read them with their built-in Read tool.
+ * file to `.cascade/context/images/` so native-tool engines (Codex, OpenCode)
+ * can read them with their built-in Read tool. The Claude Code engine receives
+ * images natively as SDK ImageBlockParam content blocks instead (see
+ * src/backends/claude-code/index.ts buildPromptWithImages).
  */
 import { mkdir, rm, writeFile } from 'node:fs/promises';
 import { join } from 'node:path';
@@ -104,7 +106,7 @@ function generateReadInstructions(files: OffloadedFile[], images: OffloadedImage
 	if (images.length > 0) {
 		if (files.length > 0) lines.push('');
 		lines.push(
-			`The following context images have been saved to \`${CONTEXT_OFFLOAD_CONFIG.contextDir}/${IMAGES_SUBDIR}/\`:`,
+			'The following work item images were pre-downloaded using authenticated credentials. Use the Read tool on the file paths below — do NOT curl or HTTP-fetch the original attachment URLs:',
 		);
 		lines.push('');
 		for (const img of images) {

diff --git a/src/backends/shared/nativeToolPrompts.ts b/src/backends/shared/nativeToolPrompts.ts
@@ -6,12 +6,13 @@ const NATIVE_TOOL_EXECUTION_RULES = `## Native Tool Execution Rules
 You are operating in a native-tool environment, not a gadget/function-call environment.
 
 - Never write pseudo tool calls such as \`[tool_call: ...]\`, \`ReadFile(...)\`, \`RipGrep(...)\`, \`Tmux(...)\`, \`CreatePR(...)\`, or similar function-call text in your assistant response.
-- Use actual OpenCode/Codex tool invocations instead:
+- Use your built-in tools instead:
   - use built-in file/search tools or the shell tool for repository exploration
   - use the edit tool for file modifications
   - use the shell tool for all \`cascade-tools ...\`, \`git ...\`, \`rg ...\`, \`fd ...\`, test, lint, and build commands
 - When the task instructions mention gadget names like \`CreatePR\`, \`PostComment\`, \`UpdateChecklistItem\`, \`Finish\`, \`ReadWorkItem\`, \`TodoUpsert\`, or \`TodoUpdateStatus\`, treat that as a request to run the equivalent real command or tool action, not to print the gadget name.
-- If you catch yourself composing a pseudo tool call in plain text, stop and use the real tool instead.`;
+- If you catch yourself composing a pseudo tool call in plain text, stop and use the real tool instead.
+- Trello, JIRA, and GitHub attachment URLs require backend authentication. NEVER curl, wget, or HTTP-fetch them — they return an authorization error. Work item images are pre-fetched and available either as images in your conversation context or as files under \`.cascade/context/images/\` — use whichever is present; never fetch the original URLs.`;
 
 /**
  * Format a single CLI parameter for tool guidance documentation.

diff --git a/src/backends/types.ts b/src/backends/types.ts
@@ -5,6 +5,7 @@ import type { CompletionRequirements } from './completion.js';
 
 // Re-export shared contracts so downstream code that imports from here continues to work.
 export type {
+	ContextImage,
 	ContextInjection,
 	LogWriter,
 	ProgressReporter,

diff --git a/tests/unit/backends/claude-code-imagePrompt.test.ts b/tests/unit/backends/claude-code-imagePrompt.test.ts
@@ -0,0 +1,112 @@
+import type { SDKUserMessage } from '@anthropic-ai/claude-agent-sdk';
+import type { ContentBlockParam } from '@anthropic-ai/sdk/resources';
+import { describe, expect, it, vi } from 'vitest';
+import type { ContextImage } from '../../../src/agents/contracts/index.js';
+import { buildPromptWithImages } from '../../../src/backends/claude-code/index.js';
+
+vi.mock('@anthropic-ai/claude-agent-sdk', () => ({
+	query: vi.fn(),
+}));
+
+vi.mock('../../../src/utils/logging.js', () => ({
+	logger: { warn: vi.fn(), info: vi.fn(), error: vi.fn(), debug: vi.fn() },
+}));
+
+vi.mock('../../../src/db/repositories/runsRepository.js', () => ({
+	storeLlmCall: vi.fn().mockResolvedValue(undefined),
+}));
+
+async function collect<T>(iterable: AsyncIterable<T>): Promise<T[]> {
+	const results: T[] = [];
+	for await (const item of iterable) results.push(item);
+	return results;
+}
+
+const PNG_IMAGE: ContextImage = {
+	base64Data: 'aGVsbG8=',
+	mimeType: 'image/png',
+	altText: 'A diagram',
+};
+
+const JPEG_IMAGE: ContextImage = {
+	base64Data: 'dGVzdA==',
+	mimeType: 'image/jpeg',
+};
+
+const TIFF_IMAGE: ContextImage = {
+	base64Data: 'dGlmZg==',
+	mimeType: 'image/tiff', // unsupported
+};
+
+describe('buildPromptWithImages', () => {
+	it('yields one SDKUserMessage with text block + image block', async () => {
+		const msgs = await collect(buildPromptWithImages('do task', [PNG_IMAGE]));
+
+		expect(msgs).toHaveLength(1);
+		const msg = msgs[0];
+		expect(msg.type).toBe('user');
+		expect(msg.message.role).toBe('user');
+
+		const content = msg.message.content as ContentBlockParam[];
+		expect(content).toHaveLength(2);
+		expect(content[0]).toEqual({ type: 'text', text: 'do task' });
+		expect(content[1].type).toBe('image');
+		const imageBlock = content[1] as {
+			type: 'image';
+			source: { type: string; media_type: string; data: string };
+		};
+		expect(imageBlock.source.type).toBe('base64');
+		expect(imageBlock.source.media_type).toBe('image/png');
+		expect(imageBlock.source.data).toBe('aGVsbG8=');
+	});
+
+	it('sets a non-empty session_id and null parent_tool_use_id', async () => {
+		const msgs = await collect(buildPromptWithImages('task', [PNG_IMAGE]));
+		const msg = msgs[0];
+		expect(msg.session_id).toBeTruthy();
+		expect(msg.parent_tool_use_id).toBeNull();
+	});
+
+	it('includes multiple images as separate image blocks', async () => {
+		const msgs = await collect(buildPromptWithImages('task', [PNG_IMAGE, JPEG_IMAGE]));
+		const content = msgs[0].message.content as ContentBlockParam[];
+		expect(content).toHaveLength(3); // text + 2 images
+		expect(content[0].type).toBe('text');
+		expect(content[1].type).toBe('image');
+		expect(content[2].type).toBe('image');
+	});
+
+	it('filters out unsupported MIME types', async () => {
+		const msgs = await collect(buildPromptWithImages('task', [PNG_IMAGE, TIFF_IMAGE]));
+		const content = msgs[0].message.content as ContentBlockParam[];
+		// text + 1 image (TIFF filtered out)
+		expect(content).toHaveLength(2);
+		expect(content[0].type).toBe('text');
+		expect(content[1].type).toBe('image');
+	});
+
+	it('yields text-only message when all images are unsupported', async () => {
+		const msgs = await collect(buildPromptWithImages('task', [TIFF_IMAGE]));
+		const content = msgs[0].message.content as ContentBlockParam[];
+		expect(content).toHaveLength(1);
+		expect(content[0]).toEqual({ type: 'text', text: 'task' });
+	});
+
+	it('yields text-only message when images array is empty', async () => {
+		const msgs = await collect(buildPromptWithImages('task', []));
+		const content = msgs[0].message.content as ContentBlockParam[];
+		expect(content).toHaveLength(1);
+		expect(content[0]).toEqual({ type: 'text', text: 'task' });
+	});
+
+	it('yields exactly one message', async () => {
+		const msgs = await collect(buildPromptWithImages('task', [PNG_IMAGE, JPEG_IMAGE, TIFF_IMAGE]));
+		expect(msgs).toHaveLength(1);
+	});
+
+	it('each call produces a unique session_id', async () => {
+		const msgs1 = await collect(buildPromptWithImages('task', [PNG_IMAGE]));
+		const msgs2 = await collect(buildPromptWithImages('task', [PNG_IMAGE]));
+		expect(msgs1[0].session_id).not.toBe(msgs2[0].session_id);
+	});
+});
diff --git a/tests/unit/backends/claude-code.test.ts b/tests/unit/backends/claude-code.test.ts
@@ -895,6 +895,65 @@ describe('execute', () => {
 		await Promise.resolve();
 		expect(mockStoreLlmCall).not.toHaveBeenCalled();
 	});
+
+	it('passes AsyncIterable prompt to query() when contextInjections has images', async () => {
+		mockStream([
+			{ type: 'result', subtype: 'success', result: 'Done', total_cost_usd: 0, num_turns: 1 },
+		]);
+
+		const input = makeInput({
+			contextInjections: [
+				{
+					toolName: 'ReadWorkItem',
+					params: {},
+					result: 'card content',
+					description: 'Work item',
+					images: [{ base64Data: 'abc', mimeType: 'image/png' }],
+				},
+			],
+		});
+
+		await new ClaudeCodeEngine().execute(input);
+
+		const promptArg = mockQuery.mock.calls[0][0].prompt;
+		expect(typeof promptArg).not.toBe('string');
+		expect(promptArg[Symbol.asyncIterator]).toBeDefined();
+	});
+
+	it('logs image injection and strips images before buildTaskPrompt', async () => {
+		mockStream([
+			{ type: 'result', subtype: 'success', result: 'Done', total_cost_usd: 0, num_turns: 1 },
+		]);
+
+		const input = makeInput({
+			contextInjections: [
+				{
+					toolName: 'ReadWorkItem',
+					params: {},
+					result: 'card content',
+					description: 'Work item',
+					images: [{ base64Data: 'imagedata123', mimeType: 'image/png' }],
+				},
+			],
+		});
+
+		await new ClaudeCodeEngine().execute(input);
+
+		expect(input.logWriter).toHaveBeenCalledWith(
+			'INFO',
+			'Injecting work item images as SDK content blocks',
+			{ count: 1 },
+		);
+
+		// Collect text from the AsyncIterable prompt
+		const promptArg = mockQuery.mock.calls[0][0].prompt as AsyncIterable<{
+			message: { content: { type: string; text?: string }[] };
+		}>;
+		const msgs: { message: { content: { type: string; text?: string }[] } }[] = [];
+		for await (const m of promptArg) msgs.push(m);
+		const textBlock = msgs[0].message.content.find((b) => b.type === 'text');
+		expect(textBlock?.text).not.toContain('imagedata123');
+	});
 });
 
 describe('continuation loop', () => {