From eb8deebd02b734f74061c3e67bd70d3c8585e5ec Mon Sep 17 00:00:00 2001 From: Jack Wotherspoon Date: Sun, 14 Dec 2025 07:54:41 -0500 Subject: [PATCH 1/8] feat: inline image data with [Image #1] --- packages/cli/src/ui/AppContainer.tsx | 7 + packages/cli/src/ui/components/Composer.tsx | 1 + .../cli/src/ui/components/InputPrompt.tsx | 87 ++++++++- .../cli/src/ui/contexts/UIStateContext.tsx | 2 + .../cli/src/ui/hooks/useClipboardImages.ts | 165 ++++++++++++++++++ packages/cli/src/ui/hooks/useGeminiStream.ts | 81 ++++++++- .../cli/src/ui/utils/clipboardUtils.test.ts | 165 ++++++++++++++++++ packages/cli/src/ui/utils/clipboardUtils.ts | 77 ++++++++ packages/cli/src/ui/utils/highlight.test.ts | 40 +++++ packages/cli/src/ui/utils/highlight.ts | 17 +- 10 files changed, 630 insertions(+), 12 deletions(-) create mode 100644 packages/cli/src/ui/hooks/useClipboardImages.ts diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index 55ccc7438f5..43c6b4b2e28 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -107,6 +107,7 @@ import { registerCleanup, runExitCleanup } from '../utils/cleanup.js'; import { RELAUNCH_EXIT_CODE } from '../utils/processUtils.js'; import type { SessionInfo } from '../utils/sessionUtils.js'; import { useMessageQueue } from './hooks/useMessageQueue.js'; +import { useClipboardImages } from './hooks/useClipboardImages.js'; import { useAutoAcceptIndicator } from './hooks/useAutoAcceptIndicator.js'; import { useSessionStats } from './contexts/SessionContext.js'; import { useGitBranchName } from './hooks/useGitBranchName.js'; @@ -360,6 +361,9 @@ export const AppContainer = (props: AppContainerProps) => { const { consoleMessages, clearConsoleMessages: clearConsoleMessagesState } = useConsoleMessages(); + // Clipboard images for pasted images in the input + const clipboardImages = useClipboardImages(); + const mainAreaWidth = calculateMainAreaWidth(terminalWidth, settings); // Derive widths for InputPrompt using shared helper const { inputWidth, suggestionsWidth } = useMemo(() => { @@ -793,6 +797,7 @@ Logging in with Google... Restarting Gemini CLI to continue. terminalWidth, terminalHeight, embeddedShellFocused, + clipboardImages, ); // Auto-accept indicator @@ -1543,6 +1548,7 @@ Logging in with Google... Restarting Gemini CLI to continue. warningMessage, bannerData, bannerVisible, + clipboardImages, }), [ isThemeDialogOpen, @@ -1634,6 +1640,7 @@ Logging in with Google... Restarting Gemini CLI to continue. warningMessage, bannerData, bannerVisible, + clipboardImages, ], ); diff --git a/packages/cli/src/ui/components/Composer.tsx b/packages/cli/src/ui/components/Composer.tsx index b4559a997f2..ddb2a961ee0 100644 --- a/packages/cli/src/ui/components/Composer.tsx +++ b/packages/cli/src/ui/components/Composer.tsx @@ -179,6 +179,7 @@ export const Composer = () => { streamingState={uiState.streamingState} suggestionsPosition={suggestionsPosition} onSuggestionsVisibilityChange={setSuggestionsVisible} + clipboardImages={uiState.clipboardImages} /> )} diff --git a/packages/cli/src/ui/components/InputPrompt.tsx b/packages/cli/src/ui/components/InputPrompt.tsx index 31c34f3a284..58023c1568b 100644 --- a/packages/cli/src/ui/components/InputPrompt.tsx +++ b/packages/cli/src/ui/components/InputPrompt.tsx @@ -34,7 +34,10 @@ import { clipboardHasImage, saveClipboardImage, cleanupOldClipboardImages, + mayContainImagePaths, + categorizePathsByType, } from '../utils/clipboardUtils.js'; +import type { UseClipboardImagesReturn } from '../hooks/useClipboardImages.js'; import { isAutoExecutableCommand, isSlashCommand, @@ -86,6 +89,7 @@ export interface InputPromptProps { popAllMessages?: () => string | undefined; suggestionsPosition?: 'above' | 'below'; setBannerVisible: (visible: boolean) => void; + clipboardImages?: UseClipboardImagesReturn; } // The input content, input container, and input suggestions list may have different widths @@ -128,6 +132,7 @@ export const InputPrompt: React.FC = ({ popAllMessages, suggestionsPosition = 'below', setBannerVisible, + clipboardImages, }) => { const kittyProtocol = useKittyKeyboardProtocol(); const isShellFocused = useShellFocusState(); @@ -323,15 +328,23 @@ export const InputPrompt: React.FC = ({ // Ignore cleanup errors }); - // Get relative path from current directory - const relativePath = path.relative(config.getTargetDir(), imagePath); + // Register image and get display text (e.g., "[Image #1]") + // If clipboardImages is not provided, fall back to the old @path behavior + let insertText: string; + if (clipboardImages) { + insertText = clipboardImages.registerImage(imagePath); + } else { + const relativePath = path.relative( + config.getTargetDir(), + imagePath, + ); + insertText = `@${relativePath}`; + } - // Insert @path reference at cursor position - const insertText = `@${relativePath}`; const currentText = buffer.text; const offset = buffer.getOffset(); - // Add spaces around the path if needed + // Add spaces around the display text if needed let textToInsert = insertText; const charBefore = offset > 0 ? currentText[offset - 1] : ''; const charAfter = @@ -356,7 +369,7 @@ export const InputPrompt: React.FC = ({ } catch (error) { console.error('Error handling clipboard image:', error); } - }, [buffer, config]); + }, [buffer, config, clipboardImages]); useMouseClick( innerBoxRef, @@ -414,6 +427,63 @@ export const InputPrompt: React.FC = ({ pasteTimeoutRef.current = null; }, 40); } + + // Check if pasted content could be image file path(s) (drag and drop) + // Use synchronous check first to avoid async handling for normal text + if ( + clipboardImages && + key.sequence && + mayContainImagePaths(key.sequence) + ) { + // Only go async for potential image paths to verify file existence + const sequence = key.sequence; + void (async () => { + try { + const { imagePaths, nonImagePaths } = + await categorizePathsByType(sequence); + + if (imagePaths.length > 0 || nonImagePaths.length > 0) { + // Register each image and collect placeholders + const placeholders = imagePaths.map((p) => + clipboardImages.registerImage(p), + ); + + // Non-image files use @path syntax for file references + const atPrefixedPaths = nonImagePaths.map((p) => `@${p}`); + + // Build insertion text: image placeholders + @path references + const insertParts = [...placeholders, ...atPrefixedPaths]; + const insertText = insertParts.join(' '); + + // Insert at cursor position with proper spacing + const offset = buffer.getOffset(); + const currentText = buffer.text; + let textToInsert = insertText; + + const charBefore = offset > 0 ? currentText[offset - 1] : ''; + const charAfter = + offset < currentText.length ? currentText[offset] : ''; + + if (charBefore && charBefore !== ' ' && charBefore !== '\n') { + textToInsert = ' ' + textToInsert; + } + if (!charAfter || (charAfter !== ' ' && charAfter !== '\n')) { + textToInsert = textToInsert + ' '; + } + + buffer.replaceRangeByOffset(offset, offset, textToInsert); + } else { + // No valid paths found, treat as normal paste + buffer.handleInput(key); + } + } catch { + // On error, fall back to normal paste behavior + buffer.handleInput(key); + } + })(); + return; + } + // Ensure we never accidentally interpret paste as regular input. buffer.handleInput(key); return; @@ -858,6 +928,7 @@ export const InputPrompt: React.FC = ({ kittyProtocol.enabled, tryLoadQueuedMessages, setBannerVisible, + clipboardImages, ], ); @@ -1149,7 +1220,9 @@ export const InputPrompt: React.FC = ({ } const color = - seg.type === 'command' || seg.type === 'file' + seg.type === 'command' || + seg.type === 'file' || + seg.type === 'image' ? theme.text.accent : theme.text.primary; diff --git a/packages/cli/src/ui/contexts/UIStateContext.tsx b/packages/cli/src/ui/contexts/UIStateContext.tsx index 34e6262f306..d5aaa3e9d01 100644 --- a/packages/cli/src/ui/contexts/UIStateContext.tsx +++ b/packages/cli/src/ui/contexts/UIStateContext.tsx @@ -28,6 +28,7 @@ import type { DOMElement } from 'ink'; import type { SessionStatsState } from '../contexts/SessionContext.js'; import type { ExtensionUpdateState } from '../state/extensions.js'; import type { UpdateObject } from '../utils/updateCheck.js'; +import type { UseClipboardImagesReturn } from '../hooks/useClipboardImages.js'; export interface ProQuotaDialogRequest { failedModel: string; @@ -136,6 +137,7 @@ export interface UIState { }; bannerVisible: boolean; customDialog: React.ReactNode | null; + clipboardImages: UseClipboardImagesReturn; } export const UIStateContext = createContext(null); diff --git a/packages/cli/src/ui/hooks/useClipboardImages.ts b/packages/cli/src/ui/hooks/useClipboardImages.ts new file mode 100644 index 00000000000..93efa824296 --- /dev/null +++ b/packages/cli/src/ui/hooks/useClipboardImages.ts @@ -0,0 +1,165 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { useState, useCallback, useRef } from 'react'; +import * as fs from 'node:fs/promises'; +import * as path from 'node:path'; +import type { PartUnion } from '@google/genai'; +import { debugLogger } from '@google/gemini-cli-core'; +import { IMAGE_EXTENSIONS } from '../utils/clipboardUtils.js'; + +/** + * Represents a clipboard image that has been pasted into the input. + */ +export interface ClipboardImage { + /** Sequential ID for this image within the current message */ + id: number; + /** Absolute path to the image file */ + path: string; + /** Display text shown in the input (e.g., "[Image #1]") */ + displayText: string; +} + +/** + * Return type for the useClipboardImages hook. + */ +export interface UseClipboardImagesReturn { + /** Array of registered clipboard images for the current message */ + images: ClipboardImage[]; + /** Register a new image and return its display text (e.g., "[Image #1]") */ + registerImage: (absolutePath: string) => string; + /** Clear all images (called after message submission) */ + clear: () => void; + /** Get image parts only for images whose [Image #N] tags are present in the text */ + getImagePartsForText: (text: string) => Promise; +} + +/** + * MIME types supported by Gemini API for image inputs. + * See: https://ai.google.dev/gemini-api/docs/image-understanding + */ +const MIME_TYPES: Record = { + '.png': 'image/png', + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.webp': 'image/webp', + '.heic': 'image/heic', + '.heif': 'image/heif', +}; + +function getMimeType(filePath: string): string | null { + const ext = path.extname(filePath).toLowerCase(); + return MIME_TYPES[ext] ?? null; +} + +/** + * Reads an image file and returns it as a base64-encoded PartUnion. + * Returns null if the file cannot be read or has an unsupported format. + */ +async function readImageAsPart( + imagePath: string, + displayText: string, +): Promise { + const mimeType = getMimeType(imagePath); + if (!mimeType) { + const ext = path.extname(imagePath); + debugLogger.warn( + `Unsupported image format ${ext} for ${displayText}, skipping. Supported: ${IMAGE_EXTENSIONS.join(', ')}`, + ); + return null; + } + + try { + const fileContent = await fs.readFile(imagePath); + return { + inlineData: { + data: fileContent.toString('base64'), + mimeType, + }, + }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + debugLogger.warn( + `Failed to load clipboard image ${displayText} from ${imagePath}: ${message}`, + ); + return null; + } +} + +/** + * Hook to manage clipboard images pasted into the input. + * + * This hook provides a registry for tracking pasted images and converting them + * to base64-encoded parts for injection into the Gemini prompt. + * + * The image counter resets after each message submission. + */ +export function useClipboardImages(): UseClipboardImagesReturn { + const [images, setImages] = useState([]); + const nextIdRef = useRef(1); + + const registerImage = useCallback((absolutePath: string): string => { + // Generate ID atomically with state update to prevent race conditions + // when multiple images are registered rapidly (e.g., multi-file drag-and-drop) + const id = nextIdRef.current++; + const displayText = `[Image #${id}]`; + + setImages((prev) => { + // Check if this path is already registered to prevent duplicates + if (prev.some((img) => img.path === absolutePath)) { + return prev; + } + return [ + ...prev, + { + id, + path: absolutePath, + displayText, + }, + ]; + }); + + return displayText; + }, []); + + const clear = useCallback(() => { + setImages([]); + nextIdRef.current = 1; + }, []); + + /** + * Get image parts only for images whose [Image #N] tags are present in the text. + * This prevents sending images the user has deleted from their prompt. + */ + const getImagePartsForText = useCallback( + async (text: string): Promise => { + const parts: PartUnion[] = []; + + for (const image of images) { + // Use String.includes for faster tag checking (no regex compilation) + if (!text.includes(`[Image #${image.id}]`)) { + // Tag was deleted - skip this image + continue; + } + + const part = await readImageAsPart(image.path, image.displayText); + if (part) { + parts.push(part); + } + } + + return parts; + }, + [images], + ); + + return { + images, + registerImage, + clear, + getImagePartsForText, + }; +} diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index 306653bd2d6..3a7d4257f8c 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -39,7 +39,12 @@ import { EDIT_TOOL_NAMES, processRestorableToolCalls, } from '@google/gemini-cli-core'; -import { type Part, type PartListUnion, FinishReason } from '@google/genai'; +import { + type Part, + type PartListUnion, + type PartUnion, + FinishReason, +} from '@google/genai'; import type { HistoryItem, HistoryItemWithoutId, @@ -109,6 +114,10 @@ export const useGeminiStream = ( terminalWidth: number, terminalHeight: number, isShellFocused?: boolean, + clipboardImages?: { + getImagePartsForText: (text: string) => Promise; + clear: () => void; + }, ) => { const [initError, setInitError] = useState(null); const abortControllerRef = useRef(null); @@ -507,6 +516,69 @@ export const useGeminiStream = ( ); return { queryToSend: null, shouldProceed: false }; } + + // Inject clipboard images into the query + // Only include images whose [Image #N] tags are still present in the text + if (clipboardImages) { + // Extract query text to check which image tags are present + const queryText = + typeof localQueryToSendToGemini === 'string' + ? localQueryToSendToGemini + : Array.isArray(localQueryToSendToGemini) + ? localQueryToSendToGemini + .filter( + (p): p is { text: string } => + p !== null && + typeof p === 'object' && + 'text' in p && + typeof p.text === 'string', + ) + .map((p) => p.text) + .join(' ') + : ''; + + const imageParts = + await clipboardImages.getImagePartsForText(queryText); + if (imageParts.length > 0) { + onDebugMessage(`Injecting ${imageParts.length} clipboard image(s)`); + + // Strip [Image #N] references from the text since we're injecting the actual images + const stripImageReferences = (text: string): string => + text.replace(/\[Image #\d+\]\s*/g, '').trim(); + + if (typeof localQueryToSendToGemini === 'string') { + const cleanedText = stripImageReferences(localQueryToSendToGemini); + localQueryToSendToGemini = [ + ...imageParts, + ...(cleanedText ? [{ text: cleanedText }] : []), + ]; + } else if (Array.isArray(localQueryToSendToGemini)) { + // Clean text parts in the array + const cleanedParts = localQueryToSendToGemini + .map((part) => { + if (typeof part === 'string') { + const cleaned = stripImageReferences(part); + return cleaned ? cleaned : null; + } + if ( + part && + typeof part === 'object' && + 'text' in part && + typeof part.text === 'string' + ) { + const cleaned = stripImageReferences(part.text); + return cleaned ? { ...part, text: cleaned } : null; + } + return part; + }) + .filter( + (part): part is NonNullable => part !== null, + ); + localQueryToSendToGemini = [...imageParts, ...cleanedParts]; + } + } + } + return { queryToSend: localQueryToSendToGemini, shouldProceed: true }; }, [ @@ -518,6 +590,7 @@ export const useGeminiStream = ( logger, shellModeActive, scheduleToolCalls, + clipboardImages, ], ); @@ -921,6 +994,11 @@ export const useGeminiStream = ( return; } + // Clear clipboard images after they've been injected into the query + if (clipboardImages && !options?.isContinuation) { + clipboardImages.clear(); + } + if (!options?.isContinuation) { if (typeof queryToSend === 'string') { // logging the text prompts only for now @@ -1049,6 +1127,7 @@ export const useGeminiStream = ( config, startNewPrompt, getPromptCount, + clipboardImages, ], ); diff --git a/packages/cli/src/ui/utils/clipboardUtils.test.ts b/packages/cli/src/ui/utils/clipboardUtils.test.ts index bff3d2a6ec7..80c5bafccde 100644 --- a/packages/cli/src/ui/utils/clipboardUtils.test.ts +++ b/packages/cli/src/ui/utils/clipboardUtils.test.ts @@ -11,7 +11,12 @@ import { cleanupOldClipboardImages, splitEscapedPaths, parsePastedPaths, + mayContainImagePaths, + categorizePathsByType, } from './clipboardUtils.js'; +import * as fs from 'node:fs/promises'; +import * as path from 'node:path'; +import * as os from 'node:os'; describe('clipboardUtils', () => { describe('clipboardHasImage', () => { @@ -237,4 +242,164 @@ describe('clipboardUtils', () => { expect(result).toBe('@\\\\server\\share\\file.txt '); }); }); + + describe('mayContainImagePaths', () => { + it('should return true for single image path', () => { + expect(mayContainImagePaths('/path/to/image.png')).toBe(true); + }); + + it('should return true for multiple image paths', () => { + expect(mayContainImagePaths('/img1.png /img2.jpg')).toBe(true); + }); + + it('should return false for non-image paths', () => { + expect(mayContainImagePaths('/path/to/file.txt')).toBe(false); + }); + + it('should return false for non-path text', () => { + expect(mayContainImagePaths('hello world')).toBe(false); + }); + + it('should return true for mixed paths with at least one image', () => { + expect(mayContainImagePaths('/file.txt /image.png')).toBe(true); + }); + + it('should handle paths with escaped spaces', () => { + expect(mayContainImagePaths('/my\\ image.png')).toBe(true); + }); + + it('should handle Windows paths', () => { + expect(mayContainImagePaths('C:\\Users\\image.png')).toBe(true); + }); + + it('should handle tilde paths', () => { + expect(mayContainImagePaths('~/images/photo.jpg')).toBe(true); + }); + + it('should handle relative paths', () => { + expect(mayContainImagePaths('./image.png')).toBe(true); + }); + + it('should be case insensitive for extensions', () => { + expect(mayContainImagePaths('/image.PNG')).toBe(true); + expect(mayContainImagePaths('/image.Jpg')).toBe(true); + }); + + it('should handle all supported image extensions', () => { + expect(mayContainImagePaths('/a.png')).toBe(true); + expect(mayContainImagePaths('/a.jpg')).toBe(true); + expect(mayContainImagePaths('/a.jpeg')).toBe(true); + expect(mayContainImagePaths('/a.webp')).toBe(true); + expect(mayContainImagePaths('/a.heic')).toBe(true); + expect(mayContainImagePaths('/a.heif')).toBe(true); + }); + + it('should return false for unsupported image formats', () => { + expect(mayContainImagePaths('/a.gif')).toBe(false); + expect(mayContainImagePaths('/a.bmp')).toBe(false); + expect(mayContainImagePaths('/a.tiff')).toBe(false); + }); + }); + + describe('categorizePathsByType', () => { + let tempDir: string; + + // Create temp files for testing + const setupTempFiles = async () => { + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'clipboard-test-')); + await fs.writeFile(path.join(tempDir, 'image.png'), 'fake png'); + await fs.writeFile(path.join(tempDir, 'image.jpg'), 'fake jpg'); + await fs.writeFile(path.join(tempDir, 'document.txt'), 'text content'); + await fs.writeFile(path.join(tempDir, 'script.js'), 'js content'); + return tempDir; + }; + + const cleanupTempFiles = async () => { + if (tempDir) { + await fs.rm(tempDir, { recursive: true, force: true }); + } + }; + + it('should return empty result for non-path text', async () => { + const result = await categorizePathsByType('hello world'); + expect(result.imagePaths).toEqual([]); + expect(result.nonImagePaths).toEqual([]); + expect(result.invalidSegments).toEqual(['hello', 'world']); + }); + + it('should categorize non-existent paths as invalid', async () => { + const result = await categorizePathsByType('/nonexistent/image.png'); + expect(result.imagePaths).toEqual([]); + expect(result.nonImagePaths).toEqual([]); + expect(result.invalidSegments).toEqual(['/nonexistent/image.png']); + }); + + it('should categorize existing images correctly', async () => { + await setupTempFiles(); + try { + const imagePath = path.join(tempDir, 'image.png'); + const result = await categorizePathsByType(imagePath); + expect(result.imagePaths).toEqual([imagePath]); + expect(result.nonImagePaths).toEqual([]); + expect(result.invalidSegments).toEqual([]); + } finally { + await cleanupTempFiles(); + } + }); + + it('should categorize existing non-images correctly', async () => { + await setupTempFiles(); + try { + const textPath = path.join(tempDir, 'document.txt'); + const result = await categorizePathsByType(textPath); + expect(result.imagePaths).toEqual([]); + expect(result.nonImagePaths).toEqual([textPath]); + expect(result.invalidSegments).toEqual([]); + } finally { + await cleanupTempFiles(); + } + }); + + it('should handle mixed paths correctly', async () => { + await setupTempFiles(); + try { + const imagePath = path.join(tempDir, 'image.png'); + const textPath = path.join(tempDir, 'document.txt'); + const nonexistent = '/nonexistent/file.xyz'; + const input = `${imagePath} ${textPath} ${nonexistent}`; + + const result = await categorizePathsByType(input); + expect(result.imagePaths).toEqual([imagePath]); + expect(result.nonImagePaths).toEqual([textPath]); + expect(result.invalidSegments).toEqual([nonexistent]); + } finally { + await cleanupTempFiles(); + } + }); + + it('should handle multiple images', async () => { + await setupTempFiles(); + try { + const png = path.join(tempDir, 'image.png'); + const jpg = path.join(tempDir, 'image.jpg'); + const input = `${png} ${jpg}`; + + const result = await categorizePathsByType(input); + expect(result.imagePaths).toContain(png); + expect(result.imagePaths).toContain(jpg); + expect(result.imagePaths.length).toBe(2); + expect(result.nonImagePaths).toEqual([]); + expect(result.invalidSegments).toEqual([]); + } finally { + await cleanupTempFiles(); + } + }); + + it('should return empty arrays for empty string', async () => { + const result = await categorizePathsByType(''); + expect(result.imagePaths).toEqual([]); + expect(result.nonImagePaths).toEqual([]); + expect(result.invalidSegments).toEqual([]); + }); + }); }); diff --git a/packages/cli/src/ui/utils/clipboardUtils.ts b/packages/cli/src/ui/utils/clipboardUtils.ts index 91a657aca02..24f50692d78 100644 --- a/packages/cli/src/ui/utils/clipboardUtils.ts +++ b/packages/cli/src/ui/utils/clipboardUtils.ts @@ -244,3 +244,80 @@ export function parsePastedPaths( return anyValidPath ? processedPaths.join(' ') + ' ' : null; } + +/** + * Quick synchronous check if text could contain image file paths. + * Used as a fast heuristic before async validation. + */ +export function mayContainImagePaths(text: string): boolean { + if (!PATH_PREFIX_PATTERN.test(text)) { + return false; + } + const lowerText = text.toLowerCase(); + return IMAGE_EXTENSIONS.some((ext) => lowerText.includes(ext)); +} + +/** + * Result of categorizing pasted/dropped paths. + */ +export interface CategorizedPaths { + /** Absolute paths to existing image files */ + imagePaths: string[]; + /** Absolute paths to existing non-image files */ + nonImagePaths: string[]; + /** Segments that don't exist or aren't valid paths */ + invalidSegments: string[]; +} + +/** + * Categorizes pasted/dropped paths into images, non-images, and invalid. + * Validates that files exist on disk before categorizing. + * + * @param text The pasted text (potentially space-separated paths) + * @returns Categorized paths by type + */ +export async function categorizePathsByType( + text: string, +): Promise { + const result: CategorizedPaths = { + imagePaths: [], + nonImagePaths: [], + invalidSegments: [], + }; + + const segments = splitEscapedPaths(text); + if (segments.length === 0) { + return result; + } + + const validationResults = await Promise.all( + segments.map(async (segment) => { + if (!PATH_PREFIX_PATTERN.test(segment)) { + return { segment, type: 'invalid' as const, unescaped: segment }; + } + const unescaped = unescapePath(segment); + try { + await fs.access(unescaped); + const ext = path.extname(unescaped).toLowerCase(); + const type: 'image' | 'non-image' = IMAGE_EXTENSIONS.includes(ext) + ? 'image' + : 'non-image'; + return { segment, type, unescaped }; + } catch { + return { segment, type: 'invalid' as const, unescaped }; + } + }), + ); + + for (const { type, unescaped, segment } of validationResults) { + if (type === 'image') { + result.imagePaths.push(unescaped); + } else if (type === 'non-image') { + result.nonImagePaths.push(unescaped); + } else { + result.invalidSegments.push(segment); + } + } + + return result; +} diff --git a/packages/cli/src/ui/utils/highlight.test.ts b/packages/cli/src/ui/utils/highlight.test.ts index 8d4c5ce620f..f09a66790c1 100644 --- a/packages/cli/src/ui/utils/highlight.test.ts +++ b/packages/cli/src/ui/utils/highlight.test.ts @@ -133,4 +133,44 @@ describe('parseInputForHighlighting', () => { { text: '@/my\\ path/file.txt', type: 'file' }, ]); }); + + it('should highlight image placeholders', () => { + const text = 'Check this [Image #1] please'; + expect(parseInputForHighlighting(text, 0)).toEqual([ + { text: 'Check this ', type: 'default' }, + { text: '[Image #1]', type: 'image' }, + { text: ' please', type: 'default' }, + ]); + }); + + it('should highlight multiple image placeholders', () => { + const text = '[Image #1] and [Image #2]'; + expect(parseInputForHighlighting(text, 0)).toEqual([ + { text: '[Image #1]', type: 'image' }, + { text: ' and ', type: 'default' }, + { text: '[Image #2]', type: 'image' }, + ]); + }); + + it('should highlight image placeholders with double digits', () => { + const text = 'See [Image #12] for details'; + expect(parseInputForHighlighting(text, 0)).toEqual([ + { text: 'See ', type: 'default' }, + { text: '[Image #12]', type: 'image' }, + { text: ' for details', type: 'default' }, + ]); + }); + + it('should highlight mixed files and images', () => { + const text = '@file.txt [Image #1] @another.jpg [Image #2]'; + expect(parseInputForHighlighting(text, 0)).toEqual([ + { text: '@file.txt', type: 'file' }, + { text: ' ', type: 'default' }, + { text: '[Image #1]', type: 'image' }, + { text: ' ', type: 'default' }, + { text: '@another.jpg', type: 'file' }, + { text: ' ', type: 'default' }, + { text: '[Image #2]', type: 'image' }, + ]); + }); }); diff --git a/packages/cli/src/ui/utils/highlight.ts b/packages/cli/src/ui/utils/highlight.ts index 19866b330a8..aab5627e06d 100644 --- a/packages/cli/src/ui/utils/highlight.ts +++ b/packages/cli/src/ui/utils/highlight.ts @@ -8,14 +8,16 @@ import { cpLen, cpSlice } from './textUtils.js'; export type HighlightToken = { text: string; - type: 'default' | 'command' | 'file'; + type: 'default' | 'command' | 'file' | 'image'; }; -// Matches slash commands (e.g., /help) and @ references (files or MCP resource URIs). +// Matches slash commands (e.g., /help), @ references (files or MCP resource URIs), +// and image placeholders (e.g., [Image #1]). // The @ pattern uses a negated character class to support URIs like `@file:///example.txt` // which contain colons. It matches any character except delimiters: comma, whitespace, // semicolon, common punctuation, and brackets. -const HIGHLIGHT_REGEX = /(^\/[a-zA-Z0-9_-]+|@(?:\\ |[^,\s;!?()[\]{}])+)/g; +const HIGHLIGHT_REGEX = + /(^\/[a-zA-Z0-9_-]+|@(?:\\ |[^,\s;!?()[\]{}])+|\[Image #\d+\])/g; export function parseInputForHighlighting( text: string, @@ -42,7 +44,14 @@ export function parseInputForHighlighting( } // Add the matched token - const type = fullMatch.startsWith('/') ? 'command' : 'file'; + let type: HighlightToken['type']; + if (fullMatch.startsWith('/')) { + type = 'command'; + } else if (fullMatch.startsWith('[Image')) { + type = 'image'; + } else { + type = 'file'; + } // Only highlight slash commands if the index is 0. if (type === 'command' && index !== 0) { tokens.push({ From 41cb95075a346f621014f9496432b88e7ce1cd8d Mon Sep 17 00:00:00 2001 From: Jack Wotherspoon Date: Sun, 14 Dec 2025 08:56:47 -0500 Subject: [PATCH 2/8] chore: remove race condition and add tests --- .../src/ui/hooks/useClipboardImages.test.ts | 135 ++++++++++++++++++ .../cli/src/ui/hooks/useClipboardImages.ts | 112 ++++++++++----- 2 files changed, 215 insertions(+), 32 deletions(-) create mode 100644 packages/cli/src/ui/hooks/useClipboardImages.test.ts diff --git a/packages/cli/src/ui/hooks/useClipboardImages.test.ts b/packages/cli/src/ui/hooks/useClipboardImages.test.ts new file mode 100644 index 00000000000..c67c21f1c73 --- /dev/null +++ b/packages/cli/src/ui/hooks/useClipboardImages.test.ts @@ -0,0 +1,135 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { act } from 'react'; +import { renderHook } from '../../test-utils/render.js'; +import { useClipboardImages } from './useClipboardImages.js'; + +// Mock the fs module to avoid actual file system operations +vi.mock('node:fs/promises', () => ({ + readFile: vi.fn().mockResolvedValue(Buffer.from('fake image data')), +})); + +describe('useClipboardImages', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + describe('registerImage', () => { + it('should assign sequential IDs to different images', () => { + const { result } = renderHook(() => useClipboardImages()); + + let text1 = ''; + let text2 = ''; + let text3 = ''; + act(() => { + text1 = result.current.registerImage('/path/to/image1.png'); + text2 = result.current.registerImage('/path/to/image2.png'); + text3 = result.current.registerImage('/path/to/image3.png'); + }); + + expect(text1).toBe('[Image #1]'); + expect(text2).toBe('[Image #2]'); + expect(text3).toBe('[Image #3]'); + }); + + it('should be idempotent - same path returns same display text', () => { + const { result } = renderHook(() => useClipboardImages()); + + let text1 = ''; + let text2 = ''; + let text3 = ''; + act(() => { + text1 = result.current.registerImage('/path/to/image.png'); + text2 = result.current.registerImage('/path/to/image.png'); + text3 = result.current.registerImage('/path/to/image.png'); + }); + + expect(text1).toBe('[Image #1]'); + expect(text2).toBe('[Image #1]'); + expect(text3).toBe('[Image #1]'); + expect(result.current.images.length).toBe(1); + }); + + it('should handle rapid registrations correctly (simulating drag-drop)', () => { + const { result } = renderHook(() => useClipboardImages()); + + const paths = ['/a.png', '/b.png', '/c.png', '/d.png', '/e.png']; + const texts: string[] = []; + act(() => { + paths.forEach((p) => texts.push(result.current.registerImage(p))); + }); + + expect(texts).toEqual([ + '[Image #1]', + '[Image #2]', + '[Image #3]', + '[Image #4]', + '[Image #5]', + ]); + expect(result.current.images.length).toBe(5); + }); + }); + + describe('clear', () => { + it('should reset images array and ID counter', () => { + const { result } = renderHook(() => useClipboardImages()); + + act(() => { + result.current.registerImage('/path/to/image1.png'); + result.current.registerImage('/path/to/image2.png'); + }); + expect(result.current.images.length).toBe(2); + + act(() => { + result.current.clear(); + }); + expect(result.current.images.length).toBe(0); + + let newText = ''; + act(() => { + newText = result.current.registerImage('/path/to/new-image.png'); + }); + expect(newText).toBe('[Image #1]'); + }); + }); + + describe('getImagePartsForText', () => { + it('should only return images whose tags are present in text', async () => { + const { result } = renderHook(() => useClipboardImages()); + + act(() => { + result.current.registerImage('/path/to/image1.png'); + result.current.registerImage('/path/to/image2.png'); + result.current.registerImage('/path/to/image3.png'); + }); + + // Only mention Image #1 and #3 in text (user deleted #2) + const parts = await result.current.getImagePartsForText( + 'Hello [Image #1] and [Image #3]', + ); + + expect(parts.length).toBe(2); + }); + + it('should return empty array when user deletes all image tags', async () => { + const { result } = renderHook(() => useClipboardImages()); + + act(() => { + result.current.registerImage('/path/to/image1.png'); + result.current.registerImage('/path/to/image2.png'); + }); + + // User deleted all [Image #N] tags from their message + const parts = await result.current.getImagePartsForText( + 'Hello world, no images here', + ); + + expect(parts).toEqual([]); + }); + }); +}); diff --git a/packages/cli/src/ui/hooks/useClipboardImages.ts b/packages/cli/src/ui/hooks/useClipboardImages.ts index 93efa824296..666fe097a20 100644 --- a/packages/cli/src/ui/hooks/useClipboardImages.ts +++ b/packages/cli/src/ui/hooks/useClipboardImages.ts @@ -23,6 +23,28 @@ export interface ClipboardImage { displayText: string; } +/** + * Internal registry for tracking clipboard images. + * Uses a Map for O(1) path lookup to prevent race conditions. + */ +interface ImageRegistry { + /** Map from absolute path to ClipboardImage for O(1) duplicate detection */ + pathToImage: Map; + /** Ordered array of images for iteration */ + images: ClipboardImage[]; + /** Next sequential ID to assign */ + nextId: number; +} + +/** + * Creates an empty image registry. + */ +const createEmptyRegistry = (): ImageRegistry => ({ + pathToImage: new Map(), + images: [], + nextId: 1, +}); + /** * Return type for the useClipboardImages hook. */ @@ -96,39 +118,63 @@ async function readImageAsPart( * to base64-encoded parts for injection into the Gemini prompt. * * The image counter resets after each message submission. + * + * Uses a Map-based registry with synchronized ref/state to prevent race conditions + * when multiple images are registered rapidly (e.g., multi-file drag-and-drop). */ export function useClipboardImages(): UseClipboardImagesReturn { - const [images, setImages] = useState([]); - const nextIdRef = useRef(1); - - const registerImage = useCallback((absolutePath: string): string => { - // Generate ID atomically with state update to prevent race conditions - // when multiple images are registered rapidly (e.g., multi-file drag-and-drop) - const id = nextIdRef.current++; - const displayText = `[Image #${id}]`; - - setImages((prev) => { - // Check if this path is already registered to prevent duplicates - if (prev.some((img) => img.path === absolutePath)) { - return prev; - } - return [ - ...prev, - { - id, - path: absolutePath, - displayText, - }, - ]; - }); - - return displayText; + const [registry, setRegistryState] = + useState(createEmptyRegistry); + const registryRef = useRef(registry); + + // Custom setter that syncs ref and state atomically. + // The ref is updated synchronously for immediate reads, + // while state update is queued for React re-renders. + const setRegistry = useCallback((newRegistry: ImageRegistry) => { + registryRef.current = newRegistry; + setRegistryState(newRegistry); }, []); + /** + * Register a new image and return its display text. + * This function is idempotent: registering the same path twice returns + * the same display text without creating a duplicate entry. + */ + const registerImage = useCallback( + (absolutePath: string): string => { + // Read from ref for synchronous access to latest state + const current = registryRef.current; + + // O(1) check for existing registration - makes this idempotent + const existing = current.pathToImage.get(absolutePath); + if (existing) { + return existing.displayText; + } + + // Assign ID and create image atomically + const id = current.nextId; + const displayText = `[Image #${id}]`; + const newImage: ClipboardImage = { id, path: absolutePath, displayText }; + + // Immutable Map update + const newPathToImage = new Map(current.pathToImage); + newPathToImage.set(absolutePath, newImage); + + const newRegistry: ImageRegistry = { + pathToImage: newPathToImage, + images: [...current.images, newImage], + nextId: id + 1, + }; + + setRegistry(newRegistry); + return displayText; + }, + [setRegistry], + ); + const clear = useCallback(() => { - setImages([]); - nextIdRef.current = 1; - }, []); + setRegistry(createEmptyRegistry()); + }, [setRegistry]); /** * Get image parts only for images whose [Image #N] tags are present in the text. @@ -136,11 +182,13 @@ export function useClipboardImages(): UseClipboardImagesReturn { */ const getImagePartsForText = useCallback( async (text: string): Promise => { + // Use ref for synchronous access to current state + const current = registryRef.current; const parts: PartUnion[] = []; - for (const image of images) { + for (const image of current.images) { // Use String.includes for faster tag checking (no regex compilation) - if (!text.includes(`[Image #${image.id}]`)) { + if (!text.includes(image.displayText)) { // Tag was deleted - skip this image continue; } @@ -153,11 +201,11 @@ export function useClipboardImages(): UseClipboardImagesReturn { return parts; }, - [images], + [], // No dependencies - reads from ref for consistent access ); return { - images, + images: registry.images, registerImage, clear, getImagePartsForText, From f68d4e5c33635925ccda3710d2dd8c2f6224faf2 Mon Sep 17 00:00:00 2001 From: Jack Wotherspoon Date: Sun, 14 Dec 2025 09:02:47 -0500 Subject: [PATCH 3/8] chore: enforece 20MB size limit --- .../src/ui/hooks/useClipboardImages.test.ts | 29 +++++++++++++++++++ .../cli/src/ui/hooks/useClipboardImages.ts | 19 +++++++++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/packages/cli/src/ui/hooks/useClipboardImages.test.ts b/packages/cli/src/ui/hooks/useClipboardImages.test.ts index c67c21f1c73..78a116bf40e 100644 --- a/packages/cli/src/ui/hooks/useClipboardImages.test.ts +++ b/packages/cli/src/ui/hooks/useClipboardImages.test.ts @@ -12,6 +12,7 @@ import { useClipboardImages } from './useClipboardImages.js'; // Mock the fs module to avoid actual file system operations vi.mock('node:fs/promises', () => ({ readFile: vi.fn().mockResolvedValue(Buffer.from('fake image data')), + stat: vi.fn().mockResolvedValue({ size: 1024 }), // Default: 1KB (under limit) })); describe('useClipboardImages', () => { @@ -131,5 +132,33 @@ describe('useClipboardImages', () => { expect(parts).toEqual([]); }); + + it('should skip images exceeding 20MB size limit', async () => { + const fs = await import('node:fs/promises'); + const statMock = vi.mocked(fs.stat); + + // First image: 25MB (over limit), Second image: 1KB (under limit) + statMock + .mockResolvedValueOnce({ size: 25 * 1024 * 1024 } as Awaited< + ReturnType + >) + .mockResolvedValueOnce({ size: 1024 } as Awaited< + ReturnType + >); + + const { result } = renderHook(() => useClipboardImages()); + + act(() => { + result.current.registerImage('/path/to/huge-image.png'); + result.current.registerImage('/path/to/small-image.png'); + }); + + const parts = await result.current.getImagePartsForText( + '[Image #1] [Image #2]', + ); + + // Only the small image should be included + expect(parts.length).toBe(1); + }); }); }); diff --git a/packages/cli/src/ui/hooks/useClipboardImages.ts b/packages/cli/src/ui/hooks/useClipboardImages.ts index 666fe097a20..e3c20a63417 100644 --- a/packages/cli/src/ui/hooks/useClipboardImages.ts +++ b/packages/cli/src/ui/hooks/useClipboardImages.ts @@ -59,6 +59,12 @@ export interface UseClipboardImagesReturn { getImagePartsForText: (text: string) => Promise; } +/** + * Maximum file size for inline image data (20MB). + * See: https://ai.google.dev/gemini-api/docs/image-understanding + */ +const MAX_IMAGE_SIZE_BYTES = 20 * 1024 * 1024; + /** * MIME types supported by Gemini API for image inputs. * See: https://ai.google.dev/gemini-api/docs/image-understanding @@ -79,7 +85,8 @@ function getMimeType(filePath: string): string | null { /** * Reads an image file and returns it as a base64-encoded PartUnion. - * Returns null if the file cannot be read or has an unsupported format. + * Returns null if the file cannot be read, has an unsupported format, + * or exceeds the 20MB size limit. */ async function readImageAsPart( imagePath: string, @@ -95,6 +102,16 @@ async function readImageAsPart( } try { + // Check file size before reading to avoid loading huge files into memory + const stats = await fs.stat(imagePath); + if (stats.size > MAX_IMAGE_SIZE_BYTES) { + const sizeMB = (stats.size / (1024 * 1024)).toFixed(1); + debugLogger.warn( + `${displayText} exceeds 20MB limit (${sizeMB}MB), skipping. Consider using a smaller image.`, + ); + return null; + } + const fileContent = await fs.readFile(imagePath); return { inlineData: { From 835acd6b23fa53e47fec7fe202163ceed1f1a199 Mon Sep 17 00:00:00 2001 From: Jack Wotherspoon Date: Sun, 14 Dec 2025 15:13:50 -0500 Subject: [PATCH 4/8] chore: add warning for oversized images --- packages/cli/src/ui/AppContainer.tsx | 13 +++- .../cli/src/ui/components/InputPrompt.tsx | 77 ++++++++++++++----- .../src/ui/hooks/useClipboardImages.test.ts | 55 +++++++++++++ .../cli/src/ui/hooks/useClipboardImages.ts | 65 ++++++++++++++-- packages/cli/src/utils/events.ts | 2 + 5 files changed, 185 insertions(+), 27 deletions(-) diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index 43c6b4b2e28..ee7c27128cf 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -130,6 +130,7 @@ import { enableBracketedPaste } from './utils/bracketedPaste.js'; import { useBanner } from './hooks/useBanner.js'; const WARNING_PROMPT_DURATION_MS = 1000; +const IMAGE_WARNING_DURATION_MS = 3000; const QUEUE_ERROR_DISPLAY_DURATION_MS = 3000; function isToolExecuting(pendingHistoryItems: HistoryItemWithoutId[]) { @@ -1041,14 +1042,17 @@ Logging in with Google... Restarting Gemini CLI to continue. useEffect(() => { let timeoutId: NodeJS.Timeout; - const handleWarning = (message: string) => { + const handleWarning = ( + message: string, + durationMs = WARNING_PROMPT_DURATION_MS, + ) => { setWarningMessage(message); if (timeoutId) { clearTimeout(timeoutId); } timeoutId = setTimeout(() => { setWarningMessage(null); - }, WARNING_PROMPT_DURATION_MS); + }, durationMs); }; const handleSelectionWarning = () => { @@ -1057,11 +1061,16 @@ Logging in with Google... Restarting Gemini CLI to continue. const handlePasteTimeout = () => { handleWarning('Paste Timed out. Possibly due to slow connection.'); }; + const handleImageWarning = (message: string) => { + handleWarning(message, IMAGE_WARNING_DURATION_MS); + }; appEvents.on(AppEvent.SelectionWarning, handleSelectionWarning); appEvents.on(AppEvent.PasteTimeout, handlePasteTimeout); + appEvents.on(AppEvent.ImageWarning, handleImageWarning); return () => { appEvents.off(AppEvent.SelectionWarning, handleSelectionWarning); appEvents.off(AppEvent.PasteTimeout, handlePasteTimeout); + appEvents.off(AppEvent.ImageWarning, handleImageWarning); if (timeoutId) { clearTimeout(timeoutId); } diff --git a/packages/cli/src/ui/components/InputPrompt.tsx b/packages/cli/src/ui/components/InputPrompt.tsx index 58023c1568b..ed1729c3566 100644 --- a/packages/cli/src/ui/components/InputPrompt.tsx +++ b/packages/cli/src/ui/components/InputPrompt.tsx @@ -38,6 +38,7 @@ import { categorizePathsByType, } from '../utils/clipboardUtils.js'; import type { UseClipboardImagesReturn } from '../hooks/useClipboardImages.js'; +import { appEvents, AppEvent } from '../../utils/events.js'; import { isAutoExecutableCommand, isSlashCommand, @@ -332,6 +333,15 @@ export const InputPrompt: React.FC = ({ // If clipboardImages is not provided, fall back to the old @path behavior let insertText: string; if (clipboardImages) { + // Validate image before registration + const validation = await clipboardImages.validateImage(imagePath); + if (!validation.valid) { + appEvents.emit( + AppEvent.ImageWarning, + validation.error ?? 'Invalid image', + ); + return; + } insertText = clipboardImages.registerImage(imagePath); } else { const relativePath = path.relative( @@ -435,50 +445,79 @@ export const InputPrompt: React.FC = ({ key.sequence && mayContainImagePaths(key.sequence) ) { - // Only go async for potential image paths to verify file existence + // Capture state at paste time to handle the async operation correctly const sequence = key.sequence; + const pasteOffset = buffer.getOffset(); + const currentText = buffer.text; + + // Only go async for potential image paths to verify file existence void (async () => { try { const { imagePaths, nonImagePaths } = await categorizePathsByType(sequence); if (imagePaths.length > 0 || nonImagePaths.length > 0) { - // Register each image and collect placeholders - const placeholders = imagePaths.map((p) => - clipboardImages.registerImage(p), - ); + // Validate and register each image, collecting placeholders + const placeholders: string[] = []; + const skippedImages: string[] = []; + + for (const imagePath of imagePaths) { + const validation = + await clipboardImages.validateImage(imagePath); + if (validation.valid) { + placeholders.push(clipboardImages.registerImage(imagePath)); + } else { + skippedImages.push(validation.error ?? 'Invalid image'); + } + } + + // Show warnings for skipped images + for (const error of skippedImages) { + appEvents.emit(AppEvent.ImageWarning, error); + } // Non-image files use @path syntax for file references const atPrefixedPaths = nonImagePaths.map((p) => `@${p}`); // Build insertion text: image placeholders + @path references const insertParts = [...placeholders, ...atPrefixedPaths]; - const insertText = insertParts.join(' '); - // Insert at cursor position with proper spacing - const offset = buffer.getOffset(); - const currentText = buffer.text; - let textToInsert = insertText; + // If all images were invalid but we have non-image paths, still insert those + if (insertParts.length === 0) { + // All paths were invalid images with no non-image files + return; + } + + let insertText = insertParts.join(' '); - const charBefore = offset > 0 ? currentText[offset - 1] : ''; + // Add spacing around the insert text based on context at paste time + const charBefore = + pasteOffset > 0 ? currentText[pasteOffset - 1] : ''; const charAfter = - offset < currentText.length ? currentText[offset] : ''; + pasteOffset < currentText.length + ? currentText[pasteOffset] + : ''; if (charBefore && charBefore !== ' ' && charBefore !== '\n') { - textToInsert = ' ' + textToInsert; + insertText = ' ' + insertText; } if (!charAfter || (charAfter !== ' ' && charAfter !== '\n')) { - textToInsert = textToInsert + ' '; + insertText = insertText + ' '; } - buffer.replaceRangeByOffset(offset, offset, textToInsert); + // Insert at the original paste position + buffer.replaceRangeByOffset( + pasteOffset, + pasteOffset, + insertText, + ); } else { - // No valid paths found, treat as normal paste - buffer.handleInput(key); + // No valid paths found, insert as normal text + buffer.replaceRangeByOffset(pasteOffset, pasteOffset, sequence); } } catch { - // On error, fall back to normal paste behavior - buffer.handleInput(key); + // On error, insert as normal text + buffer.replaceRangeByOffset(pasteOffset, pasteOffset, sequence); } })(); return; diff --git a/packages/cli/src/ui/hooks/useClipboardImages.test.ts b/packages/cli/src/ui/hooks/useClipboardImages.test.ts index 78a116bf40e..324bbd3abb8 100644 --- a/packages/cli/src/ui/hooks/useClipboardImages.test.ts +++ b/packages/cli/src/ui/hooks/useClipboardImages.test.ts @@ -161,4 +161,59 @@ describe('useClipboardImages', () => { expect(parts.length).toBe(1); }); }); + + describe('validateImage', () => { + it('should return valid for supported image under size limit', async () => { + const fs = await import('node:fs/promises'); + const statMock = vi.mocked(fs.stat); + statMock.mockResolvedValueOnce({ size: 1024 } as Awaited< + ReturnType + >); + + const { result } = renderHook(() => useClipboardImages()); + const validation = + await result.current.validateImage('/path/to/image.png'); + + expect(validation.valid).toBe(true); + expect(validation.error).toBeUndefined(); + }); + + it('should return error for unsupported image format', async () => { + const { result } = renderHook(() => useClipboardImages()); + const validation = + await result.current.validateImage('/path/to/image.gif'); + + expect(validation.valid).toBe(false); + expect(validation.error).toContain('Unsupported image format'); + }); + + it('should return error for image exceeding 20MB', async () => { + const fs = await import('node:fs/promises'); + const statMock = vi.mocked(fs.stat); + statMock.mockResolvedValueOnce({ size: 25 * 1024 * 1024 } as Awaited< + ReturnType + >); + + const { result } = renderHook(() => useClipboardImages()); + const validation = + await result.current.validateImage('/path/to/huge.png'); + + expect(validation.valid).toBe(false); + expect(validation.error).toContain('exceeds 20MB limit'); + }); + + it('should return error when file cannot be read', async () => { + const fs = await import('node:fs/promises'); + const statMock = vi.mocked(fs.stat); + statMock.mockRejectedValueOnce(new Error('ENOENT: no such file')); + + const { result } = renderHook(() => useClipboardImages()); + const validation = await result.current.validateImage( + '/path/to/missing.png', + ); + + expect(validation.valid).toBe(false); + expect(validation.error).toContain('Cannot read image'); + }); + }); }); diff --git a/packages/cli/src/ui/hooks/useClipboardImages.ts b/packages/cli/src/ui/hooks/useClipboardImages.ts index e3c20a63417..cb3800f24da 100644 --- a/packages/cli/src/ui/hooks/useClipboardImages.ts +++ b/packages/cli/src/ui/hooks/useClipboardImages.ts @@ -10,6 +10,7 @@ import * as path from 'node:path'; import type { PartUnion } from '@google/genai'; import { debugLogger } from '@google/gemini-cli-core'; import { IMAGE_EXTENSIONS } from '../utils/clipboardUtils.js'; +import { appEvents, AppEvent } from '../../utils/events.js'; /** * Represents a clipboard image that has been pasted into the input. @@ -45,12 +46,24 @@ const createEmptyRegistry = (): ImageRegistry => ({ nextId: 1, }); +/** + * Result of image validation. + */ +export interface ImageValidationResult { + /** Whether the image is valid and can be registered */ + valid: boolean; + /** Error message if validation failed */ + error?: string; +} + /** * Return type for the useClipboardImages hook. */ export interface UseClipboardImagesReturn { /** Array of registered clipboard images for the current message */ images: ClipboardImage[]; + /** Validate an image before registration. Returns error message if invalid. */ + validateImage: (absolutePath: string) => Promise; /** Register a new image and return its display text (e.g., "[Image #1]") */ registerImage: (absolutePath: string) => string; /** Clear all images (called after message submission) */ @@ -95,9 +108,9 @@ async function readImageAsPart( const mimeType = getMimeType(imagePath); if (!mimeType) { const ext = path.extname(imagePath); - debugLogger.warn( - `Unsupported image format ${ext} for ${displayText}, skipping. Supported: ${IMAGE_EXTENSIONS.join(', ')}`, - ); + const message = `Unsupported image format ${ext} for ${displayText}`; + debugLogger.warn(`${message}. Supported: ${IMAGE_EXTENSIONS.join(', ')}`); + appEvents.emit(AppEvent.ImageWarning, message); return null; } @@ -106,9 +119,9 @@ async function readImageAsPart( const stats = await fs.stat(imagePath); if (stats.size > MAX_IMAGE_SIZE_BYTES) { const sizeMB = (stats.size / (1024 * 1024)).toFixed(1); - debugLogger.warn( - `${displayText} exceeds 20MB limit (${sizeMB}MB), skipping. Consider using a smaller image.`, - ); + const message = `${displayText} exceeds 20MB limit (${sizeMB}MB)`; + debugLogger.warn(`${message}. Consider using a smaller image.`); + appEvents.emit(AppEvent.ImageWarning, message); return null; } @@ -152,6 +165,45 @@ export function useClipboardImages(): UseClipboardImagesReturn { setRegistryState(newRegistry); }, []); + /** + * Validate an image before registration. + * Checks file size and MIME type support. + */ + const validateImage = useCallback( + async (absolutePath: string): Promise => { + // Check MIME type + const mimeType = getMimeType(absolutePath); + if (!mimeType) { + const ext = path.extname(absolutePath); + return { + valid: false, + error: `Unsupported image format ${ext}`, + }; + } + + // Check file size + try { + const stats = await fs.stat(absolutePath); + if (stats.size > MAX_IMAGE_SIZE_BYTES) { + const sizeMB = (stats.size / (1024 * 1024)).toFixed(1); + return { + valid: false, + error: `Image exceeds 20MB limit (${sizeMB}MB)`, + }; + } + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { + valid: false, + error: `Cannot read image: ${message}`, + }; + } + + return { valid: true }; + }, + [], + ); + /** * Register a new image and return its display text. * This function is idempotent: registering the same path twice returns @@ -223,6 +275,7 @@ export function useClipboardImages(): UseClipboardImagesReturn { return { images: registry.images, + validateImage, registerImage, clear, getImagePartsForText, diff --git a/packages/cli/src/utils/events.ts b/packages/cli/src/utils/events.ts index 4e7d1270289..63cf54bcc18 100644 --- a/packages/cli/src/utils/events.ts +++ b/packages/cli/src/utils/events.ts @@ -14,6 +14,7 @@ export enum AppEvent { McpClientUpdate = 'mcp-client-update', SelectionWarning = 'selection-warning', PasteTimeout = 'paste-timeout', + ImageWarning = 'image-warning', } export interface AppEvents extends ExtensionEvents { @@ -23,6 +24,7 @@ export interface AppEvents extends ExtensionEvents { [AppEvent.McpClientUpdate]: Array | never>; [AppEvent.SelectionWarning]: never[]; [AppEvent.PasteTimeout]: never[]; + [AppEvent.ImageWarning]: string[]; } export const appEvents = new EventEmitter(); From f6780b6bd4b24c254258fd34973dc76fd9a83db9 Mon Sep 17 00:00:00 2001 From: Jack Wotherspoon Date: Sun, 14 Dec 2025 15:55:47 -0500 Subject: [PATCH 5/8] chore: add processing image warning --- packages/cli/src/ui/AppContainer.tsx | 9 +++++++++ packages/cli/src/ui/components/InputPrompt.tsx | 7 +++++++ packages/cli/src/utils/events.ts | 2 ++ 3 files changed, 18 insertions(+) diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index ee7c27128cf..edeb7c6790c 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -1064,13 +1064,22 @@ Logging in with Google... Restarting Gemini CLI to continue. const handleImageWarning = (message: string) => { handleWarning(message, IMAGE_WARNING_DURATION_MS); }; + const handleImageProcessing = (message: string) => { + if (message) { + setWarningMessage(message); + } else { + setWarningMessage(null); + } + }; appEvents.on(AppEvent.SelectionWarning, handleSelectionWarning); appEvents.on(AppEvent.PasteTimeout, handlePasteTimeout); appEvents.on(AppEvent.ImageWarning, handleImageWarning); + appEvents.on(AppEvent.ImageProcessing, handleImageProcessing); return () => { appEvents.off(AppEvent.SelectionWarning, handleSelectionWarning); appEvents.off(AppEvent.PasteTimeout, handlePasteTimeout); appEvents.off(AppEvent.ImageWarning, handleImageWarning); + appEvents.off(AppEvent.ImageProcessing, handleImageProcessing); if (timeoutId) { clearTimeout(timeoutId); } diff --git a/packages/cli/src/ui/components/InputPrompt.tsx b/packages/cli/src/ui/components/InputPrompt.tsx index ed1729c3566..87100971fe3 100644 --- a/packages/cli/src/ui/components/InputPrompt.tsx +++ b/packages/cli/src/ui/components/InputPrompt.tsx @@ -322,7 +322,14 @@ export const InputPrompt: React.FC = ({ const handleClipboardPaste = useCallback(async () => { try { if (await clipboardHasImage()) { + // Show processing indicator immediately + appEvents.emit(AppEvent.ImageProcessing, 'Processing image...'); + const imagePath = await saveClipboardImage(config.getTargetDir()); + + // Clear processing indicator + appEvents.emit(AppEvent.ImageProcessing, ''); + if (imagePath) { // Clean up old images cleanupOldClipboardImages(config.getTargetDir()).catch(() => { diff --git a/packages/cli/src/utils/events.ts b/packages/cli/src/utils/events.ts index 63cf54bcc18..eec176f764f 100644 --- a/packages/cli/src/utils/events.ts +++ b/packages/cli/src/utils/events.ts @@ -15,6 +15,7 @@ export enum AppEvent { SelectionWarning = 'selection-warning', PasteTimeout = 'paste-timeout', ImageWarning = 'image-warning', + ImageProcessing = 'image-processing', } export interface AppEvents extends ExtensionEvents { @@ -25,6 +26,7 @@ export interface AppEvents extends ExtensionEvents { [AppEvent.SelectionWarning]: never[]; [AppEvent.PasteTimeout]: never[]; [AppEvent.ImageWarning]: string[]; + [AppEvent.ImageProcessing]: string[]; } export const appEvents = new EventEmitter(); From 4cb91d3ba86c90a48611fd2575f52b521d52e358 Mon Sep 17 00:00:00 2001 From: Jack Wotherspoon Date: Tue, 16 Dec 2025 20:13:37 -0800 Subject: [PATCH 6/8] chore: review comments --- .../cli/src/ui/components/InputPrompt.tsx | 4 +-- .../src/ui/hooks/useClipboardImages.test.ts | 6 ++-- .../cli/src/ui/hooks/useClipboardImages.ts | 36 +++++++++---------- packages/cli/src/ui/hooks/useGeminiStream.ts | 19 +++++++--- packages/cli/src/ui/utils/clipboardUtils.ts | 24 ++++++++----- 5 files changed, 52 insertions(+), 37 deletions(-) diff --git a/packages/cli/src/ui/components/InputPrompt.tsx b/packages/cli/src/ui/components/InputPrompt.tsx index 87100971fe3..6b4494cd036 100644 --- a/packages/cli/src/ui/components/InputPrompt.tsx +++ b/packages/cli/src/ui/components/InputPrompt.tsx @@ -24,7 +24,7 @@ import { useKeypress } from '../hooks/useKeypress.js'; import { keyMatchers, Command } from '../keyMatchers.js'; import type { CommandContext, SlashCommand } from '../commands/types.js'; import type { Config } from '@google/gemini-cli-core'; -import { ApprovalMode } from '@google/gemini-cli-core'; +import { ApprovalMode, debugLogger } from '@google/gemini-cli-core'; import { parseInputForHighlighting, buildSegmentsForVisualSlice, @@ -384,7 +384,7 @@ export const InputPrompt: React.FC = ({ const offset = buffer.getOffset(); buffer.replaceRangeByOffset(offset, offset, textToInsert); } catch (error) { - console.error('Error handling clipboard image:', error); + debugLogger.error('Error handling clipboard image:', error); } }, [buffer, config, clipboardImages]); diff --git a/packages/cli/src/ui/hooks/useClipboardImages.test.ts b/packages/cli/src/ui/hooks/useClipboardImages.test.ts index 324bbd3abb8..a5ac2052a69 100644 --- a/packages/cli/src/ui/hooks/useClipboardImages.test.ts +++ b/packages/cli/src/ui/hooks/useClipboardImages.test.ts @@ -110,7 +110,7 @@ describe('useClipboardImages', () => { }); // Only mention Image #1 and #3 in text (user deleted #2) - const parts = await result.current.getImagePartsForText( + const { parts } = await result.current.getImagePartsForText( 'Hello [Image #1] and [Image #3]', ); @@ -126,7 +126,7 @@ describe('useClipboardImages', () => { }); // User deleted all [Image #N] tags from their message - const parts = await result.current.getImagePartsForText( + const { parts } = await result.current.getImagePartsForText( 'Hello world, no images here', ); @@ -153,7 +153,7 @@ describe('useClipboardImages', () => { result.current.registerImage('/path/to/small-image.png'); }); - const parts = await result.current.getImagePartsForText( + const { parts } = await result.current.getImagePartsForText( '[Image #1] [Image #2]', ); diff --git a/packages/cli/src/ui/hooks/useClipboardImages.ts b/packages/cli/src/ui/hooks/useClipboardImages.ts index cb3800f24da..5af9435e309 100644 --- a/packages/cli/src/ui/hooks/useClipboardImages.ts +++ b/packages/cli/src/ui/hooks/useClipboardImages.ts @@ -9,7 +9,7 @@ import * as fs from 'node:fs/promises'; import * as path from 'node:path'; import type { PartUnion } from '@google/genai'; import { debugLogger } from '@google/gemini-cli-core'; -import { IMAGE_EXTENSIONS } from '../utils/clipboardUtils.js'; +import { IMAGE_EXTENSIONS, IMAGE_FORMATS } from '../utils/clipboardUtils.js'; import { appEvents, AppEvent } from '../../utils/events.js'; /** @@ -56,6 +56,16 @@ export interface ImageValidationResult { error?: string; } +/** + * Result of getImagePartsForText, containing both image data and matched placeholders. + */ +export interface ImagePartsResult { + /** The image parts to send to the API */ + parts: PartUnion[]; + /** The display texts (e.g., "[Image #1]") that were matched and should be stripped */ + matchedDisplayTexts: string[]; +} + /** * Return type for the useClipboardImages hook. */ @@ -69,7 +79,7 @@ export interface UseClipboardImagesReturn { /** Clear all images (called after message submission) */ clear: () => void; /** Get image parts only for images whose [Image #N] tags are present in the text */ - getImagePartsForText: (text: string) => Promise; + getImagePartsForText: (text: string) => Promise; } /** @@ -78,22 +88,9 @@ export interface UseClipboardImagesReturn { */ const MAX_IMAGE_SIZE_BYTES = 20 * 1024 * 1024; -/** - * MIME types supported by Gemini API for image inputs. - * See: https://ai.google.dev/gemini-api/docs/image-understanding - */ -const MIME_TYPES: Record = { - '.png': 'image/png', - '.jpg': 'image/jpeg', - '.jpeg': 'image/jpeg', - '.webp': 'image/webp', - '.heic': 'image/heic', - '.heif': 'image/heif', -}; - function getMimeType(filePath: string): string | null { const ext = path.extname(filePath).toLowerCase(); - return MIME_TYPES[ext] ?? null; + return IMAGE_FORMATS[ext] ?? null; } /** @@ -248,12 +245,14 @@ export function useClipboardImages(): UseClipboardImagesReturn { /** * Get image parts only for images whose [Image #N] tags are present in the text. * This prevents sending images the user has deleted from their prompt. + * Returns both the image parts and the matched display texts for stripping. */ const getImagePartsForText = useCallback( - async (text: string): Promise => { + async (text: string): Promise => { // Use ref for synchronous access to current state const current = registryRef.current; const parts: PartUnion[] = []; + const matchedDisplayTexts: string[] = []; for (const image of current.images) { // Use String.includes for faster tag checking (no regex compilation) @@ -265,10 +264,11 @@ export function useClipboardImages(): UseClipboardImagesReturn { const part = await readImageAsPart(image.path, image.displayText); if (part) { parts.push(part); + matchedDisplayTexts.push(image.displayText); } } - return parts; + return { parts, matchedDisplayTexts }; }, [], // No dependencies - reads from ref for consistent access ); diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index 3a7d4257f8c..a36e036d9b8 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -115,7 +115,10 @@ export const useGeminiStream = ( terminalHeight: number, isShellFocused?: boolean, clipboardImages?: { - getImagePartsForText: (text: string) => Promise; + getImagePartsForText: (text: string) => Promise<{ + parts: PartUnion[]; + matchedDisplayTexts: string[]; + }>; clear: () => void; }, ) => { @@ -537,14 +540,20 @@ export const useGeminiStream = ( .join(' ') : ''; - const imageParts = + const { parts: imageParts, matchedDisplayTexts } = await clipboardImages.getImagePartsForText(queryText); if (imageParts.length > 0) { onDebugMessage(`Injecting ${imageParts.length} clipboard image(s)`); - // Strip [Image #N] references from the text since we're injecting the actual images - const stripImageReferences = (text: string): string => - text.replace(/\[Image #\d+\]\s*/g, '').trim(); + // Strip only the placeholders that correspond to actual registered images + // This preserves any user-typed [Image #N] text that doesn't match a registered image + const stripImageReferences = (text: string): string => { + let result = text; + for (const displayText of matchedDisplayTexts) { + result = result.replaceAll(displayText, ''); + } + return result.replace(/\s{2,}/g, ' ').trim(); + }; if (typeof localQueryToSendToGemini === 'string') { const cleanedText = stripImageReferences(localQueryToSendToGemini); diff --git a/packages/cli/src/ui/utils/clipboardUtils.ts b/packages/cli/src/ui/utils/clipboardUtils.ts index 24f50692d78..ff8a96e2e1b 100644 --- a/packages/cli/src/ui/utils/clipboardUtils.ts +++ b/packages/cli/src/ui/utils/clipboardUtils.ts @@ -14,17 +14,23 @@ import { } from '@google/gemini-cli-core'; /** - * Supported image file extensions based on Gemini API. + * Supported image formats based on Gemini API. + * Maps file extensions to MIME types. * See: https://ai.google.dev/gemini-api/docs/image-understanding */ -export const IMAGE_EXTENSIONS = [ - '.png', - '.jpg', - '.jpeg', - '.webp', - '.heic', - '.heif', -]; +export const IMAGE_FORMATS: Record = { + '.png': 'image/png', + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.webp': 'image/webp', + '.heic': 'image/heic', + '.heif': 'image/heif', +}; + +/** + * Supported image file extensions derived from IMAGE_FORMATS. + */ +export const IMAGE_EXTENSIONS = Object.keys(IMAGE_FORMATS); /** Matches strings that start with a path prefix (/, ~, ., Windows drive letter, or UNC path) */ const PATH_PREFIX_PATTERN = /^([/~.]|[a-zA-Z]:|\\\\)/; From eadc6ae20dcba02b036aff51bf8ce4e6b802fa3a Mon Sep 17 00:00:00 2001 From: Jack Wotherspoon Date: Tue, 16 Dec 2025 20:46:04 -0800 Subject: [PATCH 7/8] chore: more review comments --- packages/cli/src/ui/hooks/useGeminiStream.ts | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index a36e036d9b8..0f003d2d889 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -1003,11 +1003,6 @@ export const useGeminiStream = ( return; } - // Clear clipboard images after they've been injected into the query - if (clipboardImages && !options?.isContinuation) { - clipboardImages.clear(); - } - if (!options?.isContinuation) { if (typeof queryToSend === 'string') { // logging the text prompts only for now @@ -1053,6 +1048,12 @@ export const useGeminiStream = ( addItem(pendingHistoryItemRef.current, userMessageTimestamp); setPendingHistoryItem(null); } + + // Clear clipboard images after successful send + if (clipboardImages && !options?.isContinuation) { + clipboardImages.clear(); + } + if (loopDetectedRef.current) { loopDetectedRef.current = false; // Show the confirmation dialog to choose whether to disable loop detection From 7b05602544b368275049a6947dd6c58b720203ac Mon Sep 17 00:00:00 2001 From: Jack Wotherspoon Date: Mon, 22 Dec 2025 12:23:06 -0500 Subject: [PATCH 8/8] chore: improve parralelization --- .../cli/src/ui/components/InputPrompt.tsx | 14 +++++++--- .../cli/src/ui/hooks/useClipboardImages.ts | 27 ++++++++++++------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/packages/cli/src/ui/components/InputPrompt.tsx b/packages/cli/src/ui/components/InputPrompt.tsx index 635ab833a9d..ff5448af569 100644 --- a/packages/cli/src/ui/components/InputPrompt.tsx +++ b/packages/cli/src/ui/components/InputPrompt.tsx @@ -464,13 +464,19 @@ export const InputPrompt: React.FC = ({ await categorizePathsByType(sequence); if (imagePaths.length > 0 || nonImagePaths.length > 0) { - // Validate and register each image, collecting placeholders + // Validate all images in parallel + const validationResults = await Promise.all( + imagePaths.map(async (imagePath) => ({ + imagePath, + validation: await clipboardImages.validateImage(imagePath), + })), + ); + + // Register valid images and collect errors const placeholders: string[] = []; const skippedImages: string[] = []; - for (const imagePath of imagePaths) { - const validation = - await clipboardImages.validateImage(imagePath); + for (const { imagePath, validation } of validationResults) { if (validation.valid) { placeholders.push(clipboardImages.registerImage(imagePath)); } else { diff --git a/packages/cli/src/ui/hooks/useClipboardImages.ts b/packages/cli/src/ui/hooks/useClipboardImages.ts index 5af9435e309..d42cbe0cf71 100644 --- a/packages/cli/src/ui/hooks/useClipboardImages.ts +++ b/packages/cli/src/ui/hooks/useClipboardImages.ts @@ -251,20 +251,27 @@ export function useClipboardImages(): UseClipboardImagesReturn { async (text: string): Promise => { // Use ref for synchronous access to current state const current = registryRef.current; - const parts: PartUnion[] = []; - const matchedDisplayTexts: string[] = []; - for (const image of current.images) { - // Use String.includes for faster tag checking (no regex compilation) - if (!text.includes(image.displayText)) { - // Tag was deleted - skip this image - continue; - } + // Filter to only images whose tags are still present in the text + const imagesToProcess = current.images.filter((image) => + text.includes(image.displayText), + ); - const part = await readImageAsPart(image.path, image.displayText); + // Process all images in parallel + const results = await Promise.all( + imagesToProcess.map(async (image) => { + const part = await readImageAsPart(image.path, image.displayText); + return { part, displayText: image.displayText }; + }), + ); + + // Collect successful results + const parts: PartUnion[] = []; + const matchedDisplayTexts: string[] = []; + for (const { part, displayText } of results) { if (part) { parts.push(part); - matchedDisplayTexts.push(image.displayText); + matchedDisplayTexts.push(displayText); } }