diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index 4986a226174..bc9a534214b 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -105,6 +105,7 @@ import { registerCleanup, runExitCleanup } from '../utils/cleanup.js'; import { RELAUNCH_EXIT_CODE } from '../utils/processUtils.js'; import type { SessionInfo } from '../utils/sessionUtils.js'; import { useMessageQueue } from './hooks/useMessageQueue.js'; +import { useClipboardImages } from './hooks/useClipboardImages.js'; import { useAutoAcceptIndicator } from './hooks/useAutoAcceptIndicator.js'; import { useSessionStats } from './contexts/SessionContext.js'; import { useGitBranchName } from './hooks/useGitBranchName.js'; @@ -127,6 +128,7 @@ import { enableBracketedPaste } from './utils/bracketedPaste.js'; import { useBanner } from './hooks/useBanner.js'; const WARNING_PROMPT_DURATION_MS = 1000; +const IMAGE_WARNING_DURATION_MS = 3000; const QUEUE_ERROR_DISPLAY_DURATION_MS = 3000; function isToolExecuting(pendingHistoryItems: HistoryItemWithoutId[]) { @@ -343,6 +345,9 @@ export const AppContainer = (props: AppContainerProps) => { const { consoleMessages, clearConsoleMessages: clearConsoleMessagesState } = useConsoleMessages(); + // Clipboard images for pasted images in the input + const clipboardImages = useClipboardImages(); + const mainAreaWidth = calculateMainAreaWidth(terminalWidth, settings); // Derive widths for InputPrompt using shared helper const { inputWidth, suggestionsWidth } = useMemo(() => { @@ -776,6 +781,7 @@ Logging in with Google... Restarting Gemini CLI to continue. terminalWidth, terminalHeight, embeddedShellFocused, + clipboardImages, ); // Auto-accept indicator @@ -1020,14 +1026,17 @@ Logging in with Google... Restarting Gemini CLI to continue. useEffect(() => { let timeoutId: NodeJS.Timeout; - const handleWarning = (message: string) => { + const handleWarning = ( + message: string, + durationMs = WARNING_PROMPT_DURATION_MS, + ) => { setWarningMessage(message); if (timeoutId) { clearTimeout(timeoutId); } timeoutId = setTimeout(() => { setWarningMessage(null); - }, WARNING_PROMPT_DURATION_MS); + }, durationMs); }; const handleSelectionWarning = () => { @@ -1036,11 +1045,25 @@ Logging in with Google... Restarting Gemini CLI to continue. const handlePasteTimeout = () => { handleWarning('Paste Timed out. Possibly due to slow connection.'); }; + const handleImageWarning = (message: string) => { + handleWarning(message, IMAGE_WARNING_DURATION_MS); + }; + const handleImageProcessing = (message: string) => { + if (message) { + setWarningMessage(message); + } else { + setWarningMessage(null); + } + }; appEvents.on(AppEvent.SelectionWarning, handleSelectionWarning); appEvents.on(AppEvent.PasteTimeout, handlePasteTimeout); + appEvents.on(AppEvent.ImageWarning, handleImageWarning); + appEvents.on(AppEvent.ImageProcessing, handleImageProcessing); return () => { appEvents.off(AppEvent.SelectionWarning, handleSelectionWarning); appEvents.off(AppEvent.PasteTimeout, handlePasteTimeout); + appEvents.off(AppEvent.ImageWarning, handleImageWarning); + appEvents.off(AppEvent.ImageProcessing, handleImageProcessing); if (timeoutId) { clearTimeout(timeoutId); } @@ -1527,6 +1550,7 @@ Logging in with Google... Restarting Gemini CLI to continue. warningMessage, bannerData, bannerVisible, + clipboardImages, terminalBackgroundColor: config.getTerminalBackground(), }), [ @@ -1619,6 +1643,7 @@ Logging in with Google... Restarting Gemini CLI to continue. warningMessage, bannerData, bannerVisible, + clipboardImages, config, ], ); diff --git a/packages/cli/src/ui/components/Composer.tsx b/packages/cli/src/ui/components/Composer.tsx index 15a2b455990..9f9b8b3e488 100644 --- a/packages/cli/src/ui/components/Composer.tsx +++ b/packages/cli/src/ui/components/Composer.tsx @@ -181,6 +181,7 @@ export const Composer = () => { streamingState={uiState.streamingState} suggestionsPosition={suggestionsPosition} onSuggestionsVisibilityChange={setSuggestionsVisible} + clipboardImages={uiState.clipboardImages} /> )} diff --git a/packages/cli/src/ui/components/InputPrompt.tsx b/packages/cli/src/ui/components/InputPrompt.tsx index 5aeb0078ab1..a9ead6dbd89 100644 --- a/packages/cli/src/ui/components/InputPrompt.tsx +++ b/packages/cli/src/ui/components/InputPrompt.tsx @@ -24,7 +24,7 @@ import { useKeypress } from '../hooks/useKeypress.js'; import { keyMatchers, Command } from '../keyMatchers.js'; import type { CommandContext, SlashCommand } from '../commands/types.js'; import type { Config } from '@google/gemini-cli-core'; -import { ApprovalMode } from '@google/gemini-cli-core'; +import { ApprovalMode, debugLogger } from '@google/gemini-cli-core'; import { parseInputForHighlighting, parseSegmentsFromTokens, @@ -34,7 +34,11 @@ import { clipboardHasImage, saveClipboardImage, cleanupOldClipboardImages, + mayContainImagePaths, + categorizePathsByType, } from '../utils/clipboardUtils.js'; +import type { UseClipboardImagesReturn } from '../hooks/useClipboardImages.js'; +import { appEvents, AppEvent } from '../../utils/events.js'; import { isAutoExecutableCommand, isSlashCommand, @@ -86,6 +90,7 @@ export interface InputPromptProps { popAllMessages?: () => string | undefined; suggestionsPosition?: 'above' | 'below'; setBannerVisible: (visible: boolean) => void; + clipboardImages?: UseClipboardImagesReturn; } // The input content, input container, and input suggestions list may have different widths @@ -128,6 +133,7 @@ export const InputPrompt: React.FC = ({ popAllMessages, suggestionsPosition = 'below', setBannerVisible, + clipboardImages, }) => { const kittyProtocol = useKittyKeyboardProtocol(); const isShellFocused = useShellFocusState(); @@ -316,22 +322,46 @@ export const InputPrompt: React.FC = ({ const handleClipboardPaste = useCallback(async () => { try { if (await clipboardHasImage()) { + // Show processing indicator immediately + appEvents.emit(AppEvent.ImageProcessing, 'Processing image...'); + const imagePath = await saveClipboardImage(config.getTargetDir()); + + // Clear processing indicator + appEvents.emit(AppEvent.ImageProcessing, ''); + if (imagePath) { // Clean up old images cleanupOldClipboardImages(config.getTargetDir()).catch(() => { // Ignore cleanup errors }); - // Get relative path from current directory - const relativePath = path.relative(config.getTargetDir(), imagePath); + // Register image and get display text (e.g., "[Image #1]") + // If clipboardImages is not provided, fall back to the old @path behavior + let insertText: string; + if (clipboardImages) { + // Validate image before registration + const validation = await clipboardImages.validateImage(imagePath); + if (!validation.valid) { + appEvents.emit( + AppEvent.ImageWarning, + validation.error ?? 'Invalid image', + ); + return; + } + insertText = clipboardImages.registerImage(imagePath); + } else { + const relativePath = path.relative( + config.getTargetDir(), + imagePath, + ); + insertText = `@${relativePath}`; + } - // Insert @path reference at cursor position - const insertText = `@${relativePath}`; const currentText = buffer.text; const offset = buffer.getOffset(); - // Add spaces around the path if needed + // Add spaces around the display text if needed let textToInsert = insertText; const charBefore = offset > 0 ? currentText[offset - 1] : ''; const charAfter = @@ -354,9 +384,9 @@ export const InputPrompt: React.FC = ({ const offset = buffer.getOffset(); buffer.replaceRangeByOffset(offset, offset, textToInsert); } catch (error) { - console.error('Error handling clipboard image:', error); + debugLogger.error('Error handling clipboard image:', error); } - }, [buffer, config]); + }, [buffer, config, clipboardImages]); useMouseClick( innerBoxRef, @@ -414,6 +444,98 @@ export const InputPrompt: React.FC = ({ pasteTimeoutRef.current = null; }, 40); } + + // Check if pasted content could be image file path(s) (drag and drop) + // Use synchronous check first to avoid async handling for normal text + if ( + clipboardImages && + key.sequence && + mayContainImagePaths(key.sequence) + ) { + // Capture state at paste time to handle the async operation correctly + const sequence = key.sequence; + const pasteOffset = buffer.getOffset(); + const currentText = buffer.text; + + // Only go async for potential image paths to verify file existence + void (async () => { + try { + const { imagePaths, nonImagePaths } = + await categorizePathsByType(sequence); + + if (imagePaths.length > 0 || nonImagePaths.length > 0) { + // Validate all images in parallel + const validationResults = await Promise.all( + imagePaths.map(async (imagePath) => ({ + imagePath, + validation: await clipboardImages.validateImage(imagePath), + })), + ); + + // Register valid images and collect errors + const placeholders: string[] = []; + const skippedImages: string[] = []; + + for (const { imagePath, validation } of validationResults) { + if (validation.valid) { + placeholders.push(clipboardImages.registerImage(imagePath)); + } else { + skippedImages.push(validation.error ?? 'Invalid image'); + } + } + + // Show warnings for skipped images + for (const error of skippedImages) { + appEvents.emit(AppEvent.ImageWarning, error); + } + + // Non-image files use @path syntax for file references + const atPrefixedPaths = nonImagePaths.map((p) => `@${p}`); + + // Build insertion text: image placeholders + @path references + const insertParts = [...placeholders, ...atPrefixedPaths]; + + // If all images were invalid but we have non-image paths, still insert those + if (insertParts.length === 0) { + // All paths were invalid images with no non-image files + return; + } + + let insertText = insertParts.join(' '); + + // Add spacing around the insert text based on context at paste time + const charBefore = + pasteOffset > 0 ? currentText[pasteOffset - 1] : ''; + const charAfter = + pasteOffset < currentText.length + ? currentText[pasteOffset] + : ''; + + if (charBefore && charBefore !== ' ' && charBefore !== '\n') { + insertText = ' ' + insertText; + } + if (!charAfter || (charAfter !== ' ' && charAfter !== '\n')) { + insertText = insertText + ' '; + } + + // Insert at the original paste position + buffer.replaceRangeByOffset( + pasteOffset, + pasteOffset, + insertText, + ); + } else { + // No valid paths found, insert as normal text + buffer.replaceRangeByOffset(pasteOffset, pasteOffset, sequence); + } + } catch { + // On error, insert as normal text + buffer.replaceRangeByOffset(pasteOffset, pasteOffset, sequence); + } + })(); + return; + } + // Ensure we never accidentally interpret paste as regular input. buffer.handleInput(key); return; @@ -863,6 +985,7 @@ export const InputPrompt: React.FC = ({ kittyProtocol.enabled, tryLoadQueuedMessages, setBannerVisible, + clipboardImages, ], ); @@ -1163,7 +1286,9 @@ export const InputPrompt: React.FC = ({ } const color = - seg.type === 'command' || seg.type === 'file' + seg.type === 'command' || + seg.type === 'file' || + seg.type === 'image' ? theme.text.accent : theme.text.primary; diff --git a/packages/cli/src/ui/contexts/UIStateContext.tsx b/packages/cli/src/ui/contexts/UIStateContext.tsx index c0f0eb0c2e7..eaff2b01a95 100644 --- a/packages/cli/src/ui/contexts/UIStateContext.tsx +++ b/packages/cli/src/ui/contexts/UIStateContext.tsx @@ -28,6 +28,7 @@ import type { DOMElement } from 'ink'; import type { SessionStatsState } from '../contexts/SessionContext.js'; import type { ExtensionUpdateState } from '../state/extensions.js'; import type { UpdateObject } from '../utils/updateCheck.js'; +import type { UseClipboardImagesReturn } from '../hooks/useClipboardImages.js'; export interface ProQuotaDialogRequest { failedModel: string; @@ -137,6 +138,7 @@ export interface UIState { }; bannerVisible: boolean; customDialog: React.ReactNode | null; + clipboardImages: UseClipboardImagesReturn; terminalBackgroundColor: TerminalBackgroundColor; } diff --git a/packages/cli/src/ui/hooks/useClipboardImages.test.ts b/packages/cli/src/ui/hooks/useClipboardImages.test.ts new file mode 100644 index 00000000000..a5ac2052a69 --- /dev/null +++ b/packages/cli/src/ui/hooks/useClipboardImages.test.ts @@ -0,0 +1,219 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { act } from 'react'; +import { renderHook } from '../../test-utils/render.js'; +import { useClipboardImages } from './useClipboardImages.js'; + +// Mock the fs module to avoid actual file system operations +vi.mock('node:fs/promises', () => ({ + readFile: vi.fn().mockResolvedValue(Buffer.from('fake image data')), + stat: vi.fn().mockResolvedValue({ size: 1024 }), // Default: 1KB (under limit) +})); + +describe('useClipboardImages', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + describe('registerImage', () => { + it('should assign sequential IDs to different images', () => { + const { result } = renderHook(() => useClipboardImages()); + + let text1 = ''; + let text2 = ''; + let text3 = ''; + act(() => { + text1 = result.current.registerImage('/path/to/image1.png'); + text2 = result.current.registerImage('/path/to/image2.png'); + text3 = result.current.registerImage('/path/to/image3.png'); + }); + + expect(text1).toBe('[Image #1]'); + expect(text2).toBe('[Image #2]'); + expect(text3).toBe('[Image #3]'); + }); + + it('should be idempotent - same path returns same display text', () => { + const { result } = renderHook(() => useClipboardImages()); + + let text1 = ''; + let text2 = ''; + let text3 = ''; + act(() => { + text1 = result.current.registerImage('/path/to/image.png'); + text2 = result.current.registerImage('/path/to/image.png'); + text3 = result.current.registerImage('/path/to/image.png'); + }); + + expect(text1).toBe('[Image #1]'); + expect(text2).toBe('[Image #1]'); + expect(text3).toBe('[Image #1]'); + expect(result.current.images.length).toBe(1); + }); + + it('should handle rapid registrations correctly (simulating drag-drop)', () => { + const { result } = renderHook(() => useClipboardImages()); + + const paths = ['/a.png', '/b.png', '/c.png', '/d.png', '/e.png']; + const texts: string[] = []; + act(() => { + paths.forEach((p) => texts.push(result.current.registerImage(p))); + }); + + expect(texts).toEqual([ + '[Image #1]', + '[Image #2]', + '[Image #3]', + '[Image #4]', + '[Image #5]', + ]); + expect(result.current.images.length).toBe(5); + }); + }); + + describe('clear', () => { + it('should reset images array and ID counter', () => { + const { result } = renderHook(() => useClipboardImages()); + + act(() => { + result.current.registerImage('/path/to/image1.png'); + result.current.registerImage('/path/to/image2.png'); + }); + expect(result.current.images.length).toBe(2); + + act(() => { + result.current.clear(); + }); + expect(result.current.images.length).toBe(0); + + let newText = ''; + act(() => { + newText = result.current.registerImage('/path/to/new-image.png'); + }); + expect(newText).toBe('[Image #1]'); + }); + }); + + describe('getImagePartsForText', () => { + it('should only return images whose tags are present in text', async () => { + const { result } = renderHook(() => useClipboardImages()); + + act(() => { + result.current.registerImage('/path/to/image1.png'); + result.current.registerImage('/path/to/image2.png'); + result.current.registerImage('/path/to/image3.png'); + }); + + // Only mention Image #1 and #3 in text (user deleted #2) + const { parts } = await result.current.getImagePartsForText( + 'Hello [Image #1] and [Image #3]', + ); + + expect(parts.length).toBe(2); + }); + + it('should return empty array when user deletes all image tags', async () => { + const { result } = renderHook(() => useClipboardImages()); + + act(() => { + result.current.registerImage('/path/to/image1.png'); + result.current.registerImage('/path/to/image2.png'); + }); + + // User deleted all [Image #N] tags from their message + const { parts } = await result.current.getImagePartsForText( + 'Hello world, no images here', + ); + + expect(parts).toEqual([]); + }); + + it('should skip images exceeding 20MB size limit', async () => { + const fs = await import('node:fs/promises'); + const statMock = vi.mocked(fs.stat); + + // First image: 25MB (over limit), Second image: 1KB (under limit) + statMock + .mockResolvedValueOnce({ size: 25 * 1024 * 1024 } as Awaited< + ReturnType + >) + .mockResolvedValueOnce({ size: 1024 } as Awaited< + ReturnType + >); + + const { result } = renderHook(() => useClipboardImages()); + + act(() => { + result.current.registerImage('/path/to/huge-image.png'); + result.current.registerImage('/path/to/small-image.png'); + }); + + const { parts } = await result.current.getImagePartsForText( + '[Image #1] [Image #2]', + ); + + // Only the small image should be included + expect(parts.length).toBe(1); + }); + }); + + describe('validateImage', () => { + it('should return valid for supported image under size limit', async () => { + const fs = await import('node:fs/promises'); + const statMock = vi.mocked(fs.stat); + statMock.mockResolvedValueOnce({ size: 1024 } as Awaited< + ReturnType + >); + + const { result } = renderHook(() => useClipboardImages()); + const validation = + await result.current.validateImage('/path/to/image.png'); + + expect(validation.valid).toBe(true); + expect(validation.error).toBeUndefined(); + }); + + it('should return error for unsupported image format', async () => { + const { result } = renderHook(() => useClipboardImages()); + const validation = + await result.current.validateImage('/path/to/image.gif'); + + expect(validation.valid).toBe(false); + expect(validation.error).toContain('Unsupported image format'); + }); + + it('should return error for image exceeding 20MB', async () => { + const fs = await import('node:fs/promises'); + const statMock = vi.mocked(fs.stat); + statMock.mockResolvedValueOnce({ size: 25 * 1024 * 1024 } as Awaited< + ReturnType + >); + + const { result } = renderHook(() => useClipboardImages()); + const validation = + await result.current.validateImage('/path/to/huge.png'); + + expect(validation.valid).toBe(false); + expect(validation.error).toContain('exceeds 20MB limit'); + }); + + it('should return error when file cannot be read', async () => { + const fs = await import('node:fs/promises'); + const statMock = vi.mocked(fs.stat); + statMock.mockRejectedValueOnce(new Error('ENOENT: no such file')); + + const { result } = renderHook(() => useClipboardImages()); + const validation = await result.current.validateImage( + '/path/to/missing.png', + ); + + expect(validation.valid).toBe(false); + expect(validation.error).toContain('Cannot read image'); + }); + }); +}); diff --git a/packages/cli/src/ui/hooks/useClipboardImages.ts b/packages/cli/src/ui/hooks/useClipboardImages.ts new file mode 100644 index 00000000000..d42cbe0cf71 --- /dev/null +++ b/packages/cli/src/ui/hooks/useClipboardImages.ts @@ -0,0 +1,290 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { useState, useCallback, useRef } from 'react'; +import * as fs from 'node:fs/promises'; +import * as path from 'node:path'; +import type { PartUnion } from '@google/genai'; +import { debugLogger } from '@google/gemini-cli-core'; +import { IMAGE_EXTENSIONS, IMAGE_FORMATS } from '../utils/clipboardUtils.js'; +import { appEvents, AppEvent } from '../../utils/events.js'; + +/** + * Represents a clipboard image that has been pasted into the input. + */ +export interface ClipboardImage { + /** Sequential ID for this image within the current message */ + id: number; + /** Absolute path to the image file */ + path: string; + /** Display text shown in the input (e.g., "[Image #1]") */ + displayText: string; +} + +/** + * Internal registry for tracking clipboard images. + * Uses a Map for O(1) path lookup to prevent race conditions. + */ +interface ImageRegistry { + /** Map from absolute path to ClipboardImage for O(1) duplicate detection */ + pathToImage: Map; + /** Ordered array of images for iteration */ + images: ClipboardImage[]; + /** Next sequential ID to assign */ + nextId: number; +} + +/** + * Creates an empty image registry. + */ +const createEmptyRegistry = (): ImageRegistry => ({ + pathToImage: new Map(), + images: [], + nextId: 1, +}); + +/** + * Result of image validation. + */ +export interface ImageValidationResult { + /** Whether the image is valid and can be registered */ + valid: boolean; + /** Error message if validation failed */ + error?: string; +} + +/** + * Result of getImagePartsForText, containing both image data and matched placeholders. + */ +export interface ImagePartsResult { + /** The image parts to send to the API */ + parts: PartUnion[]; + /** The display texts (e.g., "[Image #1]") that were matched and should be stripped */ + matchedDisplayTexts: string[]; +} + +/** + * Return type for the useClipboardImages hook. + */ +export interface UseClipboardImagesReturn { + /** Array of registered clipboard images for the current message */ + images: ClipboardImage[]; + /** Validate an image before registration. Returns error message if invalid. */ + validateImage: (absolutePath: string) => Promise; + /** Register a new image and return its display text (e.g., "[Image #1]") */ + registerImage: (absolutePath: string) => string; + /** Clear all images (called after message submission) */ + clear: () => void; + /** Get image parts only for images whose [Image #N] tags are present in the text */ + getImagePartsForText: (text: string) => Promise; +} + +/** + * Maximum file size for inline image data (20MB). + * See: https://ai.google.dev/gemini-api/docs/image-understanding + */ +const MAX_IMAGE_SIZE_BYTES = 20 * 1024 * 1024; + +function getMimeType(filePath: string): string | null { + const ext = path.extname(filePath).toLowerCase(); + return IMAGE_FORMATS[ext] ?? null; +} + +/** + * Reads an image file and returns it as a base64-encoded PartUnion. + * Returns null if the file cannot be read, has an unsupported format, + * or exceeds the 20MB size limit. + */ +async function readImageAsPart( + imagePath: string, + displayText: string, +): Promise { + const mimeType = getMimeType(imagePath); + if (!mimeType) { + const ext = path.extname(imagePath); + const message = `Unsupported image format ${ext} for ${displayText}`; + debugLogger.warn(`${message}. Supported: ${IMAGE_EXTENSIONS.join(', ')}`); + appEvents.emit(AppEvent.ImageWarning, message); + return null; + } + + try { + // Check file size before reading to avoid loading huge files into memory + const stats = await fs.stat(imagePath); + if (stats.size > MAX_IMAGE_SIZE_BYTES) { + const sizeMB = (stats.size / (1024 * 1024)).toFixed(1); + const message = `${displayText} exceeds 20MB limit (${sizeMB}MB)`; + debugLogger.warn(`${message}. Consider using a smaller image.`); + appEvents.emit(AppEvent.ImageWarning, message); + return null; + } + + const fileContent = await fs.readFile(imagePath); + return { + inlineData: { + data: fileContent.toString('base64'), + mimeType, + }, + }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + debugLogger.warn( + `Failed to load clipboard image ${displayText} from ${imagePath}: ${message}`, + ); + return null; + } +} + +/** + * Hook to manage clipboard images pasted into the input. + * + * This hook provides a registry for tracking pasted images and converting them + * to base64-encoded parts for injection into the Gemini prompt. + * + * The image counter resets after each message submission. + * + * Uses a Map-based registry with synchronized ref/state to prevent race conditions + * when multiple images are registered rapidly (e.g., multi-file drag-and-drop). + */ +export function useClipboardImages(): UseClipboardImagesReturn { + const [registry, setRegistryState] = + useState(createEmptyRegistry); + const registryRef = useRef(registry); + + // Custom setter that syncs ref and state atomically. + // The ref is updated synchronously for immediate reads, + // while state update is queued for React re-renders. + const setRegistry = useCallback((newRegistry: ImageRegistry) => { + registryRef.current = newRegistry; + setRegistryState(newRegistry); + }, []); + + /** + * Validate an image before registration. + * Checks file size and MIME type support. + */ + const validateImage = useCallback( + async (absolutePath: string): Promise => { + // Check MIME type + const mimeType = getMimeType(absolutePath); + if (!mimeType) { + const ext = path.extname(absolutePath); + return { + valid: false, + error: `Unsupported image format ${ext}`, + }; + } + + // Check file size + try { + const stats = await fs.stat(absolutePath); + if (stats.size > MAX_IMAGE_SIZE_BYTES) { + const sizeMB = (stats.size / (1024 * 1024)).toFixed(1); + return { + valid: false, + error: `Image exceeds 20MB limit (${sizeMB}MB)`, + }; + } + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { + valid: false, + error: `Cannot read image: ${message}`, + }; + } + + return { valid: true }; + }, + [], + ); + + /** + * Register a new image and return its display text. + * This function is idempotent: registering the same path twice returns + * the same display text without creating a duplicate entry. + */ + const registerImage = useCallback( + (absolutePath: string): string => { + // Read from ref for synchronous access to latest state + const current = registryRef.current; + + // O(1) check for existing registration - makes this idempotent + const existing = current.pathToImage.get(absolutePath); + if (existing) { + return existing.displayText; + } + + // Assign ID and create image atomically + const id = current.nextId; + const displayText = `[Image #${id}]`; + const newImage: ClipboardImage = { id, path: absolutePath, displayText }; + + // Immutable Map update + const newPathToImage = new Map(current.pathToImage); + newPathToImage.set(absolutePath, newImage); + + const newRegistry: ImageRegistry = { + pathToImage: newPathToImage, + images: [...current.images, newImage], + nextId: id + 1, + }; + + setRegistry(newRegistry); + return displayText; + }, + [setRegistry], + ); + + const clear = useCallback(() => { + setRegistry(createEmptyRegistry()); + }, [setRegistry]); + + /** + * Get image parts only for images whose [Image #N] tags are present in the text. + * This prevents sending images the user has deleted from their prompt. + * Returns both the image parts and the matched display texts for stripping. + */ + const getImagePartsForText = useCallback( + async (text: string): Promise => { + // Use ref for synchronous access to current state + const current = registryRef.current; + + // Filter to only images whose tags are still present in the text + const imagesToProcess = current.images.filter((image) => + text.includes(image.displayText), + ); + + // Process all images in parallel + const results = await Promise.all( + imagesToProcess.map(async (image) => { + const part = await readImageAsPart(image.path, image.displayText); + return { part, displayText: image.displayText }; + }), + ); + + // Collect successful results + const parts: PartUnion[] = []; + const matchedDisplayTexts: string[] = []; + for (const { part, displayText } of results) { + if (part) { + parts.push(part); + matchedDisplayTexts.push(displayText); + } + } + + return { parts, matchedDisplayTexts }; + }, + [], // No dependencies - reads from ref for consistent access + ); + + return { + images: registry.images, + validateImage, + registerImage, + clear, + getImagePartsForText, + }; +} diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index 3ab2cbce5f6..4ecf5c66d45 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -40,7 +40,12 @@ import { processRestorableToolCalls, recordToolCallInteractions, } from '@google/gemini-cli-core'; -import { type Part, type PartListUnion, FinishReason } from '@google/genai'; +import { + type Part, + type PartListUnion, + type PartUnion, + FinishReason, +} from '@google/genai'; import type { HistoryItem, HistoryItemWithoutId, @@ -110,6 +115,13 @@ export const useGeminiStream = ( terminalWidth: number, terminalHeight: number, isShellFocused?: boolean, + clipboardImages?: { + getImagePartsForText: (text: string) => Promise<{ + parts: PartUnion[]; + matchedDisplayTexts: string[]; + }>; + clear: () => void; + }, ) => { const [initError, setInitError] = useState(null); const abortControllerRef = useRef(null); @@ -512,6 +524,75 @@ export const useGeminiStream = ( ); return { queryToSend: null, shouldProceed: false }; } + + // Inject clipboard images into the query + // Only include images whose [Image #N] tags are still present in the text + if (clipboardImages) { + // Extract query text to check which image tags are present + const queryText = + typeof localQueryToSendToGemini === 'string' + ? localQueryToSendToGemini + : Array.isArray(localQueryToSendToGemini) + ? localQueryToSendToGemini + .filter( + (p): p is { text: string } => + p !== null && + typeof p === 'object' && + 'text' in p && + typeof p.text === 'string', + ) + .map((p) => p.text) + .join(' ') + : ''; + + const { parts: imageParts, matchedDisplayTexts } = + await clipboardImages.getImagePartsForText(queryText); + if (imageParts.length > 0) { + onDebugMessage(`Injecting ${imageParts.length} clipboard image(s)`); + + // Strip only the placeholders that correspond to actual registered images + // This preserves any user-typed [Image #N] text that doesn't match a registered image + const stripImageReferences = (text: string): string => { + let result = text; + for (const displayText of matchedDisplayTexts) { + result = result.replaceAll(displayText, ''); + } + return result.replace(/\s{2,}/g, ' ').trim(); + }; + + if (typeof localQueryToSendToGemini === 'string') { + const cleanedText = stripImageReferences(localQueryToSendToGemini); + localQueryToSendToGemini = [ + ...imageParts, + ...(cleanedText ? [{ text: cleanedText }] : []), + ]; + } else if (Array.isArray(localQueryToSendToGemini)) { + // Clean text parts in the array + const cleanedParts = localQueryToSendToGemini + .map((part) => { + if (typeof part === 'string') { + const cleaned = stripImageReferences(part); + return cleaned ? cleaned : null; + } + if ( + part && + typeof part === 'object' && + 'text' in part && + typeof part.text === 'string' + ) { + const cleaned = stripImageReferences(part.text); + return cleaned ? { ...part, text: cleaned } : null; + } + return part; + }) + .filter( + (part): part is NonNullable => part !== null, + ); + localQueryToSendToGemini = [...imageParts, ...cleanedParts]; + } + } + } + return { queryToSend: localQueryToSendToGemini, shouldProceed: true }; }, [ @@ -523,6 +604,7 @@ export const useGeminiStream = ( logger, shellModeActive, scheduleToolCalls, + clipboardImages, ], ); @@ -971,6 +1053,12 @@ export const useGeminiStream = ( addItem(pendingHistoryItemRef.current, userMessageTimestamp); setPendingHistoryItem(null); } + + // Clear clipboard images after successful send + if (clipboardImages && !options?.isContinuation) { + clipboardImages.clear(); + } + if (loopDetectedRef.current) { loopDetectedRef.current = false; // Show the confirmation dialog to choose whether to disable loop detection @@ -1054,6 +1142,7 @@ export const useGeminiStream = ( config, startNewPrompt, getPromptCount, + clipboardImages, ], ); diff --git a/packages/cli/src/ui/utils/clipboardUtils.test.ts b/packages/cli/src/ui/utils/clipboardUtils.test.ts index 101a5085f7a..79bb13d7681 100644 --- a/packages/cli/src/ui/utils/clipboardUtils.test.ts +++ b/packages/cli/src/ui/utils/clipboardUtils.test.ts @@ -11,7 +11,12 @@ import { cleanupOldClipboardImages, splitEscapedPaths, parsePastedPaths, + mayContainImagePaths, + categorizePathsByType, } from './clipboardUtils.js'; +import * as fs from 'node:fs/promises'; +import * as path from 'node:path'; +import * as os from 'node:os'; describe('clipboardUtils', () => { describe('clipboardHasImage', () => { @@ -237,4 +242,164 @@ describe('clipboardUtils', () => { expect(result).toBe('@\\\\server\\share\\file.txt '); }); }); + + describe('mayContainImagePaths', () => { + it('should return true for single image path', () => { + expect(mayContainImagePaths('/path/to/image.png')).toBe(true); + }); + + it('should return true for multiple image paths', () => { + expect(mayContainImagePaths('/img1.png /img2.jpg')).toBe(true); + }); + + it('should return false for non-image paths', () => { + expect(mayContainImagePaths('/path/to/file.txt')).toBe(false); + }); + + it('should return false for non-path text', () => { + expect(mayContainImagePaths('hello world')).toBe(false); + }); + + it('should return true for mixed paths with at least one image', () => { + expect(mayContainImagePaths('/file.txt /image.png')).toBe(true); + }); + + it('should handle paths with escaped spaces', () => { + expect(mayContainImagePaths('/my\\ image.png')).toBe(true); + }); + + it('should handle Windows paths', () => { + expect(mayContainImagePaths('C:\\Users\\image.png')).toBe(true); + }); + + it('should handle tilde paths', () => { + expect(mayContainImagePaths('~/images/photo.jpg')).toBe(true); + }); + + it('should handle relative paths', () => { + expect(mayContainImagePaths('./image.png')).toBe(true); + }); + + it('should be case insensitive for extensions', () => { + expect(mayContainImagePaths('/image.PNG')).toBe(true); + expect(mayContainImagePaths('/image.Jpg')).toBe(true); + }); + + it('should handle all supported image extensions', () => { + expect(mayContainImagePaths('/a.png')).toBe(true); + expect(mayContainImagePaths('/a.jpg')).toBe(true); + expect(mayContainImagePaths('/a.jpeg')).toBe(true); + expect(mayContainImagePaths('/a.webp')).toBe(true); + expect(mayContainImagePaths('/a.heic')).toBe(true); + expect(mayContainImagePaths('/a.heif')).toBe(true); + }); + + it('should return false for unsupported image formats', () => { + expect(mayContainImagePaths('/a.gif')).toBe(false); + expect(mayContainImagePaths('/a.bmp')).toBe(false); + expect(mayContainImagePaths('/a.tiff')).toBe(false); + }); + }); + + describe('categorizePathsByType', () => { + let tempDir: string; + + // Create temp files for testing + const setupTempFiles = async () => { + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'clipboard-test-')); + await fs.writeFile(path.join(tempDir, 'image.png'), 'fake png'); + await fs.writeFile(path.join(tempDir, 'image.jpg'), 'fake jpg'); + await fs.writeFile(path.join(tempDir, 'document.txt'), 'text content'); + await fs.writeFile(path.join(tempDir, 'script.js'), 'js content'); + return tempDir; + }; + + const cleanupTempFiles = async () => { + if (tempDir) { + await fs.rm(tempDir, { recursive: true, force: true }); + } + }; + + it('should return empty result for non-path text', async () => { + const result = await categorizePathsByType('hello world'); + expect(result.imagePaths).toEqual([]); + expect(result.nonImagePaths).toEqual([]); + expect(result.invalidSegments).toEqual(['hello', 'world']); + }); + + it('should categorize non-existent paths as invalid', async () => { + const result = await categorizePathsByType('/nonexistent/image.png'); + expect(result.imagePaths).toEqual([]); + expect(result.nonImagePaths).toEqual([]); + expect(result.invalidSegments).toEqual(['/nonexistent/image.png']); + }); + + it('should categorize existing images correctly', async () => { + await setupTempFiles(); + try { + const imagePath = path.join(tempDir, 'image.png'); + const result = await categorizePathsByType(imagePath); + expect(result.imagePaths).toEqual([imagePath]); + expect(result.nonImagePaths).toEqual([]); + expect(result.invalidSegments).toEqual([]); + } finally { + await cleanupTempFiles(); + } + }); + + it('should categorize existing non-images correctly', async () => { + await setupTempFiles(); + try { + const textPath = path.join(tempDir, 'document.txt'); + const result = await categorizePathsByType(textPath); + expect(result.imagePaths).toEqual([]); + expect(result.nonImagePaths).toEqual([textPath]); + expect(result.invalidSegments).toEqual([]); + } finally { + await cleanupTempFiles(); + } + }); + + it('should handle mixed paths correctly', async () => { + await setupTempFiles(); + try { + const imagePath = path.join(tempDir, 'image.png'); + const textPath = path.join(tempDir, 'document.txt'); + const nonexistent = '/nonexistent/file.xyz'; + const input = `${imagePath} ${textPath} ${nonexistent}`; + + const result = await categorizePathsByType(input); + expect(result.imagePaths).toEqual([imagePath]); + expect(result.nonImagePaths).toEqual([textPath]); + expect(result.invalidSegments).toEqual([nonexistent]); + } finally { + await cleanupTempFiles(); + } + }); + + it('should handle multiple images', async () => { + await setupTempFiles(); + try { + const png = path.join(tempDir, 'image.png'); + const jpg = path.join(tempDir, 'image.jpg'); + const input = `${png} ${jpg}`; + + const result = await categorizePathsByType(input); + expect(result.imagePaths).toContain(png); + expect(result.imagePaths).toContain(jpg); + expect(result.imagePaths.length).toBe(2); + expect(result.nonImagePaths).toEqual([]); + expect(result.invalidSegments).toEqual([]); + } finally { + await cleanupTempFiles(); + } + }); + + it('should return empty arrays for empty string', async () => { + const result = await categorizePathsByType(''); + expect(result.imagePaths).toEqual([]); + expect(result.nonImagePaths).toEqual([]); + expect(result.invalidSegments).toEqual([]); + }); + }); }); diff --git a/packages/cli/src/ui/utils/clipboardUtils.ts b/packages/cli/src/ui/utils/clipboardUtils.ts index 9296bfce99d..100740c7cfb 100644 --- a/packages/cli/src/ui/utils/clipboardUtils.ts +++ b/packages/cli/src/ui/utils/clipboardUtils.ts @@ -14,17 +14,23 @@ import { } from '@google/gemini-cli-core'; /** - * Supported image file extensions based on Gemini API. + * Supported image formats based on Gemini API. + * Maps file extensions to MIME types. * See: https://ai.google.dev/gemini-api/docs/image-understanding */ -export const IMAGE_EXTENSIONS = [ - '.png', - '.jpg', - '.jpeg', - '.webp', - '.heic', - '.heif', -]; +export const IMAGE_FORMATS: Record = { + '.png': 'image/png', + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.webp': 'image/webp', + '.heic': 'image/heic', + '.heif': 'image/heif', +}; + +/** + * Supported image file extensions derived from IMAGE_FORMATS. + */ +export const IMAGE_EXTENSIONS = Object.keys(IMAGE_FORMATS); /** Matches strings that start with a path prefix (/, ~, ., Windows drive letter, or UNC path) */ const PATH_PREFIX_PATTERN = /^([/~.]|[a-zA-Z]:|\\\\)/; @@ -293,3 +299,80 @@ export function parsePastedPaths( return anyValidPath ? processedPaths.join(' ') + ' ' : null; } + +/** + * Quick synchronous check if text could contain image file paths. + * Used as a fast heuristic before async validation. + */ +export function mayContainImagePaths(text: string): boolean { + if (!PATH_PREFIX_PATTERN.test(text)) { + return false; + } + const lowerText = text.toLowerCase(); + return IMAGE_EXTENSIONS.some((ext) => lowerText.includes(ext)); +} + +/** + * Result of categorizing pasted/dropped paths. + */ +export interface CategorizedPaths { + /** Absolute paths to existing image files */ + imagePaths: string[]; + /** Absolute paths to existing non-image files */ + nonImagePaths: string[]; + /** Segments that don't exist or aren't valid paths */ + invalidSegments: string[]; +} + +/** + * Categorizes pasted/dropped paths into images, non-images, and invalid. + * Validates that files exist on disk before categorizing. + * + * @param text The pasted text (potentially space-separated paths) + * @returns Categorized paths by type + */ +export async function categorizePathsByType( + text: string, +): Promise { + const result: CategorizedPaths = { + imagePaths: [], + nonImagePaths: [], + invalidSegments: [], + }; + + const segments = splitEscapedPaths(text); + if (segments.length === 0) { + return result; + } + + const validationResults = await Promise.all( + segments.map(async (segment) => { + if (!PATH_PREFIX_PATTERN.test(segment)) { + return { segment, type: 'invalid' as const, unescaped: segment }; + } + const unescaped = unescapePath(segment); + try { + await fs.access(unescaped); + const ext = path.extname(unescaped).toLowerCase(); + const type: 'image' | 'non-image' = IMAGE_EXTENSIONS.includes(ext) + ? 'image' + : 'non-image'; + return { segment, type, unescaped }; + } catch { + return { segment, type: 'invalid' as const, unescaped }; + } + }), + ); + + for (const { type, unescaped, segment } of validationResults) { + if (type === 'image') { + result.imagePaths.push(unescaped); + } else if (type === 'non-image') { + result.nonImagePaths.push(unescaped); + } else { + result.invalidSegments.push(segment); + } + } + + return result; +} diff --git a/packages/cli/src/ui/utils/highlight.test.ts b/packages/cli/src/ui/utils/highlight.test.ts index ba2cc09eea2..3e86f3951a1 100644 --- a/packages/cli/src/ui/utils/highlight.test.ts +++ b/packages/cli/src/ui/utils/highlight.test.ts @@ -133,6 +133,46 @@ describe('parseInputForHighlighting', () => { { text: '@/my\\ path/file.txt', type: 'file' }, ]); }); + + it('should highlight image placeholders', () => { + const text = 'Check this [Image #1] please'; + expect(parseInputForHighlighting(text, 0)).toEqual([ + { text: 'Check this ', type: 'default' }, + { text: '[Image #1]', type: 'image' }, + { text: ' please', type: 'default' }, + ]); + }); + + it('should highlight multiple image placeholders', () => { + const text = '[Image #1] and [Image #2]'; + expect(parseInputForHighlighting(text, 0)).toEqual([ + { text: '[Image #1]', type: 'image' }, + { text: ' and ', type: 'default' }, + { text: '[Image #2]', type: 'image' }, + ]); + }); + + it('should highlight image placeholders with double digits', () => { + const text = 'See [Image #12] for details'; + expect(parseInputForHighlighting(text, 0)).toEqual([ + { text: 'See ', type: 'default' }, + { text: '[Image #12]', type: 'image' }, + { text: ' for details', type: 'default' }, + ]); + }); + + it('should highlight mixed files and images', () => { + const text = '@file.txt [Image #1] @another.jpg [Image #2]'; + expect(parseInputForHighlighting(text, 0)).toEqual([ + { text: '@file.txt', type: 'file' }, + { text: ' ', type: 'default' }, + { text: '[Image #1]', type: 'image' }, + { text: ' ', type: 'default' }, + { text: '@another.jpg', type: 'file' }, + { text: ' ', type: 'default' }, + { text: '[Image #2]', type: 'image' }, + ]); + }); }); describe('parseInputForHighlighting with Transformations', () => { diff --git a/packages/cli/src/ui/utils/highlight.ts b/packages/cli/src/ui/utils/highlight.ts index 475854a3dd4..137e880c23c 100644 --- a/packages/cli/src/ui/utils/highlight.ts +++ b/packages/cli/src/ui/utils/highlight.ts @@ -9,14 +9,16 @@ import { cpLen, cpSlice } from './textUtils.js'; export type HighlightToken = { text: string; - type: 'default' | 'command' | 'file'; + type: 'default' | 'command' | 'file' | 'image'; }; -// Matches slash commands (e.g., /help) and @ references (files or MCP resource URIs). +// Matches slash commands (e.g., /help), @ references (files or MCP resource URIs), +// and image placeholders (e.g., [Image #1]). // The @ pattern uses a negated character class to support URIs like `@file:///example.txt` // which contain colons. It matches any character except delimiters: comma, whitespace, // semicolon, common punctuation, and brackets. -const HIGHLIGHT_REGEX = /(^\/[a-zA-Z0-9_-]+|@(?:\\ |[^,\s;!?()[\]{}])+)/g; +const HIGHLIGHT_REGEX = + /(^\/[a-zA-Z0-9_-]+|@(?:\\ |[^,\s;!?()[\]{}])+|\[Image #\d+\])/g; export function parseInputForHighlighting( text: string, @@ -46,7 +48,15 @@ export function parseInputForHighlighting( tokens.push({ text: text.slice(last, matchIndex), type: 'default' }); } - const type = fullMatch.startsWith('/') ? 'command' : 'file'; + let type: HighlightToken['type']; + if (fullMatch.startsWith('/')) { + type = 'command'; + } else if (fullMatch.startsWith('[Image')) { + type = 'image'; + } else { + type = 'file'; + } + if (type === 'command' && index !== 0) { tokens.push({ text: fullMatch, type: 'default' }); } else { diff --git a/packages/cli/src/utils/events.ts b/packages/cli/src/utils/events.ts index 4e7d1270289..eec176f764f 100644 --- a/packages/cli/src/utils/events.ts +++ b/packages/cli/src/utils/events.ts @@ -14,6 +14,8 @@ export enum AppEvent { McpClientUpdate = 'mcp-client-update', SelectionWarning = 'selection-warning', PasteTimeout = 'paste-timeout', + ImageWarning = 'image-warning', + ImageProcessing = 'image-processing', } export interface AppEvents extends ExtensionEvents { @@ -23,6 +25,8 @@ export interface AppEvents extends ExtensionEvents { [AppEvent.McpClientUpdate]: Array | never>; [AppEvent.SelectionWarning]: never[]; [AppEvent.PasteTimeout]: never[]; + [AppEvent.ImageWarning]: string[]; + [AppEvent.ImageProcessing]: string[]; } export const appEvents = new EventEmitter();