diff --git a/docs/cli/visual-validation.md b/docs/cli/visual-validation.md new file mode 100644 index 00000000000..681cce0cb0c --- /dev/null +++ b/docs/cli/visual-validation.md @@ -0,0 +1,138 @@ +# Visual validation and TTY testing + +Gemini CLI uses a multi-layered approach to validate its user interface (UI) and +ensure the CLI boots correctly in real terminal environments. This document +explains the tools and techniques used for visual regression and bootstrap +testing. + +## Overview + +While standard integration tests focus on logic and file system operations, +visual validation ensures that the terminal output looks correct to the user. We +use two primary methods for this: + +1. **TTY Bootstrap Smoke Tests:** Spawns the actual built binary in a real + pseudo-terminal (PTY) to verify startup and basic interactivity. +2. **Visual Regression (SVG Snapshots):** Renders integrated UI flows inside a + virtual terminal and compares the output against committed "golden" SVG + baselines. + +## TTY bootstrap smoke tests + +These tests validate that the Gemini CLI binary can successfully initialize and +render its Ink-based UI in a real terminal environment. They catch issues like +missing dependencies, broken startup sequences, or TTY-specific crashes. + +These tests are located in `packages/cli/integration-tests/`. + +### Running TTY tests + +To run the bootstrap smoke test, use the following command: + +```bash +npm test -w @google/gemini-cli -- integration-tests/bootstrap.test.ts +``` + +### How it works + +The test utility `runInteractive` (found in `@google/gemini-cli-test-utils`) +uses `node-pty` to spawn the CLI. It provides a programmable interface to wait +for specific text markers and send simulated user input. + +```typescript +const run = await runInteractive(); +const readyMarker = 'Type your message or @path/to/file'; +await run.expectText(readyMarker, 30000); // Wait for the main prompt +await run.kill(); +``` + +### TDD Example: Adding a Welcome Message + +To add a new visual feature like a "Welcome to Gemini CLI!" message: + +1. **Write the failing test:** Update `bootstrap.test.ts` to expect the new + string. + ```typescript + const welcomeMessage = 'Welcome to Gemini CLI!'; + await run.expectText(welcomeMessage, 30000); + ``` +2. **Verify failure:** Run `npm test` and observe the TTY rig reporting the + missing text. +3. **Implement the feature:** Add the message to `AppHeader.tsx`. +4. **Verify success:** Rebuild the binary (`npm run bundle`) and run the test + again to see it pass. + +## Visual regression with SVG snapshots + +To automate the verification of complex UI layouts (like tables, progress bars, +or policy warnings), we use **SVG Snapshots**. This approach captures colors, +spacing, and text formatting in a deterministic way. + +These tests are located in `packages/cli/src/ui/` and use the `AppRig` utility. + +### Running visual tests + +To run the visual validation suite, use the following command: + +```bash +npm test -w @google/gemini-cli -- src/ui/PolicyVisual.test.tsx +``` + +### Updating snapshots + +If you intentionally change the UI, the visual tests will fail because the +actual output no longer matches the saved snapshot. To "bless" your changes and +update the snapshots, run the tests with the update flag: + +```bash +npm test -w @google/gemini-cli -- src/ui/PolicyVisual.test.tsx -u +``` + +After updating, you must review the resulting `.snap.svg` files in the +`__snapshots__` directory to ensure they look as intended. + +### New use cases unlocked + +This framework allows maintainers to validate scenarios that were previously +difficult to automate: + +- **Policy Visibility:** Ensuring that security blocks or "Ask User" prompts are + clearly rendered and not suppressed by error verbosity settings. +- **Integrated Flow Validation:** Testing the full cycle of a model response + triggering a tool, which is then handled by the policy engine and displayed in + the UI. +- **Startup Health:** Verifying that changes to the core scheduler or config + resolution don't cause the app to hang in the "Initializing..." state. + +## Comparison with existing tests + +| Test Type | Rig Used | Environment | Best For | +| :-------------------- | :--------- | :---------------- | :---------------------------------- | +| **Integration (E2E)** | `TestRig` | Headless / Binary | File system logic, tool execution | +| **Bootstrap Smoke** | `node-pty` | Real PTY / Binary | Startup health, TTY compatibility | +| **Visual (Snapshot)** | `AppRig` | Virtual / Ink | UI layout, colors, integrated flows | +| **Behavioral (Old)** | `AppRig` | Virtual / Ink | Model decision-making and steering | + +## Why this matters + +Existing testing layers often miss critical user experience regressions: + +- **Integration tests** may pass if the logic is sound, but they won't detect if + the app hangs during UI initialization or if the binary fails to communicate + with the TTY. +- **Behavioral evaluations** validate the model's intent, but they don't ensure + that the resulting state (like a policy violation) is actually visible to the + user. + +The new validation tools bridge these gaps. For example, they were used to +expose critical issues where visual feedback for the Policy Engine was +suppressed in certain modes and the core scheduler was prone to TTY-based race +conditions. The high-fidelity validation provided by these tools was essential +for identifying and verifying the fixes for these issues. + +## Next steps + +- **Extend Coverage:** Add SVG snapshots for more complex components like + `DiffRenderer` or `McpStatus`. +- **CI Integration:** Ensure TTY-based tests run in GitHub Actions environments + that support pseudo-terminals. diff --git a/docs/integration-tests.md b/docs/integration-tests.md index f5784c344b7..c0ecd195141 100644 --- a/docs/integration-tests.md +++ b/docs/integration-tests.md @@ -12,6 +12,9 @@ verify that it behaves as expected when interacting with the file system. These tests are located in the `integration-tests` directory and are run using a custom test runner. +For information about visual regression and TTY bootstrap testing, see +[Visual validation and TTY testing](/docs/cli/visual-validation.md). + ## Building the tests Prior to running any integration tests, you need to create a release bundle that diff --git a/docs/sidebar.json b/docs/sidebar.json index 7198a0336bc..da49c766c2e 100644 --- a/docs/sidebar.json +++ b/docs/sidebar.json @@ -178,6 +178,10 @@ "items": [ { "label": "Contribution guide", "slug": "docs/contributing" }, { "label": "Integration testing", "slug": "docs/integration-tests" }, + { + "label": "Visual validation and TTY", + "slug": "docs/cli/visual-validation" + }, { "label": "Issue and PR automation", "slug": "docs/issue-and-pr-automation" diff --git a/packages/cli/integration-tests/bootstrap.test.ts b/packages/cli/integration-tests/bootstrap.test.ts new file mode 100644 index 00000000000..553bd6f8e20 --- /dev/null +++ b/packages/cli/integration-tests/bootstrap.test.ts @@ -0,0 +1,39 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, beforeEach, afterEach } from 'vitest'; +import { TestRig } from '@google/gemini-cli-test-utils'; + +describe('Gemini CLI TTY Bootstrap', () => { + let rig: TestRig; + + beforeEach(() => { + rig = new TestRig(); + rig.setup('TTY Bootstrap Smoke Test'); + }); + + afterEach(async () => { + await rig.cleanup(); + }); + + it('should render the interactive UI and display the ready marker in a TTY', async () => { + // Spawning the CLI in a pseudo-TTY with a dummy API key to bypass auth prompt + const run = await rig.runInteractive({ + env: { GEMINI_API_KEY: 'dummy-key' }, + }); + + // The ready marker we expect to see + const readyMarker = 'Type your message or @path/to/file'; + const welcomeMessage = 'Welcome to Gemini CLI!'; + + // Verify the initial render completes and displays the markers + await run.expectText(welcomeMessage, 30000); + await run.expectText(readyMarker, 30000); + + // If we reached here, the smoke test passed + await run.kill(); + }); +}); diff --git a/packages/cli/src/ui/components/AppHeader.tsx b/packages/cli/src/ui/components/AppHeader.tsx index 7d0ef75a36d..72ac707b27d 100644 --- a/packages/cli/src/ui/components/AppHeader.tsx +++ b/packages/cli/src/ui/components/AppHeader.tsx @@ -159,6 +159,14 @@ export const AppHeader = ({ version, showDetails = true }: AppHeaderProps) => { /> )} + {showHeader && ( + + + Welcome to Gemini CLI! + + + )} + {!(settings.merged.ui.hideTips || config.getScreenReader()) && showTips && }