From ff8467943a6a8c81cd1c7721f3aafdfe128ab553 Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Thu, 26 Feb 2026 11:56:39 -0800 Subject: [PATCH 01/33] docs: add codebase understanding guide --- docs/codebase_understanding.md | 138 +++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 docs/codebase_understanding.md diff --git a/docs/codebase_understanding.md b/docs/codebase_understanding.md new file mode 100644 index 00000000000..de70a1b9dbc --- /dev/null +++ b/docs/codebase_understanding.md @@ -0,0 +1,138 @@ +# Codebase understanding + +This document provides a detailed overview of the Gemini CLI architecture, its +core components, and how they interact to provide an agentic terminal +experience. + +## Repository overview + +Gemini CLI is structured as a monorepo using npm workspaces. The codebase is +divided into several specialized packages that separate the user interface from +the agentic orchestration logic. + +### Core packages + +- **`packages/cli`**: Contains the terminal user interface (TUI) implemented + with React and Ink. It handles terminal-specific logic like keybindings, + mouse events, and layout rendering. +- **`packages/core`**: The central engine of the application. It is UI-agnostic + and manages the Gemini API communication, tool orchestration, conversation + history, and policy enforcement. +- **`packages/devtools`**: Provides a developer-focused inspector (similar to + Chrome DevTools) for monitoring network traffic and console logs in real-time. +- **`packages/sdk`**: A library for building extensions and custom tools that + integrate with Gemini CLI. +- **`packages/vscode-ide-companion`**: A VS Code extension that connects the + editor state to the CLI, enabling the agent to read open files and cursor + positions. + +## Application lifecycle + +The application follows a structured startup and execution flow to ensure +security and environment consistency. + +### Startup and sandboxing + +When you launch Gemini CLI, the entry point in `packages/cli/src/gemini.tsx` +manages several initialization steps: + +1. **Configuration loading**: Loads user and workspace settings, parsing + command-line arguments. +2. **Authentication**: Validates credentials and refreshes OAuth tokens. +3. **Sandboxing**: If configured, the application relaunches itself in a + restricted child process using a "sandbox" environment to isolate tool + execution. +4. **Mode selection**: Determines whether to start the interactive TUI or run + in non-interactive mode based on input and terminal state. + +### Interactive vs. non-interactive modes + +- **Interactive mode**: Renders the TUI using Ink. The state is managed via + React contexts (Settings, Mouse, Keypress, Terminal) and a central + `AppContainer`. +- **Non-interactive mode**: Executes a single prompt or command. It uses a + focused loop in `packages/cli/src/nonInteractiveCli.ts` that continues until + the agent completes its task or requires user intervention that cannot be + provided. + +## Agent orchestration + +The orchestration of the agent's behavior happens primarily within +`packages/core/src/core`. + +### GeminiClient + +The `GeminiClient` is the primary interface for the rest of the application. It +coordinates: + +- **Session management**: Initializing, resuming, and persisting chat sessions. +- **Model routing**: Deciding which Gemini model to use based on the task and + configuration. +- **Context compression**: Summarizing long histories using the + `ChatCompressionService` to stay within context window limits. +- **IDE integration**: Injecting editor context (open files, selections) into + the prompt. + +### GeminiChat and Turn + +- **`GeminiChat`**: Manages the low-level API communication. It handles + streaming responses, retries for transient network errors, and records the + conversation history. +- **`Turn`**: Represents a single agentic exchange. A turn may involve multiple + API calls if the model decides to use tools. It yields events for content, + thoughts, and tool requests. + +## Tool system and scheduler + +The tool system allows the agent to interact with the external world. It is +built on a secure, policy-driven framework. + +### Tool registry + +The `ToolRegistry` in `packages/core/src/tools` maintains a list of all +available tools. It supports several types: + +- **Built-in tools**: Native TypeScript implementations for file system + operations, shell commands, and web fetching. +- **Discovered tools**: Local scripts or commands identified in the project + root. +- **MCP tools**: Tools provided by external servers via the Model Context + Protocol. + +### Scheduler + +The `Scheduler` in `packages/core/src/scheduler` manages the lifecycle of a +tool call: + +1. **Validation**: Ensures the tool exists and the arguments match the schema. +2. **Policy check**: Consults the Policy Engine to determine if the tool is + allowed to run automatically, requires user confirmation, or is denied. +3. **Confirmation**: If required, it pauses execution and uses the + `MessageBus` to request user approval through the UI. +4. **Execution**: Runs the tool and captures the output, including live + updates for long-running processes. +5. **Feedback**: Sends the tool result back to the model to continue the + agentic loop. + +## UI architecture + +The UI is built with React components rendered to the terminal via Ink. Key +design patterns include: + +- **Providers**: Global state like settings, theme, and terminal size is + provided through React Contexts to avoid prop drilling. +- **Console patching**: Standard `console.log` calls are intercepted and + redirected to the TUI's debug console or the `devtools` server. +- **Event-driven updates**: The UI listens to `coreEvents` from the orchestrator + to update its state (e.g., streaming text, tool progress, or errors). + +## Testing and quality + +The project maintains high standards through several testing tiers: + +- **Unit tests**: Located alongside the source code (e.g., `*.test.ts`), using + Vitest. +- **Integration tests**: E2E tests in the `integration-tests/` directory that + run the compiled CLI against mocked and real API endpoints. +- **Evals**: Specialized evaluation scripts in `evals/` that measure the + agent's performance on specific tasks like tool use and codebase navigation. From 760054e4821d0c48d3f2757c46e20861cef22f4f Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Thu, 26 Feb 2026 12:01:52 -0800 Subject: [PATCH 02/33] docs: expand codebase understanding guide with technical depth --- docs/codebase_understanding.md | 279 +++++++++++++++++---------------- 1 file changed, 143 insertions(+), 136 deletions(-) diff --git a/docs/codebase_understanding.md b/docs/codebase_understanding.md index de70a1b9dbc..f6c60c0f665 100644 --- a/docs/codebase_understanding.md +++ b/docs/codebase_understanding.md @@ -1,138 +1,145 @@ # Codebase understanding -This document provides a detailed overview of the Gemini CLI architecture, its -core components, and how they interact to provide an agentic terminal -experience. - -## Repository overview - -Gemini CLI is structured as a monorepo using npm workspaces. The codebase is -divided into several specialized packages that separate the user interface from -the agentic orchestration logic. - -### Core packages - -- **`packages/cli`**: Contains the terminal user interface (TUI) implemented - with React and Ink. It handles terminal-specific logic like keybindings, - mouse events, and layout rendering. -- **`packages/core`**: The central engine of the application. It is UI-agnostic - and manages the Gemini API communication, tool orchestration, conversation - history, and policy enforcement. -- **`packages/devtools`**: Provides a developer-focused inspector (similar to - Chrome DevTools) for monitoring network traffic and console logs in real-time. -- **`packages/sdk`**: A library for building extensions and custom tools that - integrate with Gemini CLI. -- **`packages/vscode-ide-companion`**: A VS Code extension that connects the - editor state to the CLI, enabling the agent to read open files and cursor - positions. - -## Application lifecycle - -The application follows a structured startup and execution flow to ensure -security and environment consistency. - -### Startup and sandboxing - -When you launch Gemini CLI, the entry point in `packages/cli/src/gemini.tsx` -manages several initialization steps: - -1. **Configuration loading**: Loads user and workspace settings, parsing - command-line arguments. -2. **Authentication**: Validates credentials and refreshes OAuth tokens. -3. **Sandboxing**: If configured, the application relaunches itself in a - restricted child process using a "sandbox" environment to isolate tool - execution. -4. **Mode selection**: Determines whether to start the interactive TUI or run - in non-interactive mode based on input and terminal state. - -### Interactive vs. non-interactive modes - -- **Interactive mode**: Renders the TUI using Ink. The state is managed via - React contexts (Settings, Mouse, Keypress, Terminal) and a central - `AppContainer`. -- **Non-interactive mode**: Executes a single prompt or command. It uses a - focused loop in `packages/cli/src/nonInteractiveCli.ts` that continues until - the agent completes its task or requires user intervention that cannot be - provided. - -## Agent orchestration - -The orchestration of the agent's behavior happens primarily within -`packages/core/src/core`. - -### GeminiClient - -The `GeminiClient` is the primary interface for the rest of the application. It -coordinates: - -- **Session management**: Initializing, resuming, and persisting chat sessions. -- **Model routing**: Deciding which Gemini model to use based on the task and - configuration. -- **Context compression**: Summarizing long histories using the - `ChatCompressionService` to stay within context window limits. -- **IDE integration**: Injecting editor context (open files, selections) into - the prompt. - -### GeminiChat and Turn - -- **`GeminiChat`**: Manages the low-level API communication. It handles - streaming responses, retries for transient network errors, and records the - conversation history. -- **`Turn`**: Represents a single agentic exchange. A turn may involve multiple - API calls if the model decides to use tools. It yields events for content, - thoughts, and tool requests. - -## Tool system and scheduler - -The tool system allows the agent to interact with the external world. It is -built on a secure, policy-driven framework. - -### Tool registry - -The `ToolRegistry` in `packages/core/src/tools` maintains a list of all -available tools. It supports several types: - -- **Built-in tools**: Native TypeScript implementations for file system - operations, shell commands, and web fetching. -- **Discovered tools**: Local scripts or commands identified in the project - root. -- **MCP tools**: Tools provided by external servers via the Model Context - Protocol. - -### Scheduler - -The `Scheduler` in `packages/core/src/scheduler` manages the lifecycle of a -tool call: - -1. **Validation**: Ensures the tool exists and the arguments match the schema. -2. **Policy check**: Consults the Policy Engine to determine if the tool is - allowed to run automatically, requires user confirmation, or is denied. -3. **Confirmation**: If required, it pauses execution and uses the - `MessageBus` to request user approval through the UI. -4. **Execution**: Runs the tool and captures the output, including live - updates for long-running processes. -5. **Feedback**: Sends the tool result back to the model to continue the - agentic loop. - -## UI architecture - -The UI is built with React components rendered to the terminal via Ink. Key -design patterns include: - -- **Providers**: Global state like settings, theme, and terminal size is - provided through React Contexts to avoid prop drilling. -- **Console patching**: Standard `console.log` calls are intercepted and - redirected to the TUI's debug console or the `devtools` server. -- **Event-driven updates**: The UI listens to `coreEvents` from the orchestrator - to update its state (e.g., streaming text, tool progress, or errors). - -## Testing and quality - -The project maintains high standards through several testing tiers: - -- **Unit tests**: Located alongside the source code (e.g., `*.test.ts`), using - Vitest. -- **Integration tests**: E2E tests in the `integration-tests/` directory that - run the compiled CLI against mocked and real API endpoints. -- **Evals**: Specialized evaluation scripts in `evals/` that measure the - agent's performance on specific tasks like tool use and codebase navigation. +This document provides an in-depth technical overview of the Gemini CLI +architecture. It is intended for developers who want to understand the system's +inner workings, from startup to advanced agentic orchestration. + +## Repository structure + +Gemini CLI is a monorepo managed with npm workspaces. It strictly separates +concerns across packages: + +- **`packages/cli`**: The terminal user interface (TUI) layer. Built with React + and Ink, it handles user interaction, rendering, and terminal state. +- **`packages/core`**: The engine containing all business logic. It is entirely + UI-agnostic and manages the agent's lifecycle, Gemini API interactions, and + tool systems. +- **`packages/devtools`**: A suite for inspection. It provides a Chrome-like + Network and Console inspector for real-time debugging. +- **`packages/sdk`**: A library for building third-party extensions. +- **`packages/vscode-ide-companion`**: Bridges the editor and CLI, providing + real-time IDE context to the agent. + +--- + +## 1. Application lifecycle + +### Startup and initialization +The entry point is `packages/cli/src/gemini.tsx`. The startup sequence involves: +1. **Standard I/O patching**: The CLI patches `process.stdout` and + `process.stderr` to capture all output, ensuring it can be redirected to the + TUI or debug logs without garbling the terminal display. +2. **Sandboxing and relaunch**: If `advanced.sandbox` is enabled, the CLI + re-launches itself in a restricted environment. It also uses a relaunch + mechanism to automatically configure Node.js memory limits (e.g., + `--max-old-space-size`). +3. **Authentication**: Credentials are validated early. The CLI supports + multiple auth types, including API Keys, OAuth2, and Vertex AI. + +### Execution modes +The CLI operates in two distinct modes: +- **Interactive (TUI)**: Uses the `render` function from Ink to start a + persistent React application in the terminal. +- **Non-interactive (CLI)**: A streamlined execution loop in + `nonInteractiveCli.ts` that runs until the agent completes its task, + supporting piped input and output redirection. + +--- + +## 2. Model routing engine + +The `ModelRouterService` (`packages/core/src/routing`) is responsible for +selecting the most appropriate model for every request. + +### Composite strategy +The router uses a "Composite Strategy" that evaluates multiple sub-strategies in +priority order: +1. **Fallback**: Switches models if a quota error or API failure occurs. +2. **Override**: Respects user-specified model overrides (e.g., `--model`). +3. **Approval Mode**: Selects specialized models for `Plan Mode`. +4. **Classifier**: A lightweight LLM call that analyzes the user's request + against a rubric (Strategic Planning, Complexity, Ambiguity) to choose + between a "Pro" (complex) or "Flash" (simple) model. +5. **Numerical Classifier**: A deterministic classifier based on token counts + and history depth. + +--- + +## 3. Intelligent context management + +Managing the model's context window is critical for long-running sessions. This +is handled by two primary services in `packages/core/src/services`: + +### ChatCompressionService +When history exceeds a threshold (default 50% of the context window), the +compression service triggers: +1. **Split point detection**: It identifies a safe point in history to begin + summarization, ensuring recent turns remain in high-fidelity. +2. **State snapshot generation**: The LLM generates a ``—a + structured summary of established constraints, technical details, and + progress. +3. **The "Probe" (Self-Correction)**: A second model call "probes" the generated + summary against the original history to ensure no critical constraints or + paths were omitted, correcting the summary if necessary. + +### ToolOutputMaskingService +To prevent bulky tool outputs (like long log files) from clogging the context, +this service detects large `functionResponse` blocks and replaces them with +concise summaries or pointers to temporary files, preserving the model's ability +to reason about the data without consuming thousands of tokens. + +--- + +## 4. Advanced tool execution + +Tool execution is orchestrated by the `Scheduler` +(`packages/core/src/scheduler`), which operates as an event-driven state +machine. + +### State management +Every tool call moves through a structured lifecycle managed by the +`SchedulerStateManager`: +`Validating` → `AwaitingApproval` → `Scheduled` → `Executing` → `Success`/`Error` + +### Key features +- **Policy Engine**: A granular system that determines if a tool is safe to run. + Policies can be "Always", "Ask", or "Never" based on the tool name, arguments, + or folder location. +- **Tail Calls**: If a tool's output requires immediate follow-up (like a shell + command that produced a specific error code), the scheduler can "tail call" + another tool (e.g., a "fixer" or "retry") without ending the current turn. +- **Parallel execution**: The scheduler can execute multiple non-conflicting + read-only tools in parallel while enforcing sequential execution for + modifying tools. + +--- + +## 5. UI architecture + +The `packages/cli/src/ui` directory implements a sophisticated React-based +terminal interface. + +### Rendering and layout +- **Ink**: Provides React components for terminal output (`Box`, `Text`). +- **AppContainer**: The root component that coordinates the display of multiple + screens (Chat, Debug Console, Settings, Auth). +- **ConsolePatcher**: Intercepts `console.log` and redirects them to the + internal "Debug Console" accessible via `ctrl+d`. + +### State providers +Global state is managed through specialized providers: +- **`KeypressProvider`**: Captures and routes terminal keyboard events, + supporting complex shortcuts and Vim-style navigation. +- **`TerminalProvider`**: Tracks the terminal size and window state using a + custom `ResizeObserver`. +- **`VimModeProvider`**: Enables Vim-like keybindings for navigating through + conversation history and multi-line input fields. + +## Testing and quality assurance + +The repo employs a three-tier testing strategy: +1. **Unit tests**: Fast, isolated tests for core logic (Vitest). +2. **Integration tests**: Verify full system flows, including mock Gemini API + responses and real file system operations. +3. **Evals**: Performance benchmarks in `evals/` that measure the agent's + reasoning accuracy and tool-use efficiency over time. From f5b60c7f6768bdbba85ebafed8e9c6b7dba658b4 Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Thu, 26 Feb 2026 12:02:49 -0800 Subject: [PATCH 03/33] docs: finalize codebase understanding guide with advanced technical details --- docs/codebase_understanding.md | 224 +++++++++++++++++---------------- 1 file changed, 114 insertions(+), 110 deletions(-) diff --git a/docs/codebase_understanding.md b/docs/codebase_understanding.md index f6c60c0f665..34a2ee3c942 100644 --- a/docs/codebase_understanding.md +++ b/docs/codebase_understanding.md @@ -1,145 +1,149 @@ # Codebase understanding -This document provides an in-depth technical overview of the Gemini CLI -architecture. It is intended for developers who want to understand the system's -inner workings, from startup to advanced agentic orchestration. - -## Repository structure - -Gemini CLI is a monorepo managed with npm workspaces. It strictly separates -concerns across packages: - -- **`packages/cli`**: The terminal user interface (TUI) layer. Built with React - and Ink, it handles user interaction, rendering, and terminal state. -- **`packages/core`**: The engine containing all business logic. It is entirely - UI-agnostic and manages the agent's lifecycle, Gemini API interactions, and - tool systems. -- **`packages/devtools`**: A suite for inspection. It provides a Chrome-like - Network and Console inspector for real-time debugging. -- **`packages/sdk`**: A library for building third-party extensions. -- **`packages/vscode-ide-companion`**: Bridges the editor and CLI, providing - real-time IDE context to the agent. +This document provides a deep-dive technical overview of the Gemini CLI +architecture. It is designed for developers who need to understand the +system's inner workings, from startup to advanced autonomous behaviors. + +## Repository architecture + +Gemini CLI is a monorepo structured to maintain a strict separation between +the user interface and the agent's core reasoning logic. + +- **`packages/cli`**: The Terminal User Interface (TUI). Built with React and + Ink, it manages the interactive terminal experience, including keyboard + protocols, rendering, and terminal state management. +- **`packages/core`**: The UI-agnostic engine. It contains the primary + orchestration logic, model routing, tool systems, policy enforcement, and + Gemini API communication. +- **`packages/devtools`**: A suite for real-time inspection of network traffic, + console logs, and session activity. +- **`packages/sdk`**: A library for developers to build third-party tools and + extensions. +- **`packages/vscode-ide-companion`**: A specialized bridge that feeds real-time + editor state (open files, active selections, cursor positions) to the agent. --- ## 1. Application lifecycle ### Startup and initialization -The entry point is `packages/cli/src/gemini.tsx`. The startup sequence involves: -1. **Standard I/O patching**: The CLI patches `process.stdout` and - `process.stderr` to capture all output, ensuring it can be redirected to the - TUI or debug logs without garbling the terminal display. -2. **Sandboxing and relaunch**: If `advanced.sandbox` is enabled, the CLI - re-launches itself in a restricted environment. It also uses a relaunch - mechanism to automatically configure Node.js memory limits (e.g., - `--max-old-space-size`). -3. **Authentication**: Credentials are validated early. The CLI supports - multiple auth types, including API Keys, OAuth2, and Vertex AI. - -### Execution modes -The CLI operates in two distinct modes: -- **Interactive (TUI)**: Uses the `render` function from Ink to start a - persistent React application in the terminal. -- **Non-interactive (CLI)**: A streamlined execution loop in - `nonInteractiveCli.ts` that runs until the agent completes its task, - supporting piped input and output redirection. +The entry point is `packages/cli/src/gemini.tsx`. The startup sequence is +designed for security and resilience: + +1. **I/O redirection**: Standard output streams (`stdout`, `stderr`) are + patched to capture all logs and errors. This allows the CLI to redirect + diagnostic information to the TUI's debug console or a remote DevTools server + without corrupting the user's terminal interface. +2. **Memory-aware relaunch**: The CLI checks the host system's total memory. + If it detects that Node.js's default heap limit is insufficient for complex + codebase analysis, it re-launches itself using the + `--max-old-space-size` flag, targeting approximately 50% of system memory. +3. **Sandboxing**: If configured, the CLI launches a restricted "sandbox" + environment (using Docker, Podman, or a localized process) to isolate the + agent's autonomous actions from the host system. +4. **Interactive (TUI) vs. Non-interactive (CLI)**: + - **Interactive mode**: Initializes the Ink renderer, starting a persistent + React application that manages terminal state via providers. + - **Non-interactive mode**: Executes a streamlined loop in + `nonInteractiveCli.ts`, designed for single prompts or piped input/output + redirection. --- -## 2. Model routing engine +## 2. Model routing and selection -The `ModelRouterService` (`packages/core/src/routing`) is responsible for -selecting the most appropriate model for every request. +The `ModelRouterService` (`packages/core/src/routing`) implements a +"Composite Strategy" to select the optimal model for every request. -### Composite strategy -The router uses a "Composite Strategy" that evaluates multiple sub-strategies in -priority order: -1. **Fallback**: Switches models if a quota error or API failure occurs. -2. **Override**: Respects user-specified model overrides (e.g., `--model`). -3. **Approval Mode**: Selects specialized models for `Plan Mode`. -4. **Classifier**: A lightweight LLM call that analyzes the user's request - against a rubric (Strategic Planning, Complexity, Ambiguity) to choose - between a "Pro" (complex) or "Flash" (simple) model. -5. **Numerical Classifier**: A deterministic classifier based on token counts - and history depth. +### Routing strategies +- **classifier**: Uses a lightweight LLM call to categorize the complexity of a + task based on a rubric (Strategic Planning, Multi-step Coordination, + Ambiguity). It chooses between a "Pro" model (for complex reasoning) and a + "Flash" model (for simple operations). +- **approvalMode**: Selects specialized models (like `gemini-2.0-flash-lite`) + when the agent is in specific modes like `Plan Mode`. +- **numericalClassifier**: A deterministic strategy that selects models based + on the number of tokens in the conversation or the length of the history. +- **fallback**: Automatically switches models if the primary model encounters + quota limits (429) or transient API failures. --- ## 3. Intelligent context management -Managing the model's context window is critical for long-running sessions. This -is handled by two primary services in `packages/core/src/services`: +The agent maintains deep project awareness while staying within token limits +through several services in `packages/core/src/services`: ### ChatCompressionService -When history exceeds a threshold (default 50% of the context window), the -compression service triggers: -1. **Split point detection**: It identifies a safe point in history to begin - summarization, ensuring recent turns remain in high-fidelity. -2. **State snapshot generation**: The LLM generates a ``—a - structured summary of established constraints, technical details, and - progress. -3. **The "Probe" (Self-Correction)**: A second model call "probes" the generated - summary against the original history to ensure no critical constraints or - paths were omitted, correcting the summary if necessary. +Triggered when the history exceeds 50% of the model's context window: +1. **State snapshots**: The agent generates a structured `` + representing the cumulative knowledge of the session (constraints, progress, + paths). +2. **The "Probe" (Self-Correction)**: A second LLM pass compares the summary + against the original history to ensure no critical technical details or + user-defined constraints were lost, correcting the summary before purging + the history. ### ToolOutputMaskingService -To prevent bulky tool outputs (like long log files) from clogging the context, -this service detects large `functionResponse` blocks and replaces them with -concise summaries or pointers to temporary files, preserving the model's ability -to reason about the data without consuming thousands of tokens. +Prevents bulky data (like large shell outputs or file reads) from clogging the +context window. It replaces large `functionResponse` blocks with concise +summaries and persists the full data to temporary files, allowing the agent to +refer to the full data only when necessary. --- -## 4. Advanced tool execution +## 4. Advanced tool execution and scheduling -Tool execution is orchestrated by the `Scheduler` -(`packages/core/src/scheduler`), which operates as an event-driven state -machine. +The `Scheduler` (`packages/core/src/scheduler`) is an event-driven state +machine that manages the lifecycle of autonomous actions. -### State management -Every tool call moves through a structured lifecycle managed by the -`SchedulerStateManager`: +### Lifecycle states `Validating` → `AwaitingApproval` → `Scheduled` → `Executing` → `Success`/`Error` ### Key features -- **Policy Engine**: A granular system that determines if a tool is safe to run. - Policies can be "Always", "Ask", or "Never" based on the tool name, arguments, - or folder location. -- **Tail Calls**: If a tool's output requires immediate follow-up (like a shell - command that produced a specific error code), the scheduler can "tail call" - another tool (e.g., a "fixer" or "retry") without ending the current turn. -- **Parallel execution**: The scheduler can execute multiple non-conflicting - read-only tools in parallel while enforcing sequential execution for - modifying tools. +- **Policy Engine**: A granular system that evaluates tools based on security + policies (e.g., "Allow read-only tools", "Ask for shell commands"). It can be + configured at the project or user level. +- **Tail calls**: Allows a tool to "link" to another action. For example, a + shell command that produces an error can automatically trigger a "diagnostic" + tool without returning control to the main model. +- **Parallelism**: The scheduler executes independent read-only tools in + parallel while enforcing sequential execution for tools that modify the + environment. +- **MCP integration**: Dynamically loads tools from Model Context Protocol + servers, integrating them seamlessly into the same policy and scheduler + framework. --- -## 5. UI architecture - -The `packages/cli/src/ui` directory implements a sophisticated React-based -terminal interface. - -### Rendering and layout -- **Ink**: Provides React components for terminal output (`Box`, `Text`). -- **AppContainer**: The root component that coordinates the display of multiple - screens (Chat, Debug Console, Settings, Auth). -- **ConsolePatcher**: Intercepts `console.log` and redirects them to the - internal "Debug Console" accessible via `ctrl+d`. - -### State providers -Global state is managed through specialized providers: -- **`KeypressProvider`**: Captures and routes terminal keyboard events, - supporting complex shortcuts and Vim-style navigation. -- **`TerminalProvider`**: Tracks the terminal size and window state using a - custom `ResizeObserver`. -- **`VimModeProvider`**: Enables Vim-like keybindings for navigating through - conversation history and multi-line input fields. - -## Testing and quality assurance - -The repo employs a three-tier testing strategy: -1. **Unit tests**: Fast, isolated tests for core logic (Vitest). -2. **Integration tests**: Verify full system flows, including mock Gemini API - responses and real file system operations. +## 5. UI and terminal integration + +The `packages/cli/src/ui` directory implements a sophisticated React-based TUI. + +### Keyboard and protocols +- **KeypressProvider**: Manages terminal input, supporting complex key + combinations and shortcuts. +- **Kitty keyboard protocol**: Detects terminals that support the Kitty + protocol to enable advanced features like detecting `ctrl+enter` vs `enter`. +- **Vim mode**: A dedicated provider that enables Vim-like navigation (hjkl, + words, search) for both conversation history and input fields. + +### Layout and rendering +- **ResizeObserver**: A custom implementation that watches the terminal size + to ensure components (like multi-column layouts or wide tables) adapt + instantly. +- **ConsolePatcher**: Intercepts `console.log`, `console.warn`, and + `console.error`, routing them to the internal debug console (toggled with + `ctrl+d`) or the external DevTools server. + +--- + +## 6. Testing and validation + +Gemini CLI uses a tiered testing strategy to ensure reliability: +1. **Unit tests**: Located alongside the source (`*.test.ts`), providing fast + coverage for core logic. +2. **Integration tests**: Located in `integration-tests/`, running the + full CLI against mock and real Gemini API endpoints. 3. **Evals**: Performance benchmarks in `evals/` that measure the agent's reasoning accuracy and tool-use efficiency over time. From 0f59d48108cc7fecec15476f7fcc6734da96ab1c Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Thu, 26 Feb 2026 12:06:50 -0800 Subject: [PATCH 04/33] docs: add codebase understanding from antigravity --- docs/codebase_understanding_antigrav.md | 101 ++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 docs/codebase_understanding_antigrav.md diff --git a/docs/codebase_understanding_antigrav.md b/docs/codebase_understanding_antigrav.md new file mode 100644 index 00000000000..60d5dbd2058 --- /dev/null +++ b/docs/codebase_understanding_antigrav.md @@ -0,0 +1,101 @@ +# Gemini CLI - Codebase Understanding + +Gemini CLI is an open-source AI agent designed to let you interact with Google's +Gemini models directly from your terminal. It's built as a **TypeScript +monorepo** (using npm workspaces) and relies heavily on **Node.js**, **React**, +and **Ink** (a library that lets you build terminal UIs using React components). + +Here is a high-level walkthrough of the repository to help you understand how +all the pieces fit together. + +## 1. High-Level Architecture (The `packages/` Directory) + +The project is split into several focused packages to maintain a clean +separation of concerns: + +- **`packages/cli`** (The Frontend) + - This is the user-facing terminal UI. + - It uses React + Ink. This means the terminal layout, styling, and + interactions are managed like a modern web app (with hooks, contexts, and + components). + - It handles all the terminal-specific logic like key bindings, processing + mouse/keyboard events, and rendering the chat stream or tool progress + indicators. +- **`packages/core`** (The Brain/Backend) + - This is where the actual "agentic" logic lives. It is entirely UI-agnostic. + - Contains the core looping mechanism that communicates with the Gemini API, + maintains conversation history, compresses context, and evaluates whether + the agent needs to invoke a tool. + - Houses the **Tool Registry** (file system tools, shell runner, web tools) + and the **Policy Engine** (deciding if a tool is safe to run automatically + or needs your permission). +- **`packages/devtools`** + - A Chrome DevTools-like web server that runs locally! If you enable + `general.devtools` in your settings, you can inspect network requests, agent + thoughts, and console logs in a local browser, just like you would for a web + app. +- **`packages/vscode-ide-companion`** + - A VS Code extension that pairs dynamically with the CLI. It allows the + terminal agent to "read" your active editor state, seamlessly pulling + context on exactly what files or lines of code you currently have + highlighted in VS Code. +- **`packages/sdk`** + - Provides libraries and types so people can build custom MCP (Model Context + Protocol) extensions or tools for the CLI. +- **`packages/a2a-server`** + - An experimental Agent-to-Agent server, hinting at future capabilities for + having different agents talk to each other. + +## 2. The Core Application Lifecycle + +When you type `gemini` in your terminal, here's roughly what happens under the +hood: + +1. **Bootstrapping (`packages/cli/src/gemini.tsx`)**: The CLI loads user + configurations, parses command-line arguments, checks authentication, and + verifies if it needs to launch itself in a controlled "sandbox" environment + (using Docker/Podman to isolate dangerous shell tools). +2. **Mode Resolution**: It determines if you are piping data in or running a + single command (`nonInteractiveCli.ts`), or if you are firing up the chat + TUI (Terminal User Interface). +3. **The Agent Loop (`packages/core/src/core/`)**: + - **`GeminiClient`**: The main orchestrator. It manages sessions and + compresses chat histories using `ChatCompressionService` so you don't + breach token limits. + - **`GeminiChat` & `Turn`**: For every prompt you send, a `Turn` is created. + This represents one "exchange" where the model might think, respond, and + realize it needs to search your codebase. It streams these requests back + in real-time. + +## 3. The Tool System & Execution + +The most powerful aspect of this CLI is its ability to interact with your +environment. + +- In `packages/core/src/tools/`, there are native TypeScript implementations for + operations (like reading files, searching directories, or running tests). +- When Gemini asks to use a tool, the **Scheduler** + (`packages/core/src/scheduler/`) intercepts the request. +- It runs the request through the **Policy Engine** + (`packages/core/src/policy/`). Some commands (like `rm -rf`) are flagged and + routed to a **Confirmation Bus**, which pauses execution and asks you in the + UI: _"Do you want to allow this command?"_ +- Once approved (or auto-approved), it executes the tool, captures standard + output/error, and pipes that text back to Gemini to continue its thought + process. + +## 4. Code Quality, Building, and Testing + +- **Bundling & Running**: The project uses `esbuild` to compile everything very + quickly. During development, you can use `npm run start` or `npm run debug` + (which attaches a Node.js inspector). +- **Testing (`vitest`)**: Testing is extremely rigorous here. + - _Unit Tests:_ `npm run test` handles basic component functionality. + - _Integration Tests:_ `npm run test:e2e` simulates an actual sandbox, + mocking/hitting models to make sure the CLI interacts realistically. + - _Evals (`evals/`):_ Standalone performance benchmarks where they evaluate + how smart the CLI is at navigating codebases or using its tools + autonomously. +- **`npm run preflight`**: Before a PR is pushed, this massive script runs + formatting (Prettier), linting (ESLint), type checking (TypeScript), unit + testing, and building, ensuring nothing breaks the main branch. From f7d43070a113c54b7d0a026d4d28efc3c0f909cb Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Fri, 10 Apr 2026 13:31:49 -0700 Subject: [PATCH 05/33] feat(cli): add `gemini gemma` command for streamlined local model setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the manual 6-step Gemma local routing setup with a single `gemini gemma setup` command that downloads the LiteRT-LM binary, pulls the model, configures settings, and starts the server. New commands: - `gemini gemma setup` — one-command install with progress bar - `gemini gemma start/stop` — server lifecycle management - `gemini gemma status` — diagnostic health check - `/gemma` slash command — in-session status with Ink UI component Also adds auto-start: the LiteRT server starts automatically on CLI launch when the Gemma router is enabled (configurable via autoStartServer setting). --- packages/cli/src/commands/gemma.ts | 34 ++ packages/cli/src/commands/gemma/constants.ts | 51 +++ packages/cli/src/commands/gemma/platform.ts | 121 +++++ packages/cli/src/commands/gemma/setup.ts | 414 ++++++++++++++++++ packages/cli/src/commands/gemma/start.ts | 122 ++++++ packages/cli/src/commands/gemma/status.ts | 181 ++++++++ packages/cli/src/commands/gemma/stop.ts | 115 +++++ packages/cli/src/config/config.ts | 3 + packages/cli/src/config/settingsSchema.ts | 20 + packages/cli/src/gemini.tsx | 10 + .../cli/src/services/BuiltinCommandLoader.ts | 2 + .../cli/src/services/liteRtServerManager.ts | 88 ++++ .../cli/src/ui/commands/gemmaStatusCommand.ts | 41 ++ .../src/ui/components/HistoryItemDisplay.tsx | 4 + .../src/ui/components/views/GemmaStatus.tsx | 126 ++++++ packages/cli/src/ui/types.ts | 15 + packages/core/src/config/config.ts | 2 + 17 files changed, 1349 insertions(+) create mode 100644 packages/cli/src/commands/gemma.ts create mode 100644 packages/cli/src/commands/gemma/constants.ts create mode 100644 packages/cli/src/commands/gemma/platform.ts create mode 100644 packages/cli/src/commands/gemma/setup.ts create mode 100644 packages/cli/src/commands/gemma/start.ts create mode 100644 packages/cli/src/commands/gemma/status.ts create mode 100644 packages/cli/src/commands/gemma/stop.ts create mode 100644 packages/cli/src/services/liteRtServerManager.ts create mode 100644 packages/cli/src/ui/commands/gemmaStatusCommand.ts create mode 100644 packages/cli/src/ui/components/views/GemmaStatus.tsx diff --git a/packages/cli/src/commands/gemma.ts b/packages/cli/src/commands/gemma.ts new file mode 100644 index 00000000000..1d2a28c23ed --- /dev/null +++ b/packages/cli/src/commands/gemma.ts @@ -0,0 +1,34 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { CommandModule, Argv } from 'yargs'; +import { initializeOutputListenersAndFlush } from '../gemini.js'; +import { defer } from '../deferred.js'; +import { setupCommand } from './gemma/setup.js'; +import { startCommand } from './gemma/start.js'; +import { stopCommand } from './gemma/stop.js'; +import { statusCommand } from './gemma/status.js'; + +export const gemmaCommand: CommandModule = { + command: 'gemma', + describe: 'Manage local Gemma model routing', + builder: (yargs: Argv) => + yargs + .middleware((argv) => { + initializeOutputListenersAndFlush(); + argv['isCommand'] = true; + }) + .command(defer(setupCommand, 'gemma')) + .command(defer(startCommand, 'gemma')) + .command(defer(stopCommand, 'gemma')) + .command(defer(statusCommand, 'gemma')) + .demandCommand(1, 'You need at least one command before continuing.') + .version(false), + handler: () => { + // yargs will automatically show help if no subcommand is provided + // thanks to demandCommand(1) in the builder. + }, +}; diff --git a/packages/cli/src/commands/gemma/constants.ts b/packages/cli/src/commands/gemma/constants.ts new file mode 100644 index 00000000000..76203a8a8f4 --- /dev/null +++ b/packages/cli/src/commands/gemma/constants.ts @@ -0,0 +1,51 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import path from 'node:path'; +import { Storage } from '@google/gemini-cli-core'; + +/** LiteRT-LM release version to download. */ +export const LITERT_RELEASE_VERSION = 'v0.9.0-alpha03'; + +/** Base URL for LiteRT-LM GitHub releases. */ +export const LITERT_RELEASE_BASE_URL = + 'https://github.com/google-ai-edge/LiteRT-LM/releases/download'; + +/** The only tested and supported model for local routing. */ +export const GEMMA_MODEL_NAME = 'gemma3-1b-gpu-custom'; + +/** Default port for the LiteRT-LM server. */ +export const DEFAULT_PORT = 9379; + +/** Server health check timeout in milliseconds. */ +export const HEALTH_CHECK_TIMEOUT_MS = 5000; + +/** Delay before checking if server started successfully. */ +export const SERVER_START_WAIT_MS = 3000; + +/** + * Maps `${process.platform}-${process.arch}` to the LiteRT-LM binary filename. + */ +export const PLATFORM_BINARY_MAP: Record = { + 'darwin-arm64': 'lit.macos_arm64', + 'linux-x64': 'lit.linux_x86_64', + 'win32-x64': 'lit.windows_x86_64.exe', +}; + +/** Directory where the LiteRT-LM binary is installed. */ +export function getLiteRtBinDir(): string { + return path.join(Storage.getGlobalGeminiDir(), 'bin', 'litert'); +} + +/** Path to the PID file for the background LiteRT server. */ +export function getPidFilePath(): string { + return path.join(Storage.getGlobalTempDir(), 'litert-server.pid'); +} + +/** Path to the log file for the background LiteRT server. */ +export function getLogFilePath(): string { + return path.join(Storage.getGlobalTempDir(), 'litert-server.log'); +} diff --git a/packages/cli/src/commands/gemma/platform.ts b/packages/cli/src/commands/gemma/platform.ts new file mode 100644 index 00000000000..e39d99e557f --- /dev/null +++ b/packages/cli/src/commands/gemma/platform.ts @@ -0,0 +1,121 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import fs from 'node:fs'; +import path from 'node:path'; +import { execFileSync } from 'node:child_process'; +import { + PLATFORM_BINARY_MAP, + LITERT_RELEASE_BASE_URL, + LITERT_RELEASE_VERSION, + getLiteRtBinDir, + GEMMA_MODEL_NAME, + HEALTH_CHECK_TIMEOUT_MS, + getPidFilePath, +} from './constants.js'; + +export interface PlatformInfo { + key: string; + binaryName: string; +} + +/** + * Detects the current platform and resolves the corresponding LiteRT-LM binary name. + * Returns null if the platform is unsupported. + */ +export function detectPlatform(): PlatformInfo | null { + const key = `${process.platform}-${process.arch}`; + const binaryName = PLATFORM_BINARY_MAP[key]; + if (!binaryName) { + return null; + } + return { key, binaryName }; +} + +/** Returns the full local path to the LiteRT-LM binary. */ +export function getBinaryPath(binaryName?: string): string | null { + const name = binaryName ?? detectPlatform()?.binaryName; + if (!name) return null; + return path.join(getLiteRtBinDir(), name); +} + +/** Returns the GitHub release download URL for the binary. */ +export function getBinaryDownloadUrl(binaryName: string): string { + return `${LITERT_RELEASE_BASE_URL}/${LITERT_RELEASE_VERSION}/${binaryName}`; +} + +/** Checks if the LiteRT-LM binary exists on disk. */ +export function isBinaryInstalled(): boolean { + const binaryPath = getBinaryPath(); + if (!binaryPath) return false; + return fs.existsSync(binaryPath); +} + +/** + * Checks if the Gemma model has been downloaded by running `lit list` + * and looking for the model name in stdout. + */ +export function isModelDownloaded(binaryPath: string): boolean { + try { + const output = execFileSync(binaryPath, ['list'], { + encoding: 'utf-8', + timeout: 10000, + }); + return output.includes(GEMMA_MODEL_NAME); + } catch { + return false; + } +} + +/** + * Checks if a LiteRT-LM server is running and responding on the given port. + * Uses a simple HTTP request with a short timeout. + */ +export async function isServerRunning(port: number): Promise { + try { + const controller = new AbortController(); + const timeout = setTimeout( + () => controller.abort(), + HEALTH_CHECK_TIMEOUT_MS, + ); + const response = await fetch(`http://localhost:${port}/`, { + signal: controller.signal, + }); + clearTimeout(timeout); + // Any response (even an error page) means the server is up. + return response.ok || response.status > 0; + } catch { + return false; + } +} + +/** + * Reads the PID from the PID file, if it exists. + * Returns the PID number, or null if the file doesn't exist or is invalid. + */ +export function readServerPid(): number | null { + const pidPath = getPidFilePath(); + try { + const content = fs.readFileSync(pidPath, 'utf-8').trim(); + const pid = parseInt(content, 10); + return isNaN(pid) ? null : pid; + } catch { + return null; + } +} + +/** + * Checks if a process with the given PID is still running. + */ +export function isProcessRunning(pid: number): boolean { + try { + // Sending signal 0 checks if the process exists without actually signaling it. + process.kill(pid, 0); + return true; + } catch { + return false; + } +} diff --git a/packages/cli/src/commands/gemma/setup.ts b/packages/cli/src/commands/gemma/setup.ts new file mode 100644 index 00000000000..7648122783d --- /dev/null +++ b/packages/cli/src/commands/gemma/setup.ts @@ -0,0 +1,414 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { CommandModule } from 'yargs'; +import fs from 'node:fs'; +import { execSync, spawn as nodeSpawn } from 'node:child_process'; +import chalk from 'chalk'; +import { debugLogger } from '@google/gemini-cli-core'; +import { loadSettings, SettingScope } from '../../config/settings.js'; +import { exitCli } from '../utils.js'; +import { + DEFAULT_PORT, + GEMMA_MODEL_NAME, + getLiteRtBinDir, +} from './constants.js'; +import { + detectPlatform, + getBinaryDownloadUrl, + getBinaryPath, + isBinaryInstalled, + isModelDownloaded, +} from './platform.js'; +import { startServer } from './start.js'; +import readline from 'node:readline'; + +const log = (msg: string) => debugLogger.log(msg); +const logError = (msg: string) => debugLogger.error(msg); + +/** + * Prompts the user for a yes/no confirmation. + * Returns true if the user answers 'y' or 'yes'. + */ +async function promptYesNo(question: string): Promise { + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }); + return new Promise((resolve) => { + rl.question(`${question} (y/N): `, (answer) => { + rl.close(); + resolve( + answer.trim().toLowerCase() === 'y' || + answer.trim().toLowerCase() === 'yes', + ); + }); + }); +} + +/** Formats a byte count into a human-readable string (e.g. "12.3 MB"). */ +function formatBytes(bytes: number): string { + if (bytes < 1024) return `${bytes} B`; + if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; + return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; +} + +/** Renders a single-line progress bar to stderr (overwriting in place). */ +function renderProgress(downloaded: number, total: number | null): void { + const barWidth = 30; + if (total && total > 0) { + const pct = Math.min(downloaded / total, 1); + const filled = Math.round(barWidth * pct); + const bar = '█'.repeat(filled) + '░'.repeat(barWidth - filled); + const pctStr = (pct * 100).toFixed(0).padStart(3); + process.stderr.write( + `\r [${bar}] ${pctStr}% ${formatBytes(downloaded)} / ${formatBytes(total)}`, + ); + } else { + process.stderr.write(`\r Downloaded ${formatBytes(downloaded)}`); + } +} + +/** + * Downloads a file from a URL to a local path with a progress bar. + * Uses a temporary `.downloading` suffix for safety against interrupted downloads. + */ +async function downloadFile(url: string, destPath: string): Promise { + const tmpPath = destPath + '.downloading'; + + // Clean up any previous interrupted download. + if (fs.existsSync(tmpPath)) { + fs.unlinkSync(tmpPath); + } + + const response = await fetch(url, { redirect: 'follow' }); + if (!response.ok) { + throw new Error( + `Download failed: HTTP ${response.status} ${response.statusText}`, + ); + } + if (!response.body) { + throw new Error('Download failed: No response body'); + } + + const contentLength = response.headers.get('content-length'); + const totalBytes = contentLength ? parseInt(contentLength, 10) : null; + let downloadedBytes = 0; + + const fileStream = fs.createWriteStream(tmpPath); + const reader = response.body.getReader(); + + try { + for (;;) { + const { done, value } = await reader.read(); + if (done) break; + fileStream.write(value); + downloadedBytes += value.byteLength; + renderProgress(downloadedBytes, totalBytes); + } + } finally { + fileStream.end(); + // Clear the progress line. + process.stderr.write('\r' + ' '.repeat(80) + '\r'); + } + + // Wait for the file to finish flushing. + await new Promise((resolve, reject) => { + fileStream.on('finish', resolve); + fileStream.on('error', reject); + }); + + // Atomic rename after successful download. + fs.renameSync(tmpPath, destPath); +} + +/** + * Spawns a child process and returns a promise that resolves with the exit code. + * Inherits stdio so the user sees all output (progress, terms acceptance, etc.). + */ +function spawnInherited(command: string, args: string[]): Promise { + return new Promise((resolve, reject) => { + const child = nodeSpawn(command, args, { + stdio: 'inherit', + }); + child.on('close', (code) => resolve(code ?? 1)); + child.on('error', reject); + }); +} + +interface SetupArgs { + port: number; + skipModel: boolean; + start: boolean; + force: boolean; + consent: boolean; +} + +async function handleSetup(argv: SetupArgs): Promise { + const { port, force } = argv; + + log(''); + log(chalk.bold('Gemma Local Model Routing Setup')); + log(chalk.dim('─'.repeat(40))); + log(''); + + // Step 1: Platform detection + const platform = detectPlatform(); + if (!platform) { + logError( + chalk.red(`Unsupported platform: ${process.platform}-${process.arch}`), + ); + logError( + 'LiteRT-LM binaries are available for: macOS (ARM64), Linux (x86_64), Windows (x86_64)', + ); + await exitCli(1); + return; + } + log(chalk.dim(` Platform: ${platform.key} → ${platform.binaryName}`)); + + // Step 2: Consent + if (!argv.consent) { + log(''); + log('This will download and install the LiteRT-LM runtime and the'); + log( + `Gemma model (${GEMMA_MODEL_NAME}, ~1 GB). By proceeding, you agree to the`, + ); + log('Gemma Terms of Use: https://ai.google.dev/gemma/terms'); + log(''); + + const accepted = await promptYesNo('Do you want to continue?'); + if (!accepted) { + log('Setup cancelled.'); + await exitCli(0); + return; + } + } + + // Step 3: Download binary + const binaryPath = getBinaryPath(platform.binaryName)!; + const alreadyInstalled = isBinaryInstalled(); + + if (alreadyInstalled && !force) { + log(''); + log(chalk.green(' ✓ LiteRT-LM binary already installed at:')); + log(chalk.dim(` ${binaryPath}`)); + } else { + log(''); + log(' Downloading LiteRT-LM binary...'); + const downloadUrl = getBinaryDownloadUrl(platform.binaryName); + debugLogger.log(`Downloading from: ${downloadUrl}`); + + try { + const binDir = getLiteRtBinDir(); + fs.mkdirSync(binDir, { recursive: true }); + await downloadFile(downloadUrl, binaryPath); + log(chalk.green(' ✓ Binary downloaded successfully')); + } catch (error) { + logError( + chalk.red( + ` ✗ Failed to download binary: ${error instanceof Error ? error.message : String(error)}`, + ), + ); + logError(' Check your internet connection and try again.'); + await exitCli(1); + return; + } + + // Step 4: Make executable and handle macOS gatekeeper + if (process.platform !== 'win32') { + try { + fs.chmodSync(binaryPath, 0o755); + } catch (error) { + logError( + chalk.red( + ` ✗ Failed to set executable permission: ${error instanceof Error ? error.message : String(error)}`, + ), + ); + await exitCli(1); + return; + } + } + + if (process.platform === 'darwin') { + try { + execSync(`xattr -d com.apple.quarantine "${binaryPath}"`, { + stdio: 'ignore', + }); + log(chalk.green(' ✓ macOS quarantine attribute removed')); + } catch { + // This is expected to fail if the attribute doesn't exist. + debugLogger.log( + 'xattr quarantine removal not needed or failed (non-fatal)', + ); + } + } + } + + // Step 5: Pull the model + if (!argv.skipModel) { + const modelAlreadyDownloaded = isModelDownloaded(binaryPath); + if (modelAlreadyDownloaded && !force) { + log(''); + log(chalk.green(` ✓ Model ${GEMMA_MODEL_NAME} already downloaded`)); + } else { + log(''); + log(` Downloading model ${GEMMA_MODEL_NAME}...`); + log(chalk.dim(' You may be prompted to accept the Gemma Terms of Use.')); + log(''); + + const exitCode = await spawnInherited(binaryPath, [ + 'pull', + GEMMA_MODEL_NAME, + ]); + if (exitCode !== 0) { + logError(''); + logError( + chalk.red(` ✗ Model download failed (exit code ${exitCode})`), + ); + await exitCli(1); + return; + } + log(''); + log(chalk.green(` ✓ Model ${GEMMA_MODEL_NAME} downloaded`)); + } + } + + // Step 6: Configure settings + log(''); + log(' Configuring settings...'); + try { + const settings = loadSettings(process.cwd()); + const existingGemma = + settings.forScope(SettingScope.User).settings.experimental + ?.gemmaModelRouter ?? {}; + + const newGemmaSettings = { + ...existingGemma, + enabled: true, + autoStartServer: existingGemma.autoStartServer ?? true, + classifier: { + host: `http://localhost:${port}`, + model: GEMMA_MODEL_NAME, + ...existingGemma.classifier, + }, + }; + + // Read existing experimental settings to avoid overwriting them. + const existingExperimental = + settings.forScope(SettingScope.User).settings.experimental ?? {}; + settings.setValue(SettingScope.User, 'experimental', { + ...existingExperimental, + gemmaModelRouter: newGemmaSettings, + }); + + log(chalk.green(' ✓ Settings updated in ~/.gemini/settings.json')); + } catch (error) { + logError( + chalk.red( + ` ✗ Failed to update settings: ${error instanceof Error ? error.message : String(error)}`, + ), + ); + logError( + ' You can manually add the configuration to ~/.gemini/settings.json', + ); + } + + // Step 7: Start server (if requested) + if (argv.start) { + log(''); + log(' Starting LiteRT server...'); + const started = await startServer(binaryPath, port); + if (started) { + log(chalk.green(` ✓ Server started on port ${port}`)); + } else { + log( + chalk.yellow( + ` ! Server may not have started correctly. Check: gemini gemma status`, + ), + ); + } + } + + // Step 8: Summary + log(''); + log(chalk.dim('─'.repeat(40))); + log(chalk.bold.green(' Setup complete! Local model routing is now active.')); + log(''); + log(' How it works: Every request is classified by the local Gemma model.'); + log( + ' Simple tasks (file reads, quick edits) route to ' + + chalk.cyan('Flash') + + ' for speed.', + ); + log( + ' Complex tasks (debugging, architecture) route to ' + + chalk.cyan('Pro') + + ' for quality.', + ); + log(' This happens automatically — just use the CLI as usual.'); + log(''); + if (!argv.start) { + log( + chalk.yellow( + ' Note: Run "gemini gemma start" to start the server, or restart', + ), + ); + log( + chalk.yellow( + ' the CLI to auto-start it (if autoStartServer is enabled).', + ), + ); + log(''); + } + log(' Useful commands:'); + log(chalk.dim(' gemini gemma status Check routing status')); + log(chalk.dim(' gemini gemma start Start the LiteRT server')); + log(chalk.dim(' gemini gemma stop Stop the LiteRT server')); + log(chalk.dim(' /gemma Check status inside a session')); + log(''); +} + +export const setupCommand: CommandModule = { + command: 'setup', + describe: 'Download and configure Gemma local model routing', + builder: (yargs) => + yargs + .option('port', { + type: 'number', + default: DEFAULT_PORT, + description: 'Port for the LiteRT server', + }) + .option('skip-model', { + type: 'boolean', + default: false, + description: 'Skip model download (binary only)', + }) + .option('start', { + type: 'boolean', + default: true, + description: 'Start the server after setup', + }) + .option('force', { + type: 'boolean', + default: false, + description: 'Re-download binary and model even if already present', + }) + .option('consent', { + type: 'boolean', + default: false, + description: 'Skip interactive consent prompt (implies acceptance)', + }), + handler: async (argv) => { + await handleSetup({ + port: Number(argv['port']), + skipModel: Boolean(argv['skipModel']), + start: Boolean(argv['start']), + force: Boolean(argv['force']), + consent: Boolean(argv['consent']), + }); + await exitCli(0); + }, +}; diff --git a/packages/cli/src/commands/gemma/start.ts b/packages/cli/src/commands/gemma/start.ts new file mode 100644 index 00000000000..8918daa9faf --- /dev/null +++ b/packages/cli/src/commands/gemma/start.ts @@ -0,0 +1,122 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { CommandModule } from 'yargs'; +import fs from 'node:fs'; +import { spawn } from 'node:child_process'; +import chalk from 'chalk'; +import { debugLogger } from '@google/gemini-cli-core'; +import { exitCli } from '../utils.js'; +import { + DEFAULT_PORT, + getPidFilePath, + getLogFilePath, + getLiteRtBinDir, + SERVER_START_WAIT_MS, +} from './constants.js'; +import { + getBinaryPath, + isBinaryInstalled, + isServerRunning, +} from './platform.js'; + +/** + * Starts the LiteRT-LM server as a detached background process. + * Returns true if the server was started (or is already running). + * + * This function is also used by `setup.ts` to start the server after installation. + */ +export async function startServer( + binaryPath: string, + port: number, +): Promise { + // Check if already running + const alreadyRunning = await isServerRunning(port); + if (alreadyRunning) { + debugLogger.log(`LiteRT server already running on port ${port}`); + return true; + } + + // Ensure log directory exists + const logPath = getLogFilePath(); + fs.mkdirSync(getLiteRtBinDir(), { recursive: true }); + // Ensure tmp dir exists for log and pid files + const tmpDir = getPidFilePath().replace(/\/[^/]+$/, ''); + fs.mkdirSync(tmpDir, { recursive: true }); + + const logFd = fs.openSync(logPath, 'a'); + + try { + const child = spawn(binaryPath, ['serve', `--port=${port}`, '--verbose'], { + detached: true, + stdio: ['ignore', logFd, logFd], + }); + + // Write PID file + const pidPath = getPidFilePath(); + if (child.pid) { + fs.writeFileSync(pidPath, String(child.pid), 'utf-8'); + } + + // Detach the child so it survives after the CLI exits. + child.unref(); + } finally { + fs.closeSync(logFd); + } + + // Wait briefly and verify the server is responding. + await new Promise((resolve) => setTimeout(resolve, SERVER_START_WAIT_MS)); + return isServerRunning(port); +} + +export const startCommand: CommandModule = { + command: 'start', + describe: 'Start the LiteRT-LM server', + builder: (yargs) => + yargs.option('port', { + type: 'number', + default: DEFAULT_PORT, + description: 'Port for the LiteRT server', + }), + handler: async (argv) => { + const port = Number(argv['port']); + + if (!isBinaryInstalled()) { + debugLogger.error( + chalk.red( + 'LiteRT-LM binary not found. Run "gemini gemma setup" first.', + ), + ); + await exitCli(1); + return; + } + + const alreadyRunning = await isServerRunning(port); + if (alreadyRunning) { + debugLogger.log( + chalk.green(`LiteRT server is already running on port ${port}.`), + ); + await exitCli(0); + return; + } + + const binaryPath = getBinaryPath()!; + debugLogger.log(`Starting LiteRT server on port ${port}...`); + + const started = await startServer(binaryPath, port); + if (started) { + debugLogger.log(chalk.green(`LiteRT server started on port ${port}.`)); + debugLogger.log(chalk.dim(`Logs: ${getLogFilePath()}`)); + await exitCli(0); + } else { + debugLogger.error( + chalk.red('Server may not have started correctly. Check logs:'), + ); + debugLogger.error(chalk.dim(` ${getLogFilePath()}`)); + await exitCli(1); + } + }, +}; diff --git a/packages/cli/src/commands/gemma/status.ts b/packages/cli/src/commands/gemma/status.ts new file mode 100644 index 00000000000..4a265dd9445 --- /dev/null +++ b/packages/cli/src/commands/gemma/status.ts @@ -0,0 +1,181 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { CommandModule } from 'yargs'; +import chalk from 'chalk'; +import { loadSettings } from '../../config/settings.js'; +import { DEFAULT_PORT, GEMMA_MODEL_NAME } from './constants.js'; +import { + detectPlatform, + getBinaryPath, + isBinaryInstalled, + isModelDownloaded, + isServerRunning, + readServerPid, + isProcessRunning, +} from './platform.js'; +import { exitCli } from '../utils.js'; + +export interface GemmaStatusResult { + binaryInstalled: boolean; + binaryPath: string | null; + modelDownloaded: boolean; + serverRunning: boolean; + serverPid: number | null; + settingsEnabled: boolean; + port: number; + allPassing: boolean; +} + +/** + * Runs all diagnostic checks and returns a structured status result. + * This is shared between the CLI `gemini gemma status` command and the + * in-session `/gemma` slash command. + */ +export async function checkGemmaStatus( + port?: number, +): Promise { + const effectivePort = port ?? DEFAULT_PORT; + const binaryPath = getBinaryPath(); + const binaryInstalled = isBinaryInstalled(); + const modelDownloaded = + binaryInstalled && binaryPath ? isModelDownloaded(binaryPath) : false; + const serverRunning = await isServerRunning(effectivePort); + const pid = readServerPid(); + const serverPid = pid && isProcessRunning(pid) ? pid : null; + + let settingsEnabled = false; + try { + const settings = loadSettings(process.cwd()); + const gemmaSettings = settings.merged.experimental?.gemmaModelRouter; + settingsEnabled = gemmaSettings?.enabled === true; + } catch { + // Settings may fail to load in some contexts; treat as not enabled. + } + + const allPassing = + binaryInstalled && modelDownloaded && serverRunning && settingsEnabled; + + return { + binaryInstalled, + binaryPath, + modelDownloaded, + serverRunning, + serverPid, + settingsEnabled, + port: effectivePort, + allPassing, + }; +} + +/** Formats the status result into a human-readable string. */ +export function formatGemmaStatus(status: GemmaStatusResult): string { + const check = (ok: boolean) => (ok ? chalk.green('✓') : chalk.red('✗')); + + const lines: string[] = [ + '', + chalk.bold('Gemma Local Model Routing Status'), + chalk.dim('─'.repeat(40)), + '', + ]; + + // Binary + if (status.binaryInstalled) { + lines.push(` Binary: ${check(true)} Installed (${status.binaryPath})`); + } else { + const platform = detectPlatform(); + if (platform) { + lines.push(` Binary: ${check(false)} Not installed`); + lines.push(chalk.dim(` Run: gemini gemma setup`)); + } else { + lines.push( + ` Binary: ${check(false)} Unsupported platform (${process.platform}-${process.arch})`, + ); + } + } + + // Model + if (status.modelDownloaded) { + lines.push(` Model: ${check(true)} ${GEMMA_MODEL_NAME} downloaded`); + } else { + lines.push(` Model: ${check(false)} ${GEMMA_MODEL_NAME} not found`); + if (status.binaryInstalled) { + lines.push( + chalk.dim( + ` Run: ${status.binaryPath} pull ${GEMMA_MODEL_NAME}`, + ), + ); + } else { + lines.push(chalk.dim(` Run: gemini gemma setup`)); + } + } + + // Server + if (status.serverRunning) { + const pidInfo = status.serverPid ? ` (PID ${status.serverPid})` : ''; + lines.push( + ` Server: ${check(true)} Running on port ${status.port}${pidInfo}`, + ); + } else { + lines.push( + ` Server: ${check(false)} Not running on port ${status.port}`, + ); + lines.push(chalk.dim(` Run: gemini gemma start`)); + } + + // Settings + if (status.settingsEnabled) { + lines.push(` Settings: ${check(true)} Enabled in settings.json`); + } else { + lines.push(` Settings: ${check(false)} Not enabled in settings.json`); + lines.push( + chalk.dim( + ` Run: gemini gemma setup (auto-configures settings)`, + ), + ); + } + + lines.push(''); + + if (status.allPassing) { + lines.push(chalk.green(' Routing is active — no action needed.')); + lines.push(''); + lines.push( + chalk.dim( + ' Simple requests → Flash (fast) | Complex requests → Pro (powerful)', + ), + ); + lines.push(chalk.dim(' This happens automatically on every request.')); + } else { + lines.push( + chalk.yellow( + ' Some checks failed. Run "gemini gemma setup" for guided installation.', + ), + ); + } + + lines.push(''); + return lines.join('\n'); +} + +export const statusCommand: CommandModule = { + command: 'status', + describe: 'Check Gemma local model routing status', + builder: (yargs) => + yargs.option('port', { + type: 'number', + default: DEFAULT_PORT, + description: 'Port to check for the LiteRT server', + }), + handler: async (argv) => { + const port = Number(argv['port']); + const status = await checkGemmaStatus(port); + const output = formatGemmaStatus(status); + // Use process.stdout directly for consistent output in non-interactive mode. + process.stdout.write(output); + await exitCli(status.allPassing ? 0 : 1); + }, +}; diff --git a/packages/cli/src/commands/gemma/stop.ts b/packages/cli/src/commands/gemma/stop.ts new file mode 100644 index 00000000000..15db60eaa8e --- /dev/null +++ b/packages/cli/src/commands/gemma/stop.ts @@ -0,0 +1,115 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { CommandModule } from 'yargs'; +import fs from 'node:fs'; +import chalk from 'chalk'; +import { debugLogger } from '@google/gemini-cli-core'; +import { exitCli } from '../utils.js'; +import { DEFAULT_PORT, getPidFilePath } from './constants.js'; +import { + readServerPid, + isProcessRunning, + isServerRunning, +} from './platform.js'; + +/** + * Stops the LiteRT-LM server by sending SIGTERM to the stored PID. + * Returns true if the server was stopped successfully. + */ +export async function stopServer(): Promise { + const pid = readServerPid(); + const pidPath = getPidFilePath(); + + if (pid === null) { + return false; + } + + if (!isProcessRunning(pid)) { + // PID file exists but process is gone — clean up stale file. + try { + fs.unlinkSync(pidPath); + } catch { + // Ignore cleanup errors. + } + return false; + } + + try { + process.kill(pid, 'SIGTERM'); + } catch { + return false; + } + + // Wait briefly for graceful shutdown. + await new Promise((resolve) => setTimeout(resolve, 1000)); + + // If still running, escalate to SIGKILL. + if (isProcessRunning(pid)) { + try { + process.kill(pid, 'SIGKILL'); + } catch { + // Process may have exited between the check and the kill. + } + await new Promise((resolve) => setTimeout(resolve, 500)); + } + + // Clean up PID file. + try { + fs.unlinkSync(pidPath); + } catch { + // Ignore cleanup errors. + } + + return true; +} + +export const stopCommand: CommandModule = { + command: 'stop', + describe: 'Stop the LiteRT-LM server', + builder: (yargs) => + yargs.option('port', { + type: 'number', + default: DEFAULT_PORT, + description: 'Port the server is running on', + }), + handler: async (argv) => { + const port = Number(argv['port']); + const pid = readServerPid(); + + if (pid !== null && isProcessRunning(pid)) { + debugLogger.log(`Stopping LiteRT server (PID ${pid})...`); + const stopped = await stopServer(); + if (stopped) { + debugLogger.log(chalk.green('LiteRT server stopped.')); + await exitCli(0); + } else { + debugLogger.error(chalk.red('Failed to stop LiteRT server.')); + await exitCli(1); + } + return; + } + + // No PID file or process not running — check if something else is on the port. + const running = await isServerRunning(port); + if (running) { + debugLogger.log( + chalk.yellow( + `A server is responding on port ${port}, but it was not started by "gemini gemma start".`, + ), + ); + debugLogger.log( + chalk.dim( + 'If you started it manually, stop it from the terminal where it is running.', + ), + ); + await exitCli(1); + } else { + debugLogger.log('No LiteRT server is currently running.'); + await exitCli(0); + } + }, +}; diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index 4e7e1db6f2c..17c3ffb89a6 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -13,6 +13,7 @@ import { mcpCommand } from '../commands/mcp.js'; import { extensionsCommand } from '../commands/extensions.js'; import { skillsCommand } from '../commands/skills.js'; import { hooksCommand } from '../commands/hooks.js'; +import { gemmaCommand } from '../commands/gemma.js'; import { setGeminiMdFilename as setServerGeminiMdFilename, getCurrentGeminiMdFilename, @@ -181,6 +182,7 @@ export async function parseArguments( extensionsCommand, skillsCommand, hooksCommand, + gemmaCommand, ]; const subcommands = commandModules.flatMap((mod) => { @@ -260,6 +262,7 @@ export async function parseArguments( yargsInstance.command(extensionsCommand); yargsInstance.command(skillsCommand); yargsInstance.command(hooksCommand); + yargsInstance.command(gemmaCommand); yargsInstance .command('$0 [query..]', 'Launch Gemini CLI', (yargsInstance) => diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index fcfd604e3a7..6f946eb963d 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -2146,6 +2146,26 @@ const SETTINGS_SCHEMA = { 'Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim.', showInDialog: false, }, + autoStartServer: { + type: 'boolean', + label: 'Auto-start LiteRT Server', + category: 'Experimental', + requiresRestart: true, + default: true, + description: + 'Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.', + showInDialog: false, + }, + binaryPath: { + type: 'string', + label: 'LiteRT Binary Path', + category: 'Experimental', + requiresRestart: true, + default: '', + description: + 'Custom path to the LiteRT-LM binary. Leave empty to use the default location (~/.gemini/bin/litert/).', + showInDialog: false, + }, classifier: { type: 'object', label: 'Classifier', diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx index eedfcc950ad..899be0bad57 100644 --- a/packages/cli/src/gemini.tsx +++ b/packages/cli/src/gemini.tsx @@ -612,6 +612,16 @@ export async function main() { const initializationResult = await initializeApp(config, settings); initAppHandle?.end(); + // Auto-start the LiteRT-LM server for Gemma local routing if configured. + // This is fire-and-forget — failures are logged but never block startup. + import('./services/liteRtServerManager.js') + .then(({ LiteRtServerManager }) => + LiteRtServerManager.ensureRunning( + settings.merged.experimental?.gemmaModelRouter, + ), + ) + .catch((e) => debugLogger.warn('LiteRT auto-start import failed:', e)); + if ( settings.merged.security.auth.selectedType === AuthType.LOGIN_WITH_GOOGLE && diff --git a/packages/cli/src/services/BuiltinCommandLoader.ts b/packages/cli/src/services/BuiltinCommandLoader.ts index c1cbd5621e8..94b5986eb33 100644 --- a/packages/cli/src/services/BuiltinCommandLoader.ts +++ b/packages/cli/src/services/BuiltinCommandLoader.ts @@ -61,6 +61,7 @@ import { vimCommand } from '../ui/commands/vimCommand.js'; import { setupGithubCommand } from '../ui/commands/setupGithubCommand.js'; import { terminalSetupCommand } from '../ui/commands/terminalSetupCommand.js'; import { upgradeCommand } from '../ui/commands/upgradeCommand.js'; +import { gemmaStatusCommand } from '../ui/commands/gemmaStatusCommand.js'; /** * Loads the core, hard-coded slash commands that are an integral part @@ -221,6 +222,7 @@ export class BuiltinCommandLoader implements ICommandLoader { : [skillsCommand] : []), settingsCommand, + gemmaStatusCommand, tasksCommand, vimCommand, setupGithubCommand, diff --git a/packages/cli/src/services/liteRtServerManager.ts b/packages/cli/src/services/liteRtServerManager.ts new file mode 100644 index 00000000000..1f0de35fe3a --- /dev/null +++ b/packages/cli/src/services/liteRtServerManager.ts @@ -0,0 +1,88 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { debugLogger } from '@google/gemini-cli-core'; +import { + getBinaryPath, + isBinaryInstalled, + isServerRunning, +} from '../commands/gemma/platform.js'; +import { DEFAULT_PORT } from '../commands/gemma/constants.js'; + +// Use a local interface that includes the new fields, since the core +// package's compiled types may not include them until rebuilt. +interface GemmaSettings { + enabled?: boolean; + autoStartServer?: boolean; + binaryPath?: string; + classifier?: { host?: string; model?: string }; +} + +/** + * Manages the LiteRT-LM server lifecycle for auto-start during CLI startup. + * + * When the Gemma model router is enabled and `autoStartServer` is true, + * this manager ensures the server is running before the CLI enters + * interactive mode. The server is spawned as a detached daemon that + * persists across CLI sessions — it is NOT stopped when the CLI exits. + */ +export class LiteRtServerManager { + /** + * Ensures the LiteRT-LM server is running if the settings call for it. + * This is fire-and-forget: failures are logged but never block startup. + */ + static async ensureRunning( + gemmaSettings: GemmaSettings | undefined, + ): Promise { + if (!gemmaSettings?.enabled) return; + if (gemmaSettings.autoStartServer === false) return; + if (!isBinaryInstalled()) { + debugLogger.log( + '[LiteRtServerManager] Binary not installed, skipping auto-start. Run "gemini gemma setup".', + ); + return; + } + + const port = + parseInt( + gemmaSettings.classifier?.host?.match(/:(\d+)/)?.[1] ?? '', + 10, + ) || DEFAULT_PORT; + + const running = await isServerRunning(port); + if (running) { + debugLogger.log( + `[LiteRtServerManager] Server already running on port ${port}`, + ); + return; + } + + debugLogger.log( + `[LiteRtServerManager] Auto-starting LiteRT server on port ${port}...`, + ); + + try { + // Dynamic import to avoid circular dependencies and to keep the start + // logic in one place. + const { startServer } = await import('../commands/gemma/start.js'); + const binaryPath = gemmaSettings.binaryPath || getBinaryPath() || ''; + if (!binaryPath) { + debugLogger.warn('[LiteRtServerManager] Could not resolve binary path'); + return; + } + const started = await startServer(binaryPath, port); + if (started) { + debugLogger.log(`[LiteRtServerManager] Server started on port ${port}`); + } else { + debugLogger.warn( + `[LiteRtServerManager] Server may not have started correctly on port ${port}`, + ); + } + } catch (error) { + debugLogger.warn('[LiteRtServerManager] Auto-start failed:', error); + } + } +} diff --git a/packages/cli/src/ui/commands/gemmaStatusCommand.ts b/packages/cli/src/ui/commands/gemmaStatusCommand.ts new file mode 100644 index 00000000000..2c581b31a1e --- /dev/null +++ b/packages/cli/src/ui/commands/gemmaStatusCommand.ts @@ -0,0 +1,41 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { CommandKind, type SlashCommand } from './types.js'; +import { MessageType, type HistoryItemGemmaStatus } from '../types.js'; +import { checkGemmaStatus } from '../../commands/gemma/status.js'; +import { GEMMA_MODEL_NAME } from '../../commands/gemma/constants.js'; + +export const gemmaStatusCommand: SlashCommand = { + name: 'gemma', + description: 'Check local Gemma model routing status', + kind: CommandKind.BUILT_IN, + autoExecute: true, + isSafeConcurrent: true, + action: async (context) => { + const port = + parseInt( + context.services.settings.merged.experimental?.gemmaModelRouter?.classifier?.host?.match( + /:(\d+)/, + )?.[1] ?? '', + 10, + ) || undefined; + const status = await checkGemmaStatus(port); + const item: Omit = { + type: MessageType.GEMMA_STATUS, + binaryInstalled: status.binaryInstalled, + binaryPath: status.binaryPath, + modelName: GEMMA_MODEL_NAME, + modelDownloaded: status.modelDownloaded, + serverRunning: status.serverRunning, + serverPid: status.serverPid, + serverPort: status.port, + settingsEnabled: status.settingsEnabled, + allPassing: status.allPassing, + }; + context.ui.addItem(item); + }, +}; diff --git a/packages/cli/src/ui/components/HistoryItemDisplay.tsx b/packages/cli/src/ui/components/HistoryItemDisplay.tsx index 0ceb70f8d72..5f384612867 100644 --- a/packages/cli/src/ui/components/HistoryItemDisplay.tsx +++ b/packages/cli/src/ui/components/HistoryItemDisplay.tsx @@ -32,6 +32,7 @@ import { ToolsList } from './views/ToolsList.js'; import { SkillsList } from './views/SkillsList.js'; import { AgentsStatus } from './views/AgentsStatus.js'; import { McpStatus } from './views/McpStatus.js'; +import { GemmaStatus } from './views/GemmaStatus.js'; import { ChatList } from './views/ChatList.js'; import { ModelMessage } from './messages/ModelMessage.js'; import { ThinkingMessage } from './messages/ThinkingMessage.js'; @@ -242,6 +243,9 @@ export const HistoryItemDisplay: React.FC = ({ {itemForDisplay.type === 'mcp_status' && ( )} + {itemForDisplay.type === 'gemma_status' && ( + + )} {itemForDisplay.type === 'chat_list' && ( )} diff --git a/packages/cli/src/ui/components/views/GemmaStatus.tsx b/packages/cli/src/ui/components/views/GemmaStatus.tsx new file mode 100644 index 00000000000..b9c20142d46 --- /dev/null +++ b/packages/cli/src/ui/components/views/GemmaStatus.tsx @@ -0,0 +1,126 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { Box, Text } from 'ink'; +import type React from 'react'; +import { theme } from '../../semantic-colors.js'; +import type { HistoryItemGemmaStatus } from '../../types.js'; + +type GemmaStatusProps = Omit; + +const StatusDot: React.FC<{ ok: boolean }> = ({ ok }) => ( + + {ok ? '\u25CF' : '\u25CB'} + +); + +export const GemmaStatus: React.FC = ({ + binaryInstalled, + binaryPath, + modelName, + modelDownloaded, + serverRunning, + serverPid, + serverPort, + settingsEnabled, + allPassing, +}) => ( + + Gemma Local Model Routing + + + {/* Binary */} + + + + {' '} + Binary: + {binaryInstalled ? ( + {binaryPath} + ) : ( + Not installed + )} + + + + {/* Model */} + + + + {' '} + Model: + {modelDownloaded ? ( + {modelName} + ) : ( + {modelName} not found + )} + + + + {/* Server */} + + + + {' '} + Server: + {serverRunning ? ( + + port {serverPort} + {serverPid ? ( + (PID {serverPid}) + ) : null} + + ) : ( + + not running on port {serverPort} + + )} + + + + {/* Settings */} + + + + {' '} + Settings: + {settingsEnabled ? ( + enabled + ) : ( + not enabled + )} + + + + {/* Active For */} + + Active for: + {allPassing ? ( + [routing] + ) : ( + none + )} + + + {/* Summary */} + + {allPassing ? ( + + + Simple requests route to Flash, complex requests to Pro. + + + This happens automatically on every request. + + + ) : ( + + Run "gemini gemma setup" to install and configure. + + )} + + +); diff --git a/packages/cli/src/ui/types.ts b/packages/cli/src/ui/types.ts index 6fbc3151d8e..bd9d4f893a0 100644 --- a/packages/cli/src/ui/types.ts +++ b/packages/cli/src/ui/types.ts @@ -353,6 +353,19 @@ export interface JsonMcpResource { description?: string; } +export type HistoryItemGemmaStatus = HistoryItemBase & { + type: 'gemma_status'; + binaryInstalled: boolean; + binaryPath: string | null; + modelName: string; + modelDownloaded: boolean; + serverRunning: boolean; + serverPid: number | null; + serverPort: number; + settingsEnabled: boolean; + allPassing: boolean; +}; + export type HistoryItemMcpStatus = HistoryItemBase & { type: 'mcp_status'; servers: Record; @@ -402,6 +415,7 @@ export type HistoryItemWithoutId = | HistoryItemSkillsList | HistoryItemAgentsList | HistoryItemMcpStatus + | HistoryItemGemmaStatus | HistoryItemChatList | HistoryItemThinking | HistoryItemHint @@ -428,6 +442,7 @@ export enum MessageType { SKILLS_LIST = 'skills_list', AGENTS_LIST = 'agents_list', MCP_STATUS = 'mcp_status', + GEMMA_STATUS = 'gemma_status', CHAT_LIST = 'chat_list', HINT = 'hint', } diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 5e8507eba4d..8b96272d51e 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -217,6 +217,8 @@ export interface OutputSettings { export interface GemmaModelRouterSettings { enabled?: boolean; + autoStartServer?: boolean; + binaryPath?: string; classifier?: { host?: string; model?: string; From 8f023b56f7856860b425d1977719a56697472b5a Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Fri, 10 Apr 2026 13:40:22 -0700 Subject: [PATCH 06/33] feat(cli): add `gemini gemma logs` command to view LiteRT server logs Tails the LiteRT-LM server log file for live visibility into routing classification requests. Supports --lines N for last N lines, or follow mode (default) for live streaming. --- packages/cli/src/commands/gemma.ts | 2 + packages/cli/src/commands/gemma/logs.ts | 68 +++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 packages/cli/src/commands/gemma/logs.ts diff --git a/packages/cli/src/commands/gemma.ts b/packages/cli/src/commands/gemma.ts index 1d2a28c23ed..d7b88f06374 100644 --- a/packages/cli/src/commands/gemma.ts +++ b/packages/cli/src/commands/gemma.ts @@ -11,6 +11,7 @@ import { setupCommand } from './gemma/setup.js'; import { startCommand } from './gemma/start.js'; import { stopCommand } from './gemma/stop.js'; import { statusCommand } from './gemma/status.js'; +import { logsCommand } from './gemma/logs.js'; export const gemmaCommand: CommandModule = { command: 'gemma', @@ -25,6 +26,7 @@ export const gemmaCommand: CommandModule = { .command(defer(startCommand, 'gemma')) .command(defer(stopCommand, 'gemma')) .command(defer(statusCommand, 'gemma')) + .command(defer(logsCommand, 'gemma')) .demandCommand(1, 'You need at least one command before continuing.') .version(false), handler: () => { diff --git a/packages/cli/src/commands/gemma/logs.ts b/packages/cli/src/commands/gemma/logs.ts new file mode 100644 index 00000000000..4420e7adb9d --- /dev/null +++ b/packages/cli/src/commands/gemma/logs.ts @@ -0,0 +1,68 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { CommandModule } from 'yargs'; +import fs from 'node:fs'; +import { spawn } from 'node:child_process'; +import { debugLogger } from '@google/gemini-cli-core'; +import { exitCli } from '../utils.js'; +import { getLogFilePath } from './constants.js'; + +export const logsCommand: CommandModule = { + command: 'logs', + describe: 'View LiteRT-LM server logs', + builder: (yargs) => + yargs + .option('lines', { + alias: 'n', + type: 'number', + description: 'Show the last N lines and exit (omit to follow live)', + }) + .option('follow', { + alias: 'f', + type: 'boolean', + default: true, + description: 'Follow log output (default when --lines is not set)', + }), + handler: async (argv) => { + const logPath = getLogFilePath(); + + if (!fs.existsSync(logPath)) { + debugLogger.log(`No log file found at ${logPath}`); + debugLogger.log( + 'Is the LiteRT server running? Start it with: gemini gemma start', + ); + await exitCli(1); + return; + } + + const rawLines = argv['lines']; + const lines = Number.isFinite(rawLines) ? Number(rawLines) : undefined; + + if (lines !== undefined) { + // Show last N lines and exit. + const tailArgs = ['-n', String(lines), logPath]; + const child = spawn('tail', tailArgs, { stdio: 'inherit' }); + child.on('close', async (code) => { + await exitCli(code ?? 0); + }); + return; + } + + // Follow mode — stream live output until user presses Ctrl+C. + debugLogger.log(`Tailing ${logPath} (Ctrl+C to stop)\n`); + const tailArgs = ['-f', '-n', '20', logPath]; + const child = spawn('tail', tailArgs, { stdio: 'inherit' }); + + process.on('SIGINT', () => { + child.kill('SIGTERM'); + }); + + child.on('close', async (code) => { + await exitCli(code ?? 0); + }); + }, +}; From 82e87c40d62c68a244b63b804a75c368f8292733 Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Fri, 10 Apr 2026 13:51:52 -0700 Subject: [PATCH 07/33] docs: add gemma setup quick-start guide --- docs/core/gemma-setup-guide.md | 65 ++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 docs/core/gemma-setup-guide.md diff --git a/docs/core/gemma-setup-guide.md b/docs/core/gemma-setup-guide.md new file mode 100644 index 00000000000..05f3b104ff6 --- /dev/null +++ b/docs/core/gemma-setup-guide.md @@ -0,0 +1,65 @@ +# `gemini gemma` — Local Model Routing Setup + +## What is this? + +Routes simple requests to Flash and complex requests to Pro using a local Gemma +3 1B model running on your machine. Saves cloud API costs and adds a few ms of +local inference instead of a cloud classifier round-trip. + +## Quick Start + +```bash +# One command does everything: downloads runtime, pulls model, configures settings, starts server +gemini gemma setup +``` + +You'll be prompted to accept the Gemma Terms of Use. The model is ~1 GB. + +After setup, **just use the CLI normally** — routing happens automatically on +every request. + +## Commands + +| Command | What it does | +| --------------------- | -------------------------------------------------------------- | +| `gemini gemma setup` | Full install (binary + model + settings + server start) | +| `gemini gemma status` | Health check — shows what's installed and running | +| `gemini gemma start` | Start the LiteRT server (auto-starts on CLI launch by default) | +| `gemini gemma stop` | Stop the LiteRT server | +| `gemini gemma logs` | Tail the server logs to see routing requests live | +| `/gemma` | In-session status check (type it inside the CLI) | + +## Verifying it works + +1. Run `gemini gemma status` — all checks should show green +2. Open two terminals: + - Terminal 1: `gemini gemma logs` (watch for incoming requests) + - Terminal 2: use the CLI normally +3. You should see classification requests appear in the logs as you interact + with the CLI +4. The `/gemma` slash command inside a session shows a quick status panel + +## Setup flags + +```bash +gemini gemma setup --port 8080 # custom port +gemini gemma setup --no-start # don't start server after install +gemini gemma setup --force # re-download everything +gemini gemma setup --skip-model # binary only, skip the 1GB model download +``` + +## How it works under the hood + +- Local Gemma classifies each request as "simple" or "complex" (~100ms) +- Simple → Flash, Complex → Pro +- If the local server is down, the CLI silently falls back to the cloud + classifier — no errors, no disruption + +## Disabling + +Set `enabled: false` in settings or just run `gemini gemma stop` to turn off the +server: + +```json +{ "experimental": { "gemmaModelRouter": { "enabled": false } } } +``` From e0f043a6732bc1b03432cc196b00a4369a5dabea Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Mon, 13 Apr 2026 12:35:10 -0700 Subject: [PATCH 08/33] fix(core): set apiVersion to empty string for LiteRT-LM client The Google GenAI SDK defaults to 'v1beta' as the API version prefix, producing URLs like /v1beta/models/...:generateContent. The LiteRT-LM server does not use a versioned API path, causing 404 responses. Setting apiVersion to '' removes the prefix. --- packages/core/src/core/localLiteRtLmClient.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/core/src/core/localLiteRtLmClient.ts b/packages/core/src/core/localLiteRtLmClient.ts index 798dcb57656..3aa111eb3d5 100644 --- a/packages/core/src/core/localLiteRtLmClient.ts +++ b/packages/core/src/core/localLiteRtLmClient.ts @@ -27,6 +27,9 @@ export class LocalLiteRtLmClient { apiKey: 'no-api-key-needed', httpOptions: { baseUrl: this.host, + // The LiteRT-LM server does not use a versioned API path prefix. + // The SDK defaults to 'v1beta' which produces 404s against LiteRT-LM. + apiVersion: '', // If the LiteRT-LM server is started but the wrong port is set, there will be a lengthy TCP timeout (here fixed to be 10 seconds). // If the LiteRT-LM server is not started, there will be an immediate connection refusal. // If the LiteRT-LM server is started and the model is unsupported or not downloaded, the server will return an error immediately. From ab8dc2dd01fc85953657bbb1e5ff88e3175356d0 Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Mon, 13 Apr 2026 12:42:07 -0700 Subject: [PATCH 09/33] Revert "fix(core): set apiVersion to empty string for LiteRT-LM client" This reverts commit e0f043a6732bc1b03432cc196b00a4369a5dabea. --- packages/core/src/core/localLiteRtLmClient.ts | 3 --- 1 file changed, 3 deletions(-) diff --git a/packages/core/src/core/localLiteRtLmClient.ts b/packages/core/src/core/localLiteRtLmClient.ts index 3aa111eb3d5..798dcb57656 100644 --- a/packages/core/src/core/localLiteRtLmClient.ts +++ b/packages/core/src/core/localLiteRtLmClient.ts @@ -27,9 +27,6 @@ export class LocalLiteRtLmClient { apiKey: 'no-api-key-needed', httpOptions: { baseUrl: this.host, - // The LiteRT-LM server does not use a versioned API path prefix. - // The SDK defaults to 'v1beta' which produces 404s against LiteRT-LM. - apiVersion: '', // If the LiteRT-LM server is started but the wrong port is set, there will be a lengthy TCP timeout (here fixed to be 10 seconds). // If the LiteRT-LM server is not started, there will be an immediate connection refusal. // If the LiteRT-LM server is started and the model is unsupported or not downloaded, the server will return an error immediately. From d0dd169cccba35bb27ba4f25807bd551835bebe1 Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Mon, 13 Apr 2026 12:51:42 -0700 Subject: [PATCH 10/33] feat(cli): show gemma router settings in /settings dialog --- packages/cli/src/config/settingsSchema.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index 6f946eb963d..f37faad78dd 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -2144,7 +2144,7 @@ const SETTINGS_SCHEMA = { default: false, description: 'Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim.', - showInDialog: false, + showInDialog: true, }, autoStartServer: { type: 'boolean', @@ -2154,7 +2154,7 @@ const SETTINGS_SCHEMA = { default: true, description: 'Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.', - showInDialog: false, + showInDialog: true, }, binaryPath: { type: 'string', From 8d3ac5213ce28c1066ed70a00d53f43b8c63abd9 Mon Sep 17 00:00:00 2001 From: Abhijit Balaji Date: Mon, 13 Apr 2026 14:39:15 -0700 Subject: [PATCH 11/33] fix(gemma): resolve 404 errors and improve port resolution (#25340) --- packages/cli/src/commands/gemma/platform.ts | 29 +++++++++++++++++++ packages/cli/src/commands/gemma/start.ts | 12 ++++++-- packages/cli/src/commands/gemma/status.ts | 21 +++++--------- packages/cli/src/commands/gemma/stop.ts | 16 +++++++--- .../core/src/core/localLiteRtLmClient.test.ts | 10 +++++++ packages/core/src/core/localLiteRtLmClient.ts | 2 ++ 6 files changed, 71 insertions(+), 19 deletions(-) diff --git a/packages/cli/src/commands/gemma/platform.ts b/packages/cli/src/commands/gemma/platform.ts index e39d99e557f..ef5c5288a65 100644 --- a/packages/cli/src/commands/gemma/platform.ts +++ b/packages/cli/src/commands/gemma/platform.ts @@ -4,6 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ +import { loadSettings } from '../../config/settings.js'; import fs from 'node:fs'; import path from 'node:path'; import { execFileSync } from 'node:child_process'; @@ -22,6 +23,34 @@ export interface PlatformInfo { binaryName: string; } +export interface GemmaConfigStatus { + settingsEnabled: boolean; + configuredPort: number; +} + +/** + * Resolves the Gemma configuration from the workspace settings. + */ +export function resolveGemmaConfig(fallbackPort: number): GemmaConfigStatus { + let settingsEnabled = false; + let configuredPort = fallbackPort; + try { + const settings = loadSettings(process.cwd()); + const gemmaSettings = settings.merged.experimental?.gemmaModelRouter; + settingsEnabled = gemmaSettings?.enabled === true; + const hostStr = gemmaSettings?.classifier?.host; + if (hostStr) { + const match = hostStr.match(/:(\d+)/); + if (match) { + configuredPort = parseInt(match[1], 10); + } + } + } catch { + // Settings may fail to load in some contexts; treat as not enabled. + } + return { settingsEnabled, configuredPort }; +} + /** * Detects the current platform and resolves the corresponding LiteRT-LM binary name. * Returns null if the platform is unsupported. diff --git a/packages/cli/src/commands/gemma/start.ts b/packages/cli/src/commands/gemma/start.ts index 8918daa9faf..02b1bd0e4a6 100644 --- a/packages/cli/src/commands/gemma/start.ts +++ b/packages/cli/src/commands/gemma/start.ts @@ -21,6 +21,7 @@ import { getBinaryPath, isBinaryInstalled, isServerRunning, + resolveGemmaConfig, } from './platform.js'; /** @@ -78,11 +79,18 @@ export const startCommand: CommandModule = { builder: (yargs) => yargs.option('port', { type: 'number', - default: DEFAULT_PORT, description: 'Port for the LiteRT server', }), handler: async (argv) => { - const port = Number(argv['port']); + let port: number | undefined; + if (argv['port'] !== undefined) { + port = Number(argv['port']); + } + + if (!port) { + const { configuredPort } = resolveGemmaConfig(DEFAULT_PORT); + port = configuredPort; + } if (!isBinaryInstalled()) { debugLogger.error( diff --git a/packages/cli/src/commands/gemma/status.ts b/packages/cli/src/commands/gemma/status.ts index 4a265dd9445..1e061d7bc57 100644 --- a/packages/cli/src/commands/gemma/status.ts +++ b/packages/cli/src/commands/gemma/status.ts @@ -6,7 +6,6 @@ import type { CommandModule } from 'yargs'; import chalk from 'chalk'; -import { loadSettings } from '../../config/settings.js'; import { DEFAULT_PORT, GEMMA_MODEL_NAME } from './constants.js'; import { detectPlatform, @@ -16,6 +15,7 @@ import { isServerRunning, readServerPid, isProcessRunning, + resolveGemmaConfig, } from './platform.js'; import { exitCli } from '../utils.js'; @@ -38,7 +38,9 @@ export interface GemmaStatusResult { export async function checkGemmaStatus( port?: number, ): Promise { - const effectivePort = port ?? DEFAULT_PORT; + const { settingsEnabled, configuredPort } = resolveGemmaConfig(DEFAULT_PORT); + + const effectivePort = port ?? configuredPort; const binaryPath = getBinaryPath(); const binaryInstalled = isBinaryInstalled(); const modelDownloaded = @@ -47,15 +49,6 @@ export async function checkGemmaStatus( const pid = readServerPid(); const serverPid = pid && isProcessRunning(pid) ? pid : null; - let settingsEnabled = false; - try { - const settings = loadSettings(process.cwd()); - const gemmaSettings = settings.merged.experimental?.gemmaModelRouter; - settingsEnabled = gemmaSettings?.enabled === true; - } catch { - // Settings may fail to load in some contexts; treat as not enabled. - } - const allPassing = binaryInstalled && modelDownloaded && serverRunning && settingsEnabled; @@ -167,11 +160,13 @@ export const statusCommand: CommandModule = { builder: (yargs) => yargs.option('port', { type: 'number', - default: DEFAULT_PORT, description: 'Port to check for the LiteRT server', }), handler: async (argv) => { - const port = Number(argv['port']); + let port: number | undefined; + if (argv['port'] !== undefined) { + port = Number(argv['port']); + } const status = await checkGemmaStatus(port); const output = formatGemmaStatus(status); // Use process.stdout directly for consistent output in non-interactive mode. diff --git a/packages/cli/src/commands/gemma/stop.ts b/packages/cli/src/commands/gemma/stop.ts index 15db60eaa8e..409989e33c3 100644 --- a/packages/cli/src/commands/gemma/stop.ts +++ b/packages/cli/src/commands/gemma/stop.ts @@ -14,6 +14,7 @@ import { readServerPid, isProcessRunning, isServerRunning, + resolveGemmaConfig, } from './platform.js'; /** @@ -66,18 +67,25 @@ export async function stopServer(): Promise { return true; } - export const stopCommand: CommandModule = { command: 'stop', describe: 'Stop the LiteRT-LM server', builder: (yargs) => yargs.option('port', { type: 'number', - default: DEFAULT_PORT, - description: 'Port the server is running on', + description: 'Port where the LiteRT server is running', }), handler: async (argv) => { - const port = Number(argv['port']); + let port: number | undefined; + if (argv['port'] !== undefined) { + port = Number(argv['port']); + } + + if (!port) { + const { configuredPort } = resolveGemmaConfig(DEFAULT_PORT); + port = configuredPort; + } + const pid = readServerPid(); if (pid !== null && isProcessRunning(pid)) { diff --git a/packages/core/src/core/localLiteRtLmClient.test.ts b/packages/core/src/core/localLiteRtLmClient.test.ts index c4398b5b9c1..6c64143ec3d 100644 --- a/packages/core/src/core/localLiteRtLmClient.test.ts +++ b/packages/core/src/core/localLiteRtLmClient.test.ts @@ -7,6 +7,8 @@ import { describe, it, expect, vi, beforeEach } from 'vitest'; import { LocalLiteRtLmClient } from './localLiteRtLmClient.js'; import type { Config } from '../config/config.js'; +import { GoogleGenAI } from '@google/genai'; + const mockGenerateContent = vi.fn(); vi.mock('@google/genai', () => { @@ -44,6 +46,14 @@ describe('LocalLiteRtLmClient', () => { const result = await client.generateJson([], 'test-instruction'); expect(result).toEqual({ key: 'value' }); + expect(GoogleGenAI).toHaveBeenCalledWith( + expect.objectContaining({ + apiVersion: 'v1beta', + httpOptions: expect.objectContaining({ + baseUrl: 'http://test-host:1234', + }), + }), + ); expect(mockGenerateContent).toHaveBeenCalledWith( expect.objectContaining({ model: 'gemma:latest', diff --git a/packages/core/src/core/localLiteRtLmClient.ts b/packages/core/src/core/localLiteRtLmClient.ts index 798dcb57656..82fa44e87b9 100644 --- a/packages/core/src/core/localLiteRtLmClient.ts +++ b/packages/core/src/core/localLiteRtLmClient.ts @@ -25,6 +25,8 @@ export class LocalLiteRtLmClient { this.client = new GoogleGenAI({ // The LiteRT-LM server does not require an API key, but the SDK requires one to be set even for local endpoints. This is a dummy value and is not used for authentication. apiKey: 'no-api-key-needed', + apiVersion: 'v1beta', + vertexai: false, httpOptions: { baseUrl: this.host, // If the LiteRT-LM server is started but the wrong port is set, there will be a lengthy TCP timeout (here fixed to be 10 seconds). From c0117b4484fd5321b16a59dc1964fbfeac39bc41 Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Wed, 15 Apr 2026 15:00:48 -0700 Subject: [PATCH 12/33] chore(docs): regenerate settings schema and docs --- docs/codebase_understanding.md | 58 +++++++++++++++++++-------------- docs/reference/configuration.md | 12 +++++++ schemas/settings.schema.json | 14 ++++++++ 3 files changed, 60 insertions(+), 24 deletions(-) diff --git a/docs/codebase_understanding.md b/docs/codebase_understanding.md index 34a2ee3c942..29c9bf692ab 100644 --- a/docs/codebase_understanding.md +++ b/docs/codebase_understanding.md @@ -1,13 +1,13 @@ # Codebase understanding This document provides a deep-dive technical overview of the Gemini CLI -architecture. It is designed for developers who need to understand the -system's inner workings, from startup to advanced autonomous behaviors. +architecture. It is designed for developers who need to understand the system's +inner workings, from startup to advanced autonomous behaviors. ## Repository architecture -Gemini CLI is a monorepo structured to maintain a strict separation between -the user interface and the agent's core reasoning logic. +Gemini CLI is a monorepo structured to maintain a strict separation between the +user interface and the agent's core reasoning logic. - **`packages/cli`**: The Terminal User Interface (TUI). Built with React and Ink, it manages the interactive terminal experience, including keyboard @@ -27,17 +27,18 @@ the user interface and the agent's core reasoning logic. ## 1. Application lifecycle ### Startup and initialization + The entry point is `packages/cli/src/gemini.tsx`. The startup sequence is designed for security and resilience: 1. **I/O redirection**: Standard output streams (`stdout`, `stderr`) are patched to capture all logs and errors. This allows the CLI to redirect - diagnostic information to the TUI's debug console or a remote DevTools server - without corrupting the user's terminal interface. -2. **Memory-aware relaunch**: The CLI checks the host system's total memory. - If it detects that Node.js's default heap limit is insufficient for complex - codebase analysis, it re-launches itself using the - `--max-old-space-size` flag, targeting approximately 50% of system memory. + diagnostic information to the TUI's debug console or a remote DevTools + server without corrupting the user's terminal interface. +2. **Memory-aware relaunch**: The CLI checks the host system's total memory. If + it detects that Node.js's default heap limit is insufficient for complex + codebase analysis, it re-launches itself using the `--max-old-space-size` + flag, targeting approximately 50% of system memory. 3. **Sandboxing**: If configured, the CLI launches a restricted "sandbox" environment (using Docker, Podman, or a localized process) to isolate the agent's autonomous actions from the host system. @@ -52,18 +53,19 @@ designed for security and resilience: ## 2. Model routing and selection -The `ModelRouterService` (`packages/core/src/routing`) implements a -"Composite Strategy" to select the optimal model for every request. +The `ModelRouterService` (`packages/core/src/routing`) implements a "Composite +Strategy" to select the optimal model for every request. ### Routing strategies + - **classifier**: Uses a lightweight LLM call to categorize the complexity of a task based on a rubric (Strategic Planning, Multi-step Coordination, Ambiguity). It chooses between a "Pro" model (for complex reasoning) and a "Flash" model (for simple operations). - **approvalMode**: Selects specialized models (like `gemini-2.0-flash-lite`) when the agent is in specific modes like `Plan Mode`. -- **numericalClassifier**: A deterministic strategy that selects models based - on the number of tokens in the conversation or the length of the history. +- **numericalClassifier**: A deterministic strategy that selects models based on + the number of tokens in the conversation or the length of the history. - **fallback**: Automatically switches models if the primary model encounters quota limits (429) or transient API failures. @@ -75,7 +77,9 @@ The agent maintains deep project awareness while staying within token limits through several services in `packages/core/src/services`: ### ChatCompressionService + Triggered when the history exceeds 50% of the model's context window: + 1. **State snapshots**: The agent generates a structured `` representing the cumulative knowledge of the session (constraints, progress, paths). @@ -85,6 +89,7 @@ Triggered when the history exceeds 50% of the model's context window: the history. ### ToolOutputMaskingService + Prevents bulky data (like large shell outputs or file reads) from clogging the context window. It replaces large `functionResponse` blocks with concise summaries and persists the full data to temporary files, allowing the agent to @@ -94,13 +99,16 @@ refer to the full data only when necessary. ## 4. Advanced tool execution and scheduling -The `Scheduler` (`packages/core/src/scheduler`) is an event-driven state -machine that manages the lifecycle of autonomous actions. +The `Scheduler` (`packages/core/src/scheduler`) is an event-driven state machine +that manages the lifecycle of autonomous actions. ### Lifecycle states -`Validating` → `AwaitingApproval` → `Scheduled` → `Executing` → `Success`/`Error` + +`Validating` → `AwaitingApproval` → `Scheduled` → `Executing` → +`Success`/`Error` ### Key features + - **Policy Engine**: A granular system that evaluates tools based on security policies (e.g., "Allow read-only tools", "Ask for shell commands"). It can be configured at the project or user level. @@ -121,17 +129,18 @@ machine that manages the lifecycle of autonomous actions. The `packages/cli/src/ui` directory implements a sophisticated React-based TUI. ### Keyboard and protocols + - **KeypressProvider**: Manages terminal input, supporting complex key combinations and shortcuts. -- **Kitty keyboard protocol**: Detects terminals that support the Kitty - protocol to enable advanced features like detecting `ctrl+enter` vs `enter`. +- **Kitty keyboard protocol**: Detects terminals that support the Kitty protocol + to enable advanced features like detecting `ctrl+enter` vs `enter`. - **Vim mode**: A dedicated provider that enables Vim-like navigation (hjkl, words, search) for both conversation history and input fields. ### Layout and rendering -- **ResizeObserver**: A custom implementation that watches the terminal size - to ensure components (like multi-column layouts or wide tables) adapt - instantly. + +- **ResizeObserver**: A custom implementation that watches the terminal size to + ensure components (like multi-column layouts or wide tables) adapt instantly. - **ConsolePatcher**: Intercepts `console.log`, `console.warn`, and `console.error`, routing them to the internal debug console (toggled with `ctrl+d`) or the external DevTools server. @@ -141,9 +150,10 @@ The `packages/cli/src/ui` directory implements a sophisticated React-based TUI. ## 6. Testing and validation Gemini CLI uses a tiered testing strategy to ensure reliability: + 1. **Unit tests**: Located alongside the source (`*.test.ts`), providing fast coverage for core logic. -2. **Integration tests**: Located in `integration-tests/`, running the - full CLI against mock and real Gemini API endpoints. +2. **Integration tests**: Located in `integration-tests/`, running the full CLI + against mock and real Gemini API endpoints. 3. **Evals**: Performance benchmarks in `evals/` that measure the agent's reasoning accuracy and tool-use efficiency over time. diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 05368f20fe6..b6c64a80b37 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -1701,6 +1701,18 @@ their corresponding top-level category object in your `settings.json` file. - **Default:** `false` - **Requires restart:** Yes +- **`experimental.gemmaModelRouter.autoStartServer`** (boolean): + - **Description:** Automatically start the LiteRT-LM server when Gemini CLI + starts and the Gemma router is enabled. + - **Default:** `true` + - **Requires restart:** Yes + +- **`experimental.gemmaModelRouter.binaryPath`** (string): + - **Description:** Custom path to the LiteRT-LM binary. Leave empty to use the + default location (~/.gemini/bin/litert/). + - **Default:** `""` + - **Requires restart:** Yes + - **`experimental.gemmaModelRouter.classifier.host`** (string): - **Description:** The host of the classifier. - **Default:** `"http://localhost:9379"` diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json index 98bc786410d..d01ec0dc1df 100644 --- a/schemas/settings.schema.json +++ b/schemas/settings.schema.json @@ -2905,6 +2905,20 @@ "default": false, "type": "boolean" }, + "autoStartServer": { + "title": "Auto-start LiteRT Server", + "description": "Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.", + "markdownDescription": "Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `true`", + "default": true, + "type": "boolean" + }, + "binaryPath": { + "title": "LiteRT Binary Path", + "description": "Custom path to the LiteRT-LM binary. Leave empty to use the default location (~/.gemini/bin/litert/).", + "markdownDescription": "Custom path to the LiteRT-LM binary. Leave empty to use the default location (~/.gemini/bin/litert/).\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: ``", + "default": "", + "type": "string" + }, "classifier": { "title": "Classifier", "description": "Classifier configuration.", From 509060e74ec470ce42d107df1896efd3e4a64fb7 Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Wed, 15 Apr 2026 16:27:36 -0700 Subject: [PATCH 13/33] rm docs --- docs/codebase_understanding.md | 159 ------------------------ docs/codebase_understanding_antigrav.md | 101 --------------- 2 files changed, 260 deletions(-) delete mode 100644 docs/codebase_understanding.md delete mode 100644 docs/codebase_understanding_antigrav.md diff --git a/docs/codebase_understanding.md b/docs/codebase_understanding.md deleted file mode 100644 index 29c9bf692ab..00000000000 --- a/docs/codebase_understanding.md +++ /dev/null @@ -1,159 +0,0 @@ -# Codebase understanding - -This document provides a deep-dive technical overview of the Gemini CLI -architecture. It is designed for developers who need to understand the system's -inner workings, from startup to advanced autonomous behaviors. - -## Repository architecture - -Gemini CLI is a monorepo structured to maintain a strict separation between the -user interface and the agent's core reasoning logic. - -- **`packages/cli`**: The Terminal User Interface (TUI). Built with React and - Ink, it manages the interactive terminal experience, including keyboard - protocols, rendering, and terminal state management. -- **`packages/core`**: The UI-agnostic engine. It contains the primary - orchestration logic, model routing, tool systems, policy enforcement, and - Gemini API communication. -- **`packages/devtools`**: A suite for real-time inspection of network traffic, - console logs, and session activity. -- **`packages/sdk`**: A library for developers to build third-party tools and - extensions. -- **`packages/vscode-ide-companion`**: A specialized bridge that feeds real-time - editor state (open files, active selections, cursor positions) to the agent. - ---- - -## 1. Application lifecycle - -### Startup and initialization - -The entry point is `packages/cli/src/gemini.tsx`. The startup sequence is -designed for security and resilience: - -1. **I/O redirection**: Standard output streams (`stdout`, `stderr`) are - patched to capture all logs and errors. This allows the CLI to redirect - diagnostic information to the TUI's debug console or a remote DevTools - server without corrupting the user's terminal interface. -2. **Memory-aware relaunch**: The CLI checks the host system's total memory. If - it detects that Node.js's default heap limit is insufficient for complex - codebase analysis, it re-launches itself using the `--max-old-space-size` - flag, targeting approximately 50% of system memory. -3. **Sandboxing**: If configured, the CLI launches a restricted "sandbox" - environment (using Docker, Podman, or a localized process) to isolate the - agent's autonomous actions from the host system. -4. **Interactive (TUI) vs. Non-interactive (CLI)**: - - **Interactive mode**: Initializes the Ink renderer, starting a persistent - React application that manages terminal state via providers. - - **Non-interactive mode**: Executes a streamlined loop in - `nonInteractiveCli.ts`, designed for single prompts or piped input/output - redirection. - ---- - -## 2. Model routing and selection - -The `ModelRouterService` (`packages/core/src/routing`) implements a "Composite -Strategy" to select the optimal model for every request. - -### Routing strategies - -- **classifier**: Uses a lightweight LLM call to categorize the complexity of a - task based on a rubric (Strategic Planning, Multi-step Coordination, - Ambiguity). It chooses between a "Pro" model (for complex reasoning) and a - "Flash" model (for simple operations). -- **approvalMode**: Selects specialized models (like `gemini-2.0-flash-lite`) - when the agent is in specific modes like `Plan Mode`. -- **numericalClassifier**: A deterministic strategy that selects models based on - the number of tokens in the conversation or the length of the history. -- **fallback**: Automatically switches models if the primary model encounters - quota limits (429) or transient API failures. - ---- - -## 3. Intelligent context management - -The agent maintains deep project awareness while staying within token limits -through several services in `packages/core/src/services`: - -### ChatCompressionService - -Triggered when the history exceeds 50% of the model's context window: - -1. **State snapshots**: The agent generates a structured `` - representing the cumulative knowledge of the session (constraints, progress, - paths). -2. **The "Probe" (Self-Correction)**: A second LLM pass compares the summary - against the original history to ensure no critical technical details or - user-defined constraints were lost, correcting the summary before purging - the history. - -### ToolOutputMaskingService - -Prevents bulky data (like large shell outputs or file reads) from clogging the -context window. It replaces large `functionResponse` blocks with concise -summaries and persists the full data to temporary files, allowing the agent to -refer to the full data only when necessary. - ---- - -## 4. Advanced tool execution and scheduling - -The `Scheduler` (`packages/core/src/scheduler`) is an event-driven state machine -that manages the lifecycle of autonomous actions. - -### Lifecycle states - -`Validating` → `AwaitingApproval` → `Scheduled` → `Executing` → -`Success`/`Error` - -### Key features - -- **Policy Engine**: A granular system that evaluates tools based on security - policies (e.g., "Allow read-only tools", "Ask for shell commands"). It can be - configured at the project or user level. -- **Tail calls**: Allows a tool to "link" to another action. For example, a - shell command that produces an error can automatically trigger a "diagnostic" - tool without returning control to the main model. -- **Parallelism**: The scheduler executes independent read-only tools in - parallel while enforcing sequential execution for tools that modify the - environment. -- **MCP integration**: Dynamically loads tools from Model Context Protocol - servers, integrating them seamlessly into the same policy and scheduler - framework. - ---- - -## 5. UI and terminal integration - -The `packages/cli/src/ui` directory implements a sophisticated React-based TUI. - -### Keyboard and protocols - -- **KeypressProvider**: Manages terminal input, supporting complex key - combinations and shortcuts. -- **Kitty keyboard protocol**: Detects terminals that support the Kitty protocol - to enable advanced features like detecting `ctrl+enter` vs `enter`. -- **Vim mode**: A dedicated provider that enables Vim-like navigation (hjkl, - words, search) for both conversation history and input fields. - -### Layout and rendering - -- **ResizeObserver**: A custom implementation that watches the terminal size to - ensure components (like multi-column layouts or wide tables) adapt instantly. -- **ConsolePatcher**: Intercepts `console.log`, `console.warn`, and - `console.error`, routing them to the internal debug console (toggled with - `ctrl+d`) or the external DevTools server. - ---- - -## 6. Testing and validation - -Gemini CLI uses a tiered testing strategy to ensure reliability: - -1. **Unit tests**: Located alongside the source (`*.test.ts`), providing fast - coverage for core logic. -2. **Integration tests**: Located in `integration-tests/`, running the full CLI - against mock and real Gemini API endpoints. -3. **Evals**: Performance benchmarks in `evals/` that measure the agent's - reasoning accuracy and tool-use efficiency over time. diff --git a/docs/codebase_understanding_antigrav.md b/docs/codebase_understanding_antigrav.md deleted file mode 100644 index 60d5dbd2058..00000000000 --- a/docs/codebase_understanding_antigrav.md +++ /dev/null @@ -1,101 +0,0 @@ -# Gemini CLI - Codebase Understanding - -Gemini CLI is an open-source AI agent designed to let you interact with Google's -Gemini models directly from your terminal. It's built as a **TypeScript -monorepo** (using npm workspaces) and relies heavily on **Node.js**, **React**, -and **Ink** (a library that lets you build terminal UIs using React components). - -Here is a high-level walkthrough of the repository to help you understand how -all the pieces fit together. - -## 1. High-Level Architecture (The `packages/` Directory) - -The project is split into several focused packages to maintain a clean -separation of concerns: - -- **`packages/cli`** (The Frontend) - - This is the user-facing terminal UI. - - It uses React + Ink. This means the terminal layout, styling, and - interactions are managed like a modern web app (with hooks, contexts, and - components). - - It handles all the terminal-specific logic like key bindings, processing - mouse/keyboard events, and rendering the chat stream or tool progress - indicators. -- **`packages/core`** (The Brain/Backend) - - This is where the actual "agentic" logic lives. It is entirely UI-agnostic. - - Contains the core looping mechanism that communicates with the Gemini API, - maintains conversation history, compresses context, and evaluates whether - the agent needs to invoke a tool. - - Houses the **Tool Registry** (file system tools, shell runner, web tools) - and the **Policy Engine** (deciding if a tool is safe to run automatically - or needs your permission). -- **`packages/devtools`** - - A Chrome DevTools-like web server that runs locally! If you enable - `general.devtools` in your settings, you can inspect network requests, agent - thoughts, and console logs in a local browser, just like you would for a web - app. -- **`packages/vscode-ide-companion`** - - A VS Code extension that pairs dynamically with the CLI. It allows the - terminal agent to "read" your active editor state, seamlessly pulling - context on exactly what files or lines of code you currently have - highlighted in VS Code. -- **`packages/sdk`** - - Provides libraries and types so people can build custom MCP (Model Context - Protocol) extensions or tools for the CLI. -- **`packages/a2a-server`** - - An experimental Agent-to-Agent server, hinting at future capabilities for - having different agents talk to each other. - -## 2. The Core Application Lifecycle - -When you type `gemini` in your terminal, here's roughly what happens under the -hood: - -1. **Bootstrapping (`packages/cli/src/gemini.tsx`)**: The CLI loads user - configurations, parses command-line arguments, checks authentication, and - verifies if it needs to launch itself in a controlled "sandbox" environment - (using Docker/Podman to isolate dangerous shell tools). -2. **Mode Resolution**: It determines if you are piping data in or running a - single command (`nonInteractiveCli.ts`), or if you are firing up the chat - TUI (Terminal User Interface). -3. **The Agent Loop (`packages/core/src/core/`)**: - - **`GeminiClient`**: The main orchestrator. It manages sessions and - compresses chat histories using `ChatCompressionService` so you don't - breach token limits. - - **`GeminiChat` & `Turn`**: For every prompt you send, a `Turn` is created. - This represents one "exchange" where the model might think, respond, and - realize it needs to search your codebase. It streams these requests back - in real-time. - -## 3. The Tool System & Execution - -The most powerful aspect of this CLI is its ability to interact with your -environment. - -- In `packages/core/src/tools/`, there are native TypeScript implementations for - operations (like reading files, searching directories, or running tests). -- When Gemini asks to use a tool, the **Scheduler** - (`packages/core/src/scheduler/`) intercepts the request. -- It runs the request through the **Policy Engine** - (`packages/core/src/policy/`). Some commands (like `rm -rf`) are flagged and - routed to a **Confirmation Bus**, which pauses execution and asks you in the - UI: _"Do you want to allow this command?"_ -- Once approved (or auto-approved), it executes the tool, captures standard - output/error, and pipes that text back to Gemini to continue its thought - process. - -## 4. Code Quality, Building, and Testing - -- **Bundling & Running**: The project uses `esbuild` to compile everything very - quickly. During development, you can use `npm run start` or `npm run debug` - (which attaches a Node.js inspector). -- **Testing (`vitest`)**: Testing is extremely rigorous here. - - _Unit Tests:_ `npm run test` handles basic component functionality. - - _Integration Tests:_ `npm run test:e2e` simulates an actual sandbox, - mocking/hitting models to make sure the CLI interacts realistically. - - _Evals (`evals/`):_ Standalone performance benchmarks where they evaluate - how smart the CLI is at navigating codebases or using its tools - autonomously. -- **`npm run preflight`**: Before a PR is pushed, this massive script runs - formatting (Prettier), linting (ESLint), type checking (TypeScript), unit - testing, and building, ensuring nothing breaks the main branch. From 3b2243dcde29cc8053f1b07e78867773d2704188 Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Thu, 16 Apr 2026 18:12:31 -0700 Subject: [PATCH 14/33] fix(cli): correctness and cross-platform fixes for gemma commands --- packages/cli/src/commands/gemma/logs.ts | 29 +++++++++++++++++++ packages/cli/src/commands/gemma/platform.ts | 6 ++-- packages/cli/src/commands/gemma/setup.ts | 28 +++++++++--------- packages/cli/src/commands/gemma/start.ts | 3 +- .../cli/src/services/liteRtServerManager.ts | 12 ++------ 5 files changed, 49 insertions(+), 29 deletions(-) diff --git a/packages/cli/src/commands/gemma/logs.ts b/packages/cli/src/commands/gemma/logs.ts index 4420e7adb9d..473f3557fdc 100644 --- a/packages/cli/src/commands/gemma/logs.ts +++ b/packages/cli/src/commands/gemma/logs.ts @@ -11,6 +11,22 @@ import { debugLogger } from '@google/gemini-cli-core'; import { exitCli } from '../utils.js'; import { getLogFilePath } from './constants.js'; +/** + * Reads the last N lines from a file using Node.js APIs. + * Used as a cross-platform fallback when `tail` is unavailable (Windows). + */ +function readLastLines(filePath: string, count: number): string { + const content = fs.readFileSync(filePath, 'utf-8'); + const lines = content.split('\n'); + // If the file ends with a newline, the last element is empty — skip it. + if (lines.length > 0 && lines[lines.length - 1] === '') { + lines.pop(); + } + return lines.slice(-count).join('\n') + '\n'; +} + +const isWindows = process.platform === 'win32'; + export const logsCommand: CommandModule = { command: 'logs', describe: 'View LiteRT-LM server logs', @@ -43,6 +59,11 @@ export const logsCommand: CommandModule = { const lines = Number.isFinite(rawLines) ? Number(rawLines) : undefined; if (lines !== undefined) { + if (isWindows) { + process.stdout.write(readLastLines(logPath, lines)); + await exitCli(0); + return; + } // Show last N lines and exit. const tailArgs = ['-n', String(lines), logPath]; const child = spawn('tail', tailArgs, { stdio: 'inherit' }); @@ -52,6 +73,14 @@ export const logsCommand: CommandModule = { return; } + if (isWindows) { + debugLogger.log( + 'Live log following is not supported on Windows. Use --lines N to view recent logs.', + ); + await exitCli(1); + return; + } + // Follow mode — stream live output until user presses Ctrl+C. debugLogger.log(`Tailing ${logPath} (Ctrl+C to stop)\n`); const tailArgs = ['-f', '-n', '20', logPath]; diff --git a/packages/cli/src/commands/gemma/platform.ts b/packages/cli/src/commands/gemma/platform.ts index ef5c5288a65..d1c9373f685 100644 --- a/packages/cli/src/commands/gemma/platform.ts +++ b/packages/cli/src/commands/gemma/platform.ts @@ -110,12 +110,10 @@ export async function isServerRunning(port: number): Promise { () => controller.abort(), HEALTH_CHECK_TIMEOUT_MS, ); - const response = await fetch(`http://localhost:${port}/`, { - signal: controller.signal, - }); + await fetch(`http://localhost:${port}/`, { signal: controller.signal }); clearTimeout(timeout); // Any response (even an error page) means the server is up. - return response.ok || response.status > 0; + return true; } catch { return false; } diff --git a/packages/cli/src/commands/gemma/setup.ts b/packages/cli/src/commands/gemma/setup.ts index 7648122783d..8bf9eb4b58b 100644 --- a/packages/cli/src/commands/gemma/setup.ts +++ b/packages/cli/src/commands/gemma/setup.ts @@ -105,7 +105,10 @@ async function downloadFile(url: string, destPath: string): Promise { for (;;) { const { done, value } = await reader.read(); if (done) break; - fileStream.write(value); + const writeOk = fileStream.write(value); + if (!writeOk) { + await new Promise((resolve) => fileStream.once('drain', resolve)); + } downloadedBytes += value.byteLength; renderProgress(downloadedBytes, totalBytes); } @@ -147,7 +150,7 @@ interface SetupArgs { consent: boolean; } -async function handleSetup(argv: SetupArgs): Promise { +async function handleSetup(argv: SetupArgs): Promise { const { port, force } = argv; log(''); @@ -164,8 +167,7 @@ async function handleSetup(argv: SetupArgs): Promise { logError( 'LiteRT-LM binaries are available for: macOS (ARM64), Linux (x86_64), Windows (x86_64)', ); - await exitCli(1); - return; + return 1; } log(chalk.dim(` Platform: ${platform.key} → ${platform.binaryName}`)); @@ -182,8 +184,7 @@ async function handleSetup(argv: SetupArgs): Promise { const accepted = await promptYesNo('Do you want to continue?'); if (!accepted) { log('Setup cancelled.'); - await exitCli(0); - return; + return 0; } } @@ -213,8 +214,7 @@ async function handleSetup(argv: SetupArgs): Promise { ), ); logError(' Check your internet connection and try again.'); - await exitCli(1); - return; + return 1; } // Step 4: Make executable and handle macOS gatekeeper @@ -227,8 +227,7 @@ async function handleSetup(argv: SetupArgs): Promise { ` ✗ Failed to set executable permission: ${error instanceof Error ? error.message : String(error)}`, ), ); - await exitCli(1); - return; + return 1; } } @@ -268,8 +267,7 @@ async function handleSetup(argv: SetupArgs): Promise { logError( chalk.red(` ✗ Model download failed (exit code ${exitCode})`), ); - await exitCli(1); - return; + return 1; } log(''); log(chalk.green(` ✓ Model ${GEMMA_MODEL_NAME} downloaded`)); @@ -369,6 +367,8 @@ async function handleSetup(argv: SetupArgs): Promise { log(chalk.dim(' gemini gemma stop Stop the LiteRT server')); log(chalk.dim(' /gemma Check status inside a session')); log(''); + + return 0; } export const setupCommand: CommandModule = { @@ -402,13 +402,13 @@ export const setupCommand: CommandModule = { description: 'Skip interactive consent prompt (implies acceptance)', }), handler: async (argv) => { - await handleSetup({ + const exitCode = await handleSetup({ port: Number(argv['port']), skipModel: Boolean(argv['skipModel']), start: Boolean(argv['start']), force: Boolean(argv['force']), consent: Boolean(argv['consent']), }); - await exitCli(0); + await exitCli(exitCode); }, }; diff --git a/packages/cli/src/commands/gemma/start.ts b/packages/cli/src/commands/gemma/start.ts index 02b1bd0e4a6..22591b9fc0e 100644 --- a/packages/cli/src/commands/gemma/start.ts +++ b/packages/cli/src/commands/gemma/start.ts @@ -6,6 +6,7 @@ import type { CommandModule } from 'yargs'; import fs from 'node:fs'; +import path from 'node:path'; import { spawn } from 'node:child_process'; import chalk from 'chalk'; import { debugLogger } from '@google/gemini-cli-core'; @@ -45,7 +46,7 @@ export async function startServer( const logPath = getLogFilePath(); fs.mkdirSync(getLiteRtBinDir(), { recursive: true }); // Ensure tmp dir exists for log and pid files - const tmpDir = getPidFilePath().replace(/\/[^/]+$/, ''); + const tmpDir = path.dirname(getPidFilePath()); fs.mkdirSync(tmpDir, { recursive: true }); const logFd = fs.openSync(logPath, 'a'); diff --git a/packages/cli/src/services/liteRtServerManager.ts b/packages/cli/src/services/liteRtServerManager.ts index 1f0de35fe3a..375b34ca06a 100644 --- a/packages/cli/src/services/liteRtServerManager.ts +++ b/packages/cli/src/services/liteRtServerManager.ts @@ -5,6 +5,7 @@ */ import { debugLogger } from '@google/gemini-cli-core'; +import type { GemmaModelRouterSettings } from '@google/gemini-cli-core'; import { getBinaryPath, isBinaryInstalled, @@ -12,15 +13,6 @@ import { } from '../commands/gemma/platform.js'; import { DEFAULT_PORT } from '../commands/gemma/constants.js'; -// Use a local interface that includes the new fields, since the core -// package's compiled types may not include them until rebuilt. -interface GemmaSettings { - enabled?: boolean; - autoStartServer?: boolean; - binaryPath?: string; - classifier?: { host?: string; model?: string }; -} - /** * Manages the LiteRT-LM server lifecycle for auto-start during CLI startup. * @@ -35,7 +27,7 @@ export class LiteRtServerManager { * This is fire-and-forget: failures are logged but never block startup. */ static async ensureRunning( - gemmaSettings: GemmaSettings | undefined, + gemmaSettings: GemmaModelRouterSettings | undefined, ): Promise { if (!gemmaSettings?.enabled) return; if (gemmaSettings.autoStartServer === false) return; From 85a5a972ff8806f02ff9857be5fc1547cc9a5eba Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Thu, 16 Apr 2026 18:16:25 -0700 Subject: [PATCH 15/33] chore: remove gemma setup guide doc for now --- docs/core/gemma-setup-guide.md | 65 ---------------------------------- 1 file changed, 65 deletions(-) delete mode 100644 docs/core/gemma-setup-guide.md diff --git a/docs/core/gemma-setup-guide.md b/docs/core/gemma-setup-guide.md deleted file mode 100644 index 05f3b104ff6..00000000000 --- a/docs/core/gemma-setup-guide.md +++ /dev/null @@ -1,65 +0,0 @@ -# `gemini gemma` — Local Model Routing Setup - -## What is this? - -Routes simple requests to Flash and complex requests to Pro using a local Gemma -3 1B model running on your machine. Saves cloud API costs and adds a few ms of -local inference instead of a cloud classifier round-trip. - -## Quick Start - -```bash -# One command does everything: downloads runtime, pulls model, configures settings, starts server -gemini gemma setup -``` - -You'll be prompted to accept the Gemma Terms of Use. The model is ~1 GB. - -After setup, **just use the CLI normally** — routing happens automatically on -every request. - -## Commands - -| Command | What it does | -| --------------------- | -------------------------------------------------------------- | -| `gemini gemma setup` | Full install (binary + model + settings + server start) | -| `gemini gemma status` | Health check — shows what's installed and running | -| `gemini gemma start` | Start the LiteRT server (auto-starts on CLI launch by default) | -| `gemini gemma stop` | Stop the LiteRT server | -| `gemini gemma logs` | Tail the server logs to see routing requests live | -| `/gemma` | In-session status check (type it inside the CLI) | - -## Verifying it works - -1. Run `gemini gemma status` — all checks should show green -2. Open two terminals: - - Terminal 1: `gemini gemma logs` (watch for incoming requests) - - Terminal 2: use the CLI normally -3. You should see classification requests appear in the logs as you interact - with the CLI -4. The `/gemma` slash command inside a session shows a quick status panel - -## Setup flags - -```bash -gemini gemma setup --port 8080 # custom port -gemini gemma setup --no-start # don't start server after install -gemini gemma setup --force # re-download everything -gemini gemma setup --skip-model # binary only, skip the 1GB model download -``` - -## How it works under the hood - -- Local Gemma classifies each request as "simple" or "complex" (~100ms) -- Simple → Flash, Complex → Pro -- If the local server is down, the CLI silently falls back to the cloud - classifier — no errors, no disruption - -## Disabling - -Set `enabled: false` in settings or just run `gemini gemma stop` to turn off the -server: - -```json -{ "experimental": { "gemmaModelRouter": { "enabled": false } } } -``` From eb5a3b90d8c2236773b19841decaf726bbb4d45f Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Thu, 16 Apr 2026 18:23:43 -0700 Subject: [PATCH 16/33] chore: remove unnecessary comments across gemma commands --- packages/cli/src/commands/gemma.ts | 5 +-- packages/cli/src/commands/gemma/constants.ts | 17 ---------- packages/cli/src/commands/gemma/logs.ts | 7 ---- packages/cli/src/commands/gemma/platform.ts | 29 +--------------- packages/cli/src/commands/gemma/setup.ts | 33 +------------------ packages/cli/src/commands/gemma/start.ts | 12 ------- packages/cli/src/commands/gemma/status.ts | 11 ------- packages/cli/src/commands/gemma/stop.ts | 15 ++------- packages/cli/src/gemini.tsx | 2 -- .../cli/src/services/liteRtServerManager.ts | 14 -------- .../src/ui/components/views/GemmaStatus.tsx | 6 ---- 11 files changed, 6 insertions(+), 145 deletions(-) diff --git a/packages/cli/src/commands/gemma.ts b/packages/cli/src/commands/gemma.ts index d7b88f06374..737bbb069ba 100644 --- a/packages/cli/src/commands/gemma.ts +++ b/packages/cli/src/commands/gemma.ts @@ -29,8 +29,5 @@ export const gemmaCommand: CommandModule = { .command(defer(logsCommand, 'gemma')) .demandCommand(1, 'You need at least one command before continuing.') .version(false), - handler: () => { - // yargs will automatically show help if no subcommand is provided - // thanks to demandCommand(1) in the builder. - }, + handler: () => {}, }; diff --git a/packages/cli/src/commands/gemma/constants.ts b/packages/cli/src/commands/gemma/constants.ts index 76203a8a8f4..cab4c1f2b26 100644 --- a/packages/cli/src/commands/gemma/constants.ts +++ b/packages/cli/src/commands/gemma/constants.ts @@ -7,45 +7,28 @@ import path from 'node:path'; import { Storage } from '@google/gemini-cli-core'; -/** LiteRT-LM release version to download. */ export const LITERT_RELEASE_VERSION = 'v0.9.0-alpha03'; - -/** Base URL for LiteRT-LM GitHub releases. */ export const LITERT_RELEASE_BASE_URL = 'https://github.com/google-ai-edge/LiteRT-LM/releases/download'; - -/** The only tested and supported model for local routing. */ export const GEMMA_MODEL_NAME = 'gemma3-1b-gpu-custom'; - -/** Default port for the LiteRT-LM server. */ export const DEFAULT_PORT = 9379; - -/** Server health check timeout in milliseconds. */ export const HEALTH_CHECK_TIMEOUT_MS = 5000; - -/** Delay before checking if server started successfully. */ export const SERVER_START_WAIT_MS = 3000; -/** - * Maps `${process.platform}-${process.arch}` to the LiteRT-LM binary filename. - */ export const PLATFORM_BINARY_MAP: Record = { 'darwin-arm64': 'lit.macos_arm64', 'linux-x64': 'lit.linux_x86_64', 'win32-x64': 'lit.windows_x86_64.exe', }; -/** Directory where the LiteRT-LM binary is installed. */ export function getLiteRtBinDir(): string { return path.join(Storage.getGlobalGeminiDir(), 'bin', 'litert'); } -/** Path to the PID file for the background LiteRT server. */ export function getPidFilePath(): string { return path.join(Storage.getGlobalTempDir(), 'litert-server.pid'); } -/** Path to the log file for the background LiteRT server. */ export function getLogFilePath(): string { return path.join(Storage.getGlobalTempDir(), 'litert-server.log'); } diff --git a/packages/cli/src/commands/gemma/logs.ts b/packages/cli/src/commands/gemma/logs.ts index 473f3557fdc..bbe5780c358 100644 --- a/packages/cli/src/commands/gemma/logs.ts +++ b/packages/cli/src/commands/gemma/logs.ts @@ -11,14 +11,9 @@ import { debugLogger } from '@google/gemini-cli-core'; import { exitCli } from '../utils.js'; import { getLogFilePath } from './constants.js'; -/** - * Reads the last N lines from a file using Node.js APIs. - * Used as a cross-platform fallback when `tail` is unavailable (Windows). - */ function readLastLines(filePath: string, count: number): string { const content = fs.readFileSync(filePath, 'utf-8'); const lines = content.split('\n'); - // If the file ends with a newline, the last element is empty — skip it. if (lines.length > 0 && lines[lines.length - 1] === '') { lines.pop(); } @@ -64,7 +59,6 @@ export const logsCommand: CommandModule = { await exitCli(0); return; } - // Show last N lines and exit. const tailArgs = ['-n', String(lines), logPath]; const child = spawn('tail', tailArgs, { stdio: 'inherit' }); child.on('close', async (code) => { @@ -81,7 +75,6 @@ export const logsCommand: CommandModule = { return; } - // Follow mode — stream live output until user presses Ctrl+C. debugLogger.log(`Tailing ${logPath} (Ctrl+C to stop)\n`); const tailArgs = ['-f', '-n', '20', logPath]; const child = spawn('tail', tailArgs, { stdio: 'inherit' }); diff --git a/packages/cli/src/commands/gemma/platform.ts b/packages/cli/src/commands/gemma/platform.ts index d1c9373f685..fd2dbc683b6 100644 --- a/packages/cli/src/commands/gemma/platform.ts +++ b/packages/cli/src/commands/gemma/platform.ts @@ -28,9 +28,6 @@ export interface GemmaConfigStatus { configuredPort: number; } -/** - * Resolves the Gemma configuration from the workspace settings. - */ export function resolveGemmaConfig(fallbackPort: number): GemmaConfigStatus { let settingsEnabled = false; let configuredPort = fallbackPort; @@ -46,15 +43,11 @@ export function resolveGemmaConfig(fallbackPort: number): GemmaConfigStatus { } } } catch { - // Settings may fail to load in some contexts; treat as not enabled. + // ignore — settings may fail to load outside a workspace } return { settingsEnabled, configuredPort }; } -/** - * Detects the current platform and resolves the corresponding LiteRT-LM binary name. - * Returns null if the platform is unsupported. - */ export function detectPlatform(): PlatformInfo | null { const key = `${process.platform}-${process.arch}`; const binaryName = PLATFORM_BINARY_MAP[key]; @@ -64,29 +57,22 @@ export function detectPlatform(): PlatformInfo | null { return { key, binaryName }; } -/** Returns the full local path to the LiteRT-LM binary. */ export function getBinaryPath(binaryName?: string): string | null { const name = binaryName ?? detectPlatform()?.binaryName; if (!name) return null; return path.join(getLiteRtBinDir(), name); } -/** Returns the GitHub release download URL for the binary. */ export function getBinaryDownloadUrl(binaryName: string): string { return `${LITERT_RELEASE_BASE_URL}/${LITERT_RELEASE_VERSION}/${binaryName}`; } -/** Checks if the LiteRT-LM binary exists on disk. */ export function isBinaryInstalled(): boolean { const binaryPath = getBinaryPath(); if (!binaryPath) return false; return fs.existsSync(binaryPath); } -/** - * Checks if the Gemma model has been downloaded by running `lit list` - * and looking for the model name in stdout. - */ export function isModelDownloaded(binaryPath: string): boolean { try { const output = execFileSync(binaryPath, ['list'], { @@ -99,10 +85,6 @@ export function isModelDownloaded(binaryPath: string): boolean { } } -/** - * Checks if a LiteRT-LM server is running and responding on the given port. - * Uses a simple HTTP request with a short timeout. - */ export async function isServerRunning(port: number): Promise { try { const controller = new AbortController(); @@ -112,17 +94,12 @@ export async function isServerRunning(port: number): Promise { ); await fetch(`http://localhost:${port}/`, { signal: controller.signal }); clearTimeout(timeout); - // Any response (even an error page) means the server is up. return true; } catch { return false; } } -/** - * Reads the PID from the PID file, if it exists. - * Returns the PID number, or null if the file doesn't exist or is invalid. - */ export function readServerPid(): number | null { const pidPath = getPidFilePath(); try { @@ -134,12 +111,8 @@ export function readServerPid(): number | null { } } -/** - * Checks if a process with the given PID is still running. - */ export function isProcessRunning(pid: number): boolean { try { - // Sending signal 0 checks if the process exists without actually signaling it. process.kill(pid, 0); return true; } catch { diff --git a/packages/cli/src/commands/gemma/setup.ts b/packages/cli/src/commands/gemma/setup.ts index 8bf9eb4b58b..002afcbfdbf 100644 --- a/packages/cli/src/commands/gemma/setup.ts +++ b/packages/cli/src/commands/gemma/setup.ts @@ -29,10 +29,6 @@ import readline from 'node:readline'; const log = (msg: string) => debugLogger.log(msg); const logError = (msg: string) => debugLogger.error(msg); -/** - * Prompts the user for a yes/no confirmation. - * Returns true if the user answers 'y' or 'yes'. - */ async function promptYesNo(question: string): Promise { const rl = readline.createInterface({ input: process.stdin, @@ -49,14 +45,12 @@ async function promptYesNo(question: string): Promise { }); } -/** Formats a byte count into a human-readable string (e.g. "12.3 MB"). */ function formatBytes(bytes: number): string { if (bytes < 1024) return `${bytes} B`; if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; } -/** Renders a single-line progress bar to stderr (overwriting in place). */ function renderProgress(downloaded: number, total: number | null): void { const barWidth = 30; if (total && total > 0) { @@ -72,14 +66,8 @@ function renderProgress(downloaded: number, total: number | null): void { } } -/** - * Downloads a file from a URL to a local path with a progress bar. - * Uses a temporary `.downloading` suffix for safety against interrupted downloads. - */ async function downloadFile(url: string, destPath: string): Promise { const tmpPath = destPath + '.downloading'; - - // Clean up any previous interrupted download. if (fs.existsSync(tmpPath)) { fs.unlinkSync(tmpPath); } @@ -114,24 +102,17 @@ async function downloadFile(url: string, destPath: string): Promise { } } finally { fileStream.end(); - // Clear the progress line. process.stderr.write('\r' + ' '.repeat(80) + '\r'); } - // Wait for the file to finish flushing. await new Promise((resolve, reject) => { fileStream.on('finish', resolve); fileStream.on('error', reject); }); - // Atomic rename after successful download. fs.renameSync(tmpPath, destPath); } -/** - * Spawns a child process and returns a promise that resolves with the exit code. - * Inherits stdio so the user sees all output (progress, terms acceptance, etc.). - */ function spawnInherited(command: string, args: string[]): Promise { return new Promise((resolve, reject) => { const child = nodeSpawn(command, args, { @@ -158,7 +139,6 @@ async function handleSetup(argv: SetupArgs): Promise { log(chalk.dim('─'.repeat(40))); log(''); - // Step 1: Platform detection const platform = detectPlatform(); if (!platform) { logError( @@ -171,7 +151,6 @@ async function handleSetup(argv: SetupArgs): Promise { } log(chalk.dim(` Platform: ${platform.key} → ${platform.binaryName}`)); - // Step 2: Consent if (!argv.consent) { log(''); log('This will download and install the LiteRT-LM runtime and the'); @@ -188,7 +167,6 @@ async function handleSetup(argv: SetupArgs): Promise { } } - // Step 3: Download binary const binaryPath = getBinaryPath(platform.binaryName)!; const alreadyInstalled = isBinaryInstalled(); @@ -217,7 +195,6 @@ async function handleSetup(argv: SetupArgs): Promise { return 1; } - // Step 4: Make executable and handle macOS gatekeeper if (process.platform !== 'win32') { try { fs.chmodSync(binaryPath, 0o755); @@ -238,15 +215,11 @@ async function handleSetup(argv: SetupArgs): Promise { }); log(chalk.green(' ✓ macOS quarantine attribute removed')); } catch { - // This is expected to fail if the attribute doesn't exist. - debugLogger.log( - 'xattr quarantine removal not needed or failed (non-fatal)', - ); + // Expected if the attribute doesn't exist. } } } - // Step 5: Pull the model if (!argv.skipModel) { const modelAlreadyDownloaded = isModelDownloaded(binaryPath); if (modelAlreadyDownloaded && !force) { @@ -274,7 +247,6 @@ async function handleSetup(argv: SetupArgs): Promise { } } - // Step 6: Configure settings log(''); log(' Configuring settings...'); try { @@ -294,7 +266,6 @@ async function handleSetup(argv: SetupArgs): Promise { }, }; - // Read existing experimental settings to avoid overwriting them. const existingExperimental = settings.forScope(SettingScope.User).settings.experimental ?? {}; settings.setValue(SettingScope.User, 'experimental', { @@ -314,7 +285,6 @@ async function handleSetup(argv: SetupArgs): Promise { ); } - // Step 7: Start server (if requested) if (argv.start) { log(''); log(' Starting LiteRT server...'); @@ -330,7 +300,6 @@ async function handleSetup(argv: SetupArgs): Promise { } } - // Step 8: Summary log(''); log(chalk.dim('─'.repeat(40))); log(chalk.bold.green(' Setup complete! Local model routing is now active.')); diff --git a/packages/cli/src/commands/gemma/start.ts b/packages/cli/src/commands/gemma/start.ts index 22591b9fc0e..827c3f4fbd6 100644 --- a/packages/cli/src/commands/gemma/start.ts +++ b/packages/cli/src/commands/gemma/start.ts @@ -25,27 +25,18 @@ import { resolveGemmaConfig, } from './platform.js'; -/** - * Starts the LiteRT-LM server as a detached background process. - * Returns true if the server was started (or is already running). - * - * This function is also used by `setup.ts` to start the server after installation. - */ export async function startServer( binaryPath: string, port: number, ): Promise { - // Check if already running const alreadyRunning = await isServerRunning(port); if (alreadyRunning) { debugLogger.log(`LiteRT server already running on port ${port}`); return true; } - // Ensure log directory exists const logPath = getLogFilePath(); fs.mkdirSync(getLiteRtBinDir(), { recursive: true }); - // Ensure tmp dir exists for log and pid files const tmpDir = path.dirname(getPidFilePath()); fs.mkdirSync(tmpDir, { recursive: true }); @@ -57,19 +48,16 @@ export async function startServer( stdio: ['ignore', logFd, logFd], }); - // Write PID file const pidPath = getPidFilePath(); if (child.pid) { fs.writeFileSync(pidPath, String(child.pid), 'utf-8'); } - // Detach the child so it survives after the CLI exits. child.unref(); } finally { fs.closeSync(logFd); } - // Wait briefly and verify the server is responding. await new Promise((resolve) => setTimeout(resolve, SERVER_START_WAIT_MS)); return isServerRunning(port); } diff --git a/packages/cli/src/commands/gemma/status.ts b/packages/cli/src/commands/gemma/status.ts index 1e061d7bc57..77847437cb9 100644 --- a/packages/cli/src/commands/gemma/status.ts +++ b/packages/cli/src/commands/gemma/status.ts @@ -30,11 +30,6 @@ export interface GemmaStatusResult { allPassing: boolean; } -/** - * Runs all diagnostic checks and returns a structured status result. - * This is shared between the CLI `gemini gemma status` command and the - * in-session `/gemma` slash command. - */ export async function checkGemmaStatus( port?: number, ): Promise { @@ -64,7 +59,6 @@ export async function checkGemmaStatus( }; } -/** Formats the status result into a human-readable string. */ export function formatGemmaStatus(status: GemmaStatusResult): string { const check = (ok: boolean) => (ok ? chalk.green('✓') : chalk.red('✗')); @@ -75,7 +69,6 @@ export function formatGemmaStatus(status: GemmaStatusResult): string { '', ]; - // Binary if (status.binaryInstalled) { lines.push(` Binary: ${check(true)} Installed (${status.binaryPath})`); } else { @@ -90,7 +83,6 @@ export function formatGemmaStatus(status: GemmaStatusResult): string { } } - // Model if (status.modelDownloaded) { lines.push(` Model: ${check(true)} ${GEMMA_MODEL_NAME} downloaded`); } else { @@ -106,7 +98,6 @@ export function formatGemmaStatus(status: GemmaStatusResult): string { } } - // Server if (status.serverRunning) { const pidInfo = status.serverPid ? ` (PID ${status.serverPid})` : ''; lines.push( @@ -119,7 +110,6 @@ export function formatGemmaStatus(status: GemmaStatusResult): string { lines.push(chalk.dim(` Run: gemini gemma start`)); } - // Settings if (status.settingsEnabled) { lines.push(` Settings: ${check(true)} Enabled in settings.json`); } else { @@ -169,7 +159,6 @@ export const statusCommand: CommandModule = { } const status = await checkGemmaStatus(port); const output = formatGemmaStatus(status); - // Use process.stdout directly for consistent output in non-interactive mode. process.stdout.write(output); await exitCli(status.allPassing ? 0 : 1); }, diff --git a/packages/cli/src/commands/gemma/stop.ts b/packages/cli/src/commands/gemma/stop.ts index 409989e33c3..676dc0b667a 100644 --- a/packages/cli/src/commands/gemma/stop.ts +++ b/packages/cli/src/commands/gemma/stop.ts @@ -17,10 +17,6 @@ import { resolveGemmaConfig, } from './platform.js'; -/** - * Stops the LiteRT-LM server by sending SIGTERM to the stored PID. - * Returns true if the server was stopped successfully. - */ export async function stopServer(): Promise { const pid = readServerPid(); const pidPath = getPidFilePath(); @@ -30,11 +26,10 @@ export async function stopServer(): Promise { } if (!isProcessRunning(pid)) { - // PID file exists but process is gone — clean up stale file. try { fs.unlinkSync(pidPath); } catch { - // Ignore cleanup errors. + // ignore } return false; } @@ -45,24 +40,21 @@ export async function stopServer(): Promise { return false; } - // Wait briefly for graceful shutdown. await new Promise((resolve) => setTimeout(resolve, 1000)); - // If still running, escalate to SIGKILL. if (isProcessRunning(pid)) { try { process.kill(pid, 'SIGKILL'); } catch { - // Process may have exited between the check and the kill. + // ignore } await new Promise((resolve) => setTimeout(resolve, 500)); } - // Clean up PID file. try { fs.unlinkSync(pidPath); } catch { - // Ignore cleanup errors. + // ignore } return true; @@ -101,7 +93,6 @@ export const stopCommand: CommandModule = { return; } - // No PID file or process not running — check if something else is on the port. const running = await isServerRunning(port); if (running) { debugLogger.log( diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx index 899be0bad57..1f43419c8da 100644 --- a/packages/cli/src/gemini.tsx +++ b/packages/cli/src/gemini.tsx @@ -612,8 +612,6 @@ export async function main() { const initializationResult = await initializeApp(config, settings); initAppHandle?.end(); - // Auto-start the LiteRT-LM server for Gemma local routing if configured. - // This is fire-and-forget — failures are logged but never block startup. import('./services/liteRtServerManager.js') .then(({ LiteRtServerManager }) => LiteRtServerManager.ensureRunning( diff --git a/packages/cli/src/services/liteRtServerManager.ts b/packages/cli/src/services/liteRtServerManager.ts index 375b34ca06a..7107d2321ef 100644 --- a/packages/cli/src/services/liteRtServerManager.ts +++ b/packages/cli/src/services/liteRtServerManager.ts @@ -13,19 +13,7 @@ import { } from '../commands/gemma/platform.js'; import { DEFAULT_PORT } from '../commands/gemma/constants.js'; -/** - * Manages the LiteRT-LM server lifecycle for auto-start during CLI startup. - * - * When the Gemma model router is enabled and `autoStartServer` is true, - * this manager ensures the server is running before the CLI enters - * interactive mode. The server is spawned as a detached daemon that - * persists across CLI sessions — it is NOT stopped when the CLI exits. - */ export class LiteRtServerManager { - /** - * Ensures the LiteRT-LM server is running if the settings call for it. - * This is fire-and-forget: failures are logged but never block startup. - */ static async ensureRunning( gemmaSettings: GemmaModelRouterSettings | undefined, ): Promise { @@ -57,8 +45,6 @@ export class LiteRtServerManager { ); try { - // Dynamic import to avoid circular dependencies and to keep the start - // logic in one place. const { startServer } = await import('../commands/gemma/start.js'); const binaryPath = gemmaSettings.binaryPath || getBinaryPath() || ''; if (!binaryPath) { diff --git a/packages/cli/src/ui/components/views/GemmaStatus.tsx b/packages/cli/src/ui/components/views/GemmaStatus.tsx index b9c20142d46..160689ebeac 100644 --- a/packages/cli/src/ui/components/views/GemmaStatus.tsx +++ b/packages/cli/src/ui/components/views/GemmaStatus.tsx @@ -32,7 +32,6 @@ export const GemmaStatus: React.FC = ({ Gemma Local Model Routing - {/* Binary */} @@ -46,7 +45,6 @@ export const GemmaStatus: React.FC = ({ - {/* Model */} @@ -60,7 +58,6 @@ export const GemmaStatus: React.FC = ({ - {/* Server */} @@ -81,7 +78,6 @@ export const GemmaStatus: React.FC = ({ - {/* Settings */} @@ -95,7 +91,6 @@ export const GemmaStatus: React.FC = ({ - {/* Active For */} Active for: {allPassing ? ( @@ -105,7 +100,6 @@ export const GemmaStatus: React.FC = ({ )} - {/* Summary */} {allPassing ? ( From c83376ca948e0420a4f40fa7216534ce11719910 Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Thu, 16 Apr 2026 19:31:25 -0700 Subject: [PATCH 17/33] fix(cli): harden gemma router setup and server handling --- packages/cli/src/commands/gemma/logs.test.ts | 51 ++++++++ packages/cli/src/commands/gemma/logs.ts | 84 +++++++++++-- .../cli/src/commands/gemma/platform.test.ts | 113 ++++++++++++++++++ packages/cli/src/commands/gemma/platform.ts | 60 ++++++++-- packages/cli/src/commands/gemma/setup.ts | 62 +++++++--- packages/cli/src/commands/gemma/start.ts | 4 +- packages/cli/src/commands/gemma/status.ts | 2 +- packages/cli/src/config/config.test.ts | 13 ++ .../cli/src/config/settingsSchema.test.ts | 24 +++- .../src/services/liteRtServerManager.test.ts | 68 +++++++++++ .../cli/src/services/liteRtServerManager.ts | 15 +-- packages/core/src/config/config.test.ts | 8 ++ packages/core/src/config/config.ts | 2 + 13 files changed, 455 insertions(+), 51 deletions(-) create mode 100644 packages/cli/src/commands/gemma/logs.test.ts create mode 100644 packages/cli/src/commands/gemma/platform.test.ts create mode 100644 packages/cli/src/services/liteRtServerManager.test.ts diff --git a/packages/cli/src/commands/gemma/logs.test.ts b/packages/cli/src/commands/gemma/logs.test.ts new file mode 100644 index 00000000000..caa9bc1ec4e --- /dev/null +++ b/packages/cli/src/commands/gemma/logs.test.ts @@ -0,0 +1,51 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import { afterEach, describe, expect, it } from 'vitest'; +import { readLastLines } from './logs.js'; + +describe('readLastLines', () => { + const tempFiles: string[] = []; + + afterEach(async () => { + await Promise.all( + tempFiles + .splice(0) + .map((filePath) => fs.promises.rm(filePath, { force: true })), + ); + }); + + it('returns only the requested tail lines without reading the whole file eagerly', async () => { + const filePath = path.join( + os.tmpdir(), + `gemma-logs-${Date.now()}-${Math.random().toString(36).slice(2)}.log`, + ); + tempFiles.push(filePath); + + const content = Array.from({ length: 2000 }, (_, i) => `line-${i + 1}`) + .join('\n') + .concat('\n'); + await fs.promises.writeFile(filePath, content, 'utf-8'); + + await expect(readLastLines(filePath, 3)).resolves.toBe( + 'line-1998\nline-1999\nline-2000\n', + ); + }); + + it('returns an empty string when zero lines are requested', async () => { + const filePath = path.join( + os.tmpdir(), + `gemma-logs-${Date.now()}-${Math.random().toString(36).slice(2)}.log`, + ); + tempFiles.push(filePath); + await fs.promises.writeFile(filePath, 'line-1\nline-2\n', 'utf-8'); + + await expect(readLastLines(filePath, 0)).resolves.toBe(''); + }); +}); diff --git a/packages/cli/src/commands/gemma/logs.ts b/packages/cli/src/commands/gemma/logs.ts index bbe5780c358..5eb81ea33d4 100644 --- a/packages/cli/src/commands/gemma/logs.ts +++ b/packages/cli/src/commands/gemma/logs.ts @@ -11,13 +11,79 @@ import { debugLogger } from '@google/gemini-cli-core'; import { exitCli } from '../utils.js'; import { getLogFilePath } from './constants.js'; -function readLastLines(filePath: string, count: number): string { - const content = fs.readFileSync(filePath, 'utf-8'); - const lines = content.split('\n'); - if (lines.length > 0 && lines[lines.length - 1] === '') { - lines.pop(); +export async function readLastLines( + filePath: string, + count: number, +): Promise { + if (count <= 0) { + return ''; + } + + const CHUNK_SIZE = 64 * 1024; + const fileHandle = await fs.promises.open(filePath, fs.constants.O_RDONLY); + + try { + const stats = await fileHandle.stat(); + if (stats.size === 0) { + return ''; + } + + const chunks: Buffer[] = []; + let totalBytes = 0; + let newlineCount = 0; + let position = stats.size; + + while (position > 0 && newlineCount <= count) { + const readSize = Math.min(CHUNK_SIZE, position); + position -= readSize; + + const buffer = Buffer.allocUnsafe(readSize); + const { bytesRead } = await fileHandle.read( + buffer, + 0, + readSize, + position, + ); + + if (bytesRead === 0) { + break; + } + + const chunk = + bytesRead === readSize ? buffer : buffer.subarray(0, bytesRead); + chunks.unshift(chunk); + totalBytes += chunk.length; + + for (const byte of chunk) { + if (byte === 0x0a) { + newlineCount += 1; + } + } + } + + const content = Buffer.concat(chunks, totalBytes).toString('utf-8'); + const lines = content.split('\n'); + + if (position > 0 && lines.length > 0) { + const boundary = Buffer.allocUnsafe(1); + const { bytesRead } = await fileHandle.read(boundary, 0, 1, position - 1); + if (bytesRead === 1 && boundary[0] !== 0x0a) { + lines.shift(); + } + } + + if (lines.length > 0 && lines[lines.length - 1] === '') { + lines.pop(); + } + + if (lines.length === 0) { + return ''; + } + + return lines.slice(-count).join('\n') + '\n'; + } finally { + await fileHandle.close(); } - return lines.slice(-count).join('\n') + '\n'; } const isWindows = process.platform === 'win32'; @@ -41,7 +107,9 @@ export const logsCommand: CommandModule = { handler: async (argv) => { const logPath = getLogFilePath(); - if (!fs.existsSync(logPath)) { + try { + await fs.promises.access(logPath, fs.constants.F_OK); + } catch { debugLogger.log(`No log file found at ${logPath}`); debugLogger.log( 'Is the LiteRT server running? Start it with: gemini gemma start', @@ -55,7 +123,7 @@ export const logsCommand: CommandModule = { if (lines !== undefined) { if (isWindows) { - process.stdout.write(readLastLines(logPath, lines)); + process.stdout.write(await readLastLines(logPath, lines)); await exitCli(0); return; } diff --git a/packages/cli/src/commands/gemma/platform.test.ts b/packages/cli/src/commands/gemma/platform.test.ts new file mode 100644 index 00000000000..fb10c026ece --- /dev/null +++ b/packages/cli/src/commands/gemma/platform.test.ts @@ -0,0 +1,113 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import fs from 'node:fs'; +import path from 'node:path'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { SettingScope } from '../../config/settings.js'; +import { getLiteRtBinDir } from './constants.js'; + +const mockLoadSettings = vi.hoisted(() => vi.fn()); + +vi.mock('../../config/settings.js', () => ({ + loadSettings: mockLoadSettings, + SettingScope: { + User: 'User', + }, +})); + +import { + getBinaryPath, + isBinaryInstalled, + resolveGemmaConfig, +} from './platform.js'; + +describe('gemma platform helpers', () => { + function createMockSettings( + userGemmaSettings?: object, + mergedGemmaSettings?: object, + ) { + return { + merged: { + experimental: { + gemmaModelRouter: mergedGemmaSettings, + }, + }, + forScope: vi.fn((scope: SettingScope) => { + if (scope !== SettingScope.User) { + throw new Error(`Unexpected scope ${scope}`); + } + return { + settings: { + experimental: { + gemmaModelRouter: userGemmaSettings, + }, + }, + }; + }), + }; + } + + beforeEach(() => { + vi.clearAllMocks(); + mockLoadSettings.mockReturnValue(createMockSettings()); + }); + + it('prefers the configured binary path from settings', () => { + mockLoadSettings.mockReturnValue( + createMockSettings({ binaryPath: '/custom/lit' }), + ); + + expect(getBinaryPath('lit.test')).toBe('/custom/lit'); + }); + + it('ignores workspace overrides for the configured binary path', () => { + mockLoadSettings.mockReturnValue( + createMockSettings( + { binaryPath: '/user/lit' }, + { binaryPath: '/workspace/evil' }, + ), + ); + + expect(getBinaryPath('lit.test')).toBe('/user/lit'); + }); + + it('falls back to the default install location when no custom path is set', () => { + expect(getBinaryPath('lit.test')).toBe( + path.join(getLiteRtBinDir(), 'lit.test'), + ); + }); + + it('resolves the configured port and binary path from settings', () => { + mockLoadSettings.mockReturnValue( + createMockSettings( + { binaryPath: '/custom/lit' }, + { + enabled: true, + classifier: { + host: 'http://localhost:8123/v1beta', + }, + }, + ), + ); + + expect(resolveGemmaConfig(9379)).toEqual({ + settingsEnabled: true, + configuredPort: 8123, + configuredBinaryPath: '/custom/lit', + }); + }); + + it('checks binary installation using the resolved binary path', () => { + mockLoadSettings.mockReturnValue( + createMockSettings({ binaryPath: '/custom/lit' }), + ); + vi.spyOn(fs, 'existsSync').mockReturnValue(true); + + expect(isBinaryInstalled()).toBe(true); + expect(fs.existsSync).toHaveBeenCalledWith('/custom/lit'); + }); +}); diff --git a/packages/cli/src/commands/gemma/platform.ts b/packages/cli/src/commands/gemma/platform.ts index fd2dbc683b6..0ee360ae63e 100644 --- a/packages/cli/src/commands/gemma/platform.ts +++ b/packages/cli/src/commands/gemma/platform.ts @@ -4,7 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { loadSettings } from '../../config/settings.js'; +import { loadSettings, SettingScope } from '../../config/settings.js'; import fs from 'node:fs'; import path from 'node:path'; import { execFileSync } from 'node:child_process'; @@ -26,26 +26,60 @@ export interface PlatformInfo { export interface GemmaConfigStatus { settingsEnabled: boolean; configuredPort: number; + configuredBinaryPath?: string; +} + +function getUserConfiguredBinaryPath( + workspaceDir = process.cwd(), +): string | undefined { + try { + const userGemmaSettings = loadSettings(workspaceDir).forScope( + SettingScope.User, + ).settings.experimental?.gemmaModelRouter; + return userGemmaSettings?.binaryPath?.trim() || undefined; + } catch { + return undefined; + } +} + +function parsePortFromHost( + host: string | undefined, + fallbackPort: number, +): number { + if (!host) { + return fallbackPort; + } + + try { + const url = new URL(host); + const port = Number(url.port); + return Number.isFinite(port) && port > 0 ? port : fallbackPort; + } catch { + const match = host.match(/:(\d+)/); + if (!match) { + return fallbackPort; + } + const port = parseInt(match[1], 10); + return Number.isFinite(port) && port > 0 ? port : fallbackPort; + } } export function resolveGemmaConfig(fallbackPort: number): GemmaConfigStatus { let settingsEnabled = false; let configuredPort = fallbackPort; + const configuredBinaryPath = getUserConfiguredBinaryPath(); try { const settings = loadSettings(process.cwd()); const gemmaSettings = settings.merged.experimental?.gemmaModelRouter; settingsEnabled = gemmaSettings?.enabled === true; - const hostStr = gemmaSettings?.classifier?.host; - if (hostStr) { - const match = hostStr.match(/:(\d+)/); - if (match) { - configuredPort = parseInt(match[1], 10); - } - } + configuredPort = parsePortFromHost( + gemmaSettings?.classifier?.host, + fallbackPort, + ); } catch { // ignore — settings may fail to load outside a workspace } - return { settingsEnabled, configuredPort }; + return { settingsEnabled, configuredPort, configuredBinaryPath }; } export function detectPlatform(): PlatformInfo | null { @@ -58,6 +92,11 @@ export function detectPlatform(): PlatformInfo | null { } export function getBinaryPath(binaryName?: string): string | null { + const configuredBinaryPath = getUserConfiguredBinaryPath(); + if (configuredBinaryPath) { + return configuredBinaryPath; + } + const name = binaryName ?? detectPlatform()?.binaryName; if (!name) return null; return path.join(getLiteRtBinDir(), name); @@ -67,8 +106,7 @@ export function getBinaryDownloadUrl(binaryName: string): string { return `${LITERT_RELEASE_BASE_URL}/${LITERT_RELEASE_VERSION}/${binaryName}`; } -export function isBinaryInstalled(): boolean { - const binaryPath = getBinaryPath(); +export function isBinaryInstalled(binaryPath = getBinaryPath()): boolean { if (!binaryPath) return false; return fs.existsSync(binaryPath); } diff --git a/packages/cli/src/commands/gemma/setup.ts b/packages/cli/src/commands/gemma/setup.ts index 002afcbfdbf..5a7034f6ae9 100644 --- a/packages/cli/src/commands/gemma/setup.ts +++ b/packages/cli/src/commands/gemma/setup.ts @@ -6,16 +6,13 @@ import type { CommandModule } from 'yargs'; import fs from 'node:fs'; -import { execSync, spawn as nodeSpawn } from 'node:child_process'; +import path from 'node:path'; +import { execFileSync, spawn as nodeSpawn } from 'node:child_process'; import chalk from 'chalk'; import { debugLogger } from '@google/gemini-cli-core'; import { loadSettings, SettingScope } from '../../config/settings.js'; import { exitCli } from '../utils.js'; -import { - DEFAULT_PORT, - GEMMA_MODEL_NAME, - getLiteRtBinDir, -} from './constants.js'; +import { DEFAULT_PORT, GEMMA_MODEL_NAME } from './constants.js'; import { detectPlatform, getBinaryDownloadUrl, @@ -133,6 +130,9 @@ interface SetupArgs { async function handleSetup(argv: SetupArgs): Promise { const { port, force } = argv; + let settingsUpdated = false; + let serverStarted = false; + let autoStartServer = true; log(''); log(chalk.bold('Gemma Local Model Routing Setup')); @@ -181,7 +181,7 @@ async function handleSetup(argv: SetupArgs): Promise { debugLogger.log(`Downloading from: ${downloadUrl}`); try { - const binDir = getLiteRtBinDir(); + const binDir = path.dirname(binaryPath); fs.mkdirSync(binDir, { recursive: true }); await downloadFile(downloadUrl, binaryPath); log(chalk.green(' ✓ Binary downloaded successfully')); @@ -210,7 +210,7 @@ async function handleSetup(argv: SetupArgs): Promise { if (process.platform === 'darwin') { try { - execSync(`xattr -d com.apple.quarantine "${binaryPath}"`, { + execFileSync('xattr', ['-d', 'com.apple.quarantine', binaryPath], { stdio: 'ignore', }); log(chalk.green(' ✓ macOS quarantine attribute removed')); @@ -254,15 +254,16 @@ async function handleSetup(argv: SetupArgs): Promise { const existingGemma = settings.forScope(SettingScope.User).settings.experimental ?.gemmaModelRouter ?? {}; + autoStartServer = existingGemma.autoStartServer ?? true; const newGemmaSettings = { ...existingGemma, enabled: true, - autoStartServer: existingGemma.autoStartServer ?? true, + autoStartServer, classifier: { + ...existingGemma.classifier, host: `http://localhost:${port}`, model: GEMMA_MODEL_NAME, - ...existingGemma.classifier, }, }; @@ -274,6 +275,7 @@ async function handleSetup(argv: SetupArgs): Promise { }); log(chalk.green(' ✓ Settings updated in ~/.gemini/settings.json')); + settingsUpdated = true; } catch (error) { logError( chalk.red( @@ -288,8 +290,8 @@ async function handleSetup(argv: SetupArgs): Promise { if (argv.start) { log(''); log(' Starting LiteRT server...'); - const started = await startServer(binaryPath, port); - if (started) { + serverStarted = await startServer(binaryPath, port); + if (serverStarted) { log(chalk.green(` ✓ Server started on port ${port}`)); } else { log( @@ -300,9 +302,23 @@ async function handleSetup(argv: SetupArgs): Promise { } } + const routingActive = settingsUpdated && serverStarted; + const setupSucceeded = settingsUpdated && (!argv.start || serverStarted); log(''); log(chalk.dim('─'.repeat(40))); - log(chalk.bold.green(' Setup complete! Local model routing is now active.')); + if (routingActive) { + log(chalk.bold.green(' Setup complete! Local model routing is active.')); + } else if (settingsUpdated) { + log( + chalk.bold.green(' Setup complete! Local model routing is configured.'), + ); + } else { + log( + chalk.bold.yellow( + ' Setup incomplete. Manual settings changes are still required.', + ), + ); + } log(''); log(' How it works: Every request is classified by the local Gemma model.'); log( @@ -317,15 +333,27 @@ async function handleSetup(argv: SetupArgs): Promise { ); log(' This happens automatically — just use the CLI as usual.'); log(''); - if (!argv.start) { + if (!settingsUpdated) { log( chalk.yellow( - ' Note: Run "gemini gemma start" to start the server, or restart', + ' Fix the settings update above, then rerun "gemini gemma status".', ), ); + log(''); + } else if (!argv.start) { + log(chalk.yellow(' Note: Run "gemini gemma start" to start the server.')); + if (autoStartServer) { + log( + chalk.yellow( + ' Or restart the CLI to auto-start it on the next launch.', + ), + ); + } + log(''); + } else if (!serverStarted) { log( chalk.yellow( - ' the CLI to auto-start it (if autoStartServer is enabled).', + ' Review the server logs and rerun "gemini gemma start" after fixing the issue.', ), ); log(''); @@ -337,7 +365,7 @@ async function handleSetup(argv: SetupArgs): Promise { log(chalk.dim(' /gemma Check status inside a session')); log(''); - return 0; + return setupSucceeded ? 0 : 1; } export const setupCommand: CommandModule = { diff --git a/packages/cli/src/commands/gemma/start.ts b/packages/cli/src/commands/gemma/start.ts index 827c3f4fbd6..96a31de218e 100644 --- a/packages/cli/src/commands/gemma/start.ts +++ b/packages/cli/src/commands/gemma/start.ts @@ -81,7 +81,8 @@ export const startCommand: CommandModule = { port = configuredPort; } - if (!isBinaryInstalled()) { + const binaryPath = getBinaryPath(); + if (!binaryPath || !isBinaryInstalled(binaryPath)) { debugLogger.error( chalk.red( 'LiteRT-LM binary not found. Run "gemini gemma setup" first.', @@ -100,7 +101,6 @@ export const startCommand: CommandModule = { return; } - const binaryPath = getBinaryPath()!; debugLogger.log(`Starting LiteRT server on port ${port}...`); const started = await startServer(binaryPath, port); diff --git a/packages/cli/src/commands/gemma/status.ts b/packages/cli/src/commands/gemma/status.ts index 77847437cb9..8ce9f006dcb 100644 --- a/packages/cli/src/commands/gemma/status.ts +++ b/packages/cli/src/commands/gemma/status.ts @@ -37,7 +37,7 @@ export async function checkGemmaStatus( const effectivePort = port ?? configuredPort; const binaryPath = getBinaryPath(); - const binaryInstalled = isBinaryInstalled(); + const binaryInstalled = isBinaryInstalled(binaryPath); const modelDownloaded = binaryInstalled && binaryPath ? isModelDownloaded(binaryPath) : false; const serverRunning = await isServerRunning(effectivePort); diff --git a/packages/cli/src/config/config.test.ts b/packages/cli/src/config/config.test.ts index 04df366a983..4288150ba5a 100644 --- a/packages/cli/src/config/config.test.ts +++ b/packages/cli/src/config/config.test.ts @@ -338,6 +338,7 @@ describe('parseArguments', () => { { cmd: 'skill list', expected: true }, { cmd: 'hooks migrate', expected: true }, { cmd: 'hook migrate', expected: true }, + { cmd: 'gemma status', expected: true }, { cmd: 'some query', expected: undefined }, { cmd: 'hello world', expected: undefined }, ])( @@ -758,6 +759,12 @@ describe('parseArguments', () => { const argv = await parseArguments(settings); expect(argv.isCommand).toBe(true); }); + + it('should set isCommand to true for gemma command', async () => { + process.argv = ['node', 'script.js', 'gemma', 'status']; + const argv = await parseArguments(createTestMergedSettings()); + expect(argv.isCommand).toBe(true); + }); }); describe('loadCliConfig', () => { @@ -3030,6 +3037,8 @@ describe('loadCliConfig gemmaModelRouter', () => { experimental: { gemmaModelRouter: { enabled: true, + autoStartServer: false, + binaryPath: '/custom/lit', classifier: { host: 'http://custom:1234', model: 'custom-gemma', @@ -3040,6 +3049,8 @@ describe('loadCliConfig gemmaModelRouter', () => { const config = await loadCliConfig(settings, 'test-session', argv); expect(config.getGemmaModelRouterEnabled()).toBe(true); const gemmaSettings = config.getGemmaModelRouterSettings(); + expect(gemmaSettings.autoStartServer).toBe(false); + expect(gemmaSettings.binaryPath).toBe('/custom/lit'); expect(gemmaSettings.classifier?.host).toBe('http://custom:1234'); expect(gemmaSettings.classifier?.model).toBe('custom-gemma'); }); @@ -3057,6 +3068,8 @@ describe('loadCliConfig gemmaModelRouter', () => { const config = await loadCliConfig(settings, 'test-session', argv); expect(config.getGemmaModelRouterEnabled()).toBe(true); const gemmaSettings = config.getGemmaModelRouterSettings(); + expect(gemmaSettings.autoStartServer).toBe(true); + expect(gemmaSettings.binaryPath).toBe(''); expect(gemmaSettings.classifier?.host).toBe('http://localhost:9379'); expect(gemmaSettings.classifier?.model).toBe('gemma3-1b-gpu-custom'); }); diff --git a/packages/cli/src/config/settingsSchema.test.ts b/packages/cli/src/config/settingsSchema.test.ts index 27639fa0311..a7ce6cf0159 100644 --- a/packages/cli/src/config/settingsSchema.test.ts +++ b/packages/cli/src/config/settingsSchema.test.ts @@ -471,11 +471,33 @@ describe('SettingsSchema', () => { expect(enabled.category).toBe('Experimental'); expect(enabled.default).toBe(false); expect(enabled.requiresRestart).toBe(true); - expect(enabled.showInDialog).toBe(false); + expect(enabled.showInDialog).toBe(true); expect(enabled.description).toBe( 'Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim.', ); + const autoStartServer = gemmaModelRouter.properties.autoStartServer; + expect(autoStartServer).toBeDefined(); + expect(autoStartServer.type).toBe('boolean'); + expect(autoStartServer.category).toBe('Experimental'); + expect(autoStartServer.default).toBe(true); + expect(autoStartServer.requiresRestart).toBe(true); + expect(autoStartServer.showInDialog).toBe(true); + expect(autoStartServer.description).toBe( + 'Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.', + ); + + const binaryPath = gemmaModelRouter.properties.binaryPath; + expect(binaryPath).toBeDefined(); + expect(binaryPath.type).toBe('string'); + expect(binaryPath.category).toBe('Experimental'); + expect(binaryPath.default).toBe(''); + expect(binaryPath.requiresRestart).toBe(true); + expect(binaryPath.showInDialog).toBe(false); + expect(binaryPath.description).toBe( + 'Custom path to the LiteRT-LM binary. Leave empty to use the default location (~/.gemini/bin/litert/).', + ); + const classifier = gemmaModelRouter.properties.classifier; expect(classifier).toBeDefined(); expect(classifier.type).toBe('object'); diff --git a/packages/cli/src/services/liteRtServerManager.test.ts b/packages/cli/src/services/liteRtServerManager.test.ts new file mode 100644 index 00000000000..f1af5c800ae --- /dev/null +++ b/packages/cli/src/services/liteRtServerManager.test.ts @@ -0,0 +1,68 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import fs from 'node:fs'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import type { GemmaModelRouterSettings } from '@google/gemini-cli-core'; + +const mockGetBinaryPath = vi.hoisted(() => vi.fn()); +const mockIsServerRunning = vi.hoisted(() => vi.fn()); +const mockStartServer = vi.hoisted(() => vi.fn()); + +vi.mock('../commands/gemma/platform.js', () => ({ + getBinaryPath: mockGetBinaryPath, + isServerRunning: mockIsServerRunning, +})); + +vi.mock('../commands/gemma/start.js', () => ({ + startServer: mockStartServer, +})); + +import { LiteRtServerManager } from './liteRtServerManager.js'; + +describe('LiteRtServerManager', () => { + beforeEach(() => { + vi.clearAllMocks(); + vi.spyOn(fs, 'existsSync').mockReturnValue(true); + mockIsServerRunning.mockResolvedValue(false); + mockStartServer.mockResolvedValue(true); + }); + + it('uses the configured custom binary path when auto-starting', async () => { + mockGetBinaryPath.mockReturnValue('/user/lit'); + + const settings: GemmaModelRouterSettings = { + enabled: true, + binaryPath: '/workspace/evil', + classifier: { + host: 'http://localhost:8123', + }, + }; + + await LiteRtServerManager.ensureRunning(settings); + + expect(mockGetBinaryPath).toHaveBeenCalledTimes(1); + expect(fs.existsSync).toHaveBeenCalledWith('/user/lit'); + expect(mockStartServer).toHaveBeenCalledWith('/user/lit', 8123); + }); + + it('falls back to the default binary path when no custom path is configured', async () => { + mockGetBinaryPath.mockReturnValue('/default/lit'); + + const settings: GemmaModelRouterSettings = { + enabled: true, + classifier: { + host: 'http://localhost:9379', + }, + }; + + await LiteRtServerManager.ensureRunning(settings); + + expect(mockGetBinaryPath).toHaveBeenCalledTimes(1); + expect(fs.existsSync).toHaveBeenCalledWith('/default/lit'); + expect(mockStartServer).toHaveBeenCalledWith('/default/lit', 9379); + }); +}); diff --git a/packages/cli/src/services/liteRtServerManager.ts b/packages/cli/src/services/liteRtServerManager.ts index 7107d2321ef..e72d321f9d0 100644 --- a/packages/cli/src/services/liteRtServerManager.ts +++ b/packages/cli/src/services/liteRtServerManager.ts @@ -4,13 +4,10 @@ * SPDX-License-Identifier: Apache-2.0 */ +import fs from 'node:fs'; import { debugLogger } from '@google/gemini-cli-core'; import type { GemmaModelRouterSettings } from '@google/gemini-cli-core'; -import { - getBinaryPath, - isBinaryInstalled, - isServerRunning, -} from '../commands/gemma/platform.js'; +import { getBinaryPath, isServerRunning } from '../commands/gemma/platform.js'; import { DEFAULT_PORT } from '../commands/gemma/constants.js'; export class LiteRtServerManager { @@ -19,7 +16,8 @@ export class LiteRtServerManager { ): Promise { if (!gemmaSettings?.enabled) return; if (gemmaSettings.autoStartServer === false) return; - if (!isBinaryInstalled()) { + const binaryPath = getBinaryPath(); + if (!binaryPath || !fs.existsSync(binaryPath)) { debugLogger.log( '[LiteRtServerManager] Binary not installed, skipping auto-start. Run "gemini gemma setup".', ); @@ -46,11 +44,6 @@ export class LiteRtServerManager { try { const { startServer } = await import('../commands/gemma/start.js'); - const binaryPath = gemmaSettings.binaryPath || getBinaryPath() || ''; - if (!binaryPath) { - debugLogger.warn('[LiteRtServerManager] Could not resolve binary path'); - return; - } const started = await startServer(binaryPath, port); if (started) { debugLogger.log(`[LiteRtServerManager] Server started on port ${port}`); diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts index ab000b2691f..17fab3d8e07 100644 --- a/packages/core/src/config/config.test.ts +++ b/packages/core/src/config/config.test.ts @@ -1901,6 +1901,8 @@ describe('GemmaModelRouterSettings', () => { const config = new Config(baseParams); const settings = config.getGemmaModelRouterSettings(); expect(settings.enabled).toBe(false); + expect(settings.autoStartServer).toBe(true); + expect(settings.binaryPath).toBe(''); expect(settings.classifier?.host).toBe('http://localhost:9379'); expect(settings.classifier?.model).toBe('gemma3-1b-gpu-custom'); }); @@ -1910,6 +1912,8 @@ describe('GemmaModelRouterSettings', () => { ...baseParams, gemmaModelRouter: { enabled: true, + autoStartServer: false, + binaryPath: '/custom/lit', classifier: { host: 'http://custom:1234', model: 'custom-gemma', @@ -1919,6 +1923,8 @@ describe('GemmaModelRouterSettings', () => { const config = new Config(params); const settings = config.getGemmaModelRouterSettings(); expect(settings.enabled).toBe(true); + expect(settings.autoStartServer).toBe(false); + expect(settings.binaryPath).toBe('/custom/lit'); expect(settings.classifier?.host).toBe('http://custom:1234'); expect(settings.classifier?.model).toBe('custom-gemma'); }); @@ -1933,6 +1939,8 @@ describe('GemmaModelRouterSettings', () => { const config = new Config(params); const settings = config.getGemmaModelRouterSettings(); expect(settings.enabled).toBe(true); + expect(settings.autoStartServer).toBe(true); + expect(settings.binaryPath).toBe(''); expect(settings.classifier?.host).toBe('http://localhost:9379'); expect(settings.classifier?.model).toBe('gemma3-1b-gpu-custom'); }); diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 45abdceacce..3e397cb9e64 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -1322,6 +1322,8 @@ export class Config implements McpContext, AgentLoopContext { }; this.gemmaModelRouter = { enabled: params.gemmaModelRouter?.enabled ?? false, + autoStartServer: params.gemmaModelRouter?.autoStartServer ?? true, + binaryPath: params.gemmaModelRouter?.binaryPath ?? '', classifier: { host: params.gemmaModelRouter?.classifier?.host ?? 'http://localhost:9379', From 637af65507c1b774e76c7e59595e427a6314782d Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Thu, 16 Apr 2026 19:44:08 -0700 Subject: [PATCH 18/33] fix(cli): keep gemma logs attached to tail --- packages/cli/src/commands/gemma/logs.test.ts | 129 ++++++++++++++++++- packages/cli/src/commands/gemma/logs.ts | 90 ++++++++----- 2 files changed, 186 insertions(+), 33 deletions(-) diff --git a/packages/cli/src/commands/gemma/logs.test.ts b/packages/cli/src/commands/gemma/logs.test.ts index caa9bc1ec4e..1cf34b77e5a 100644 --- a/packages/cli/src/commands/gemma/logs.test.ts +++ b/packages/cli/src/commands/gemma/logs.test.ts @@ -5,10 +5,54 @@ */ import fs from 'node:fs'; +import type { ChildProcess } from 'node:child_process'; +import { EventEmitter } from 'node:events'; import os from 'node:os'; import path from 'node:path'; -import { afterEach, describe, expect, it } from 'vitest'; -import { readLastLines } from './logs.js'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { spawn } from 'node:child_process'; +import { exitCli } from '../utils.js'; +import { getLogFilePath } from './constants.js'; +import { logsCommand, readLastLines } from './logs.js'; + +vi.mock('@google/gemini-cli-core', async (importOriginal) => { + const { mockCoreDebugLogger } = await import( + '../../test-utils/mockDebugLogger.js' + ); + return mockCoreDebugLogger( + await importOriginal(), + { + stripAnsi: false, + }, + ); +}); + +vi.mock('node:child_process', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + spawn: vi.fn(), + }; +}); + +vi.mock('../utils.js', () => ({ + exitCli: vi.fn(), +})); + +vi.mock('./constants.js', () => ({ + getLogFilePath: vi.fn(), +})); + +function createMockChild(): ChildProcess { + return Object.assign(new EventEmitter(), { + kill: vi.fn(), + }) as unknown as ChildProcess; +} + +async function flushMicrotasks() { + await Promise.resolve(); + await Promise.resolve(); +} describe('readLastLines', () => { const tempFiles: string[] = []; @@ -49,3 +93,84 @@ describe('readLastLines', () => { await expect(readLastLines(filePath, 0)).resolves.toBe(''); }); }); + +describe('logsCommand', () => { + beforeEach(() => { + vi.clearAllMocks(); + vi.mocked(getLogFilePath).mockReturnValue('/tmp/gemma.log'); + vi.spyOn(fs.promises, 'access').mockResolvedValue(undefined); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('waits for the tail process to close before exiting in follow mode', async () => { + const child = createMockChild(); + vi.mocked(spawn).mockReturnValue(child); + + let resolved = false; + const handlerPromise = ( + logsCommand.handler as (argv: Record) => Promise + )({}).then(() => { + resolved = true; + }); + + await flushMicrotasks(); + + expect(spawn).toHaveBeenCalledWith( + 'tail', + ['-f', '-n', '20', '/tmp/gemma.log'], + { stdio: 'inherit' }, + ); + expect(resolved).toBe(false); + expect(exitCli).not.toHaveBeenCalled(); + + child.emit('close', 0); + await handlerPromise; + + expect(exitCli).toHaveBeenCalledWith(0); + }); + + it('uses one-shot tail output when follow is disabled', async () => { + const child = createMockChild(); + vi.mocked(spawn).mockReturnValue(child); + + const handlerPromise = ( + logsCommand.handler as (argv: Record) => Promise + )({ follow: false }); + + await flushMicrotasks(); + + expect(spawn).toHaveBeenCalledWith('tail', ['-n', '20', '/tmp/gemma.log'], { + stdio: 'inherit', + }); + + child.emit('close', 0); + await handlerPromise; + + expect(exitCli).toHaveBeenCalledWith(0); + }); + + it('follows from the requested line count when both --lines and --follow are set', async () => { + const child = createMockChild(); + vi.mocked(spawn).mockReturnValue(child); + + const handlerPromise = ( + logsCommand.handler as (argv: Record) => Promise + )({ lines: 5, follow: true }); + + await flushMicrotasks(); + + expect(spawn).toHaveBeenCalledWith( + 'tail', + ['-f', '-n', '5', '/tmp/gemma.log'], + { stdio: 'inherit' }, + ); + + child.emit('close', 0); + await handlerPromise; + + expect(exitCli).toHaveBeenCalledWith(0); + }); +}); diff --git a/packages/cli/src/commands/gemma/logs.ts b/packages/cli/src/commands/gemma/logs.ts index 5eb81ea33d4..f2f96b6b08c 100644 --- a/packages/cli/src/commands/gemma/logs.ts +++ b/packages/cli/src/commands/gemma/logs.ts @@ -6,7 +6,7 @@ import type { CommandModule } from 'yargs'; import fs from 'node:fs'; -import { spawn } from 'node:child_process'; +import { spawn, type ChildProcess } from 'node:child_process'; import { debugLogger } from '@google/gemini-cli-core'; import { exitCli } from '../utils.js'; import { getLogFilePath } from './constants.js'; @@ -88,7 +88,41 @@ export async function readLastLines( const isWindows = process.platform === 'win32'; -export const logsCommand: CommandModule = { +interface LogsArgs { + lines?: number; + follow?: boolean; +} + +function waitForChild(child: ChildProcess): Promise { + return new Promise((resolve, reject) => { + child.once('error', reject); + child.once('close', (code) => resolve(code ?? 1)); + }); +} + +async function runTail(logPath: string, lines: number, follow: boolean) { + const tailArgs = follow + ? ['-f', '-n', String(lines), logPath] + : ['-n', String(lines), logPath]; + const child = spawn('tail', tailArgs, { stdio: 'inherit' }); + + if (!follow) { + return waitForChild(child); + } + + const handleSigint = () => { + child.kill('SIGTERM'); + }; + process.once('SIGINT', handleSigint); + + try { + return await waitForChild(child); + } finally { + process.off('SIGINT', handleSigint); + } +} + +export const logsCommand: CommandModule = { command: 'logs', describe: 'View LiteRT-LM server logs', builder: (yargs) => @@ -101,8 +135,8 @@ export const logsCommand: CommandModule = { .option('follow', { alias: 'f', type: 'boolean', - default: true, - description: 'Follow log output (default when --lines is not set)', + description: + 'Follow log output (defaults to true when --lines is omitted)', }), handler: async (argv) => { const logPath = getLogFilePath(); @@ -118,24 +152,11 @@ export const logsCommand: CommandModule = { return; } - const rawLines = argv['lines']; - const lines = Number.isFinite(rawLines) ? Number(rawLines) : undefined; + const lines = argv.lines; + const follow = argv.follow ?? lines === undefined; + const requestedLines = lines ?? 20; - if (lines !== undefined) { - if (isWindows) { - process.stdout.write(await readLastLines(logPath, lines)); - await exitCli(0); - return; - } - const tailArgs = ['-n', String(lines), logPath]; - const child = spawn('tail', tailArgs, { stdio: 'inherit' }); - child.on('close', async (code) => { - await exitCli(code ?? 0); - }); - return; - } - - if (isWindows) { + if (follow && isWindows) { debugLogger.log( 'Live log following is not supported on Windows. Use --lines N to view recent logs.', ); @@ -143,16 +164,23 @@ export const logsCommand: CommandModule = { return; } - debugLogger.log(`Tailing ${logPath} (Ctrl+C to stop)\n`); - const tailArgs = ['-f', '-n', '20', logPath]; - const child = spawn('tail', tailArgs, { stdio: 'inherit' }); - - process.on('SIGINT', () => { - child.kill('SIGTERM'); - }); + if (isWindows) { + process.stdout.write(await readLastLines(logPath, requestedLines)); + await exitCli(0); + return; + } - child.on('close', async (code) => { - await exitCli(code ?? 0); - }); + try { + if (follow) { + debugLogger.log(`Tailing ${logPath} (Ctrl+C to stop)\n`); + } + const exitCode = await runTail(logPath, requestedLines, follow); + await exitCli(exitCode); + } catch (error) { + debugLogger.error( + `Failed to read log output: ${error instanceof Error ? error.message : String(error)}`, + ); + await exitCli(1); + } }, }; From 21bd3bd9bdaf9621df66eeff9dad28d6138e1607 Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Thu, 16 Apr 2026 19:44:29 -0700 Subject: [PATCH 19/33] Update packages/cli/src/commands/gemma/platform.ts Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- packages/cli/src/commands/gemma/platform.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/packages/cli/src/commands/gemma/platform.ts b/packages/cli/src/commands/gemma/platform.ts index 0ee360ae63e..044056af182 100644 --- a/packages/cli/src/commands/gemma/platform.ts +++ b/packages/cli/src/commands/gemma/platform.ts @@ -70,12 +70,10 @@ export function resolveGemmaConfig(fallbackPort: number): GemmaConfigStatus { const configuredBinaryPath = getUserConfiguredBinaryPath(); try { const settings = loadSettings(process.cwd()); - const gemmaSettings = settings.merged.experimental?.gemmaModelRouter; + const gemmaSettings = settings.forScope(SettingScope.User).settings.experimental?.gemmaModelRouter; settingsEnabled = gemmaSettings?.enabled === true; configuredPort = parsePortFromHost( gemmaSettings?.classifier?.host, - fallbackPort, - ); } catch { // ignore — settings may fail to load outside a workspace } From 6fed15f626a15a55555f7e52ba3265cce8167ef2 Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Thu, 16 Apr 2026 19:44:41 -0700 Subject: [PATCH 20/33] Update packages/cli/src/gemini.tsx Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- packages/cli/src/gemini.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx index 1f43419c8da..c10d74744af 100644 --- a/packages/cli/src/gemini.tsx +++ b/packages/cli/src/gemini.tsx @@ -615,7 +615,7 @@ export async function main() { import('./services/liteRtServerManager.js') .then(({ LiteRtServerManager }) => LiteRtServerManager.ensureRunning( - settings.merged.experimental?.gemmaModelRouter, + settings.forScope(SettingScope.User).settings.experimental?.gemmaModelRouter, ), ) .catch((e) => debugLogger.warn('LiteRT auto-start import failed:', e)); From 403a4c0e8475fb4ddaa30c747d04a7d0f919c2b6 Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Thu, 16 Apr 2026 20:02:50 -0700 Subject: [PATCH 21/33] fix(cli): harden gemma setup and stop safety --- packages/cli/src/commands/gemma/constants.ts | 10 ++ .../cli/src/commands/gemma/platform.test.ts | 49 ++++++ packages/cli/src/commands/gemma/platform.ts | 157 +++++++++++++++++- packages/cli/src/commands/gemma/setup.test.ts | 60 +++++++ packages/cli/src/commands/gemma/setup.ts | 75 ++++++++- packages/cli/src/commands/gemma/start.ts | 8 +- packages/cli/src/commands/gemma/stop.test.ts | 112 +++++++++++++ packages/cli/src/commands/gemma/stop.ts | 60 +++++-- 8 files changed, 514 insertions(+), 17 deletions(-) create mode 100644 packages/cli/src/commands/gemma/setup.test.ts create mode 100644 packages/cli/src/commands/gemma/stop.test.ts diff --git a/packages/cli/src/commands/gemma/constants.ts b/packages/cli/src/commands/gemma/constants.ts index cab4c1f2b26..de80762a1a5 100644 --- a/packages/cli/src/commands/gemma/constants.ts +++ b/packages/cli/src/commands/gemma/constants.ts @@ -21,6 +21,16 @@ export const PLATFORM_BINARY_MAP: Record = { 'win32-x64': 'lit.windows_x86_64.exe', }; +// SHA-256 hashes for the official LiteRT-LM v0.9.0-alpha03 release binaries. +export const PLATFORM_BINARY_SHA256: Record = { + 'lit.macos_arm64': + '9e826a2634f2e8b220ad0f1e1b5c139e0b47cb172326e3b7d46d31382f49478e', + 'lit.linux_x86_64': + '66601df8a07f08244b188e9fcab0bf4a16562fe76d8d47e49f40273d57541ee8', + 'lit.windows_x86_64.exe': + 'de82d2829d2fb1cbdb318e2d8a78dc2f9659ff14cb11b2894d1f30e0bfde2bf6', +}; + export function getLiteRtBinDir(): string { return path.join(Storage.getGlobalGeminiDir(), 'bin', 'litert'); } diff --git a/packages/cli/src/commands/gemma/platform.test.ts b/packages/cli/src/commands/gemma/platform.test.ts index fb10c026ece..b00549365a9 100644 --- a/packages/cli/src/commands/gemma/platform.test.ts +++ b/packages/cli/src/commands/gemma/platform.test.ts @@ -21,7 +21,9 @@ vi.mock('../../config/settings.js', () => ({ import { getBinaryPath, + isExpectedLiteRtServerCommand, isBinaryInstalled, + readServerProcessInfo, resolveGemmaConfig, } from './platform.js'; @@ -110,4 +112,51 @@ describe('gemma platform helpers', () => { expect(isBinaryInstalled()).toBe(true); expect(fs.existsSync).toHaveBeenCalledWith('/custom/lit'); }); + + it('parses structured server process info from the pid file', () => { + vi.spyOn(fs, 'readFileSync').mockReturnValue( + JSON.stringify({ + pid: 1234, + binaryPath: '/custom/lit', + port: 8123, + }), + ); + + expect(readServerProcessInfo()).toEqual({ + pid: 1234, + binaryPath: '/custom/lit', + port: 8123, + }); + }); + + it('parses legacy pid-only files for backward compatibility', () => { + vi.spyOn(fs, 'readFileSync').mockReturnValue('4321'); + + expect(readServerProcessInfo()).toEqual({ + pid: 4321, + }); + }); + + it('matches only the expected LiteRT serve command', () => { + expect( + isExpectedLiteRtServerCommand('/custom/lit serve --port=8123 --verbose', { + binaryPath: '/custom/lit', + port: 8123, + }), + ).toBe(true); + + expect( + isExpectedLiteRtServerCommand('/custom/lit run --port=8123', { + binaryPath: '/custom/lit', + port: 8123, + }), + ).toBe(false); + + expect( + isExpectedLiteRtServerCommand('/custom/lit serve --port=9000', { + binaryPath: '/custom/lit', + port: 8123, + }), + ).toBe(false); + }); }); diff --git a/packages/cli/src/commands/gemma/platform.ts b/packages/cli/src/commands/gemma/platform.ts index 044056af182..953bd700703 100644 --- a/packages/cli/src/commands/gemma/platform.ts +++ b/packages/cli/src/commands/gemma/platform.ts @@ -29,6 +29,12 @@ export interface GemmaConfigStatus { configuredBinaryPath?: string; } +export interface LiteRtServerProcessInfo { + pid: number; + binaryPath?: string; + port?: number; +} + function getUserConfiguredBinaryPath( workspaceDir = process.cwd(), ): string | undefined { @@ -136,17 +142,162 @@ export async function isServerRunning(port: number): Promise { } } -export function readServerPid(): number | null { +function isLiteRtServerProcessInfo( + value: unknown, +): value is LiteRtServerProcessInfo { + if (!value || typeof value !== 'object') { + return false; + } + + const isPositiveInteger = (candidate: unknown): candidate is number => + typeof candidate === 'number' && + Number.isInteger(candidate) && + candidate > 0; + const isNonEmptyString = (candidate: unknown): candidate is string => + typeof candidate === 'string' && candidate.length > 0; + + const pid: unknown = Object.getOwnPropertyDescriptor(value, 'pid')?.value; + if (!isPositiveInteger(pid)) { + return false; + } + + const binaryPath: unknown = Object.getOwnPropertyDescriptor( + value, + 'binaryPath', + )?.value; + if (binaryPath !== undefined && !isNonEmptyString(binaryPath)) { + return false; + } + + const port: unknown = Object.getOwnPropertyDescriptor(value, 'port')?.value; + if (port !== undefined && !isPositiveInteger(port)) { + return false; + } + + return true; +} + +export function readServerProcessInfo(): LiteRtServerProcessInfo | null { const pidPath = getPidFilePath(); try { const content = fs.readFileSync(pidPath, 'utf-8').trim(); - const pid = parseInt(content, 10); - return isNaN(pid) ? null : pid; + if (!content) { + return null; + } + + if (/^\d+$/.test(content)) { + return { pid: parseInt(content, 10) }; + } + + const parsed = JSON.parse(content) as unknown; + return isLiteRtServerProcessInfo(parsed) ? parsed : null; + } catch { + return null; + } +} + +export function writeServerProcessInfo( + processInfo: LiteRtServerProcessInfo, +): void { + fs.writeFileSync(getPidFilePath(), JSON.stringify(processInfo), 'utf-8'); +} + +export function readServerPid(): number | null { + return readServerProcessInfo()?.pid ?? null; +} + +function normalizeProcessValue(value: string): string { + const normalized = value.replace(/\0/g, ' ').trim(); + if (process.platform === 'win32') { + return normalized.replace(/\\/g, '/').replace(/\s+/g, ' ').toLowerCase(); + } + return normalized.replace(/\s+/g, ' '); +} + +function readProcessCommandLine(pid: number): string | null { + try { + if (process.platform === 'linux') { + const output = fs.readFileSync(`/proc/${pid}/cmdline`, 'utf-8'); + return output.trim() ? output : null; + } + + if (process.platform === 'win32') { + const output = execFileSync( + 'powershell.exe', + [ + '-NoProfile', + '-Command', + `(Get-CimInstance Win32_Process -Filter "ProcessId = ${pid}").CommandLine`, + ], + { + encoding: 'utf-8', + timeout: 5000, + }, + ); + return output.trim() || null; + } + + const output = execFileSync('ps', ['-p', String(pid), '-o', 'command='], { + encoding: 'utf-8', + timeout: 5000, + }); + return output.trim() || null; } catch { return null; } } +export function isExpectedLiteRtServerCommand( + commandLine: string, + options: { + binaryPath?: string | null; + port?: number; + }, +): boolean { + const normalizedCommandLine = normalizeProcessValue(commandLine); + if (!normalizedCommandLine) { + return false; + } + + if (!/(^|\s|")serve(\s|$)/.test(normalizedCommandLine)) { + return false; + } + + if ( + options.port !== undefined && + !normalizedCommandLine.includes(`--port=${options.port}`) + ) { + return false; + } + + if (!options.binaryPath) { + return true; + } + + const normalizedBinaryPath = normalizeProcessValue(options.binaryPath); + const normalizedBinaryName = normalizeProcessValue( + path.basename(options.binaryPath), + ); + return ( + normalizedCommandLine.includes(normalizedBinaryPath) || + normalizedCommandLine.includes(normalizedBinaryName) + ); +} + +export function isExpectedLiteRtServerProcess( + pid: number, + options: { + binaryPath?: string | null; + port?: number; + }, +): boolean { + const commandLine = readProcessCommandLine(pid); + if (!commandLine) { + return false; + } + return isExpectedLiteRtServerCommand(commandLine, options); +} + export function isProcessRunning(pid: number): boolean { try { process.kill(pid, 0); diff --git a/packages/cli/src/commands/gemma/setup.test.ts b/packages/cli/src/commands/gemma/setup.test.ts new file mode 100644 index 00000000000..663a5d6e4c3 --- /dev/null +++ b/packages/cli/src/commands/gemma/setup.test.ts @@ -0,0 +1,60 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import { afterEach, describe, expect, it } from 'vitest'; +import { PLATFORM_BINARY_MAP, PLATFORM_BINARY_SHA256 } from './constants.js'; +import { computeFileSha256, verifyFileSha256 } from './setup.js'; + +describe('gemma setup checksum helpers', () => { + const tempFiles: string[] = []; + + afterEach(async () => { + await Promise.all( + tempFiles + .splice(0) + .map((filePath) => fs.promises.rm(filePath, { force: true })), + ); + }); + + it('has a pinned checksum for every supported LiteRT binary', () => { + expect(Object.keys(PLATFORM_BINARY_SHA256).sort()).toEqual( + Object.values(PLATFORM_BINARY_MAP).sort(), + ); + }); + + it('computes the sha256 for a downloaded file', async () => { + const filePath = path.join( + os.tmpdir(), + `gemma-setup-${Date.now()}-${Math.random().toString(36).slice(2)}`, + ); + tempFiles.push(filePath); + await fs.promises.writeFile(filePath, 'hello world', 'utf-8'); + + await expect(computeFileSha256(filePath)).resolves.toBe( + 'b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9', + ); + }); + + it('verifies whether a file matches the expected sha256', async () => { + const filePath = path.join( + os.tmpdir(), + `gemma-setup-${Date.now()}-${Math.random().toString(36).slice(2)}`, + ); + tempFiles.push(filePath); + await fs.promises.writeFile(filePath, 'hello world', 'utf-8'); + + await expect( + verifyFileSha256( + filePath, + 'b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9', + ), + ).resolves.toBe(true); + await expect(verifyFileSha256(filePath, 'deadbeef')).resolves.toBe(false); + }); +}); diff --git a/packages/cli/src/commands/gemma/setup.ts b/packages/cli/src/commands/gemma/setup.ts index 5a7034f6ae9..5346924e6c6 100644 --- a/packages/cli/src/commands/gemma/setup.ts +++ b/packages/cli/src/commands/gemma/setup.ts @@ -5,6 +5,7 @@ */ import type { CommandModule } from 'yargs'; +import { createHash } from 'node:crypto'; import fs from 'node:fs'; import path from 'node:path'; import { execFileSync, spawn as nodeSpawn } from 'node:child_process'; @@ -12,7 +13,11 @@ import chalk from 'chalk'; import { debugLogger } from '@google/gemini-cli-core'; import { loadSettings, SettingScope } from '../../config/settings.js'; import { exitCli } from '../utils.js'; -import { DEFAULT_PORT, GEMMA_MODEL_NAME } from './constants.js'; +import { + DEFAULT_PORT, + GEMMA_MODEL_NAME, + PLATFORM_BINARY_SHA256, +} from './constants.js'; import { detectPlatform, getBinaryDownloadUrl, @@ -110,6 +115,29 @@ async function downloadFile(url: string, destPath: string): Promise { fs.renameSync(tmpPath, destPath); } +export async function computeFileSha256(filePath: string): Promise { + const hash = createHash('sha256'); + const fileStream = fs.createReadStream(filePath); + + return new Promise((resolve, reject) => { + fileStream.on('data', (chunk) => { + hash.update(chunk); + }); + fileStream.on('error', reject); + fileStream.on('end', () => { + resolve(hash.digest('hex')); + }); + }); +} + +export async function verifyFileSha256( + filePath: string, + expectedHash: string, +): Promise { + const actualHash = await computeFileSha256(filePath); + return actualHash === expectedHash; +} + function spawnInherited(command: string, args: string[]): Promise { return new Promise((resolve, reject) => { const child = nodeSpawn(command, args, { @@ -195,6 +223,51 @@ async function handleSetup(argv: SetupArgs): Promise { return 1; } + const expectedHash = PLATFORM_BINARY_SHA256[platform.binaryName]; + if (!expectedHash) { + logError( + chalk.red( + ` ✗ No checksum is configured for ${platform.binaryName}. Refusing to install the binary.`, + ), + ); + try { + fs.rmSync(binaryPath, { force: true }); + } catch { + // ignore + } + return 1; + } + + try { + const checksumVerified = await verifyFileSha256(binaryPath, expectedHash); + if (!checksumVerified) { + logError( + chalk.red( + ' ✗ Downloaded binary checksum did not match the expected release hash.', + ), + ); + try { + fs.rmSync(binaryPath, { force: true }); + } catch { + // ignore + } + return 1; + } + log(chalk.green(' ✓ Binary checksum verified')); + } catch (error) { + logError( + chalk.red( + ` ✗ Failed to verify binary checksum: ${error instanceof Error ? error.message : String(error)}`, + ), + ); + try { + fs.rmSync(binaryPath, { force: true }); + } catch { + // ignore + } + return 1; + } + if (process.platform !== 'win32') { try { fs.chmodSync(binaryPath, 0o755); diff --git a/packages/cli/src/commands/gemma/start.ts b/packages/cli/src/commands/gemma/start.ts index 96a31de218e..badf7b69a5a 100644 --- a/packages/cli/src/commands/gemma/start.ts +++ b/packages/cli/src/commands/gemma/start.ts @@ -23,6 +23,7 @@ import { isBinaryInstalled, isServerRunning, resolveGemmaConfig, + writeServerProcessInfo, } from './platform.js'; export async function startServer( @@ -48,9 +49,12 @@ export async function startServer( stdio: ['ignore', logFd, logFd], }); - const pidPath = getPidFilePath(); if (child.pid) { - fs.writeFileSync(pidPath, String(child.pid), 'utf-8'); + writeServerProcessInfo({ + pid: child.pid, + binaryPath, + port, + }); } child.unref(); diff --git a/packages/cli/src/commands/gemma/stop.test.ts b/packages/cli/src/commands/gemma/stop.test.ts new file mode 100644 index 00000000000..64eaf6d5fcd --- /dev/null +++ b/packages/cli/src/commands/gemma/stop.test.ts @@ -0,0 +1,112 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import fs from 'node:fs'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +const mockGetBinaryPath = vi.hoisted(() => vi.fn()); +const mockIsExpectedLiteRtServerProcess = vi.hoisted(() => vi.fn()); +const mockIsProcessRunning = vi.hoisted(() => vi.fn()); +const mockIsServerRunning = vi.hoisted(() => vi.fn()); +const mockReadServerPid = vi.hoisted(() => vi.fn()); +const mockReadServerProcessInfo = vi.hoisted(() => vi.fn()); +const mockResolveGemmaConfig = vi.hoisted(() => vi.fn()); + +vi.mock('@google/gemini-cli-core', async (importOriginal) => { + const { mockCoreDebugLogger } = await import( + '../../test-utils/mockDebugLogger.js' + ); + return mockCoreDebugLogger( + await importOriginal(), + { + stripAnsi: false, + }, + ); +}); + +vi.mock('./constants.js', () => ({ + DEFAULT_PORT: 9379, + getPidFilePath: vi.fn(() => '/tmp/litert-server.pid'), +})); + +vi.mock('./platform.js', () => ({ + getBinaryPath: mockGetBinaryPath, + isExpectedLiteRtServerProcess: mockIsExpectedLiteRtServerProcess, + isProcessRunning: mockIsProcessRunning, + isServerRunning: mockIsServerRunning, + readServerPid: mockReadServerPid, + readServerProcessInfo: mockReadServerProcessInfo, + resolveGemmaConfig: mockResolveGemmaConfig, +})); + +vi.mock('../utils.js', () => ({ + exitCli: vi.fn(), +})); + +import { stopServer } from './stop.js'; + +describe('gemma stop command', () => { + beforeEach(() => { + vi.clearAllMocks(); + vi.useFakeTimers(); + mockGetBinaryPath.mockReturnValue('/custom/lit'); + mockResolveGemmaConfig.mockReturnValue({ configuredPort: 9379 }); + }); + + afterEach(() => { + vi.useRealTimers(); + vi.restoreAllMocks(); + }); + + it('refuses to signal a pid that does not match the expected LiteRT server', async () => { + mockReadServerProcessInfo.mockReturnValue({ + pid: 1234, + binaryPath: '/custom/lit', + port: 8123, + }); + mockIsProcessRunning.mockReturnValue(true); + mockIsExpectedLiteRtServerProcess.mockReturnValue(false); + + const killSpy = vi.spyOn(process, 'kill').mockImplementation(() => true); + + await expect(stopServer(8123)).resolves.toBe('unexpected-process'); + expect(killSpy).not.toHaveBeenCalled(); + }); + + it('stops the verified LiteRT server and removes the pid file', async () => { + mockReadServerProcessInfo.mockReturnValue({ + pid: 1234, + binaryPath: '/custom/lit', + port: 8123, + }); + mockIsProcessRunning.mockReturnValueOnce(true).mockReturnValueOnce(false); + mockIsExpectedLiteRtServerProcess.mockReturnValue(true); + + const unlinkSpy = vi.spyOn(fs, 'unlinkSync').mockImplementation(() => {}); + const killSpy = vi.spyOn(process, 'kill').mockImplementation(() => true); + + const stopPromise = stopServer(8123); + await vi.runAllTimersAsync(); + + await expect(stopPromise).resolves.toBe('stopped'); + expect(killSpy).toHaveBeenCalledWith(1234, 'SIGTERM'); + expect(unlinkSpy).toHaveBeenCalledWith('/tmp/litert-server.pid'); + }); + + it('cleans up a stale pid file when the recorded process is no longer running', async () => { + mockReadServerProcessInfo.mockReturnValue({ + pid: 1234, + binaryPath: '/custom/lit', + port: 8123, + }); + mockIsProcessRunning.mockReturnValue(false); + + const unlinkSpy = vi.spyOn(fs, 'unlinkSync').mockImplementation(() => {}); + + await expect(stopServer(8123)).resolves.toBe('not-running'); + expect(unlinkSpy).toHaveBeenCalledWith('/tmp/litert-server.pid'); + }); +}); diff --git a/packages/cli/src/commands/gemma/stop.ts b/packages/cli/src/commands/gemma/stop.ts index 676dc0b667a..a135f4e36d1 100644 --- a/packages/cli/src/commands/gemma/stop.ts +++ b/packages/cli/src/commands/gemma/stop.ts @@ -11,33 +11,54 @@ import { debugLogger } from '@google/gemini-cli-core'; import { exitCli } from '../utils.js'; import { DEFAULT_PORT, getPidFilePath } from './constants.js'; import { - readServerPid, + getBinaryPath, + isExpectedLiteRtServerProcess, isProcessRunning, isServerRunning, + readServerPid, + readServerProcessInfo, resolveGemmaConfig, } from './platform.js'; -export async function stopServer(): Promise { - const pid = readServerPid(); +export type StopServerResult = + | 'stopped' + | 'not-running' + | 'unexpected-process' + | 'failed'; + +export async function stopServer( + expectedPort?: number, +): Promise { + const processInfo = readServerProcessInfo(); const pidPath = getPidFilePath(); - if (pid === null) { - return false; + if (!processInfo) { + return 'not-running'; } + const { pid } = processInfo; if (!isProcessRunning(pid)) { try { fs.unlinkSync(pidPath); } catch { // ignore } - return false; + return 'not-running'; + } + + const binaryPath = processInfo.binaryPath ?? getBinaryPath(); + const port = processInfo.port ?? expectedPort; + if (!isExpectedLiteRtServerProcess(pid, { binaryPath, port })) { + debugLogger.warn( + `Refusing to stop PID ${pid} because it does not match the expected LiteRT server process.`, + ); + return 'unexpected-process'; } try { process.kill(pid, 'SIGTERM'); } catch { - return false; + return 'failed'; } await new Promise((resolve) => setTimeout(resolve, 1000)); @@ -49,6 +70,9 @@ export async function stopServer(): Promise { // ignore } await new Promise((resolve) => setTimeout(resolve, 500)); + if (isProcessRunning(pid)) { + return 'failed'; + } } try { @@ -57,8 +81,9 @@ export async function stopServer(): Promise { // ignore } - return true; + return 'stopped'; } + export const stopCommand: CommandModule = { command: 'stop', describe: 'Stop the LiteRT-LM server', @@ -78,14 +103,27 @@ export const stopCommand: CommandModule = { port = configuredPort; } - const pid = readServerPid(); + const processInfo = readServerProcessInfo(); + const pid = processInfo?.pid ?? readServerPid(); if (pid !== null && isProcessRunning(pid)) { debugLogger.log(`Stopping LiteRT server (PID ${pid})...`); - const stopped = await stopServer(); - if (stopped) { + const result = await stopServer(port); + if (result === 'stopped') { debugLogger.log(chalk.green('LiteRT server stopped.')); await exitCli(0); + } else if (result === 'unexpected-process') { + debugLogger.error( + chalk.red( + `Refusing to stop PID ${pid} because it does not match the expected LiteRT server process.`, + ), + ); + debugLogger.error( + chalk.dim( + 'Remove the stale pid file after verifying the process, or stop the process manually.', + ), + ); + await exitCli(1); } else { debugLogger.error(chalk.red('Failed to stop LiteRT server.')); await exitCli(1); From 078aeb3fa202e2a4890a4720b760786051336044 Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Thu, 16 Apr 2026 20:07:21 -0700 Subject: [PATCH 22/33] fix(cli): repair gemma platform config parsing --- packages/cli/src/commands/gemma/platform.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/cli/src/commands/gemma/platform.ts b/packages/cli/src/commands/gemma/platform.ts index 953bd700703..aa7a4c82b69 100644 --- a/packages/cli/src/commands/gemma/platform.ts +++ b/packages/cli/src/commands/gemma/platform.ts @@ -76,10 +76,12 @@ export function resolveGemmaConfig(fallbackPort: number): GemmaConfigStatus { const configuredBinaryPath = getUserConfiguredBinaryPath(); try { const settings = loadSettings(process.cwd()); - const gemmaSettings = settings.forScope(SettingScope.User).settings.experimental?.gemmaModelRouter; + const gemmaSettings = settings.merged.experimental?.gemmaModelRouter; settingsEnabled = gemmaSettings?.enabled === true; configuredPort = parsePortFromHost( gemmaSettings?.classifier?.host, + fallbackPort, + ); } catch { // ignore — settings may fail to load outside a workspace } From dec7329e40d7fbcab9d3b0fa9a9217f6179604d2 Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Thu, 16 Apr 2026 20:17:46 -0700 Subject: [PATCH 23/33] fix(cli): prettier --- packages/cli/src/gemini.tsx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx index c10d74744af..7182fa9b34e 100644 --- a/packages/cli/src/gemini.tsx +++ b/packages/cli/src/gemini.tsx @@ -615,7 +615,8 @@ export async function main() { import('./services/liteRtServerManager.js') .then(({ LiteRtServerManager }) => LiteRtServerManager.ensureRunning( - settings.forScope(SettingScope.User).settings.experimental?.gemmaModelRouter, + settings.forScope(SettingScope.User).settings.experimental + ?.gemmaModelRouter, ), ) .catch((e) => debugLogger.warn('LiteRT auto-start import failed:', e)); From 8f2a331713739c0d467b2b33aee1312332d42ea7 Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Mon, 20 Apr 2026 14:14:25 -0700 Subject: [PATCH 24/33] fix(cli): default autostart to false --- docs/reference/configuration.md | 2 +- schemas/settings.schema.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index d0eb4ef8d36..c3d9ff0f387 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -1714,7 +1714,7 @@ their corresponding top-level category object in your `settings.json` file. - **`experimental.gemmaModelRouter.autoStartServer`** (boolean): - **Description:** Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled. - - **Default:** `true` + - **Default:** `false` - **Requires restart:** Yes - **`experimental.gemmaModelRouter.binaryPath`** (string): diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json index 94912f3a223..d9dd31a4a5a 100644 --- a/schemas/settings.schema.json +++ b/schemas/settings.schema.json @@ -2924,7 +2924,7 @@ "title": "Auto-start LiteRT Server", "description": "Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.", "markdownDescription": "Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `true`", - "default": true, + "default": false, "type": "boolean" }, "binaryPath": { From 364eb1c230faf9fa5abc0f6dcfda14595d20d3b4 Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Mon, 20 Apr 2026 14:24:04 -0700 Subject: [PATCH 25/33] fix(cli): make server check more robust --- packages/cli/src/commands/gemma/constants.ts | 1 + packages/cli/src/commands/gemma/platform.ts | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/packages/cli/src/commands/gemma/constants.ts b/packages/cli/src/commands/gemma/constants.ts index de80762a1a5..a37326a0578 100644 --- a/packages/cli/src/commands/gemma/constants.ts +++ b/packages/cli/src/commands/gemma/constants.ts @@ -13,6 +13,7 @@ export const LITERT_RELEASE_BASE_URL = export const GEMMA_MODEL_NAME = 'gemma3-1b-gpu-custom'; export const DEFAULT_PORT = 9379; export const HEALTH_CHECK_TIMEOUT_MS = 5000; +export const LITERT_API_VERSION = 'v1beta'; export const SERVER_START_WAIT_MS = 3000; export const PLATFORM_BINARY_MAP: Record = { diff --git a/packages/cli/src/commands/gemma/platform.ts b/packages/cli/src/commands/gemma/platform.ts index aa7a4c82b69..aa85b8353fe 100644 --- a/packages/cli/src/commands/gemma/platform.ts +++ b/packages/cli/src/commands/gemma/platform.ts @@ -15,6 +15,7 @@ import { getLiteRtBinDir, GEMMA_MODEL_NAME, HEALTH_CHECK_TIMEOUT_MS, + LITERT_API_VERSION, getPidFilePath, } from './constants.js'; @@ -136,9 +137,12 @@ export async function isServerRunning(port: number): Promise { () => controller.abort(), HEALTH_CHECK_TIMEOUT_MS, ); - await fetch(`http://localhost:${port}/`, { signal: controller.signal }); + const response = await fetch( + `http://localhost:${port}/${LITERT_API_VERSION}/models/${GEMMA_MODEL_NAME}`, + { signal: controller.signal }, + ); clearTimeout(timeout); - return true; + return response.ok; } catch { return false; } From dac00da10ed7507b680412ba548483615e6370ac Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Mon, 20 Apr 2026 14:37:26 -0700 Subject: [PATCH 26/33] fix(cli): scope gemma settings for security and project isolation --- packages/cli/src/commands/gemma/setup.ts | 56 ++++++++++++++++-------- packages/cli/src/gemini.tsx | 19 +++++--- 2 files changed, 52 insertions(+), 23 deletions(-) diff --git a/packages/cli/src/commands/gemma/setup.ts b/packages/cli/src/commands/gemma/setup.ts index 5346924e6c6..a936462dbff 100644 --- a/packages/cli/src/commands/gemma/setup.ts +++ b/packages/cli/src/commands/gemma/setup.ts @@ -324,30 +324,50 @@ async function handleSetup(argv: SetupArgs): Promise { log(' Configuring settings...'); try { const settings = loadSettings(process.cwd()); - const existingGemma = + + // User scope: security-sensitive settings that must not be overridable + // by workspace configs (prevents arbitrary binary execution). + const existingUserGemma = settings.forScope(SettingScope.User).settings.experimental ?.gemmaModelRouter ?? {}; - autoStartServer = existingGemma.autoStartServer ?? true; - - const newGemmaSettings = { - ...existingGemma, - enabled: true, - autoStartServer, - classifier: { - ...existingGemma.classifier, - host: `http://localhost:${port}`, - model: GEMMA_MODEL_NAME, - }, - }; - - const existingExperimental = + autoStartServer = existingUserGemma.autoStartServer ?? true; + const existingUserExperimental = settings.forScope(SettingScope.User).settings.experimental ?? {}; settings.setValue(SettingScope.User, 'experimental', { - ...existingExperimental, - gemmaModelRouter: newGemmaSettings, + ...existingUserExperimental, + gemmaModelRouter: { + autoStartServer, + ...(existingUserGemma.binaryPath !== undefined + ? { binaryPath: existingUserGemma.binaryPath } + : {}), + }, + }); + + // Workspace scope: project-isolated settings so the local model only + // runs for this specific project, saving resources globally. + const existingWorkspaceGemma = + settings.forScope(SettingScope.Workspace).settings.experimental + ?.gemmaModelRouter ?? {}; + const existingWorkspaceExperimental = + settings.forScope(SettingScope.Workspace).settings.experimental ?? {}; + settings.setValue(SettingScope.Workspace, 'experimental', { + ...existingWorkspaceExperimental, + gemmaModelRouter: { + ...existingWorkspaceGemma, + enabled: true, + classifier: { + ...existingWorkspaceGemma.classifier, + host: `http://localhost:${port}`, + model: GEMMA_MODEL_NAME, + }, + }, }); - log(chalk.green(' ✓ Settings updated in ~/.gemini/settings.json')); + log(chalk.green(' ✓ Settings updated')); + log(chalk.dim(' User (~/.gemini/settings.json): autoStartServer')); + log( + chalk.dim(' Workspace (.gemini/settings.json): enabled, classifier'), + ); settingsUpdated = true; } catch (error) { logError( diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx index 7182fa9b34e..fb338664a29 100644 --- a/packages/cli/src/gemini.tsx +++ b/packages/cli/src/gemini.tsx @@ -613,12 +613,21 @@ export async function main() { initAppHandle?.end(); import('./services/liteRtServerManager.js') - .then(({ LiteRtServerManager }) => - LiteRtServerManager.ensureRunning( + .then(({ LiteRtServerManager }) => { + const mergedGemma = settings.merged.experimental?.gemmaModelRouter; + if (!mergedGemma) return; + // Security: binaryPath and autoStartServer must come from user-scoped + // settings only to prevent workspace configs from triggering arbitrary + // binary execution. + const userGemma = settings.forScope(SettingScope.User).settings.experimental - ?.gemmaModelRouter, - ), - ) + ?.gemmaModelRouter; + return LiteRtServerManager.ensureRunning({ + ...mergedGemma, + binaryPath: userGemma?.binaryPath, + autoStartServer: userGemma?.autoStartServer, + }); + }) .catch((e) => debugLogger.warn('LiteRT auto-start import failed:', e)); if ( From e17478b3eb86e57c5ed129bfdec173992c682c69 Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Mon, 20 Apr 2026 14:40:03 -0700 Subject: [PATCH 27/33] fix(cli): handle missing tail command and log stale PID cleanup --- packages/cli/src/commands/gemma/logs.ts | 22 ++++++++++++++++++---- packages/cli/src/commands/gemma/stop.ts | 3 +++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/packages/cli/src/commands/gemma/logs.ts b/packages/cli/src/commands/gemma/logs.ts index f2f96b6b08c..c7f61f05cb6 100644 --- a/packages/cli/src/commands/gemma/logs.ts +++ b/packages/cli/src/commands/gemma/logs.ts @@ -177,10 +177,24 @@ export const logsCommand: CommandModule = { const exitCode = await runTail(logPath, requestedLines, follow); await exitCli(exitCode); } catch (error) { - debugLogger.error( - `Failed to read log output: ${error instanceof Error ? error.message : String(error)}`, - ); - await exitCli(1); + if ((error as NodeJS.ErrnoException).code === 'ENOENT') { + if (!follow) { + process.stdout.write( + await readLastLines(logPath, requestedLines), + ); + await exitCli(0); + } else { + debugLogger.error( + '"tail" command not found. Use --lines N to view recent logs without tail.', + ); + await exitCli(1); + } + } else { + debugLogger.error( + `Failed to read log output: ${error instanceof Error ? error.message : String(error)}`, + ); + await exitCli(1); + } } }, }; diff --git a/packages/cli/src/commands/gemma/stop.ts b/packages/cli/src/commands/gemma/stop.ts index a135f4e36d1..c51269c579c 100644 --- a/packages/cli/src/commands/gemma/stop.ts +++ b/packages/cli/src/commands/gemma/stop.ts @@ -38,6 +38,9 @@ export async function stopServer( const { pid } = processInfo; if (!isProcessRunning(pid)) { + debugLogger.log( + `Stale PID file found (PID ${pid} is not running), removing ${pidPath}`, + ); try { fs.unlinkSync(pidPath); } catch { From 6bad21190e3cdd9277b2fce064181b4c886ccd26 Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Mon, 20 Apr 2026 15:01:13 -0700 Subject: [PATCH 28/33] fix(cli): use generateContent endpoint for server health check --- packages/cli/src/commands/gemma/platform.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/packages/cli/src/commands/gemma/platform.ts b/packages/cli/src/commands/gemma/platform.ts index aa85b8353fe..0fdd6e02e14 100644 --- a/packages/cli/src/commands/gemma/platform.ts +++ b/packages/cli/src/commands/gemma/platform.ts @@ -138,11 +138,13 @@ export async function isServerRunning(port: number): Promise { HEALTH_CHECK_TIMEOUT_MS, ); const response = await fetch( - `http://localhost:${port}/${LITERT_API_VERSION}/models/${GEMMA_MODEL_NAME}`, - { signal: controller.signal }, + `http://localhost:${port}/${LITERT_API_VERSION}/models/${GEMMA_MODEL_NAME}:generateContent`, + { method: 'POST', signal: controller.signal }, ); clearTimeout(timeout); - return response.ok; + // A 400 (bad request) confirms the route exists — the server recognises + // the model endpoint. Only a 404 means "wrong server / wrong model". + return response.status !== 404; } catch { return false; } From d3576a2256583eca7f49b7fb7696790d627cb52f Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Mon, 20 Apr 2026 15:16:59 -0700 Subject: [PATCH 29/33] fix(cli): default autoStartServer to false in schema --- docs/cli/settings.md | 23 ++++++++++++----------- packages/cli/src/config/settingsSchema.ts | 2 +- schemas/settings.schema.json | 2 +- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/docs/cli/settings.md b/docs/cli/settings.md index 7f34365bb00..d9e72f16f6e 100644 --- a/docs/cli/settings.md +++ b/docs/cli/settings.md @@ -161,17 +161,18 @@ they appear in the UI. ### Experimental -| UI Label | Setting | Description | Default | -| ---------------------------------------------------- | -------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | -| Enable Git Worktrees | `experimental.worktrees` | Enable automated Git worktree management for parallel work. | `false` | -| Use OSC 52 Paste | `experimental.useOSC52Paste` | Use OSC 52 for pasting. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` | -| Use OSC 52 Copy | `experimental.useOSC52Copy` | Use OSC 52 for copying. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` | -| Model Steering | `experimental.modelSteering` | Enable model steering (user hints) to guide the model during tool execution. | `false` | -| Direct Web Fetch | `experimental.directWebFetch` | Enable web fetch behavior that bypasses LLM summarization. | `false` | -| Memory Manager Agent | `experimental.memoryManager` | Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories. | `false` | -| Auto Memory | `experimental.autoMemory` | Automatically extract reusable skills from past sessions in the background. Review results with /memory inbox. | `false` | -| Use the generalist profile to manage agent contexts. | `experimental.generalistProfile` | Suitable for general coding and software development tasks. | `false` | -| Enable Context Management | `experimental.contextManagement` | Enable logic for context management. | `false` | +| UI Label | Setting | Description | Default | +| ---------------------------------------------------- | ----------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | +| Enable Git Worktrees | `experimental.worktrees` | Enable automated Git worktree management for parallel work. | `false` | +| Use OSC 52 Paste | `experimental.useOSC52Paste` | Use OSC 52 for pasting. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` | +| Use OSC 52 Copy | `experimental.useOSC52Copy` | Use OSC 52 for copying. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` | +| Model Steering | `experimental.modelSteering` | Enable model steering (user hints) to guide the model during tool execution. | `false` | +| Direct Web Fetch | `experimental.directWebFetch` | Enable web fetch behavior that bypasses LLM summarization. | `false` | +| Enable Gemma Model Router | `experimental.gemmaModelRouter.enabled` | Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim. | `false` | +| Auto-start LiteRT Server | `experimental.gemmaModelRouter.autoStartServer` | Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled. | `false` | +| Memory Manager Agent | `experimental.memoryManager` | Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories. | `false` | +| Use the generalist profile to manage agent contexts. | `experimental.generalistProfile` | Suitable for general coding and software development tasks. | `false` | +| Enable Context Management | `experimental.contextManagement` | Enable logic for context management. | `false` | ### Skills diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index b7a517141f9..7e7de801328 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -2176,7 +2176,7 @@ const SETTINGS_SCHEMA = { label: 'Auto-start LiteRT Server', category: 'Experimental', requiresRestart: true, - default: true, + default: false, description: 'Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.', showInDialog: true, diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json index 8dcfbe02d88..d30a6f4b0a3 100644 --- a/schemas/settings.schema.json +++ b/schemas/settings.schema.json @@ -2923,7 +2923,7 @@ "autoStartServer": { "title": "Auto-start LiteRT Server", "description": "Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.", - "markdownDescription": "Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `true`", + "markdownDescription": "Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`", "default": false, "type": "boolean" }, From 2d3e3ab46a7c2ec8c439db6972eaf8a20a8948ef Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Mon, 20 Apr 2026 15:25:07 -0700 Subject: [PATCH 30/33] fix(cli): fix unsafe type assertion in logs error handler --- packages/cli/src/commands/gemma/logs.ts | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/packages/cli/src/commands/gemma/logs.ts b/packages/cli/src/commands/gemma/logs.ts index c7f61f05cb6..ed81ac4a4e4 100644 --- a/packages/cli/src/commands/gemma/logs.ts +++ b/packages/cli/src/commands/gemma/logs.ts @@ -177,11 +177,13 @@ export const logsCommand: CommandModule = { const exitCode = await runTail(logPath, requestedLines, follow); await exitCli(exitCode); } catch (error) { - if ((error as NodeJS.ErrnoException).code === 'ENOENT') { + if ( + error instanceof Error && + 'code' in error && + error.code === 'ENOENT' + ) { if (!follow) { - process.stdout.write( - await readLastLines(logPath, requestedLines), - ); + process.stdout.write(await readLastLines(logPath, requestedLines)); await exitCli(0); } else { debugLogger.error( From 14e60b3fe0fb5f998ed45a65b5146503de0b28bc Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Mon, 20 Apr 2026 15:49:11 -0700 Subject: [PATCH 31/33] format --- packages/cli/src/gemini.tsx | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx index fb338664a29..6e257270d77 100644 --- a/packages/cli/src/gemini.tsx +++ b/packages/cli/src/gemini.tsx @@ -619,9 +619,8 @@ export async function main() { // Security: binaryPath and autoStartServer must come from user-scoped // settings only to prevent workspace configs from triggering arbitrary // binary execution. - const userGemma = - settings.forScope(SettingScope.User).settings.experimental - ?.gemmaModelRouter; + const userGemma = settings.forScope(SettingScope.User).settings + .experimental?.gemmaModelRouter; return LiteRtServerManager.ensureRunning({ ...mergedGemma, binaryPath: userGemma?.binaryPath, From b894f16e3b0d91b0b7b4d734e0577868eb668bac Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Mon, 20 Apr 2026 15:57:53 -0700 Subject: [PATCH 32/33] fix(chore): update docs --- docs/cli/settings.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/cli/settings.md b/docs/cli/settings.md index d9e72f16f6e..fbe556a3705 100644 --- a/docs/cli/settings.md +++ b/docs/cli/settings.md @@ -171,6 +171,7 @@ they appear in the UI. | Enable Gemma Model Router | `experimental.gemmaModelRouter.enabled` | Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim. | `false` | | Auto-start LiteRT Server | `experimental.gemmaModelRouter.autoStartServer` | Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled. | `false` | | Memory Manager Agent | `experimental.memoryManager` | Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories. | `false` | +| Auto Memory | `experimental.autoMemory` | Automatically extract reusable skills from past sessions in the background. Review results with /memory inbox. | `false` | | Use the generalist profile to manage agent contexts. | `experimental.generalistProfile` | Suitable for general coding and software development tasks. | `false` | | Enable Context Management | `experimental.contextManagement` | Enable logic for context management. | `false` | From 8ff63304aabd33779767cca7de7b00da5f453e5f Mon Sep 17 00:00:00 2001 From: Samee Zahid Date: Mon, 20 Apr 2026 16:37:51 -0700 Subject: [PATCH 33/33] fix(cli): update tests for autoStartServer default and cross-platform logs --- packages/cli/src/commands/gemma/logs.test.ts | 10 ++++++++++ packages/cli/src/commands/gemma/logs.ts | 6 ++---- packages/cli/src/config/config.test.ts | 2 +- packages/cli/src/config/settingsSchema.test.ts | 2 +- 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/packages/cli/src/commands/gemma/logs.test.ts b/packages/cli/src/commands/gemma/logs.test.ts index 1cf34b77e5a..49ab8d43c68 100644 --- a/packages/cli/src/commands/gemma/logs.test.ts +++ b/packages/cli/src/commands/gemma/logs.test.ts @@ -95,13 +95,23 @@ describe('readLastLines', () => { }); describe('logsCommand', () => { + const originalPlatform = process.platform; + beforeEach(() => { vi.clearAllMocks(); + Object.defineProperty(process, 'platform', { + value: 'linux', + configurable: true, + }); vi.mocked(getLogFilePath).mockReturnValue('/tmp/gemma.log'); vi.spyOn(fs.promises, 'access').mockResolvedValue(undefined); }); afterEach(() => { + Object.defineProperty(process, 'platform', { + value: originalPlatform, + configurable: true, + }); vi.restoreAllMocks(); }); diff --git a/packages/cli/src/commands/gemma/logs.ts b/packages/cli/src/commands/gemma/logs.ts index ed81ac4a4e4..023b8e6352a 100644 --- a/packages/cli/src/commands/gemma/logs.ts +++ b/packages/cli/src/commands/gemma/logs.ts @@ -86,8 +86,6 @@ export async function readLastLines( } } -const isWindows = process.platform === 'win32'; - interface LogsArgs { lines?: number; follow?: boolean; @@ -156,7 +154,7 @@ export const logsCommand: CommandModule = { const follow = argv.follow ?? lines === undefined; const requestedLines = lines ?? 20; - if (follow && isWindows) { + if (follow && process.platform === 'win32') { debugLogger.log( 'Live log following is not supported on Windows. Use --lines N to view recent logs.', ); @@ -164,7 +162,7 @@ export const logsCommand: CommandModule = { return; } - if (isWindows) { + if (process.platform === 'win32') { process.stdout.write(await readLastLines(logPath, requestedLines)); await exitCli(0); return; diff --git a/packages/cli/src/config/config.test.ts b/packages/cli/src/config/config.test.ts index 4288150ba5a..180f4617491 100644 --- a/packages/cli/src/config/config.test.ts +++ b/packages/cli/src/config/config.test.ts @@ -3068,7 +3068,7 @@ describe('loadCliConfig gemmaModelRouter', () => { const config = await loadCliConfig(settings, 'test-session', argv); expect(config.getGemmaModelRouterEnabled()).toBe(true); const gemmaSettings = config.getGemmaModelRouterSettings(); - expect(gemmaSettings.autoStartServer).toBe(true); + expect(gemmaSettings.autoStartServer).toBe(false); expect(gemmaSettings.binaryPath).toBe(''); expect(gemmaSettings.classifier?.host).toBe('http://localhost:9379'); expect(gemmaSettings.classifier?.model).toBe('gemma3-1b-gpu-custom'); diff --git a/packages/cli/src/config/settingsSchema.test.ts b/packages/cli/src/config/settingsSchema.test.ts index a7ce6cf0159..81e5f32ff09 100644 --- a/packages/cli/src/config/settingsSchema.test.ts +++ b/packages/cli/src/config/settingsSchema.test.ts @@ -480,7 +480,7 @@ describe('SettingsSchema', () => { expect(autoStartServer).toBeDefined(); expect(autoStartServer.type).toBe('boolean'); expect(autoStartServer.category).toBe('Experimental'); - expect(autoStartServer.default).toBe(true); + expect(autoStartServer.default).toBe(false); expect(autoStartServer.requiresRestart).toBe(true); expect(autoStartServer.showInDialog).toBe(true); expect(autoStartServer.description).toBe(