From ff8467943a6a8c81cd1c7721f3aafdfe128ab553 Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Thu, 26 Feb 2026 11:56:39 -0800
Subject: [PATCH 01/33] docs: add codebase understanding guide

---
 docs/codebase_understanding.md | 138 +++++++++++++++++++++++++++++++++
 1 file changed, 138 insertions(+)
 create mode 100644 docs/codebase_understanding.md

diff --git a/docs/codebase_understanding.md b/docs/codebase_understanding.md
new file mode 100644
index 00000000000..de70a1b9dbc
--- /dev/null
+++ b/docs/codebase_understanding.md
@@ -0,0 +1,138 @@
+# Codebase understanding
+
+This document provides a detailed overview of the Gemini CLI architecture, its
+core components, and how they interact to provide an agentic terminal
+experience.
+
+## Repository overview
+
+Gemini CLI is structured as a monorepo using npm workspaces. The codebase is
+divided into several specialized packages that separate the user interface from
+the agentic orchestration logic.
+
+### Core packages
+
+- **`packages/cli`**: Contains the terminal user interface (TUI) implemented
+  with React and Ink. It handles terminal-specific logic like keybindings,
+  mouse events, and layout rendering.
+- **`packages/core`**: The central engine of the application. It is UI-agnostic
+  and manages the Gemini API communication, tool orchestration, conversation
+  history, and policy enforcement.
+- **`packages/devtools`**: Provides a developer-focused inspector (similar to
+  Chrome DevTools) for monitoring network traffic and console logs in real-time.
+- **`packages/sdk`**: A library for building extensions and custom tools that
+  integrate with Gemini CLI.
+- **`packages/vscode-ide-companion`**: A VS Code extension that connects the
+  editor state to the CLI, enabling the agent to read open files and cursor
+  positions.
+
+## Application lifecycle
+
+The application follows a structured startup and execution flow to ensure
+security and environment consistency.
+
+### Startup and sandboxing
+
+When you launch Gemini CLI, the entry point in `packages/cli/src/gemini.tsx`
+manages several initialization steps:
+
+1.  **Configuration loading**: Loads user and workspace settings, parsing
+    command-line arguments.
+2.  **Authentication**: Validates credentials and refreshes OAuth tokens.
+3.  **Sandboxing**: If configured, the application relaunches itself in a
+    restricted child process using a "sandbox" environment to isolate tool
+    execution.
+4.  **Mode selection**: Determines whether to start the interactive TUI or run
+    in non-interactive mode based on input and terminal state.
+
+### Interactive vs. non-interactive modes
+
+- **Interactive mode**: Renders the TUI using Ink. The state is managed via
+  React contexts (Settings, Mouse, Keypress, Terminal) and a central
+  `AppContainer`.
+- **Non-interactive mode**: Executes a single prompt or command. It uses a
+  focused loop in `packages/cli/src/nonInteractiveCli.ts` that continues until
+  the agent completes its task or requires user intervention that cannot be
+  provided.
+
+## Agent orchestration
+
+The orchestration of the agent's behavior happens primarily within
+`packages/core/src/core`.
+
+### GeminiClient
+
+The `GeminiClient` is the primary interface for the rest of the application. It
+coordinates:
+
+- **Session management**: Initializing, resuming, and persisting chat sessions.
+- **Model routing**: Deciding which Gemini model to use based on the task and
+  configuration.
+- **Context compression**: Summarizing long histories using the
+  `ChatCompressionService` to stay within context window limits.
+- **IDE integration**: Injecting editor context (open files, selections) into
+  the prompt.
+
+### GeminiChat and Turn
+
+- **`GeminiChat`**: Manages the low-level API communication. It handles
+  streaming responses, retries for transient network errors, and records the
+  conversation history.
+- **`Turn`**: Represents a single agentic exchange. A turn may involve multiple
+  API calls if the model decides to use tools. It yields events for content,
+  thoughts, and tool requests.
+
+## Tool system and scheduler
+
+The tool system allows the agent to interact with the external world. It is
+built on a secure, policy-driven framework.
+
+### Tool registry
+
+The `ToolRegistry` in `packages/core/src/tools` maintains a list of all
+available tools. It supports several types:
+
+- **Built-in tools**: Native TypeScript implementations for file system
+  operations, shell commands, and web fetching.
+- **Discovered tools**: Local scripts or commands identified in the project
+  root.
+- **MCP tools**: Tools provided by external servers via the Model Context
+  Protocol.
+
+### Scheduler
+
+The `Scheduler` in `packages/core/src/scheduler` manages the lifecycle of a
+tool call:
+
+1.  **Validation**: Ensures the tool exists and the arguments match the schema.
+2.  **Policy check**: Consults the Policy Engine to determine if the tool is
+    allowed to run automatically, requires user confirmation, or is denied.
+3.  **Confirmation**: If required, it pauses execution and uses the
+    `MessageBus` to request user approval through the UI.
+4.  **Execution**: Runs the tool and captures the output, including live
+    updates for long-running processes.
+5.  **Feedback**: Sends the tool result back to the model to continue the
+    agentic loop.
+
+## UI architecture
+
+The UI is built with React components rendered to the terminal via Ink. Key
+design patterns include:
+
+- **Providers**: Global state like settings, theme, and terminal size is
+  provided through React Contexts to avoid prop drilling.
+- **Console patching**: Standard `console.log` calls are intercepted and
+  redirected to the TUI's debug console or the `devtools` server.
+- **Event-driven updates**: The UI listens to `coreEvents` from the orchestrator
+  to update its state (e.g., streaming text, tool progress, or errors).
+
+## Testing and quality
+
+The project maintains high standards through several testing tiers:
+
+- **Unit tests**: Located alongside the source code (e.g., `*.test.ts`), using
+  Vitest.
+- **Integration tests**: E2E tests in the `integration-tests/` directory that
+  run the compiled CLI against mocked and real API endpoints.
+- **Evals**: Specialized evaluation scripts in `evals/` that measure the
+  agent's performance on specific tasks like tool use and codebase navigation.

From 760054e4821d0c48d3f2757c46e20861cef22f4f Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Thu, 26 Feb 2026 12:01:52 -0800
Subject: [PATCH 02/33] docs: expand codebase understanding guide with
 technical depth

---
 docs/codebase_understanding.md | 279 +++++++++++++++++----------------
 1 file changed, 143 insertions(+), 136 deletions(-)

diff --git a/docs/codebase_understanding.md b/docs/codebase_understanding.md
index de70a1b9dbc..f6c60c0f665 100644
--- a/docs/codebase_understanding.md
+++ b/docs/codebase_understanding.md
@@ -1,138 +1,145 @@
 # Codebase understanding
 
-This document provides a detailed overview of the Gemini CLI architecture, its
-core components, and how they interact to provide an agentic terminal
-experience.
-
-## Repository overview
-
-Gemini CLI is structured as a monorepo using npm workspaces. The codebase is
-divided into several specialized packages that separate the user interface from
-the agentic orchestration logic.
-
-### Core packages
-
-- **`packages/cli`**: Contains the terminal user interface (TUI) implemented
-  with React and Ink. It handles terminal-specific logic like keybindings,
-  mouse events, and layout rendering.
-- **`packages/core`**: The central engine of the application. It is UI-agnostic
-  and manages the Gemini API communication, tool orchestration, conversation
-  history, and policy enforcement.
-- **`packages/devtools`**: Provides a developer-focused inspector (similar to
-  Chrome DevTools) for monitoring network traffic and console logs in real-time.
-- **`packages/sdk`**: A library for building extensions and custom tools that
-  integrate with Gemini CLI.
-- **`packages/vscode-ide-companion`**: A VS Code extension that connects the
-  editor state to the CLI, enabling the agent to read open files and cursor
-  positions.
-
-## Application lifecycle
-
-The application follows a structured startup and execution flow to ensure
-security and environment consistency.
-
-### Startup and sandboxing
-
-When you launch Gemini CLI, the entry point in `packages/cli/src/gemini.tsx`
-manages several initialization steps:
-
-1.  **Configuration loading**: Loads user and workspace settings, parsing
-    command-line arguments.
-2.  **Authentication**: Validates credentials and refreshes OAuth tokens.
-3.  **Sandboxing**: If configured, the application relaunches itself in a
-    restricted child process using a "sandbox" environment to isolate tool
-    execution.
-4.  **Mode selection**: Determines whether to start the interactive TUI or run
-    in non-interactive mode based on input and terminal state.
-
-### Interactive vs. non-interactive modes
-
-- **Interactive mode**: Renders the TUI using Ink. The state is managed via
-  React contexts (Settings, Mouse, Keypress, Terminal) and a central
-  `AppContainer`.
-- **Non-interactive mode**: Executes a single prompt or command. It uses a
-  focused loop in `packages/cli/src/nonInteractiveCli.ts` that continues until
-  the agent completes its task or requires user intervention that cannot be
-  provided.
-
-## Agent orchestration
-
-The orchestration of the agent's behavior happens primarily within
-`packages/core/src/core`.
-
-### GeminiClient
-
-The `GeminiClient` is the primary interface for the rest of the application. It
-coordinates:
-
-- **Session management**: Initializing, resuming, and persisting chat sessions.
-- **Model routing**: Deciding which Gemini model to use based on the task and
-  configuration.
-- **Context compression**: Summarizing long histories using the
-  `ChatCompressionService` to stay within context window limits.
-- **IDE integration**: Injecting editor context (open files, selections) into
-  the prompt.
-
-### GeminiChat and Turn
-
-- **`GeminiChat`**: Manages the low-level API communication. It handles
-  streaming responses, retries for transient network errors, and records the
-  conversation history.
-- **`Turn`**: Represents a single agentic exchange. A turn may involve multiple
-  API calls if the model decides to use tools. It yields events for content,
-  thoughts, and tool requests.
-
-## Tool system and scheduler
-
-The tool system allows the agent to interact with the external world. It is
-built on a secure, policy-driven framework.
-
-### Tool registry
-
-The `ToolRegistry` in `packages/core/src/tools` maintains a list of all
-available tools. It supports several types:
-
-- **Built-in tools**: Native TypeScript implementations for file system
-  operations, shell commands, and web fetching.
-- **Discovered tools**: Local scripts or commands identified in the project
-  root.
-- **MCP tools**: Tools provided by external servers via the Model Context
-  Protocol.
-
-### Scheduler
-
-The `Scheduler` in `packages/core/src/scheduler` manages the lifecycle of a
-tool call:
-
-1.  **Validation**: Ensures the tool exists and the arguments match the schema.
-2.  **Policy check**: Consults the Policy Engine to determine if the tool is
-    allowed to run automatically, requires user confirmation, or is denied.
-3.  **Confirmation**: If required, it pauses execution and uses the
-    `MessageBus` to request user approval through the UI.
-4.  **Execution**: Runs the tool and captures the output, including live
-    updates for long-running processes.
-5.  **Feedback**: Sends the tool result back to the model to continue the
-    agentic loop.
-
-## UI architecture
-
-The UI is built with React components rendered to the terminal via Ink. Key
-design patterns include:
-
-- **Providers**: Global state like settings, theme, and terminal size is
-  provided through React Contexts to avoid prop drilling.
-- **Console patching**: Standard `console.log` calls are intercepted and
-  redirected to the TUI's debug console or the `devtools` server.
-- **Event-driven updates**: The UI listens to `coreEvents` from the orchestrator
-  to update its state (e.g., streaming text, tool progress, or errors).
-
-## Testing and quality
-
-The project maintains high standards through several testing tiers:
-
-- **Unit tests**: Located alongside the source code (e.g., `*.test.ts`), using
-  Vitest.
-- **Integration tests**: E2E tests in the `integration-tests/` directory that
-  run the compiled CLI against mocked and real API endpoints.
-- **Evals**: Specialized evaluation scripts in `evals/` that measure the
-  agent's performance on specific tasks like tool use and codebase navigation.
+This document provides an in-depth technical overview of the Gemini CLI
+architecture. It is intended for developers who want to understand the system's
+inner workings, from startup to advanced agentic orchestration.
+
+## Repository structure
+
+Gemini CLI is a monorepo managed with npm workspaces. It strictly separates
+concerns across packages:
+
+- **`packages/cli`**: The terminal user interface (TUI) layer. Built with React
+  and Ink, it handles user interaction, rendering, and terminal state.
+- **`packages/core`**: The engine containing all business logic. It is entirely
+  UI-agnostic and manages the agent's lifecycle, Gemini API interactions, and
+  tool systems.
+- **`packages/devtools`**: A suite for inspection. It provides a Chrome-like
+  Network and Console inspector for real-time debugging.
+- **`packages/sdk`**: A library for building third-party extensions.
+- **`packages/vscode-ide-companion`**: Bridges the editor and CLI, providing
+  real-time IDE context to the agent.
+
+---
+
+## 1. Application lifecycle
+
+### Startup and initialization
+The entry point is `packages/cli/src/gemini.tsx`. The startup sequence involves:
+1.  **Standard I/O patching**: The CLI patches `process.stdout` and
+    `process.stderr` to capture all output, ensuring it can be redirected to the
+    TUI or debug logs without garbling the terminal display.
+2.  **Sandboxing and relaunch**: If `advanced.sandbox` is enabled, the CLI
+    re-launches itself in a restricted environment. It also uses a relaunch
+    mechanism to automatically configure Node.js memory limits (e.g.,
+    `--max-old-space-size`).
+3.  **Authentication**: Credentials are validated early. The CLI supports
+    multiple auth types, including API Keys, OAuth2, and Vertex AI.
+
+### Execution modes
+The CLI operates in two distinct modes:
+- **Interactive (TUI)**: Uses the `render` function from Ink to start a
+  persistent React application in the terminal.
+- **Non-interactive (CLI)**: A streamlined execution loop in
+  `nonInteractiveCli.ts` that runs until the agent completes its task,
+  supporting piped input and output redirection.
+
+---
+
+## 2. Model routing engine
+
+The `ModelRouterService` (`packages/core/src/routing`) is responsible for
+selecting the most appropriate model for every request.
+
+### Composite strategy
+The router uses a "Composite Strategy" that evaluates multiple sub-strategies in
+priority order:
+1.  **Fallback**: Switches models if a quota error or API failure occurs.
+2.  **Override**: Respects user-specified model overrides (e.g., `--model`).
+3.  **Approval Mode**: Selects specialized models for `Plan Mode`.
+4.  **Classifier**: A lightweight LLM call that analyzes the user's request
+    against a rubric (Strategic Planning, Complexity, Ambiguity) to choose
+    between a "Pro" (complex) or "Flash" (simple) model.
+5.  **Numerical Classifier**: A deterministic classifier based on token counts
+    and history depth.
+
+---
+
+## 3. Intelligent context management
+
+Managing the model's context window is critical for long-running sessions. This
+is handled by two primary services in `packages/core/src/services`:
+
+### ChatCompressionService
+When history exceeds a threshold (default 50% of the context window), the
+compression service triggers:
+1.  **Split point detection**: It identifies a safe point in history to begin
+    summarization, ensuring recent turns remain in high-fidelity.
+2.  **State snapshot generation**: The LLM generates a `<state_snapshot>`—a
+    structured summary of established constraints, technical details, and
+    progress.
+3.  **The "Probe" (Self-Correction)**: A second model call "probes" the generated
+    summary against the original history to ensure no critical constraints or
+    paths were omitted, correcting the summary if necessary.
+
+### ToolOutputMaskingService
+To prevent bulky tool outputs (like long log files) from clogging the context,
+this service detects large `functionResponse` blocks and replaces them with
+concise summaries or pointers to temporary files, preserving the model's ability
+to reason about the data without consuming thousands of tokens.
+
+---
+
+## 4. Advanced tool execution
+
+Tool execution is orchestrated by the `Scheduler`
+(`packages/core/src/scheduler`), which operates as an event-driven state
+machine.
+
+### State management
+Every tool call moves through a structured lifecycle managed by the
+`SchedulerStateManager`:
+`Validating` → `AwaitingApproval` → `Scheduled` → `Executing` → `Success`/`Error`
+
+### Key features
+- **Policy Engine**: A granular system that determines if a tool is safe to run.
+  Policies can be "Always", "Ask", or "Never" based on the tool name, arguments,
+  or folder location.
+- **Tail Calls**: If a tool's output requires immediate follow-up (like a shell
+  command that produced a specific error code), the scheduler can "tail call"
+  another tool (e.g., a "fixer" or "retry") without ending the current turn.
+- **Parallel execution**: The scheduler can execute multiple non-conflicting
+  read-only tools in parallel while enforcing sequential execution for
+  modifying tools.
+
+---
+
+## 5. UI architecture
+
+The `packages/cli/src/ui` directory implements a sophisticated React-based
+terminal interface.
+
+### Rendering and layout
+- **Ink**: Provides React components for terminal output (`Box`, `Text`).
+- **AppContainer**: The root component that coordinates the display of multiple
+  screens (Chat, Debug Console, Settings, Auth).
+- **ConsolePatcher**: Intercepts `console.log` and redirects them to the
+  internal "Debug Console" accessible via `ctrl+d`.
+
+### State providers
+Global state is managed through specialized providers:
+- **`KeypressProvider`**: Captures and routes terminal keyboard events,
+  supporting complex shortcuts and Vim-style navigation.
+- **`TerminalProvider`**: Tracks the terminal size and window state using a
+  custom `ResizeObserver`.
+- **`VimModeProvider`**: Enables Vim-like keybindings for navigating through
+  conversation history and multi-line input fields.
+
+## Testing and quality assurance
+
+The repo employs a three-tier testing strategy:
+1.  **Unit tests**: Fast, isolated tests for core logic (Vitest).
+2.  **Integration tests**: Verify full system flows, including mock Gemini API
+    responses and real file system operations.
+3.  **Evals**: Performance benchmarks in `evals/` that measure the agent's
+    reasoning accuracy and tool-use efficiency over time.

From f5b60c7f6768bdbba85ebafed8e9c6b7dba658b4 Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Thu, 26 Feb 2026 12:02:49 -0800
Subject: [PATCH 03/33] docs: finalize codebase understanding guide with
 advanced technical details

---
 docs/codebase_understanding.md | 224 +++++++++++++++++----------------
 1 file changed, 114 insertions(+), 110 deletions(-)

diff --git a/docs/codebase_understanding.md b/docs/codebase_understanding.md
index f6c60c0f665..34a2ee3c942 100644
--- a/docs/codebase_understanding.md
+++ b/docs/codebase_understanding.md
@@ -1,145 +1,149 @@
 # Codebase understanding
 
-This document provides an in-depth technical overview of the Gemini CLI
-architecture. It is intended for developers who want to understand the system's
-inner workings, from startup to advanced agentic orchestration.
-
-## Repository structure
-
-Gemini CLI is a monorepo managed with npm workspaces. It strictly separates
-concerns across packages:
-
-- **`packages/cli`**: The terminal user interface (TUI) layer. Built with React
-  and Ink, it handles user interaction, rendering, and terminal state.
-- **`packages/core`**: The engine containing all business logic. It is entirely
-  UI-agnostic and manages the agent's lifecycle, Gemini API interactions, and
-  tool systems.
-- **`packages/devtools`**: A suite for inspection. It provides a Chrome-like
-  Network and Console inspector for real-time debugging.
-- **`packages/sdk`**: A library for building third-party extensions.
-- **`packages/vscode-ide-companion`**: Bridges the editor and CLI, providing
-  real-time IDE context to the agent.
+This document provides a deep-dive technical overview of the Gemini CLI
+architecture. It is designed for developers who need to understand the
+system's inner workings, from startup to advanced autonomous behaviors.
+
+## Repository architecture
+
+Gemini CLI is a monorepo structured to maintain a strict separation between
+the user interface and the agent's core reasoning logic.
+
+- **`packages/cli`**: The Terminal User Interface (TUI). Built with React and
+  Ink, it manages the interactive terminal experience, including keyboard
+  protocols, rendering, and terminal state management.
+- **`packages/core`**: The UI-agnostic engine. It contains the primary
+  orchestration logic, model routing, tool systems, policy enforcement, and
+  Gemini API communication.
+- **`packages/devtools`**: A suite for real-time inspection of network traffic,
+  console logs, and session activity.
+- **`packages/sdk`**: A library for developers to build third-party tools and
+  extensions.
+- **`packages/vscode-ide-companion`**: A specialized bridge that feeds real-time
+  editor state (open files, active selections, cursor positions) to the agent.
 
 ---
 
 ## 1. Application lifecycle
 
 ### Startup and initialization
-The entry point is `packages/cli/src/gemini.tsx`. The startup sequence involves:
-1.  **Standard I/O patching**: The CLI patches `process.stdout` and
-    `process.stderr` to capture all output, ensuring it can be redirected to the
-    TUI or debug logs without garbling the terminal display.
-2.  **Sandboxing and relaunch**: If `advanced.sandbox` is enabled, the CLI
-    re-launches itself in a restricted environment. It also uses a relaunch
-    mechanism to automatically configure Node.js memory limits (e.g.,
-    `--max-old-space-size`).
-3.  **Authentication**: Credentials are validated early. The CLI supports
-    multiple auth types, including API Keys, OAuth2, and Vertex AI.
-
-### Execution modes
-The CLI operates in two distinct modes:
-- **Interactive (TUI)**: Uses the `render` function from Ink to start a
-  persistent React application in the terminal.
-- **Non-interactive (CLI)**: A streamlined execution loop in
-  `nonInteractiveCli.ts` that runs until the agent completes its task,
-  supporting piped input and output redirection.
+The entry point is `packages/cli/src/gemini.tsx`. The startup sequence is
+designed for security and resilience:
+
+1.  **I/O redirection**: Standard output streams (`stdout`, `stderr`) are
+    patched to capture all logs and errors. This allows the CLI to redirect
+    diagnostic information to the TUI's debug console or a remote DevTools server
+    without corrupting the user's terminal interface.
+2.  **Memory-aware relaunch**: The CLI checks the host system's total memory.
+    If it detects that Node.js's default heap limit is insufficient for complex
+    codebase analysis, it re-launches itself using the
+    `--max-old-space-size` flag, targeting approximately 50% of system memory.
+3.  **Sandboxing**: If configured, the CLI launches a restricted "sandbox"
+    environment (using Docker, Podman, or a localized process) to isolate the
+    agent's autonomous actions from the host system.
+4.  **Interactive (TUI) vs. Non-interactive (CLI)**:
+    - **Interactive mode**: Initializes the Ink renderer, starting a persistent
+      React application that manages terminal state via providers.
+    - **Non-interactive mode**: Executes a streamlined loop in
+      `nonInteractiveCli.ts`, designed for single prompts or piped input/output
+      redirection.
 
 ---
 
-## 2. Model routing engine
+## 2. Model routing and selection
 
-The `ModelRouterService` (`packages/core/src/routing`) is responsible for
-selecting the most appropriate model for every request.
+The `ModelRouterService` (`packages/core/src/routing`) implements a
+"Composite Strategy" to select the optimal model for every request.
 
-### Composite strategy
-The router uses a "Composite Strategy" that evaluates multiple sub-strategies in
-priority order:
-1.  **Fallback**: Switches models if a quota error or API failure occurs.
-2.  **Override**: Respects user-specified model overrides (e.g., `--model`).
-3.  **Approval Mode**: Selects specialized models for `Plan Mode`.
-4.  **Classifier**: A lightweight LLM call that analyzes the user's request
-    against a rubric (Strategic Planning, Complexity, Ambiguity) to choose
-    between a "Pro" (complex) or "Flash" (simple) model.
-5.  **Numerical Classifier**: A deterministic classifier based on token counts
-    and history depth.
+### Routing strategies
+- **classifier**: Uses a lightweight LLM call to categorize the complexity of a
+  task based on a rubric (Strategic Planning, Multi-step Coordination,
+  Ambiguity). It chooses between a "Pro" model (for complex reasoning) and a
+  "Flash" model (for simple operations).
+- **approvalMode**: Selects specialized models (like `gemini-2.0-flash-lite`)
+  when the agent is in specific modes like `Plan Mode`.
+- **numericalClassifier**: A deterministic strategy that selects models based
+  on the number of tokens in the conversation or the length of the history.
+- **fallback**: Automatically switches models if the primary model encounters
+  quota limits (429) or transient API failures.
 
 ---
 
 ## 3. Intelligent context management
 
-Managing the model's context window is critical for long-running sessions. This
-is handled by two primary services in `packages/core/src/services`:
+The agent maintains deep project awareness while staying within token limits
+through several services in `packages/core/src/services`:
 
 ### ChatCompressionService
-When history exceeds a threshold (default 50% of the context window), the
-compression service triggers:
-1.  **Split point detection**: It identifies a safe point in history to begin
-    summarization, ensuring recent turns remain in high-fidelity.
-2.  **State snapshot generation**: The LLM generates a `<state_snapshot>`—a
-    structured summary of established constraints, technical details, and
-    progress.
-3.  **The "Probe" (Self-Correction)**: A second model call "probes" the generated
-    summary against the original history to ensure no critical constraints or
-    paths were omitted, correcting the summary if necessary.
+Triggered when the history exceeds 50% of the model's context window:
+1.  **State snapshots**: The agent generates a structured `<state_snapshot>`
+    representing the cumulative knowledge of the session (constraints, progress,
+    paths).
+2.  **The "Probe" (Self-Correction)**: A second LLM pass compares the summary
+    against the original history to ensure no critical technical details or
+    user-defined constraints were lost, correcting the summary before purging
+    the history.
 
 ### ToolOutputMaskingService
-To prevent bulky tool outputs (like long log files) from clogging the context,
-this service detects large `functionResponse` blocks and replaces them with
-concise summaries or pointers to temporary files, preserving the model's ability
-to reason about the data without consuming thousands of tokens.
+Prevents bulky data (like large shell outputs or file reads) from clogging the
+context window. It replaces large `functionResponse` blocks with concise
+summaries and persists the full data to temporary files, allowing the agent to
+refer to the full data only when necessary.
 
 ---
 
-## 4. Advanced tool execution
+## 4. Advanced tool execution and scheduling
 
-Tool execution is orchestrated by the `Scheduler`
-(`packages/core/src/scheduler`), which operates as an event-driven state
-machine.
+The `Scheduler` (`packages/core/src/scheduler`) is an event-driven state
+machine that manages the lifecycle of autonomous actions.
 
-### State management
-Every tool call moves through a structured lifecycle managed by the
-`SchedulerStateManager`:
+### Lifecycle states
 `Validating` → `AwaitingApproval` → `Scheduled` → `Executing` → `Success`/`Error`
 
 ### Key features
-- **Policy Engine**: A granular system that determines if a tool is safe to run.
-  Policies can be "Always", "Ask", or "Never" based on the tool name, arguments,
-  or folder location.
-- **Tail Calls**: If a tool's output requires immediate follow-up (like a shell
-  command that produced a specific error code), the scheduler can "tail call"
-  another tool (e.g., a "fixer" or "retry") without ending the current turn.
-- **Parallel execution**: The scheduler can execute multiple non-conflicting
-  read-only tools in parallel while enforcing sequential execution for
-  modifying tools.
+- **Policy Engine**: A granular system that evaluates tools based on security
+  policies (e.g., "Allow read-only tools", "Ask for shell commands"). It can be
+  configured at the project or user level.
+- **Tail calls**: Allows a tool to "link" to another action. For example, a
+  shell command that produces an error can automatically trigger a "diagnostic"
+  tool without returning control to the main model.
+- **Parallelism**: The scheduler executes independent read-only tools in
+  parallel while enforcing sequential execution for tools that modify the
+  environment.
+- **MCP integration**: Dynamically loads tools from Model Context Protocol
+  servers, integrating them seamlessly into the same policy and scheduler
+  framework.
 
 ---
 
-## 5. UI architecture
-
-The `packages/cli/src/ui` directory implements a sophisticated React-based
-terminal interface.
-
-### Rendering and layout
-- **Ink**: Provides React components for terminal output (`Box`, `Text`).
-- **AppContainer**: The root component that coordinates the display of multiple
-  screens (Chat, Debug Console, Settings, Auth).
-- **ConsolePatcher**: Intercepts `console.log` and redirects them to the
-  internal "Debug Console" accessible via `ctrl+d`.
-
-### State providers
-Global state is managed through specialized providers:
-- **`KeypressProvider`**: Captures and routes terminal keyboard events,
-  supporting complex shortcuts and Vim-style navigation.
-- **`TerminalProvider`**: Tracks the terminal size and window state using a
-  custom `ResizeObserver`.
-- **`VimModeProvider`**: Enables Vim-like keybindings for navigating through
-  conversation history and multi-line input fields.
-
-## Testing and quality assurance
-
-The repo employs a three-tier testing strategy:
-1.  **Unit tests**: Fast, isolated tests for core logic (Vitest).
-2.  **Integration tests**: Verify full system flows, including mock Gemini API
-    responses and real file system operations.
+## 5. UI and terminal integration
+
+The `packages/cli/src/ui` directory implements a sophisticated React-based TUI.
+
+### Keyboard and protocols
+- **KeypressProvider**: Manages terminal input, supporting complex key
+  combinations and shortcuts.
+- **Kitty keyboard protocol**: Detects terminals that support the Kitty
+  protocol to enable advanced features like detecting `ctrl+enter` vs `enter`.
+- **Vim mode**: A dedicated provider that enables Vim-like navigation (hjkl,
+  words, search) for both conversation history and input fields.
+
+### Layout and rendering
+- **ResizeObserver**: A custom implementation that watches the terminal size
+  to ensure components (like multi-column layouts or wide tables) adapt
+  instantly.
+- **ConsolePatcher**: Intercepts `console.log`, `console.warn`, and
+  `console.error`, routing them to the internal debug console (toggled with
+  `ctrl+d`) or the external DevTools server.
+
+---
+
+## 6. Testing and validation
+
+Gemini CLI uses a tiered testing strategy to ensure reliability:
+1.  **Unit tests**: Located alongside the source (`*.test.ts`), providing fast
+    coverage for core logic.
+2.  **Integration tests**: Located in `integration-tests/`, running the
+    full CLI against mock and real Gemini API endpoints.
 3.  **Evals**: Performance benchmarks in `evals/` that measure the agent's
     reasoning accuracy and tool-use efficiency over time.

From 0f59d48108cc7fecec15476f7fcc6734da96ab1c Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Thu, 26 Feb 2026 12:06:50 -0800
Subject: [PATCH 04/33] docs: add codebase understanding from antigravity

---
 docs/codebase_understanding_antigrav.md | 101 ++++++++++++++++++++++++
 1 file changed, 101 insertions(+)
 create mode 100644 docs/codebase_understanding_antigrav.md

diff --git a/docs/codebase_understanding_antigrav.md b/docs/codebase_understanding_antigrav.md
new file mode 100644
index 00000000000..60d5dbd2058
--- /dev/null
+++ b/docs/codebase_understanding_antigrav.md
@@ -0,0 +1,101 @@
+# Gemini CLI - Codebase Understanding
+
+Gemini CLI is an open-source AI agent designed to let you interact with Google's
+Gemini models directly from your terminal. It's built as a **TypeScript
+monorepo** (using npm workspaces) and relies heavily on **Node.js**, **React**,
+and **Ink** (a library that lets you build terminal UIs using React components).
+
+Here is a high-level walkthrough of the repository to help you understand how
+all the pieces fit together.
+
+## 1. High-Level Architecture (The `packages/` Directory)
+
+The project is split into several focused packages to maintain a clean
+separation of concerns:
+
+- **`packages/cli`** (The Frontend)
+  - This is the user-facing terminal UI.
+  - It uses React + Ink. This means the terminal layout, styling, and
+    interactions are managed like a modern web app (with hooks, contexts, and
+    components).
+  - It handles all the terminal-specific logic like key bindings, processing
+    mouse/keyboard events, and rendering the chat stream or tool progress
+    indicators.
+- **`packages/core`** (The Brain/Backend)
+  - This is where the actual "agentic" logic lives. It is entirely UI-agnostic.
+  - Contains the core looping mechanism that communicates with the Gemini API,
+    maintains conversation history, compresses context, and evaluates whether
+    the agent needs to invoke a tool.
+  - Houses the **Tool Registry** (file system tools, shell runner, web tools)
+    and the **Policy Engine** (deciding if a tool is safe to run automatically
+    or needs your permission).
+- **`packages/devtools`**
+  - A Chrome DevTools-like web server that runs locally! If you enable
+    `general.devtools` in your settings, you can inspect network requests, agent
+    thoughts, and console logs in a local browser, just like you would for a web
+    app.
+- **`packages/vscode-ide-companion`**
+  - A VS Code extension that pairs dynamically with the CLI. It allows the
+    terminal agent to "read" your active editor state, seamlessly pulling
+    context on exactly what files or lines of code you currently have
+    highlighted in VS Code.
+- **`packages/sdk`**
+  - Provides libraries and types so people can build custom MCP (Model Context
+    Protocol) extensions or tools for the CLI.
+- **`packages/a2a-server`**
+  - An experimental Agent-to-Agent server, hinting at future capabilities for
+    having different agents talk to each other.
+
+## 2. The Core Application Lifecycle
+
+When you type `gemini` in your terminal, here's roughly what happens under the
+hood:
+
+1.  **Bootstrapping (`packages/cli/src/gemini.tsx`)**: The CLI loads user
+    configurations, parses command-line arguments, checks authentication, and
+    verifies if it needs to launch itself in a controlled "sandbox" environment
+    (using Docker/Podman to isolate dangerous shell tools).
+2.  **Mode Resolution**: It determines if you are piping data in or running a
+    single command (`nonInteractiveCli.ts`), or if you are firing up the chat
+    TUI (Terminal User Interface).
+3.  **The Agent Loop (`packages/core/src/core/`)**:
+    - **`GeminiClient`**: The main orchestrator. It manages sessions and
+      compresses chat histories using `ChatCompressionService` so you don't
+      breach token limits.
+    - **`GeminiChat` & `Turn`**: For every prompt you send, a `Turn` is created.
+      This represents one "exchange" where the model might think, respond, and
+      realize it needs to search your codebase. It streams these requests back
+      in real-time.
+
+## 3. The Tool System & Execution
+
+The most powerful aspect of this CLI is its ability to interact with your
+environment.
+
+- In `packages/core/src/tools/`, there are native TypeScript implementations for
+  operations (like reading files, searching directories, or running tests).
+- When Gemini asks to use a tool, the **Scheduler**
+  (`packages/core/src/scheduler/`) intercepts the request.
+- It runs the request through the **Policy Engine**
+  (`packages/core/src/policy/`). Some commands (like `rm -rf`) are flagged and
+  routed to a **Confirmation Bus**, which pauses execution and asks you in the
+  UI: _"Do you want to allow this command?"_
+- Once approved (or auto-approved), it executes the tool, captures standard
+  output/error, and pipes that text back to Gemini to continue its thought
+  process.
+
+## 4. Code Quality, Building, and Testing
+
+- **Bundling & Running**: The project uses `esbuild` to compile everything very
+  quickly. During development, you can use `npm run start` or `npm run debug`
+  (which attaches a Node.js inspector).
+- **Testing (`vitest`)**: Testing is extremely rigorous here.
+  - _Unit Tests:_ `npm run test` handles basic component functionality.
+  - _Integration Tests:_ `npm run test:e2e` simulates an actual sandbox,
+    mocking/hitting models to make sure the CLI interacts realistically.
+  - _Evals (`evals/`):_ Standalone performance benchmarks where they evaluate
+    how smart the CLI is at navigating codebases or using its tools
+    autonomously.
+- **`npm run preflight`**: Before a PR is pushed, this massive script runs
+  formatting (Prettier), linting (ESLint), type checking (TypeScript), unit
+  testing, and building, ensuring nothing breaks the main branch.

From f7d43070a113c54b7d0a026d4d28efc3c0f909cb Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Fri, 10 Apr 2026 13:31:49 -0700
Subject: [PATCH 05/33] feat(cli): add `gemini gemma` command for streamlined
 local model setup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces the manual 6-step Gemma local routing setup with a single
`gemini gemma setup` command that downloads the LiteRT-LM binary,
pulls the model, configures settings, and starts the server.

New commands:
- `gemini gemma setup` — one-command install with progress bar
- `gemini gemma start/stop` — server lifecycle management
- `gemini gemma status` — diagnostic health check
- `/gemma` slash command — in-session status with Ink UI component

Also adds auto-start: the LiteRT server starts automatically on CLI
launch when the Gemma router is enabled (configurable via
autoStartServer setting).
---
 packages/cli/src/commands/gemma.ts            |  34 ++
 packages/cli/src/commands/gemma/constants.ts  |  51 +++
 packages/cli/src/commands/gemma/platform.ts   | 121 +++++
 packages/cli/src/commands/gemma/setup.ts      | 414 ++++++++++++++++++
 packages/cli/src/commands/gemma/start.ts      | 122 ++++++
 packages/cli/src/commands/gemma/status.ts     | 181 ++++++++
 packages/cli/src/commands/gemma/stop.ts       | 115 +++++
 packages/cli/src/config/config.ts             |   3 +
 packages/cli/src/config/settingsSchema.ts     |  20 +
 packages/cli/src/gemini.tsx                   |  10 +
 .../cli/src/services/BuiltinCommandLoader.ts  |   2 +
 .../cli/src/services/liteRtServerManager.ts   |  88 ++++
 .../cli/src/ui/commands/gemmaStatusCommand.ts |  41 ++
 .../src/ui/components/HistoryItemDisplay.tsx  |   4 +
 .../src/ui/components/views/GemmaStatus.tsx   | 126 ++++++
 packages/cli/src/ui/types.ts                  |  15 +
 packages/core/src/config/config.ts            |   2 +
 17 files changed, 1349 insertions(+)
 create mode 100644 packages/cli/src/commands/gemma.ts
 create mode 100644 packages/cli/src/commands/gemma/constants.ts
 create mode 100644 packages/cli/src/commands/gemma/platform.ts
 create mode 100644 packages/cli/src/commands/gemma/setup.ts
 create mode 100644 packages/cli/src/commands/gemma/start.ts
 create mode 100644 packages/cli/src/commands/gemma/status.ts
 create mode 100644 packages/cli/src/commands/gemma/stop.ts
 create mode 100644 packages/cli/src/services/liteRtServerManager.ts
 create mode 100644 packages/cli/src/ui/commands/gemmaStatusCommand.ts
 create mode 100644 packages/cli/src/ui/components/views/GemmaStatus.tsx

diff --git a/packages/cli/src/commands/gemma.ts b/packages/cli/src/commands/gemma.ts
new file mode 100644
index 00000000000..1d2a28c23ed
--- /dev/null
+++ b/packages/cli/src/commands/gemma.ts
@@ -0,0 +1,34 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { CommandModule, Argv } from 'yargs';
+import { initializeOutputListenersAndFlush } from '../gemini.js';
+import { defer } from '../deferred.js';
+import { setupCommand } from './gemma/setup.js';
+import { startCommand } from './gemma/start.js';
+import { stopCommand } from './gemma/stop.js';
+import { statusCommand } from './gemma/status.js';
+
+export const gemmaCommand: CommandModule = {
+  command: 'gemma',
+  describe: 'Manage local Gemma model routing',
+  builder: (yargs: Argv) =>
+    yargs
+      .middleware((argv) => {
+        initializeOutputListenersAndFlush();
+        argv['isCommand'] = true;
+      })
+      .command(defer(setupCommand, 'gemma'))
+      .command(defer(startCommand, 'gemma'))
+      .command(defer(stopCommand, 'gemma'))
+      .command(defer(statusCommand, 'gemma'))
+      .demandCommand(1, 'You need at least one command before continuing.')
+      .version(false),
+  handler: () => {
+    // yargs will automatically show help if no subcommand is provided
+    // thanks to demandCommand(1) in the builder.
+  },
+};
diff --git a/packages/cli/src/commands/gemma/constants.ts b/packages/cli/src/commands/gemma/constants.ts
new file mode 100644
index 00000000000..76203a8a8f4
--- /dev/null
+++ b/packages/cli/src/commands/gemma/constants.ts
@@ -0,0 +1,51 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import path from 'node:path';
+import { Storage } from '@google/gemini-cli-core';
+
+/** LiteRT-LM release version to download. */
+export const LITERT_RELEASE_VERSION = 'v0.9.0-alpha03';
+
+/** Base URL for LiteRT-LM GitHub releases. */
+export const LITERT_RELEASE_BASE_URL =
+  'https://github.com/google-ai-edge/LiteRT-LM/releases/download';
+
+/** The only tested and supported model for local routing. */
+export const GEMMA_MODEL_NAME = 'gemma3-1b-gpu-custom';
+
+/** Default port for the LiteRT-LM server. */
+export const DEFAULT_PORT = 9379;
+
+/** Server health check timeout in milliseconds. */
+export const HEALTH_CHECK_TIMEOUT_MS = 5000;
+
+/** Delay before checking if server started successfully. */
+export const SERVER_START_WAIT_MS = 3000;
+
+/**
+ * Maps `${process.platform}-${process.arch}` to the LiteRT-LM binary filename.
+ */
+export const PLATFORM_BINARY_MAP: Record<string, string> = {
+  'darwin-arm64': 'lit.macos_arm64',
+  'linux-x64': 'lit.linux_x86_64',
+  'win32-x64': 'lit.windows_x86_64.exe',
+};
+
+/** Directory where the LiteRT-LM binary is installed. */
+export function getLiteRtBinDir(): string {
+  return path.join(Storage.getGlobalGeminiDir(), 'bin', 'litert');
+}
+
+/** Path to the PID file for the background LiteRT server. */
+export function getPidFilePath(): string {
+  return path.join(Storage.getGlobalTempDir(), 'litert-server.pid');
+}
+
+/** Path to the log file for the background LiteRT server. */
+export function getLogFilePath(): string {
+  return path.join(Storage.getGlobalTempDir(), 'litert-server.log');
+}
diff --git a/packages/cli/src/commands/gemma/platform.ts b/packages/cli/src/commands/gemma/platform.ts
new file mode 100644
index 00000000000..e39d99e557f
--- /dev/null
+++ b/packages/cli/src/commands/gemma/platform.ts
@@ -0,0 +1,121 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import fs from 'node:fs';
+import path from 'node:path';
+import { execFileSync } from 'node:child_process';
+import {
+  PLATFORM_BINARY_MAP,
+  LITERT_RELEASE_BASE_URL,
+  LITERT_RELEASE_VERSION,
+  getLiteRtBinDir,
+  GEMMA_MODEL_NAME,
+  HEALTH_CHECK_TIMEOUT_MS,
+  getPidFilePath,
+} from './constants.js';
+
+export interface PlatformInfo {
+  key: string;
+  binaryName: string;
+}
+
+/**
+ * Detects the current platform and resolves the corresponding LiteRT-LM binary name.
+ * Returns null if the platform is unsupported.
+ */
+export function detectPlatform(): PlatformInfo | null {
+  const key = `${process.platform}-${process.arch}`;
+  const binaryName = PLATFORM_BINARY_MAP[key];
+  if (!binaryName) {
+    return null;
+  }
+  return { key, binaryName };
+}
+
+/** Returns the full local path to the LiteRT-LM binary. */
+export function getBinaryPath(binaryName?: string): string | null {
+  const name = binaryName ?? detectPlatform()?.binaryName;
+  if (!name) return null;
+  return path.join(getLiteRtBinDir(), name);
+}
+
+/** Returns the GitHub release download URL for the binary. */
+export function getBinaryDownloadUrl(binaryName: string): string {
+  return `${LITERT_RELEASE_BASE_URL}/${LITERT_RELEASE_VERSION}/${binaryName}`;
+}
+
+/** Checks if the LiteRT-LM binary exists on disk. */
+export function isBinaryInstalled(): boolean {
+  const binaryPath = getBinaryPath();
+  if (!binaryPath) return false;
+  return fs.existsSync(binaryPath);
+}
+
+/**
+ * Checks if the Gemma model has been downloaded by running `lit list`
+ * and looking for the model name in stdout.
+ */
+export function isModelDownloaded(binaryPath: string): boolean {
+  try {
+    const output = execFileSync(binaryPath, ['list'], {
+      encoding: 'utf-8',
+      timeout: 10000,
+    });
+    return output.includes(GEMMA_MODEL_NAME);
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Checks if a LiteRT-LM server is running and responding on the given port.
+ * Uses a simple HTTP request with a short timeout.
+ */
+export async function isServerRunning(port: number): Promise<boolean> {
+  try {
+    const controller = new AbortController();
+    const timeout = setTimeout(
+      () => controller.abort(),
+      HEALTH_CHECK_TIMEOUT_MS,
+    );
+    const response = await fetch(`http://localhost:${port}/`, {
+      signal: controller.signal,
+    });
+    clearTimeout(timeout);
+    // Any response (even an error page) means the server is up.
+    return response.ok || response.status > 0;
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Reads the PID from the PID file, if it exists.
+ * Returns the PID number, or null if the file doesn't exist or is invalid.
+ */
+export function readServerPid(): number | null {
+  const pidPath = getPidFilePath();
+  try {
+    const content = fs.readFileSync(pidPath, 'utf-8').trim();
+    const pid = parseInt(content, 10);
+    return isNaN(pid) ? null : pid;
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Checks if a process with the given PID is still running.
+ */
+export function isProcessRunning(pid: number): boolean {
+  try {
+    // Sending signal 0 checks if the process exists without actually signaling it.
+    process.kill(pid, 0);
+    return true;
+  } catch {
+    return false;
+  }
+}
diff --git a/packages/cli/src/commands/gemma/setup.ts b/packages/cli/src/commands/gemma/setup.ts
new file mode 100644
index 00000000000..7648122783d
--- /dev/null
+++ b/packages/cli/src/commands/gemma/setup.ts
@@ -0,0 +1,414 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { CommandModule } from 'yargs';
+import fs from 'node:fs';
+import { execSync, spawn as nodeSpawn } from 'node:child_process';
+import chalk from 'chalk';
+import { debugLogger } from '@google/gemini-cli-core';
+import { loadSettings, SettingScope } from '../../config/settings.js';
+import { exitCli } from '../utils.js';
+import {
+  DEFAULT_PORT,
+  GEMMA_MODEL_NAME,
+  getLiteRtBinDir,
+} from './constants.js';
+import {
+  detectPlatform,
+  getBinaryDownloadUrl,
+  getBinaryPath,
+  isBinaryInstalled,
+  isModelDownloaded,
+} from './platform.js';
+import { startServer } from './start.js';
+import readline from 'node:readline';
+
+const log = (msg: string) => debugLogger.log(msg);
+const logError = (msg: string) => debugLogger.error(msg);
+
+/**
+ * Prompts the user for a yes/no confirmation.
+ * Returns true if the user answers 'y' or 'yes'.
+ */
+async function promptYesNo(question: string): Promise<boolean> {
+  const rl = readline.createInterface({
+    input: process.stdin,
+    output: process.stdout,
+  });
+  return new Promise((resolve) => {
+    rl.question(`${question} (y/N): `, (answer) => {
+      rl.close();
+      resolve(
+        answer.trim().toLowerCase() === 'y' ||
+          answer.trim().toLowerCase() === 'yes',
+      );
+    });
+  });
+}
+
+/** Formats a byte count into a human-readable string (e.g. "12.3 MB"). */
+function formatBytes(bytes: number): string {
+  if (bytes < 1024) return `${bytes} B`;
+  if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
+  return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
+}
+
+/** Renders a single-line progress bar to stderr (overwriting in place). */
+function renderProgress(downloaded: number, total: number | null): void {
+  const barWidth = 30;
+  if (total && total > 0) {
+    const pct = Math.min(downloaded / total, 1);
+    const filled = Math.round(barWidth * pct);
+    const bar = '█'.repeat(filled) + '░'.repeat(barWidth - filled);
+    const pctStr = (pct * 100).toFixed(0).padStart(3);
+    process.stderr.write(
+      `\r  [${bar}] ${pctStr}% ${formatBytes(downloaded)} / ${formatBytes(total)}`,
+    );
+  } else {
+    process.stderr.write(`\r  Downloaded ${formatBytes(downloaded)}`);
+  }
+}
+
+/**
+ * Downloads a file from a URL to a local path with a progress bar.
+ * Uses a temporary `.downloading` suffix for safety against interrupted downloads.
+ */
+async function downloadFile(url: string, destPath: string): Promise<void> {
+  const tmpPath = destPath + '.downloading';
+
+  // Clean up any previous interrupted download.
+  if (fs.existsSync(tmpPath)) {
+    fs.unlinkSync(tmpPath);
+  }
+
+  const response = await fetch(url, { redirect: 'follow' });
+  if (!response.ok) {
+    throw new Error(
+      `Download failed: HTTP ${response.status} ${response.statusText}`,
+    );
+  }
+  if (!response.body) {
+    throw new Error('Download failed: No response body');
+  }
+
+  const contentLength = response.headers.get('content-length');
+  const totalBytes = contentLength ? parseInt(contentLength, 10) : null;
+  let downloadedBytes = 0;
+
+  const fileStream = fs.createWriteStream(tmpPath);
+  const reader = response.body.getReader();
+
+  try {
+    for (;;) {
+      const { done, value } = await reader.read();
+      if (done) break;
+      fileStream.write(value);
+      downloadedBytes += value.byteLength;
+      renderProgress(downloadedBytes, totalBytes);
+    }
+  } finally {
+    fileStream.end();
+    // Clear the progress line.
+    process.stderr.write('\r' + ' '.repeat(80) + '\r');
+  }
+
+  // Wait for the file to finish flushing.
+  await new Promise<void>((resolve, reject) => {
+    fileStream.on('finish', resolve);
+    fileStream.on('error', reject);
+  });
+
+  // Atomic rename after successful download.
+  fs.renameSync(tmpPath, destPath);
+}
+
+/**
+ * Spawns a child process and returns a promise that resolves with the exit code.
+ * Inherits stdio so the user sees all output (progress, terms acceptance, etc.).
+ */
+function spawnInherited(command: string, args: string[]): Promise<number> {
+  return new Promise((resolve, reject) => {
+    const child = nodeSpawn(command, args, {
+      stdio: 'inherit',
+    });
+    child.on('close', (code) => resolve(code ?? 1));
+    child.on('error', reject);
+  });
+}
+
+interface SetupArgs {
+  port: number;
+  skipModel: boolean;
+  start: boolean;
+  force: boolean;
+  consent: boolean;
+}
+
+async function handleSetup(argv: SetupArgs): Promise<void> {
+  const { port, force } = argv;
+
+  log('');
+  log(chalk.bold('Gemma Local Model Routing Setup'));
+  log(chalk.dim('─'.repeat(40)));
+  log('');
+
+  // Step 1: Platform detection
+  const platform = detectPlatform();
+  if (!platform) {
+    logError(
+      chalk.red(`Unsupported platform: ${process.platform}-${process.arch}`),
+    );
+    logError(
+      'LiteRT-LM binaries are available for: macOS (ARM64), Linux (x86_64), Windows (x86_64)',
+    );
+    await exitCli(1);
+    return;
+  }
+  log(chalk.dim(`  Platform: ${platform.key} → ${platform.binaryName}`));
+
+  // Step 2: Consent
+  if (!argv.consent) {
+    log('');
+    log('This will download and install the LiteRT-LM runtime and the');
+    log(
+      `Gemma model (${GEMMA_MODEL_NAME}, ~1 GB). By proceeding, you agree to the`,
+    );
+    log('Gemma Terms of Use: https://ai.google.dev/gemma/terms');
+    log('');
+
+    const accepted = await promptYesNo('Do you want to continue?');
+    if (!accepted) {
+      log('Setup cancelled.');
+      await exitCli(0);
+      return;
+    }
+  }
+
+  // Step 3: Download binary
+  const binaryPath = getBinaryPath(platform.binaryName)!;
+  const alreadyInstalled = isBinaryInstalled();
+
+  if (alreadyInstalled && !force) {
+    log('');
+    log(chalk.green('  ✓ LiteRT-LM binary already installed at:'));
+    log(chalk.dim(`    ${binaryPath}`));
+  } else {
+    log('');
+    log('  Downloading LiteRT-LM binary...');
+    const downloadUrl = getBinaryDownloadUrl(platform.binaryName);
+    debugLogger.log(`Downloading from: ${downloadUrl}`);
+
+    try {
+      const binDir = getLiteRtBinDir();
+      fs.mkdirSync(binDir, { recursive: true });
+      await downloadFile(downloadUrl, binaryPath);
+      log(chalk.green('  ✓ Binary downloaded successfully'));
+    } catch (error) {
+      logError(
+        chalk.red(
+          `  ✗ Failed to download binary: ${error instanceof Error ? error.message : String(error)}`,
+        ),
+      );
+      logError('  Check your internet connection and try again.');
+      await exitCli(1);
+      return;
+    }
+
+    // Step 4: Make executable and handle macOS gatekeeper
+    if (process.platform !== 'win32') {
+      try {
+        fs.chmodSync(binaryPath, 0o755);
+      } catch (error) {
+        logError(
+          chalk.red(
+            `  ✗ Failed to set executable permission: ${error instanceof Error ? error.message : String(error)}`,
+          ),
+        );
+        await exitCli(1);
+        return;
+      }
+    }
+
+    if (process.platform === 'darwin') {
+      try {
+        execSync(`xattr -d com.apple.quarantine "${binaryPath}"`, {
+          stdio: 'ignore',
+        });
+        log(chalk.green('  ✓ macOS quarantine attribute removed'));
+      } catch {
+        // This is expected to fail if the attribute doesn't exist.
+        debugLogger.log(
+          'xattr quarantine removal not needed or failed (non-fatal)',
+        );
+      }
+    }
+  }
+
+  // Step 5: Pull the model
+  if (!argv.skipModel) {
+    const modelAlreadyDownloaded = isModelDownloaded(binaryPath);
+    if (modelAlreadyDownloaded && !force) {
+      log('');
+      log(chalk.green(`  ✓ Model ${GEMMA_MODEL_NAME} already downloaded`));
+    } else {
+      log('');
+      log(`  Downloading model ${GEMMA_MODEL_NAME}...`);
+      log(chalk.dim('  You may be prompted to accept the Gemma Terms of Use.'));
+      log('');
+
+      const exitCode = await spawnInherited(binaryPath, [
+        'pull',
+        GEMMA_MODEL_NAME,
+      ]);
+      if (exitCode !== 0) {
+        logError('');
+        logError(
+          chalk.red(`  ✗ Model download failed (exit code ${exitCode})`),
+        );
+        await exitCli(1);
+        return;
+      }
+      log('');
+      log(chalk.green(`  ✓ Model ${GEMMA_MODEL_NAME} downloaded`));
+    }
+  }
+
+  // Step 6: Configure settings
+  log('');
+  log('  Configuring settings...');
+  try {
+    const settings = loadSettings(process.cwd());
+    const existingGemma =
+      settings.forScope(SettingScope.User).settings.experimental
+        ?.gemmaModelRouter ?? {};
+
+    const newGemmaSettings = {
+      ...existingGemma,
+      enabled: true,
+      autoStartServer: existingGemma.autoStartServer ?? true,
+      classifier: {
+        host: `http://localhost:${port}`,
+        model: GEMMA_MODEL_NAME,
+        ...existingGemma.classifier,
+      },
+    };
+
+    // Read existing experimental settings to avoid overwriting them.
+    const existingExperimental =
+      settings.forScope(SettingScope.User).settings.experimental ?? {};
+    settings.setValue(SettingScope.User, 'experimental', {
+      ...existingExperimental,
+      gemmaModelRouter: newGemmaSettings,
+    });
+
+    log(chalk.green('  ✓ Settings updated in ~/.gemini/settings.json'));
+  } catch (error) {
+    logError(
+      chalk.red(
+        `  ✗ Failed to update settings: ${error instanceof Error ? error.message : String(error)}`,
+      ),
+    );
+    logError(
+      '  You can manually add the configuration to ~/.gemini/settings.json',
+    );
+  }
+
+  // Step 7: Start server (if requested)
+  if (argv.start) {
+    log('');
+    log('  Starting LiteRT server...');
+    const started = await startServer(binaryPath, port);
+    if (started) {
+      log(chalk.green(`  ✓ Server started on port ${port}`));
+    } else {
+      log(
+        chalk.yellow(
+          `  ! Server may not have started correctly. Check: gemini gemma status`,
+        ),
+      );
+    }
+  }
+
+  // Step 8: Summary
+  log('');
+  log(chalk.dim('─'.repeat(40)));
+  log(chalk.bold.green('  Setup complete! Local model routing is now active.'));
+  log('');
+  log('  How it works: Every request is classified by the local Gemma model.');
+  log(
+    '  Simple tasks (file reads, quick edits) route to ' +
+      chalk.cyan('Flash') +
+      ' for speed.',
+  );
+  log(
+    '  Complex tasks (debugging, architecture) route to ' +
+      chalk.cyan('Pro') +
+      ' for quality.',
+  );
+  log('  This happens automatically — just use the CLI as usual.');
+  log('');
+  if (!argv.start) {
+    log(
+      chalk.yellow(
+        '  Note: Run "gemini gemma start" to start the server, or restart',
+      ),
+    );
+    log(
+      chalk.yellow(
+        '  the CLI to auto-start it (if autoStartServer is enabled).',
+      ),
+    );
+    log('');
+  }
+  log('  Useful commands:');
+  log(chalk.dim('    gemini gemma status   Check routing status'));
+  log(chalk.dim('    gemini gemma start    Start the LiteRT server'));
+  log(chalk.dim('    gemini gemma stop     Stop the LiteRT server'));
+  log(chalk.dim('    /gemma               Check status inside a session'));
+  log('');
+}
+
+export const setupCommand: CommandModule = {
+  command: 'setup',
+  describe: 'Download and configure Gemma local model routing',
+  builder: (yargs) =>
+    yargs
+      .option('port', {
+        type: 'number',
+        default: DEFAULT_PORT,
+        description: 'Port for the LiteRT server',
+      })
+      .option('skip-model', {
+        type: 'boolean',
+        default: false,
+        description: 'Skip model download (binary only)',
+      })
+      .option('start', {
+        type: 'boolean',
+        default: true,
+        description: 'Start the server after setup',
+      })
+      .option('force', {
+        type: 'boolean',
+        default: false,
+        description: 'Re-download binary and model even if already present',
+      })
+      .option('consent', {
+        type: 'boolean',
+        default: false,
+        description: 'Skip interactive consent prompt (implies acceptance)',
+      }),
+  handler: async (argv) => {
+    await handleSetup({
+      port: Number(argv['port']),
+      skipModel: Boolean(argv['skipModel']),
+      start: Boolean(argv['start']),
+      force: Boolean(argv['force']),
+      consent: Boolean(argv['consent']),
+    });
+    await exitCli(0);
+  },
+};
diff --git a/packages/cli/src/commands/gemma/start.ts b/packages/cli/src/commands/gemma/start.ts
new file mode 100644
index 00000000000..8918daa9faf
--- /dev/null
+++ b/packages/cli/src/commands/gemma/start.ts
@@ -0,0 +1,122 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { CommandModule } from 'yargs';
+import fs from 'node:fs';
+import { spawn } from 'node:child_process';
+import chalk from 'chalk';
+import { debugLogger } from '@google/gemini-cli-core';
+import { exitCli } from '../utils.js';
+import {
+  DEFAULT_PORT,
+  getPidFilePath,
+  getLogFilePath,
+  getLiteRtBinDir,
+  SERVER_START_WAIT_MS,
+} from './constants.js';
+import {
+  getBinaryPath,
+  isBinaryInstalled,
+  isServerRunning,
+} from './platform.js';
+
+/**
+ * Starts the LiteRT-LM server as a detached background process.
+ * Returns true if the server was started (or is already running).
+ *
+ * This function is also used by `setup.ts` to start the server after installation.
+ */
+export async function startServer(
+  binaryPath: string,
+  port: number,
+): Promise<boolean> {
+  // Check if already running
+  const alreadyRunning = await isServerRunning(port);
+  if (alreadyRunning) {
+    debugLogger.log(`LiteRT server already running on port ${port}`);
+    return true;
+  }
+
+  // Ensure log directory exists
+  const logPath = getLogFilePath();
+  fs.mkdirSync(getLiteRtBinDir(), { recursive: true });
+  // Ensure tmp dir exists for log and pid files
+  const tmpDir = getPidFilePath().replace(/\/[^/]+$/, '');
+  fs.mkdirSync(tmpDir, { recursive: true });
+
+  const logFd = fs.openSync(logPath, 'a');
+
+  try {
+    const child = spawn(binaryPath, ['serve', `--port=${port}`, '--verbose'], {
+      detached: true,
+      stdio: ['ignore', logFd, logFd],
+    });
+
+    // Write PID file
+    const pidPath = getPidFilePath();
+    if (child.pid) {
+      fs.writeFileSync(pidPath, String(child.pid), 'utf-8');
+    }
+
+    // Detach the child so it survives after the CLI exits.
+    child.unref();
+  } finally {
+    fs.closeSync(logFd);
+  }
+
+  // Wait briefly and verify the server is responding.
+  await new Promise((resolve) => setTimeout(resolve, SERVER_START_WAIT_MS));
+  return isServerRunning(port);
+}
+
+export const startCommand: CommandModule = {
+  command: 'start',
+  describe: 'Start the LiteRT-LM server',
+  builder: (yargs) =>
+    yargs.option('port', {
+      type: 'number',
+      default: DEFAULT_PORT,
+      description: 'Port for the LiteRT server',
+    }),
+  handler: async (argv) => {
+    const port = Number(argv['port']);
+
+    if (!isBinaryInstalled()) {
+      debugLogger.error(
+        chalk.red(
+          'LiteRT-LM binary not found. Run "gemini gemma setup" first.',
+        ),
+      );
+      await exitCli(1);
+      return;
+    }
+
+    const alreadyRunning = await isServerRunning(port);
+    if (alreadyRunning) {
+      debugLogger.log(
+        chalk.green(`LiteRT server is already running on port ${port}.`),
+      );
+      await exitCli(0);
+      return;
+    }
+
+    const binaryPath = getBinaryPath()!;
+    debugLogger.log(`Starting LiteRT server on port ${port}...`);
+
+    const started = await startServer(binaryPath, port);
+    if (started) {
+      debugLogger.log(chalk.green(`LiteRT server started on port ${port}.`));
+      debugLogger.log(chalk.dim(`Logs: ${getLogFilePath()}`));
+      await exitCli(0);
+    } else {
+      debugLogger.error(
+        chalk.red('Server may not have started correctly. Check logs:'),
+      );
+      debugLogger.error(chalk.dim(`  ${getLogFilePath()}`));
+      await exitCli(1);
+    }
+  },
+};
diff --git a/packages/cli/src/commands/gemma/status.ts b/packages/cli/src/commands/gemma/status.ts
new file mode 100644
index 00000000000..4a265dd9445
--- /dev/null
+++ b/packages/cli/src/commands/gemma/status.ts
@@ -0,0 +1,181 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { CommandModule } from 'yargs';
+import chalk from 'chalk';
+import { loadSettings } from '../../config/settings.js';
+import { DEFAULT_PORT, GEMMA_MODEL_NAME } from './constants.js';
+import {
+  detectPlatform,
+  getBinaryPath,
+  isBinaryInstalled,
+  isModelDownloaded,
+  isServerRunning,
+  readServerPid,
+  isProcessRunning,
+} from './platform.js';
+import { exitCli } from '../utils.js';
+
+export interface GemmaStatusResult {
+  binaryInstalled: boolean;
+  binaryPath: string | null;
+  modelDownloaded: boolean;
+  serverRunning: boolean;
+  serverPid: number | null;
+  settingsEnabled: boolean;
+  port: number;
+  allPassing: boolean;
+}
+
+/**
+ * Runs all diagnostic checks and returns a structured status result.
+ * This is shared between the CLI `gemini gemma status` command and the
+ * in-session `/gemma` slash command.
+ */
+export async function checkGemmaStatus(
+  port?: number,
+): Promise<GemmaStatusResult> {
+  const effectivePort = port ?? DEFAULT_PORT;
+  const binaryPath = getBinaryPath();
+  const binaryInstalled = isBinaryInstalled();
+  const modelDownloaded =
+    binaryInstalled && binaryPath ? isModelDownloaded(binaryPath) : false;
+  const serverRunning = await isServerRunning(effectivePort);
+  const pid = readServerPid();
+  const serverPid = pid && isProcessRunning(pid) ? pid : null;
+
+  let settingsEnabled = false;
+  try {
+    const settings = loadSettings(process.cwd());
+    const gemmaSettings = settings.merged.experimental?.gemmaModelRouter;
+    settingsEnabled = gemmaSettings?.enabled === true;
+  } catch {
+    // Settings may fail to load in some contexts; treat as not enabled.
+  }
+
+  const allPassing =
+    binaryInstalled && modelDownloaded && serverRunning && settingsEnabled;
+
+  return {
+    binaryInstalled,
+    binaryPath,
+    modelDownloaded,
+    serverRunning,
+    serverPid,
+    settingsEnabled,
+    port: effectivePort,
+    allPassing,
+  };
+}
+
+/** Formats the status result into a human-readable string. */
+export function formatGemmaStatus(status: GemmaStatusResult): string {
+  const check = (ok: boolean) => (ok ? chalk.green('✓') : chalk.red('✗'));
+
+  const lines: string[] = [
+    '',
+    chalk.bold('Gemma Local Model Routing Status'),
+    chalk.dim('─'.repeat(40)),
+    '',
+  ];
+
+  // Binary
+  if (status.binaryInstalled) {
+    lines.push(`  Binary:    ${check(true)} Installed (${status.binaryPath})`);
+  } else {
+    const platform = detectPlatform();
+    if (platform) {
+      lines.push(`  Binary:    ${check(false)} Not installed`);
+      lines.push(chalk.dim(`             Run: gemini gemma setup`));
+    } else {
+      lines.push(
+        `  Binary:    ${check(false)} Unsupported platform (${process.platform}-${process.arch})`,
+      );
+    }
+  }
+
+  // Model
+  if (status.modelDownloaded) {
+    lines.push(`  Model:     ${check(true)} ${GEMMA_MODEL_NAME} downloaded`);
+  } else {
+    lines.push(`  Model:     ${check(false)} ${GEMMA_MODEL_NAME} not found`);
+    if (status.binaryInstalled) {
+      lines.push(
+        chalk.dim(
+          `             Run: ${status.binaryPath} pull ${GEMMA_MODEL_NAME}`,
+        ),
+      );
+    } else {
+      lines.push(chalk.dim(`             Run: gemini gemma setup`));
+    }
+  }
+
+  // Server
+  if (status.serverRunning) {
+    const pidInfo = status.serverPid ? ` (PID ${status.serverPid})` : '';
+    lines.push(
+      `  Server:    ${check(true)} Running on port ${status.port}${pidInfo}`,
+    );
+  } else {
+    lines.push(
+      `  Server:    ${check(false)} Not running on port ${status.port}`,
+    );
+    lines.push(chalk.dim(`             Run: gemini gemma start`));
+  }
+
+  // Settings
+  if (status.settingsEnabled) {
+    lines.push(`  Settings:  ${check(true)} Enabled in settings.json`);
+  } else {
+    lines.push(`  Settings:  ${check(false)} Not enabled in settings.json`);
+    lines.push(
+      chalk.dim(
+        `             Run: gemini gemma setup (auto-configures settings)`,
+      ),
+    );
+  }
+
+  lines.push('');
+
+  if (status.allPassing) {
+    lines.push(chalk.green('  Routing is active — no action needed.'));
+    lines.push('');
+    lines.push(
+      chalk.dim(
+        '  Simple requests → Flash (fast) | Complex requests → Pro (powerful)',
+      ),
+    );
+    lines.push(chalk.dim('  This happens automatically on every request.'));
+  } else {
+    lines.push(
+      chalk.yellow(
+        '  Some checks failed. Run "gemini gemma setup" for guided installation.',
+      ),
+    );
+  }
+
+  lines.push('');
+  return lines.join('\n');
+}
+
+export const statusCommand: CommandModule = {
+  command: 'status',
+  describe: 'Check Gemma local model routing status',
+  builder: (yargs) =>
+    yargs.option('port', {
+      type: 'number',
+      default: DEFAULT_PORT,
+      description: 'Port to check for the LiteRT server',
+    }),
+  handler: async (argv) => {
+    const port = Number(argv['port']);
+    const status = await checkGemmaStatus(port);
+    const output = formatGemmaStatus(status);
+    // Use process.stdout directly for consistent output in non-interactive mode.
+    process.stdout.write(output);
+    await exitCli(status.allPassing ? 0 : 1);
+  },
+};
diff --git a/packages/cli/src/commands/gemma/stop.ts b/packages/cli/src/commands/gemma/stop.ts
new file mode 100644
index 00000000000..15db60eaa8e
--- /dev/null
+++ b/packages/cli/src/commands/gemma/stop.ts
@@ -0,0 +1,115 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { CommandModule } from 'yargs';
+import fs from 'node:fs';
+import chalk from 'chalk';
+import { debugLogger } from '@google/gemini-cli-core';
+import { exitCli } from '../utils.js';
+import { DEFAULT_PORT, getPidFilePath } from './constants.js';
+import {
+  readServerPid,
+  isProcessRunning,
+  isServerRunning,
+} from './platform.js';
+
+/**
+ * Stops the LiteRT-LM server by sending SIGTERM to the stored PID.
+ * Returns true if the server was stopped successfully.
+ */
+export async function stopServer(): Promise<boolean> {
+  const pid = readServerPid();
+  const pidPath = getPidFilePath();
+
+  if (pid === null) {
+    return false;
+  }
+
+  if (!isProcessRunning(pid)) {
+    // PID file exists but process is gone — clean up stale file.
+    try {
+      fs.unlinkSync(pidPath);
+    } catch {
+      // Ignore cleanup errors.
+    }
+    return false;
+  }
+
+  try {
+    process.kill(pid, 'SIGTERM');
+  } catch {
+    return false;
+  }
+
+  // Wait briefly for graceful shutdown.
+  await new Promise((resolve) => setTimeout(resolve, 1000));
+
+  // If still running, escalate to SIGKILL.
+  if (isProcessRunning(pid)) {
+    try {
+      process.kill(pid, 'SIGKILL');
+    } catch {
+      // Process may have exited between the check and the kill.
+    }
+    await new Promise((resolve) => setTimeout(resolve, 500));
+  }
+
+  // Clean up PID file.
+  try {
+    fs.unlinkSync(pidPath);
+  } catch {
+    // Ignore cleanup errors.
+  }
+
+  return true;
+}
+
+export const stopCommand: CommandModule = {
+  command: 'stop',
+  describe: 'Stop the LiteRT-LM server',
+  builder: (yargs) =>
+    yargs.option('port', {
+      type: 'number',
+      default: DEFAULT_PORT,
+      description: 'Port the server is running on',
+    }),
+  handler: async (argv) => {
+    const port = Number(argv['port']);
+    const pid = readServerPid();
+
+    if (pid !== null && isProcessRunning(pid)) {
+      debugLogger.log(`Stopping LiteRT server (PID ${pid})...`);
+      const stopped = await stopServer();
+      if (stopped) {
+        debugLogger.log(chalk.green('LiteRT server stopped.'));
+        await exitCli(0);
+      } else {
+        debugLogger.error(chalk.red('Failed to stop LiteRT server.'));
+        await exitCli(1);
+      }
+      return;
+    }
+
+    // No PID file or process not running — check if something else is on the port.
+    const running = await isServerRunning(port);
+    if (running) {
+      debugLogger.log(
+        chalk.yellow(
+          `A server is responding on port ${port}, but it was not started by "gemini gemma start".`,
+        ),
+      );
+      debugLogger.log(
+        chalk.dim(
+          'If you started it manually, stop it from the terminal where it is running.',
+        ),
+      );
+      await exitCli(1);
+    } else {
+      debugLogger.log('No LiteRT server is currently running.');
+      await exitCli(0);
+    }
+  },
+};
diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts
index 4e7e1db6f2c..17c3ffb89a6 100755
--- a/packages/cli/src/config/config.ts
+++ b/packages/cli/src/config/config.ts
@@ -13,6 +13,7 @@ import { mcpCommand } from '../commands/mcp.js';
 import { extensionsCommand } from '../commands/extensions.js';
 import { skillsCommand } from '../commands/skills.js';
 import { hooksCommand } from '../commands/hooks.js';
+import { gemmaCommand } from '../commands/gemma.js';
 import {
   setGeminiMdFilename as setServerGeminiMdFilename,
   getCurrentGeminiMdFilename,
@@ -181,6 +182,7 @@ export async function parseArguments(
         extensionsCommand,
         skillsCommand,
         hooksCommand,
+        gemmaCommand,
       ];
 
       const subcommands = commandModules.flatMap((mod) => {
@@ -260,6 +262,7 @@ export async function parseArguments(
   yargsInstance.command(extensionsCommand);
   yargsInstance.command(skillsCommand);
   yargsInstance.command(hooksCommand);
+  yargsInstance.command(gemmaCommand);
 
   yargsInstance
     .command('$0 [query..]', 'Launch Gemini CLI', (yargsInstance) =>
diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts
index fcfd604e3a7..6f946eb963d 100644
--- a/packages/cli/src/config/settingsSchema.ts
+++ b/packages/cli/src/config/settingsSchema.ts
@@ -2146,6 +2146,26 @@ const SETTINGS_SCHEMA = {
               'Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim.',
             showInDialog: false,
           },
+          autoStartServer: {
+            type: 'boolean',
+            label: 'Auto-start LiteRT Server',
+            category: 'Experimental',
+            requiresRestart: true,
+            default: true,
+            description:
+              'Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.',
+            showInDialog: false,
+          },
+          binaryPath: {
+            type: 'string',
+            label: 'LiteRT Binary Path',
+            category: 'Experimental',
+            requiresRestart: true,
+            default: '',
+            description:
+              'Custom path to the LiteRT-LM binary. Leave empty to use the default location (~/.gemini/bin/litert/).',
+            showInDialog: false,
+          },
           classifier: {
             type: 'object',
             label: 'Classifier',
diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx
index eedfcc950ad..899be0bad57 100644
--- a/packages/cli/src/gemini.tsx
+++ b/packages/cli/src/gemini.tsx
@@ -612,6 +612,16 @@ export async function main() {
     const initializationResult = await initializeApp(config, settings);
     initAppHandle?.end();
 
+    // Auto-start the LiteRT-LM server for Gemma local routing if configured.
+    // This is fire-and-forget — failures are logged but never block startup.
+    import('./services/liteRtServerManager.js')
+      .then(({ LiteRtServerManager }) =>
+        LiteRtServerManager.ensureRunning(
+          settings.merged.experimental?.gemmaModelRouter,
+        ),
+      )
+      .catch((e) => debugLogger.warn('LiteRT auto-start import failed:', e));
+
     if (
       settings.merged.security.auth.selectedType ===
         AuthType.LOGIN_WITH_GOOGLE &&
diff --git a/packages/cli/src/services/BuiltinCommandLoader.ts b/packages/cli/src/services/BuiltinCommandLoader.ts
index c1cbd5621e8..94b5986eb33 100644
--- a/packages/cli/src/services/BuiltinCommandLoader.ts
+++ b/packages/cli/src/services/BuiltinCommandLoader.ts
@@ -61,6 +61,7 @@ import { vimCommand } from '../ui/commands/vimCommand.js';
 import { setupGithubCommand } from '../ui/commands/setupGithubCommand.js';
 import { terminalSetupCommand } from '../ui/commands/terminalSetupCommand.js';
 import { upgradeCommand } from '../ui/commands/upgradeCommand.js';
+import { gemmaStatusCommand } from '../ui/commands/gemmaStatusCommand.js';
 
 /**
  * Loads the core, hard-coded slash commands that are an integral part
@@ -221,6 +222,7 @@ export class BuiltinCommandLoader implements ICommandLoader {
           : [skillsCommand]
         : []),
       settingsCommand,
+      gemmaStatusCommand,
       tasksCommand,
       vimCommand,
       setupGithubCommand,
diff --git a/packages/cli/src/services/liteRtServerManager.ts b/packages/cli/src/services/liteRtServerManager.ts
new file mode 100644
index 00000000000..1f0de35fe3a
--- /dev/null
+++ b/packages/cli/src/services/liteRtServerManager.ts
@@ -0,0 +1,88 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { debugLogger } from '@google/gemini-cli-core';
+import {
+  getBinaryPath,
+  isBinaryInstalled,
+  isServerRunning,
+} from '../commands/gemma/platform.js';
+import { DEFAULT_PORT } from '../commands/gemma/constants.js';
+
+// Use a local interface that includes the new fields, since the core
+// package's compiled types may not include them until rebuilt.
+interface GemmaSettings {
+  enabled?: boolean;
+  autoStartServer?: boolean;
+  binaryPath?: string;
+  classifier?: { host?: string; model?: string };
+}
+
+/**
+ * Manages the LiteRT-LM server lifecycle for auto-start during CLI startup.
+ *
+ * When the Gemma model router is enabled and `autoStartServer` is true,
+ * this manager ensures the server is running before the CLI enters
+ * interactive mode. The server is spawned as a detached daemon that
+ * persists across CLI sessions — it is NOT stopped when the CLI exits.
+ */
+export class LiteRtServerManager {
+  /**
+   * Ensures the LiteRT-LM server is running if the settings call for it.
+   * This is fire-and-forget: failures are logged but never block startup.
+   */
+  static async ensureRunning(
+    gemmaSettings: GemmaSettings | undefined,
+  ): Promise<void> {
+    if (!gemmaSettings?.enabled) return;
+    if (gemmaSettings.autoStartServer === false) return;
+    if (!isBinaryInstalled()) {
+      debugLogger.log(
+        '[LiteRtServerManager] Binary not installed, skipping auto-start. Run "gemini gemma setup".',
+      );
+      return;
+    }
+
+    const port =
+      parseInt(
+        gemmaSettings.classifier?.host?.match(/:(\d+)/)?.[1] ?? '',
+        10,
+      ) || DEFAULT_PORT;
+
+    const running = await isServerRunning(port);
+    if (running) {
+      debugLogger.log(
+        `[LiteRtServerManager] Server already running on port ${port}`,
+      );
+      return;
+    }
+
+    debugLogger.log(
+      `[LiteRtServerManager] Auto-starting LiteRT server on port ${port}...`,
+    );
+
+    try {
+      // Dynamic import to avoid circular dependencies and to keep the start
+      // logic in one place.
+      const { startServer } = await import('../commands/gemma/start.js');
+      const binaryPath = gemmaSettings.binaryPath || getBinaryPath() || '';
+      if (!binaryPath) {
+        debugLogger.warn('[LiteRtServerManager] Could not resolve binary path');
+        return;
+      }
+      const started = await startServer(binaryPath, port);
+      if (started) {
+        debugLogger.log(`[LiteRtServerManager] Server started on port ${port}`);
+      } else {
+        debugLogger.warn(
+          `[LiteRtServerManager] Server may not have started correctly on port ${port}`,
+        );
+      }
+    } catch (error) {
+      debugLogger.warn('[LiteRtServerManager] Auto-start failed:', error);
+    }
+  }
+}
diff --git a/packages/cli/src/ui/commands/gemmaStatusCommand.ts b/packages/cli/src/ui/commands/gemmaStatusCommand.ts
new file mode 100644
index 00000000000..2c581b31a1e
--- /dev/null
+++ b/packages/cli/src/ui/commands/gemmaStatusCommand.ts
@@ -0,0 +1,41 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { CommandKind, type SlashCommand } from './types.js';
+import { MessageType, type HistoryItemGemmaStatus } from '../types.js';
+import { checkGemmaStatus } from '../../commands/gemma/status.js';
+import { GEMMA_MODEL_NAME } from '../../commands/gemma/constants.js';
+
+export const gemmaStatusCommand: SlashCommand = {
+  name: 'gemma',
+  description: 'Check local Gemma model routing status',
+  kind: CommandKind.BUILT_IN,
+  autoExecute: true,
+  isSafeConcurrent: true,
+  action: async (context) => {
+    const port =
+      parseInt(
+        context.services.settings.merged.experimental?.gemmaModelRouter?.classifier?.host?.match(
+          /:(\d+)/,
+        )?.[1] ?? '',
+        10,
+      ) || undefined;
+    const status = await checkGemmaStatus(port);
+    const item: Omit<HistoryItemGemmaStatus, 'id'> = {
+      type: MessageType.GEMMA_STATUS,
+      binaryInstalled: status.binaryInstalled,
+      binaryPath: status.binaryPath,
+      modelName: GEMMA_MODEL_NAME,
+      modelDownloaded: status.modelDownloaded,
+      serverRunning: status.serverRunning,
+      serverPid: status.serverPid,
+      serverPort: status.port,
+      settingsEnabled: status.settingsEnabled,
+      allPassing: status.allPassing,
+    };
+    context.ui.addItem(item);
+  },
+};
diff --git a/packages/cli/src/ui/components/HistoryItemDisplay.tsx b/packages/cli/src/ui/components/HistoryItemDisplay.tsx
index 0ceb70f8d72..5f384612867 100644
--- a/packages/cli/src/ui/components/HistoryItemDisplay.tsx
+++ b/packages/cli/src/ui/components/HistoryItemDisplay.tsx
@@ -32,6 +32,7 @@ import { ToolsList } from './views/ToolsList.js';
 import { SkillsList } from './views/SkillsList.js';
 import { AgentsStatus } from './views/AgentsStatus.js';
 import { McpStatus } from './views/McpStatus.js';
+import { GemmaStatus } from './views/GemmaStatus.js';
 import { ChatList } from './views/ChatList.js';
 import { ModelMessage } from './messages/ModelMessage.js';
 import { ThinkingMessage } from './messages/ThinkingMessage.js';
@@ -242,6 +243,9 @@ export const HistoryItemDisplay: React.FC<HistoryItemDisplayProps> = ({
       {itemForDisplay.type === 'mcp_status' && (
         <McpStatus {...itemForDisplay} serverStatus={getMCPServerStatus} />
       )}
+      {itemForDisplay.type === 'gemma_status' && (
+        <GemmaStatus {...itemForDisplay} />
+      )}
       {itemForDisplay.type === 'chat_list' && (
         <ChatList chats={itemForDisplay.chats} />
       )}
diff --git a/packages/cli/src/ui/components/views/GemmaStatus.tsx b/packages/cli/src/ui/components/views/GemmaStatus.tsx
new file mode 100644
index 00000000000..b9c20142d46
--- /dev/null
+++ b/packages/cli/src/ui/components/views/GemmaStatus.tsx
@@ -0,0 +1,126 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { Box, Text } from 'ink';
+import type React from 'react';
+import { theme } from '../../semantic-colors.js';
+import type { HistoryItemGemmaStatus } from '../../types.js';
+
+type GemmaStatusProps = Omit<HistoryItemGemmaStatus, 'id' | 'type'>;
+
+const StatusDot: React.FC<{ ok: boolean }> = ({ ok }) => (
+  <Text color={ok ? theme.status.success : theme.status.error}>
+    {ok ? '\u25CF' : '\u25CB'}
+  </Text>
+);
+
+export const GemmaStatus: React.FC<GemmaStatusProps> = ({
+  binaryInstalled,
+  binaryPath,
+  modelName,
+  modelDownloaded,
+  serverRunning,
+  serverPid,
+  serverPort,
+  settingsEnabled,
+  allPassing,
+}) => (
+  <Box flexDirection="column">
+    <Text bold>Gemma Local Model Routing</Text>
+    <Box height={1} />
+
+    {/* Binary */}
+    <Box>
+      <StatusDot ok={binaryInstalled} />
+      <Text>
+        {' '}
+        <Text bold>Binary: </Text>
+        {binaryInstalled ? (
+          <Text color={theme.text.secondary}>{binaryPath}</Text>
+        ) : (
+          <Text color={theme.status.error}>Not installed</Text>
+        )}
+      </Text>
+    </Box>
+
+    {/* Model */}
+    <Box>
+      <StatusDot ok={modelDownloaded} />
+      <Text>
+        {' '}
+        <Text bold>Model: </Text>
+        {modelDownloaded ? (
+          <Text>{modelName}</Text>
+        ) : (
+          <Text color={theme.status.error}>{modelName} not found</Text>
+        )}
+      </Text>
+    </Box>
+
+    {/* Server */}
+    <Box>
+      <StatusDot ok={serverRunning} />
+      <Text>
+        {' '}
+        <Text bold>Server: </Text>
+        {serverRunning ? (
+          <Text>
+            port {serverPort}
+            {serverPid ? (
+              <Text color={theme.text.secondary}> (PID {serverPid})</Text>
+            ) : null}
+          </Text>
+        ) : (
+          <Text color={theme.status.error}>
+            not running on port {serverPort}
+          </Text>
+        )}
+      </Text>
+    </Box>
+
+    {/* Settings */}
+    <Box>
+      <StatusDot ok={settingsEnabled} />
+      <Text>
+        {' '}
+        <Text bold>Settings: </Text>
+        {settingsEnabled ? (
+          <Text>enabled</Text>
+        ) : (
+          <Text color={theme.status.error}>not enabled</Text>
+        )}
+      </Text>
+    </Box>
+
+    {/* Active For */}
+    <Box marginTop={1}>
+      <Text bold>Active for: </Text>
+      {allPassing ? (
+        <Text color={theme.status.success}>[routing]</Text>
+      ) : (
+        <Text color={theme.text.secondary}>none</Text>
+      )}
+    </Box>
+
+    {/* Summary */}
+    <Box marginTop={1}>
+      {allPassing ? (
+        <Box flexDirection="column">
+          <Text color={theme.text.secondary}>
+            Simple requests route to Flash, complex requests to Pro.
+          </Text>
+          <Text color={theme.text.secondary}>
+            This happens automatically on every request.
+          </Text>
+        </Box>
+      ) : (
+        <Text color={theme.status.warning}>
+          Run &quot;gemini gemma setup&quot; to install and configure.
+        </Text>
+      )}
+    </Box>
+  </Box>
+);
diff --git a/packages/cli/src/ui/types.ts b/packages/cli/src/ui/types.ts
index 6fbc3151d8e..bd9d4f893a0 100644
--- a/packages/cli/src/ui/types.ts
+++ b/packages/cli/src/ui/types.ts
@@ -353,6 +353,19 @@ export interface JsonMcpResource {
   description?: string;
 }
 
+export type HistoryItemGemmaStatus = HistoryItemBase & {
+  type: 'gemma_status';
+  binaryInstalled: boolean;
+  binaryPath: string | null;
+  modelName: string;
+  modelDownloaded: boolean;
+  serverRunning: boolean;
+  serverPid: number | null;
+  serverPort: number;
+  settingsEnabled: boolean;
+  allPassing: boolean;
+};
+
 export type HistoryItemMcpStatus = HistoryItemBase & {
   type: 'mcp_status';
   servers: Record<string, MCPServerConfig>;
@@ -402,6 +415,7 @@ export type HistoryItemWithoutId =
   | HistoryItemSkillsList
   | HistoryItemAgentsList
   | HistoryItemMcpStatus
+  | HistoryItemGemmaStatus
   | HistoryItemChatList
   | HistoryItemThinking
   | HistoryItemHint
@@ -428,6 +442,7 @@ export enum MessageType {
   SKILLS_LIST = 'skills_list',
   AGENTS_LIST = 'agents_list',
   MCP_STATUS = 'mcp_status',
+  GEMMA_STATUS = 'gemma_status',
   CHAT_LIST = 'chat_list',
   HINT = 'hint',
 }
diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts
index 5e8507eba4d..8b96272d51e 100644
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -217,6 +217,8 @@ export interface OutputSettings {
 
 export interface GemmaModelRouterSettings {
   enabled?: boolean;
+  autoStartServer?: boolean;
+  binaryPath?: string;
   classifier?: {
     host?: string;
     model?: string;

From 8f023b56f7856860b425d1977719a56697472b5a Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Fri, 10 Apr 2026 13:40:22 -0700
Subject: [PATCH 06/33] feat(cli): add `gemini gemma logs` command to view
 LiteRT server logs

Tails the LiteRT-LM server log file for live visibility into routing
classification requests. Supports --lines N for last N lines, or
follow mode (default) for live streaming.
---
 packages/cli/src/commands/gemma.ts      |  2 +
 packages/cli/src/commands/gemma/logs.ts | 68 +++++++++++++++++++++++++
 2 files changed, 70 insertions(+)
 create mode 100644 packages/cli/src/commands/gemma/logs.ts

diff --git a/packages/cli/src/commands/gemma.ts b/packages/cli/src/commands/gemma.ts
index 1d2a28c23ed..d7b88f06374 100644
--- a/packages/cli/src/commands/gemma.ts
+++ b/packages/cli/src/commands/gemma.ts
@@ -11,6 +11,7 @@ import { setupCommand } from './gemma/setup.js';
 import { startCommand } from './gemma/start.js';
 import { stopCommand } from './gemma/stop.js';
 import { statusCommand } from './gemma/status.js';
+import { logsCommand } from './gemma/logs.js';
 
 export const gemmaCommand: CommandModule = {
   command: 'gemma',
@@ -25,6 +26,7 @@ export const gemmaCommand: CommandModule = {
       .command(defer(startCommand, 'gemma'))
       .command(defer(stopCommand, 'gemma'))
       .command(defer(statusCommand, 'gemma'))
+      .command(defer(logsCommand, 'gemma'))
       .demandCommand(1, 'You need at least one command before continuing.')
       .version(false),
   handler: () => {
diff --git a/packages/cli/src/commands/gemma/logs.ts b/packages/cli/src/commands/gemma/logs.ts
new file mode 100644
index 00000000000..4420e7adb9d
--- /dev/null
+++ b/packages/cli/src/commands/gemma/logs.ts
@@ -0,0 +1,68 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { CommandModule } from 'yargs';
+import fs from 'node:fs';
+import { spawn } from 'node:child_process';
+import { debugLogger } from '@google/gemini-cli-core';
+import { exitCli } from '../utils.js';
+import { getLogFilePath } from './constants.js';
+
+export const logsCommand: CommandModule = {
+  command: 'logs',
+  describe: 'View LiteRT-LM server logs',
+  builder: (yargs) =>
+    yargs
+      .option('lines', {
+        alias: 'n',
+        type: 'number',
+        description: 'Show the last N lines and exit (omit to follow live)',
+      })
+      .option('follow', {
+        alias: 'f',
+        type: 'boolean',
+        default: true,
+        description: 'Follow log output (default when --lines is not set)',
+      }),
+  handler: async (argv) => {
+    const logPath = getLogFilePath();
+
+    if (!fs.existsSync(logPath)) {
+      debugLogger.log(`No log file found at ${logPath}`);
+      debugLogger.log(
+        'Is the LiteRT server running? Start it with: gemini gemma start',
+      );
+      await exitCli(1);
+      return;
+    }
+
+    const rawLines = argv['lines'];
+    const lines = Number.isFinite(rawLines) ? Number(rawLines) : undefined;
+
+    if (lines !== undefined) {
+      // Show last N lines and exit.
+      const tailArgs = ['-n', String(lines), logPath];
+      const child = spawn('tail', tailArgs, { stdio: 'inherit' });
+      child.on('close', async (code) => {
+        await exitCli(code ?? 0);
+      });
+      return;
+    }
+
+    // Follow mode — stream live output until user presses Ctrl+C.
+    debugLogger.log(`Tailing ${logPath} (Ctrl+C to stop)\n`);
+    const tailArgs = ['-f', '-n', '20', logPath];
+    const child = spawn('tail', tailArgs, { stdio: 'inherit' });
+
+    process.on('SIGINT', () => {
+      child.kill('SIGTERM');
+    });
+
+    child.on('close', async (code) => {
+      await exitCli(code ?? 0);
+    });
+  },
+};

From 82e87c40d62c68a244b63b804a75c368f8292733 Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Fri, 10 Apr 2026 13:51:52 -0700
Subject: [PATCH 07/33] docs: add gemma setup quick-start guide

---
 docs/core/gemma-setup-guide.md | 65 ++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 docs/core/gemma-setup-guide.md

diff --git a/docs/core/gemma-setup-guide.md b/docs/core/gemma-setup-guide.md
new file mode 100644
index 00000000000..05f3b104ff6
--- /dev/null
+++ b/docs/core/gemma-setup-guide.md
@@ -0,0 +1,65 @@
+# `gemini gemma` — Local Model Routing Setup
+
+## What is this?
+
+Routes simple requests to Flash and complex requests to Pro using a local Gemma
+3 1B model running on your machine. Saves cloud API costs and adds a few ms of
+local inference instead of a cloud classifier round-trip.
+
+## Quick Start
+
+```bash
+# One command does everything: downloads runtime, pulls model, configures settings, starts server
+gemini gemma setup
+```
+
+You'll be prompted to accept the Gemma Terms of Use. The model is ~1 GB.
+
+After setup, **just use the CLI normally** — routing happens automatically on
+every request.
+
+## Commands
+
+| Command               | What it does                                                   |
+| --------------------- | -------------------------------------------------------------- |
+| `gemini gemma setup`  | Full install (binary + model + settings + server start)        |
+| `gemini gemma status` | Health check — shows what's installed and running              |
+| `gemini gemma start`  | Start the LiteRT server (auto-starts on CLI launch by default) |
+| `gemini gemma stop`   | Stop the LiteRT server                                         |
+| `gemini gemma logs`   | Tail the server logs to see routing requests live              |
+| `/gemma`              | In-session status check (type it inside the CLI)               |
+
+## Verifying it works
+
+1. Run `gemini gemma status` — all checks should show green
+2. Open two terminals:
+   - Terminal 1: `gemini gemma logs` (watch for incoming requests)
+   - Terminal 2: use the CLI normally
+3. You should see classification requests appear in the logs as you interact
+   with the CLI
+4. The `/gemma` slash command inside a session shows a quick status panel
+
+## Setup flags
+
+```bash
+gemini gemma setup --port 8080      # custom port
+gemini gemma setup --no-start       # don't start server after install
+gemini gemma setup --force           # re-download everything
+gemini gemma setup --skip-model     # binary only, skip the 1GB model download
+```
+
+## How it works under the hood
+
+- Local Gemma classifies each request as "simple" or "complex" (~100ms)
+- Simple → Flash, Complex → Pro
+- If the local server is down, the CLI silently falls back to the cloud
+  classifier — no errors, no disruption
+
+## Disabling
+
+Set `enabled: false` in settings or just run `gemini gemma stop` to turn off the
+server:
+
+```json
+{ "experimental": { "gemmaModelRouter": { "enabled": false } } }
+```

From e0f043a6732bc1b03432cc196b00a4369a5dabea Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Mon, 13 Apr 2026 12:35:10 -0700
Subject: [PATCH 08/33] fix(core): set apiVersion to empty string for LiteRT-LM
 client

The Google GenAI SDK defaults to 'v1beta' as the API version prefix,
producing URLs like /v1beta/models/...:generateContent. The LiteRT-LM
server does not use a versioned API path, causing 404 responses.
Setting apiVersion to '' removes the prefix.
---
 packages/core/src/core/localLiteRtLmClient.ts | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/packages/core/src/core/localLiteRtLmClient.ts b/packages/core/src/core/localLiteRtLmClient.ts
index 798dcb57656..3aa111eb3d5 100644
--- a/packages/core/src/core/localLiteRtLmClient.ts
+++ b/packages/core/src/core/localLiteRtLmClient.ts
@@ -27,6 +27,9 @@ export class LocalLiteRtLmClient {
       apiKey: 'no-api-key-needed',
       httpOptions: {
         baseUrl: this.host,
+        // The LiteRT-LM server does not use a versioned API path prefix.
+        // The SDK defaults to 'v1beta' which produces 404s against LiteRT-LM.
+        apiVersion: '',
         // If the LiteRT-LM server is started but the wrong port is set, there will be a lengthy TCP timeout (here fixed to be 10 seconds).
         // If the LiteRT-LM server is not started, there will be an immediate connection refusal.
         // If the LiteRT-LM server is started and the model is unsupported or not downloaded, the server will return an error immediately.

From ab8dc2dd01fc85953657bbb1e5ff88e3175356d0 Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Mon, 13 Apr 2026 12:42:07 -0700
Subject: [PATCH 09/33] Revert "fix(core): set apiVersion to empty string for
 LiteRT-LM client"

This reverts commit e0f043a6732bc1b03432cc196b00a4369a5dabea.
---
 packages/core/src/core/localLiteRtLmClient.ts | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/packages/core/src/core/localLiteRtLmClient.ts b/packages/core/src/core/localLiteRtLmClient.ts
index 3aa111eb3d5..798dcb57656 100644
--- a/packages/core/src/core/localLiteRtLmClient.ts
+++ b/packages/core/src/core/localLiteRtLmClient.ts
@@ -27,9 +27,6 @@ export class LocalLiteRtLmClient {
       apiKey: 'no-api-key-needed',
       httpOptions: {
         baseUrl: this.host,
-        // The LiteRT-LM server does not use a versioned API path prefix.
-        // The SDK defaults to 'v1beta' which produces 404s against LiteRT-LM.
-        apiVersion: '',
         // If the LiteRT-LM server is started but the wrong port is set, there will be a lengthy TCP timeout (here fixed to be 10 seconds).
         // If the LiteRT-LM server is not started, there will be an immediate connection refusal.
         // If the LiteRT-LM server is started and the model is unsupported or not downloaded, the server will return an error immediately.

From d0dd169cccba35bb27ba4f25807bd551835bebe1 Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Mon, 13 Apr 2026 12:51:42 -0700
Subject: [PATCH 10/33] feat(cli): show gemma router settings in /settings
 dialog

---
 packages/cli/src/config/settingsSchema.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts
index 6f946eb963d..f37faad78dd 100644
--- a/packages/cli/src/config/settingsSchema.ts
+++ b/packages/cli/src/config/settingsSchema.ts
@@ -2144,7 +2144,7 @@ const SETTINGS_SCHEMA = {
             default: false,
             description:
               'Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim.',
-            showInDialog: false,
+            showInDialog: true,
           },
           autoStartServer: {
             type: 'boolean',
@@ -2154,7 +2154,7 @@ const SETTINGS_SCHEMA = {
             default: true,
             description:
               'Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.',
-            showInDialog: false,
+            showInDialog: true,
           },
           binaryPath: {
             type: 'string',

From 8d3ac5213ce28c1066ed70a00d53f43b8c63abd9 Mon Sep 17 00:00:00 2001
From: Abhijit Balaji <abhijitbalaji@google.com>
Date: Mon, 13 Apr 2026 14:39:15 -0700
Subject: [PATCH 11/33] fix(gemma): resolve 404 errors and improve port
 resolution (#25340)

---
 packages/cli/src/commands/gemma/platform.ts   | 29 +++++++++++++++++++
 packages/cli/src/commands/gemma/start.ts      | 12 ++++++--
 packages/cli/src/commands/gemma/status.ts     | 21 +++++---------
 packages/cli/src/commands/gemma/stop.ts       | 16 +++++++---
 .../core/src/core/localLiteRtLmClient.test.ts | 10 +++++++
 packages/core/src/core/localLiteRtLmClient.ts |  2 ++
 6 files changed, 71 insertions(+), 19 deletions(-)

diff --git a/packages/cli/src/commands/gemma/platform.ts b/packages/cli/src/commands/gemma/platform.ts
index e39d99e557f..ef5c5288a65 100644
--- a/packages/cli/src/commands/gemma/platform.ts
+++ b/packages/cli/src/commands/gemma/platform.ts
@@ -4,6 +4,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+import { loadSettings } from '../../config/settings.js';
 import fs from 'node:fs';
 import path from 'node:path';
 import { execFileSync } from 'node:child_process';
@@ -22,6 +23,34 @@ export interface PlatformInfo {
   binaryName: string;
 }
 
+export interface GemmaConfigStatus {
+  settingsEnabled: boolean;
+  configuredPort: number;
+}
+
+/**
+ * Resolves the Gemma configuration from the workspace settings.
+ */
+export function resolveGemmaConfig(fallbackPort: number): GemmaConfigStatus {
+  let settingsEnabled = false;
+  let configuredPort = fallbackPort;
+  try {
+    const settings = loadSettings(process.cwd());
+    const gemmaSettings = settings.merged.experimental?.gemmaModelRouter;
+    settingsEnabled = gemmaSettings?.enabled === true;
+    const hostStr = gemmaSettings?.classifier?.host;
+    if (hostStr) {
+      const match = hostStr.match(/:(\d+)/);
+      if (match) {
+        configuredPort = parseInt(match[1], 10);
+      }
+    }
+  } catch {
+    // Settings may fail to load in some contexts; treat as not enabled.
+  }
+  return { settingsEnabled, configuredPort };
+}
+
 /**
  * Detects the current platform and resolves the corresponding LiteRT-LM binary name.
  * Returns null if the platform is unsupported.
diff --git a/packages/cli/src/commands/gemma/start.ts b/packages/cli/src/commands/gemma/start.ts
index 8918daa9faf..02b1bd0e4a6 100644
--- a/packages/cli/src/commands/gemma/start.ts
+++ b/packages/cli/src/commands/gemma/start.ts
@@ -21,6 +21,7 @@ import {
   getBinaryPath,
   isBinaryInstalled,
   isServerRunning,
+  resolveGemmaConfig,
 } from './platform.js';
 
 /**
@@ -78,11 +79,18 @@ export const startCommand: CommandModule = {
   builder: (yargs) =>
     yargs.option('port', {
       type: 'number',
-      default: DEFAULT_PORT,
       description: 'Port for the LiteRT server',
     }),
   handler: async (argv) => {
-    const port = Number(argv['port']);
+    let port: number | undefined;
+    if (argv['port'] !== undefined) {
+      port = Number(argv['port']);
+    }
+
+    if (!port) {
+      const { configuredPort } = resolveGemmaConfig(DEFAULT_PORT);
+      port = configuredPort;
+    }
 
     if (!isBinaryInstalled()) {
       debugLogger.error(
diff --git a/packages/cli/src/commands/gemma/status.ts b/packages/cli/src/commands/gemma/status.ts
index 4a265dd9445..1e061d7bc57 100644
--- a/packages/cli/src/commands/gemma/status.ts
+++ b/packages/cli/src/commands/gemma/status.ts
@@ -6,7 +6,6 @@
 
 import type { CommandModule } from 'yargs';
 import chalk from 'chalk';
-import { loadSettings } from '../../config/settings.js';
 import { DEFAULT_PORT, GEMMA_MODEL_NAME } from './constants.js';
 import {
   detectPlatform,
@@ -16,6 +15,7 @@ import {
   isServerRunning,
   readServerPid,
   isProcessRunning,
+  resolveGemmaConfig,
 } from './platform.js';
 import { exitCli } from '../utils.js';
 
@@ -38,7 +38,9 @@ export interface GemmaStatusResult {
 export async function checkGemmaStatus(
   port?: number,
 ): Promise<GemmaStatusResult> {
-  const effectivePort = port ?? DEFAULT_PORT;
+  const { settingsEnabled, configuredPort } = resolveGemmaConfig(DEFAULT_PORT);
+
+  const effectivePort = port ?? configuredPort;
   const binaryPath = getBinaryPath();
   const binaryInstalled = isBinaryInstalled();
   const modelDownloaded =
@@ -47,15 +49,6 @@ export async function checkGemmaStatus(
   const pid = readServerPid();
   const serverPid = pid && isProcessRunning(pid) ? pid : null;
 
-  let settingsEnabled = false;
-  try {
-    const settings = loadSettings(process.cwd());
-    const gemmaSettings = settings.merged.experimental?.gemmaModelRouter;
-    settingsEnabled = gemmaSettings?.enabled === true;
-  } catch {
-    // Settings may fail to load in some contexts; treat as not enabled.
-  }
-
   const allPassing =
     binaryInstalled && modelDownloaded && serverRunning && settingsEnabled;
 
@@ -167,11 +160,13 @@ export const statusCommand: CommandModule = {
   builder: (yargs) =>
     yargs.option('port', {
       type: 'number',
-      default: DEFAULT_PORT,
       description: 'Port to check for the LiteRT server',
     }),
   handler: async (argv) => {
-    const port = Number(argv['port']);
+    let port: number | undefined;
+    if (argv['port'] !== undefined) {
+      port = Number(argv['port']);
+    }
     const status = await checkGemmaStatus(port);
     const output = formatGemmaStatus(status);
     // Use process.stdout directly for consistent output in non-interactive mode.
diff --git a/packages/cli/src/commands/gemma/stop.ts b/packages/cli/src/commands/gemma/stop.ts
index 15db60eaa8e..409989e33c3 100644
--- a/packages/cli/src/commands/gemma/stop.ts
+++ b/packages/cli/src/commands/gemma/stop.ts
@@ -14,6 +14,7 @@ import {
   readServerPid,
   isProcessRunning,
   isServerRunning,
+  resolveGemmaConfig,
 } from './platform.js';
 
 /**
@@ -66,18 +67,25 @@ export async function stopServer(): Promise<boolean> {
 
   return true;
 }
-
 export const stopCommand: CommandModule = {
   command: 'stop',
   describe: 'Stop the LiteRT-LM server',
   builder: (yargs) =>
     yargs.option('port', {
       type: 'number',
-      default: DEFAULT_PORT,
-      description: 'Port the server is running on',
+      description: 'Port where the LiteRT server is running',
     }),
   handler: async (argv) => {
-    const port = Number(argv['port']);
+    let port: number | undefined;
+    if (argv['port'] !== undefined) {
+      port = Number(argv['port']);
+    }
+
+    if (!port) {
+      const { configuredPort } = resolveGemmaConfig(DEFAULT_PORT);
+      port = configuredPort;
+    }
+
     const pid = readServerPid();
 
     if (pid !== null && isProcessRunning(pid)) {
diff --git a/packages/core/src/core/localLiteRtLmClient.test.ts b/packages/core/src/core/localLiteRtLmClient.test.ts
index c4398b5b9c1..6c64143ec3d 100644
--- a/packages/core/src/core/localLiteRtLmClient.test.ts
+++ b/packages/core/src/core/localLiteRtLmClient.test.ts
@@ -7,6 +7,8 @@
 import { describe, it, expect, vi, beforeEach } from 'vitest';
 import { LocalLiteRtLmClient } from './localLiteRtLmClient.js';
 import type { Config } from '../config/config.js';
+import { GoogleGenAI } from '@google/genai';
+
 const mockGenerateContent = vi.fn();
 
 vi.mock('@google/genai', () => {
@@ -44,6 +46,14 @@ describe('LocalLiteRtLmClient', () => {
     const result = await client.generateJson([], 'test-instruction');
 
     expect(result).toEqual({ key: 'value' });
+    expect(GoogleGenAI).toHaveBeenCalledWith(
+      expect.objectContaining({
+        apiVersion: 'v1beta',
+        httpOptions: expect.objectContaining({
+          baseUrl: 'http://test-host:1234',
+        }),
+      }),
+    );
     expect(mockGenerateContent).toHaveBeenCalledWith(
       expect.objectContaining({
         model: 'gemma:latest',
diff --git a/packages/core/src/core/localLiteRtLmClient.ts b/packages/core/src/core/localLiteRtLmClient.ts
index 798dcb57656..82fa44e87b9 100644
--- a/packages/core/src/core/localLiteRtLmClient.ts
+++ b/packages/core/src/core/localLiteRtLmClient.ts
@@ -25,6 +25,8 @@ export class LocalLiteRtLmClient {
     this.client = new GoogleGenAI({
       // The LiteRT-LM server does not require an API key, but the SDK requires one to be set even for local endpoints. This is a dummy value and is not used for authentication.
       apiKey: 'no-api-key-needed',
+      apiVersion: 'v1beta',
+      vertexai: false,
       httpOptions: {
         baseUrl: this.host,
         // If the LiteRT-LM server is started but the wrong port is set, there will be a lengthy TCP timeout (here fixed to be 10 seconds).

From c0117b4484fd5321b16a59dc1964fbfeac39bc41 Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameez@google.com>
Date: Wed, 15 Apr 2026 15:00:48 -0700
Subject: [PATCH 12/33] chore(docs): regenerate settings schema and docs

---
 docs/codebase_understanding.md  | 58 +++++++++++++++++++--------------
 docs/reference/configuration.md | 12 +++++++
 schemas/settings.schema.json    | 14 ++++++++
 3 files changed, 60 insertions(+), 24 deletions(-)

diff --git a/docs/codebase_understanding.md b/docs/codebase_understanding.md
index 34a2ee3c942..29c9bf692ab 100644
--- a/docs/codebase_understanding.md
+++ b/docs/codebase_understanding.md
@@ -1,13 +1,13 @@
 # Codebase understanding
 
 This document provides a deep-dive technical overview of the Gemini CLI
-architecture. It is designed for developers who need to understand the
-system's inner workings, from startup to advanced autonomous behaviors.
+architecture. It is designed for developers who need to understand the system's
+inner workings, from startup to advanced autonomous behaviors.
 
 ## Repository architecture
 
-Gemini CLI is a monorepo structured to maintain a strict separation between
-the user interface and the agent's core reasoning logic.
+Gemini CLI is a monorepo structured to maintain a strict separation between the
+user interface and the agent's core reasoning logic.
 
 - **`packages/cli`**: The Terminal User Interface (TUI). Built with React and
   Ink, it manages the interactive terminal experience, including keyboard
@@ -27,17 +27,18 @@ the user interface and the agent's core reasoning logic.
 ## 1. Application lifecycle
 
 ### Startup and initialization
+
 The entry point is `packages/cli/src/gemini.tsx`. The startup sequence is
 designed for security and resilience:
 
 1.  **I/O redirection**: Standard output streams (`stdout`, `stderr`) are
     patched to capture all logs and errors. This allows the CLI to redirect
-    diagnostic information to the TUI's debug console or a remote DevTools server
-    without corrupting the user's terminal interface.
-2.  **Memory-aware relaunch**: The CLI checks the host system's total memory.
-    If it detects that Node.js's default heap limit is insufficient for complex
-    codebase analysis, it re-launches itself using the
-    `--max-old-space-size` flag, targeting approximately 50% of system memory.
+    diagnostic information to the TUI's debug console or a remote DevTools
+    server without corrupting the user's terminal interface.
+2.  **Memory-aware relaunch**: The CLI checks the host system's total memory. If
+    it detects that Node.js's default heap limit is insufficient for complex
+    codebase analysis, it re-launches itself using the `--max-old-space-size`
+    flag, targeting approximately 50% of system memory.
 3.  **Sandboxing**: If configured, the CLI launches a restricted "sandbox"
     environment (using Docker, Podman, or a localized process) to isolate the
     agent's autonomous actions from the host system.
@@ -52,18 +53,19 @@ designed for security and resilience:
 
 ## 2. Model routing and selection
 
-The `ModelRouterService` (`packages/core/src/routing`) implements a
-"Composite Strategy" to select the optimal model for every request.
+The `ModelRouterService` (`packages/core/src/routing`) implements a "Composite
+Strategy" to select the optimal model for every request.
 
 ### Routing strategies
+
 - **classifier**: Uses a lightweight LLM call to categorize the complexity of a
   task based on a rubric (Strategic Planning, Multi-step Coordination,
   Ambiguity). It chooses between a "Pro" model (for complex reasoning) and a
   "Flash" model (for simple operations).
 - **approvalMode**: Selects specialized models (like `gemini-2.0-flash-lite`)
   when the agent is in specific modes like `Plan Mode`.
-- **numericalClassifier**: A deterministic strategy that selects models based
-  on the number of tokens in the conversation or the length of the history.
+- **numericalClassifier**: A deterministic strategy that selects models based on
+  the number of tokens in the conversation or the length of the history.
 - **fallback**: Automatically switches models if the primary model encounters
   quota limits (429) or transient API failures.
 
@@ -75,7 +77,9 @@ The agent maintains deep project awareness while staying within token limits
 through several services in `packages/core/src/services`:
 
 ### ChatCompressionService
+
 Triggered when the history exceeds 50% of the model's context window:
+
 1.  **State snapshots**: The agent generates a structured `<state_snapshot>`
     representing the cumulative knowledge of the session (constraints, progress,
     paths).
@@ -85,6 +89,7 @@ Triggered when the history exceeds 50% of the model's context window:
     the history.
 
 ### ToolOutputMaskingService
+
 Prevents bulky data (like large shell outputs or file reads) from clogging the
 context window. It replaces large `functionResponse` blocks with concise
 summaries and persists the full data to temporary files, allowing the agent to
@@ -94,13 +99,16 @@ refer to the full data only when necessary.
 
 ## 4. Advanced tool execution and scheduling
 
-The `Scheduler` (`packages/core/src/scheduler`) is an event-driven state
-machine that manages the lifecycle of autonomous actions.
+The `Scheduler` (`packages/core/src/scheduler`) is an event-driven state machine
+that manages the lifecycle of autonomous actions.
 
 ### Lifecycle states
-`Validating` → `AwaitingApproval` → `Scheduled` → `Executing` → `Success`/`Error`
+
+`Validating` → `AwaitingApproval` → `Scheduled` → `Executing` →
+`Success`/`Error`
 
 ### Key features
+
 - **Policy Engine**: A granular system that evaluates tools based on security
   policies (e.g., "Allow read-only tools", "Ask for shell commands"). It can be
   configured at the project or user level.
@@ -121,17 +129,18 @@ machine that manages the lifecycle of autonomous actions.
 The `packages/cli/src/ui` directory implements a sophisticated React-based TUI.
 
 ### Keyboard and protocols
+
 - **KeypressProvider**: Manages terminal input, supporting complex key
   combinations and shortcuts.
-- **Kitty keyboard protocol**: Detects terminals that support the Kitty
-  protocol to enable advanced features like detecting `ctrl+enter` vs `enter`.
+- **Kitty keyboard protocol**: Detects terminals that support the Kitty protocol
+  to enable advanced features like detecting `ctrl+enter` vs `enter`.
 - **Vim mode**: A dedicated provider that enables Vim-like navigation (hjkl,
   words, search) for both conversation history and input fields.
 
 ### Layout and rendering
-- **ResizeObserver**: A custom implementation that watches the terminal size
-  to ensure components (like multi-column layouts or wide tables) adapt
-  instantly.
+
+- **ResizeObserver**: A custom implementation that watches the terminal size to
+  ensure components (like multi-column layouts or wide tables) adapt instantly.
 - **ConsolePatcher**: Intercepts `console.log`, `console.warn`, and
   `console.error`, routing them to the internal debug console (toggled with
   `ctrl+d`) or the external DevTools server.
@@ -141,9 +150,10 @@ The `packages/cli/src/ui` directory implements a sophisticated React-based TUI.
 ## 6. Testing and validation
 
 Gemini CLI uses a tiered testing strategy to ensure reliability:
+
 1.  **Unit tests**: Located alongside the source (`*.test.ts`), providing fast
     coverage for core logic.
-2.  **Integration tests**: Located in `integration-tests/`, running the
-    full CLI against mock and real Gemini API endpoints.
+2.  **Integration tests**: Located in `integration-tests/`, running the full CLI
+    against mock and real Gemini API endpoints.
 3.  **Evals**: Performance benchmarks in `evals/` that measure the agent's
     reasoning accuracy and tool-use efficiency over time.
diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md
index 05368f20fe6..b6c64a80b37 100644
--- a/docs/reference/configuration.md
+++ b/docs/reference/configuration.md
@@ -1701,6 +1701,18 @@ their corresponding top-level category object in your `settings.json` file.
   - **Default:** `false`
   - **Requires restart:** Yes
 
+- **`experimental.gemmaModelRouter.autoStartServer`** (boolean):
+  - **Description:** Automatically start the LiteRT-LM server when Gemini CLI
+    starts and the Gemma router is enabled.
+  - **Default:** `true`
+  - **Requires restart:** Yes
+
+- **`experimental.gemmaModelRouter.binaryPath`** (string):
+  - **Description:** Custom path to the LiteRT-LM binary. Leave empty to use the
+    default location (~/.gemini/bin/litert/).
+  - **Default:** `""`
+  - **Requires restart:** Yes
+
 - **`experimental.gemmaModelRouter.classifier.host`** (string):
   - **Description:** The host of the classifier.
   - **Default:** `"http://localhost:9379"`
diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json
index 98bc786410d..d01ec0dc1df 100644
--- a/schemas/settings.schema.json
+++ b/schemas/settings.schema.json
@@ -2905,6 +2905,20 @@
               "default": false,
               "type": "boolean"
             },
+            "autoStartServer": {
+              "title": "Auto-start LiteRT Server",
+              "description": "Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.",
+              "markdownDescription": "Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `true`",
+              "default": true,
+              "type": "boolean"
+            },
+            "binaryPath": {
+              "title": "LiteRT Binary Path",
+              "description": "Custom path to the LiteRT-LM binary. Leave empty to use the default location (~/.gemini/bin/litert/).",
+              "markdownDescription": "Custom path to the LiteRT-LM binary. Leave empty to use the default location (~/.gemini/bin/litert/).\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: ``",
+              "default": "",
+              "type": "string"
+            },
             "classifier": {
               "title": "Classifier",
               "description": "Classifier configuration.",

From 509060e74ec470ce42d107df1896efd3e4a64fb7 Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameez@google.com>
Date: Wed, 15 Apr 2026 16:27:36 -0700
Subject: [PATCH 13/33] rm docs

---
 docs/codebase_understanding.md          | 159 ------------------------
 docs/codebase_understanding_antigrav.md | 101 ---------------
 2 files changed, 260 deletions(-)
 delete mode 100644 docs/codebase_understanding.md
 delete mode 100644 docs/codebase_understanding_antigrav.md

diff --git a/docs/codebase_understanding.md b/docs/codebase_understanding.md
deleted file mode 100644
index 29c9bf692ab..00000000000
--- a/docs/codebase_understanding.md
+++ /dev/null
@@ -1,159 +0,0 @@
-# Codebase understanding
-
-This document provides a deep-dive technical overview of the Gemini CLI
-architecture. It is designed for developers who need to understand the system's
-inner workings, from startup to advanced autonomous behaviors.
-
-## Repository architecture
-
-Gemini CLI is a monorepo structured to maintain a strict separation between the
-user interface and the agent's core reasoning logic.
-
-- **`packages/cli`**: The Terminal User Interface (TUI). Built with React and
-  Ink, it manages the interactive terminal experience, including keyboard
-  protocols, rendering, and terminal state management.
-- **`packages/core`**: The UI-agnostic engine. It contains the primary
-  orchestration logic, model routing, tool systems, policy enforcement, and
-  Gemini API communication.
-- **`packages/devtools`**: A suite for real-time inspection of network traffic,
-  console logs, and session activity.
-- **`packages/sdk`**: A library for developers to build third-party tools and
-  extensions.
-- **`packages/vscode-ide-companion`**: A specialized bridge that feeds real-time
-  editor state (open files, active selections, cursor positions) to the agent.
-
----
-
-## 1. Application lifecycle
-
-### Startup and initialization
-
-The entry point is `packages/cli/src/gemini.tsx`. The startup sequence is
-designed for security and resilience:
-
-1.  **I/O redirection**: Standard output streams (`stdout`, `stderr`) are
-    patched to capture all logs and errors. This allows the CLI to redirect
-    diagnostic information to the TUI's debug console or a remote DevTools
-    server without corrupting the user's terminal interface.
-2.  **Memory-aware relaunch**: The CLI checks the host system's total memory. If
-    it detects that Node.js's default heap limit is insufficient for complex
-    codebase analysis, it re-launches itself using the `--max-old-space-size`
-    flag, targeting approximately 50% of system memory.
-3.  **Sandboxing**: If configured, the CLI launches a restricted "sandbox"
-    environment (using Docker, Podman, or a localized process) to isolate the
-    agent's autonomous actions from the host system.
-4.  **Interactive (TUI) vs. Non-interactive (CLI)**:
-    - **Interactive mode**: Initializes the Ink renderer, starting a persistent
-      React application that manages terminal state via providers.
-    - **Non-interactive mode**: Executes a streamlined loop in
-      `nonInteractiveCli.ts`, designed for single prompts or piped input/output
-      redirection.
-
----
-
-## 2. Model routing and selection
-
-The `ModelRouterService` (`packages/core/src/routing`) implements a "Composite
-Strategy" to select the optimal model for every request.
-
-### Routing strategies
-
-- **classifier**: Uses a lightweight LLM call to categorize the complexity of a
-  task based on a rubric (Strategic Planning, Multi-step Coordination,
-  Ambiguity). It chooses between a "Pro" model (for complex reasoning) and a
-  "Flash" model (for simple operations).
-- **approvalMode**: Selects specialized models (like `gemini-2.0-flash-lite`)
-  when the agent is in specific modes like `Plan Mode`.
-- **numericalClassifier**: A deterministic strategy that selects models based on
-  the number of tokens in the conversation or the length of the history.
-- **fallback**: Automatically switches models if the primary model encounters
-  quota limits (429) or transient API failures.
-
----
-
-## 3. Intelligent context management
-
-The agent maintains deep project awareness while staying within token limits
-through several services in `packages/core/src/services`:
-
-### ChatCompressionService
-
-Triggered when the history exceeds 50% of the model's context window:
-
-1.  **State snapshots**: The agent generates a structured `<state_snapshot>`
-    representing the cumulative knowledge of the session (constraints, progress,
-    paths).
-2.  **The "Probe" (Self-Correction)**: A second LLM pass compares the summary
-    against the original history to ensure no critical technical details or
-    user-defined constraints were lost, correcting the summary before purging
-    the history.
-
-### ToolOutputMaskingService
-
-Prevents bulky data (like large shell outputs or file reads) from clogging the
-context window. It replaces large `functionResponse` blocks with concise
-summaries and persists the full data to temporary files, allowing the agent to
-refer to the full data only when necessary.
-
----
-
-## 4. Advanced tool execution and scheduling
-
-The `Scheduler` (`packages/core/src/scheduler`) is an event-driven state machine
-that manages the lifecycle of autonomous actions.
-
-### Lifecycle states
-
-`Validating` → `AwaitingApproval` → `Scheduled` → `Executing` →
-`Success`/`Error`
-
-### Key features
-
-- **Policy Engine**: A granular system that evaluates tools based on security
-  policies (e.g., "Allow read-only tools", "Ask for shell commands"). It can be
-  configured at the project or user level.
-- **Tail calls**: Allows a tool to "link" to another action. For example, a
-  shell command that produces an error can automatically trigger a "diagnostic"
-  tool without returning control to the main model.
-- **Parallelism**: The scheduler executes independent read-only tools in
-  parallel while enforcing sequential execution for tools that modify the
-  environment.
-- **MCP integration**: Dynamically loads tools from Model Context Protocol
-  servers, integrating them seamlessly into the same policy and scheduler
-  framework.
-
----
-
-## 5. UI and terminal integration
-
-The `packages/cli/src/ui` directory implements a sophisticated React-based TUI.
-
-### Keyboard and protocols
-
-- **KeypressProvider**: Manages terminal input, supporting complex key
-  combinations and shortcuts.
-- **Kitty keyboard protocol**: Detects terminals that support the Kitty protocol
-  to enable advanced features like detecting `ctrl+enter` vs `enter`.
-- **Vim mode**: A dedicated provider that enables Vim-like navigation (hjkl,
-  words, search) for both conversation history and input fields.
-
-### Layout and rendering
-
-- **ResizeObserver**: A custom implementation that watches the terminal size to
-  ensure components (like multi-column layouts or wide tables) adapt instantly.
-- **ConsolePatcher**: Intercepts `console.log`, `console.warn`, and
-  `console.error`, routing them to the internal debug console (toggled with
-  `ctrl+d`) or the external DevTools server.
-
----
-
-## 6. Testing and validation
-
-Gemini CLI uses a tiered testing strategy to ensure reliability:
-
-1.  **Unit tests**: Located alongside the source (`*.test.ts`), providing fast
-    coverage for core logic.
-2.  **Integration tests**: Located in `integration-tests/`, running the full CLI
-    against mock and real Gemini API endpoints.
-3.  **Evals**: Performance benchmarks in `evals/` that measure the agent's
-    reasoning accuracy and tool-use efficiency over time.
diff --git a/docs/codebase_understanding_antigrav.md b/docs/codebase_understanding_antigrav.md
deleted file mode 100644
index 60d5dbd2058..00000000000
--- a/docs/codebase_understanding_antigrav.md
+++ /dev/null
@@ -1,101 +0,0 @@
-# Gemini CLI - Codebase Understanding
-
-Gemini CLI is an open-source AI agent designed to let you interact with Google's
-Gemini models directly from your terminal. It's built as a **TypeScript
-monorepo** (using npm workspaces) and relies heavily on **Node.js**, **React**,
-and **Ink** (a library that lets you build terminal UIs using React components).
-
-Here is a high-level walkthrough of the repository to help you understand how
-all the pieces fit together.
-
-## 1. High-Level Architecture (The `packages/` Directory)
-
-The project is split into several focused packages to maintain a clean
-separation of concerns:
-
-- **`packages/cli`** (The Frontend)
-  - This is the user-facing terminal UI.
-  - It uses React + Ink. This means the terminal layout, styling, and
-    interactions are managed like a modern web app (with hooks, contexts, and
-    components).
-  - It handles all the terminal-specific logic like key bindings, processing
-    mouse/keyboard events, and rendering the chat stream or tool progress
-    indicators.
-- **`packages/core`** (The Brain/Backend)
-  - This is where the actual "agentic" logic lives. It is entirely UI-agnostic.
-  - Contains the core looping mechanism that communicates with the Gemini API,
-    maintains conversation history, compresses context, and evaluates whether
-    the agent needs to invoke a tool.
-  - Houses the **Tool Registry** (file system tools, shell runner, web tools)
-    and the **Policy Engine** (deciding if a tool is safe to run automatically
-    or needs your permission).
-- **`packages/devtools`**
-  - A Chrome DevTools-like web server that runs locally! If you enable
-    `general.devtools` in your settings, you can inspect network requests, agent
-    thoughts, and console logs in a local browser, just like you would for a web
-    app.
-- **`packages/vscode-ide-companion`**
-  - A VS Code extension that pairs dynamically with the CLI. It allows the
-    terminal agent to "read" your active editor state, seamlessly pulling
-    context on exactly what files or lines of code you currently have
-    highlighted in VS Code.
-- **`packages/sdk`**
-  - Provides libraries and types so people can build custom MCP (Model Context
-    Protocol) extensions or tools for the CLI.
-- **`packages/a2a-server`**
-  - An experimental Agent-to-Agent server, hinting at future capabilities for
-    having different agents talk to each other.
-
-## 2. The Core Application Lifecycle
-
-When you type `gemini` in your terminal, here's roughly what happens under the
-hood:
-
-1.  **Bootstrapping (`packages/cli/src/gemini.tsx`)**: The CLI loads user
-    configurations, parses command-line arguments, checks authentication, and
-    verifies if it needs to launch itself in a controlled "sandbox" environment
-    (using Docker/Podman to isolate dangerous shell tools).
-2.  **Mode Resolution**: It determines if you are piping data in or running a
-    single command (`nonInteractiveCli.ts`), or if you are firing up the chat
-    TUI (Terminal User Interface).
-3.  **The Agent Loop (`packages/core/src/core/`)**:
-    - **`GeminiClient`**: The main orchestrator. It manages sessions and
-      compresses chat histories using `ChatCompressionService` so you don't
-      breach token limits.
-    - **`GeminiChat` & `Turn`**: For every prompt you send, a `Turn` is created.
-      This represents one "exchange" where the model might think, respond, and
-      realize it needs to search your codebase. It streams these requests back
-      in real-time.
-
-## 3. The Tool System & Execution
-
-The most powerful aspect of this CLI is its ability to interact with your
-environment.
-
-- In `packages/core/src/tools/`, there are native TypeScript implementations for
-  operations (like reading files, searching directories, or running tests).
-- When Gemini asks to use a tool, the **Scheduler**
-  (`packages/core/src/scheduler/`) intercepts the request.
-- It runs the request through the **Policy Engine**
-  (`packages/core/src/policy/`). Some commands (like `rm -rf`) are flagged and
-  routed to a **Confirmation Bus**, which pauses execution and asks you in the
-  UI: _"Do you want to allow this command?"_
-- Once approved (or auto-approved), it executes the tool, captures standard
-  output/error, and pipes that text back to Gemini to continue its thought
-  process.
-
-## 4. Code Quality, Building, and Testing
-
-- **Bundling & Running**: The project uses `esbuild` to compile everything very
-  quickly. During development, you can use `npm run start` or `npm run debug`
-  (which attaches a Node.js inspector).
-- **Testing (`vitest`)**: Testing is extremely rigorous here.
-  - _Unit Tests:_ `npm run test` handles basic component functionality.
-  - _Integration Tests:_ `npm run test:e2e` simulates an actual sandbox,
-    mocking/hitting models to make sure the CLI interacts realistically.
-  - _Evals (`evals/`):_ Standalone performance benchmarks where they evaluate
-    how smart the CLI is at navigating codebases or using its tools
-    autonomously.
-- **`npm run preflight`**: Before a PR is pushed, this massive script runs
-  formatting (Prettier), linting (ESLint), type checking (TypeScript), unit
-  testing, and building, ensuring nothing breaks the main branch.

From 3b2243dcde29cc8053f1b07e78867773d2704188 Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Thu, 16 Apr 2026 18:12:31 -0700
Subject: [PATCH 14/33] fix(cli): correctness and cross-platform fixes for
 gemma commands

---
 packages/cli/src/commands/gemma/logs.ts       | 29 +++++++++++++++++++
 packages/cli/src/commands/gemma/platform.ts   |  6 ++--
 packages/cli/src/commands/gemma/setup.ts      | 28 +++++++++---------
 packages/cli/src/commands/gemma/start.ts      |  3 +-
 .../cli/src/services/liteRtServerManager.ts   | 12 ++------
 5 files changed, 49 insertions(+), 29 deletions(-)

diff --git a/packages/cli/src/commands/gemma/logs.ts b/packages/cli/src/commands/gemma/logs.ts
index 4420e7adb9d..473f3557fdc 100644
--- a/packages/cli/src/commands/gemma/logs.ts
+++ b/packages/cli/src/commands/gemma/logs.ts
@@ -11,6 +11,22 @@ import { debugLogger } from '@google/gemini-cli-core';
 import { exitCli } from '../utils.js';
 import { getLogFilePath } from './constants.js';
 
+/**
+ * Reads the last N lines from a file using Node.js APIs.
+ * Used as a cross-platform fallback when `tail` is unavailable (Windows).
+ */
+function readLastLines(filePath: string, count: number): string {
+  const content = fs.readFileSync(filePath, 'utf-8');
+  const lines = content.split('\n');
+  // If the file ends with a newline, the last element is empty — skip it.
+  if (lines.length > 0 && lines[lines.length - 1] === '') {
+    lines.pop();
+  }
+  return lines.slice(-count).join('\n') + '\n';
+}
+
+const isWindows = process.platform === 'win32';
+
 export const logsCommand: CommandModule = {
   command: 'logs',
   describe: 'View LiteRT-LM server logs',
@@ -43,6 +59,11 @@ export const logsCommand: CommandModule = {
     const lines = Number.isFinite(rawLines) ? Number(rawLines) : undefined;
 
     if (lines !== undefined) {
+      if (isWindows) {
+        process.stdout.write(readLastLines(logPath, lines));
+        await exitCli(0);
+        return;
+      }
       // Show last N lines and exit.
       const tailArgs = ['-n', String(lines), logPath];
       const child = spawn('tail', tailArgs, { stdio: 'inherit' });
@@ -52,6 +73,14 @@ export const logsCommand: CommandModule = {
       return;
     }
 
+    if (isWindows) {
+      debugLogger.log(
+        'Live log following is not supported on Windows. Use --lines N to view recent logs.',
+      );
+      await exitCli(1);
+      return;
+    }
+
     // Follow mode — stream live output until user presses Ctrl+C.
     debugLogger.log(`Tailing ${logPath} (Ctrl+C to stop)\n`);
     const tailArgs = ['-f', '-n', '20', logPath];
diff --git a/packages/cli/src/commands/gemma/platform.ts b/packages/cli/src/commands/gemma/platform.ts
index ef5c5288a65..d1c9373f685 100644
--- a/packages/cli/src/commands/gemma/platform.ts
+++ b/packages/cli/src/commands/gemma/platform.ts
@@ -110,12 +110,10 @@ export async function isServerRunning(port: number): Promise<boolean> {
       () => controller.abort(),
       HEALTH_CHECK_TIMEOUT_MS,
     );
-    const response = await fetch(`http://localhost:${port}/`, {
-      signal: controller.signal,
-    });
+    await fetch(`http://localhost:${port}/`, { signal: controller.signal });
     clearTimeout(timeout);
     // Any response (even an error page) means the server is up.
-    return response.ok || response.status > 0;
+    return true;
   } catch {
     return false;
   }
diff --git a/packages/cli/src/commands/gemma/setup.ts b/packages/cli/src/commands/gemma/setup.ts
index 7648122783d..8bf9eb4b58b 100644
--- a/packages/cli/src/commands/gemma/setup.ts
+++ b/packages/cli/src/commands/gemma/setup.ts
@@ -105,7 +105,10 @@ async function downloadFile(url: string, destPath: string): Promise<void> {
     for (;;) {
       const { done, value } = await reader.read();
       if (done) break;
-      fileStream.write(value);
+      const writeOk = fileStream.write(value);
+      if (!writeOk) {
+        await new Promise<void>((resolve) => fileStream.once('drain', resolve));
+      }
       downloadedBytes += value.byteLength;
       renderProgress(downloadedBytes, totalBytes);
     }
@@ -147,7 +150,7 @@ interface SetupArgs {
   consent: boolean;
 }
 
-async function handleSetup(argv: SetupArgs): Promise<void> {
+async function handleSetup(argv: SetupArgs): Promise<number> {
   const { port, force } = argv;
 
   log('');
@@ -164,8 +167,7 @@ async function handleSetup(argv: SetupArgs): Promise<void> {
     logError(
       'LiteRT-LM binaries are available for: macOS (ARM64), Linux (x86_64), Windows (x86_64)',
     );
-    await exitCli(1);
-    return;
+    return 1;
   }
   log(chalk.dim(`  Platform: ${platform.key} → ${platform.binaryName}`));
 
@@ -182,8 +184,7 @@ async function handleSetup(argv: SetupArgs): Promise<void> {
     const accepted = await promptYesNo('Do you want to continue?');
     if (!accepted) {
       log('Setup cancelled.');
-      await exitCli(0);
-      return;
+      return 0;
     }
   }
 
@@ -213,8 +214,7 @@ async function handleSetup(argv: SetupArgs): Promise<void> {
         ),
       );
       logError('  Check your internet connection and try again.');
-      await exitCli(1);
-      return;
+      return 1;
     }
 
     // Step 4: Make executable and handle macOS gatekeeper
@@ -227,8 +227,7 @@ async function handleSetup(argv: SetupArgs): Promise<void> {
             `  ✗ Failed to set executable permission: ${error instanceof Error ? error.message : String(error)}`,
           ),
         );
-        await exitCli(1);
-        return;
+        return 1;
       }
     }
 
@@ -268,8 +267,7 @@ async function handleSetup(argv: SetupArgs): Promise<void> {
         logError(
           chalk.red(`  ✗ Model download failed (exit code ${exitCode})`),
         );
-        await exitCli(1);
-        return;
+        return 1;
       }
       log('');
       log(chalk.green(`  ✓ Model ${GEMMA_MODEL_NAME} downloaded`));
@@ -369,6 +367,8 @@ async function handleSetup(argv: SetupArgs): Promise<void> {
   log(chalk.dim('    gemini gemma stop     Stop the LiteRT server'));
   log(chalk.dim('    /gemma               Check status inside a session'));
   log('');
+
+  return 0;
 }
 
 export const setupCommand: CommandModule = {
@@ -402,13 +402,13 @@ export const setupCommand: CommandModule = {
         description: 'Skip interactive consent prompt (implies acceptance)',
       }),
   handler: async (argv) => {
-    await handleSetup({
+    const exitCode = await handleSetup({
       port: Number(argv['port']),
       skipModel: Boolean(argv['skipModel']),
       start: Boolean(argv['start']),
       force: Boolean(argv['force']),
       consent: Boolean(argv['consent']),
     });
-    await exitCli(0);
+    await exitCli(exitCode);
   },
 };
diff --git a/packages/cli/src/commands/gemma/start.ts b/packages/cli/src/commands/gemma/start.ts
index 02b1bd0e4a6..22591b9fc0e 100644
--- a/packages/cli/src/commands/gemma/start.ts
+++ b/packages/cli/src/commands/gemma/start.ts
@@ -6,6 +6,7 @@
 
 import type { CommandModule } from 'yargs';
 import fs from 'node:fs';
+import path from 'node:path';
 import { spawn } from 'node:child_process';
 import chalk from 'chalk';
 import { debugLogger } from '@google/gemini-cli-core';
@@ -45,7 +46,7 @@ export async function startServer(
   const logPath = getLogFilePath();
   fs.mkdirSync(getLiteRtBinDir(), { recursive: true });
   // Ensure tmp dir exists for log and pid files
-  const tmpDir = getPidFilePath().replace(/\/[^/]+$/, '');
+  const tmpDir = path.dirname(getPidFilePath());
   fs.mkdirSync(tmpDir, { recursive: true });
 
   const logFd = fs.openSync(logPath, 'a');
diff --git a/packages/cli/src/services/liteRtServerManager.ts b/packages/cli/src/services/liteRtServerManager.ts
index 1f0de35fe3a..375b34ca06a 100644
--- a/packages/cli/src/services/liteRtServerManager.ts
+++ b/packages/cli/src/services/liteRtServerManager.ts
@@ -5,6 +5,7 @@
  */
 
 import { debugLogger } from '@google/gemini-cli-core';
+import type { GemmaModelRouterSettings } from '@google/gemini-cli-core';
 import {
   getBinaryPath,
   isBinaryInstalled,
@@ -12,15 +13,6 @@ import {
 } from '../commands/gemma/platform.js';
 import { DEFAULT_PORT } from '../commands/gemma/constants.js';
 
-// Use a local interface that includes the new fields, since the core
-// package's compiled types may not include them until rebuilt.
-interface GemmaSettings {
-  enabled?: boolean;
-  autoStartServer?: boolean;
-  binaryPath?: string;
-  classifier?: { host?: string; model?: string };
-}
-
 /**
  * Manages the LiteRT-LM server lifecycle for auto-start during CLI startup.
  *
@@ -35,7 +27,7 @@ export class LiteRtServerManager {
    * This is fire-and-forget: failures are logged but never block startup.
    */
   static async ensureRunning(
-    gemmaSettings: GemmaSettings | undefined,
+    gemmaSettings: GemmaModelRouterSettings | undefined,
   ): Promise<void> {
     if (!gemmaSettings?.enabled) return;
     if (gemmaSettings.autoStartServer === false) return;

From 85a5a972ff8806f02ff9857be5fc1547cc9a5eba Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Thu, 16 Apr 2026 18:16:25 -0700
Subject: [PATCH 15/33] chore: remove gemma setup guide doc for now

---
 docs/core/gemma-setup-guide.md | 65 ----------------------------------
 1 file changed, 65 deletions(-)
 delete mode 100644 docs/core/gemma-setup-guide.md

diff --git a/docs/core/gemma-setup-guide.md b/docs/core/gemma-setup-guide.md
deleted file mode 100644
index 05f3b104ff6..00000000000
--- a/docs/core/gemma-setup-guide.md
+++ /dev/null
@@ -1,65 +0,0 @@
-# `gemini gemma` — Local Model Routing Setup
-
-## What is this?
-
-Routes simple requests to Flash and complex requests to Pro using a local Gemma
-3 1B model running on your machine. Saves cloud API costs and adds a few ms of
-local inference instead of a cloud classifier round-trip.
-
-## Quick Start
-
-```bash
-# One command does everything: downloads runtime, pulls model, configures settings, starts server
-gemini gemma setup
-```
-
-You'll be prompted to accept the Gemma Terms of Use. The model is ~1 GB.
-
-After setup, **just use the CLI normally** — routing happens automatically on
-every request.
-
-## Commands
-
-| Command               | What it does                                                   |
-| --------------------- | -------------------------------------------------------------- |
-| `gemini gemma setup`  | Full install (binary + model + settings + server start)        |
-| `gemini gemma status` | Health check — shows what's installed and running              |
-| `gemini gemma start`  | Start the LiteRT server (auto-starts on CLI launch by default) |
-| `gemini gemma stop`   | Stop the LiteRT server                                         |
-| `gemini gemma logs`   | Tail the server logs to see routing requests live              |
-| `/gemma`              | In-session status check (type it inside the CLI)               |
-
-## Verifying it works
-
-1. Run `gemini gemma status` — all checks should show green
-2. Open two terminals:
-   - Terminal 1: `gemini gemma logs` (watch for incoming requests)
-   - Terminal 2: use the CLI normally
-3. You should see classification requests appear in the logs as you interact
-   with the CLI
-4. The `/gemma` slash command inside a session shows a quick status panel
-
-## Setup flags
-
-```bash
-gemini gemma setup --port 8080      # custom port
-gemini gemma setup --no-start       # don't start server after install
-gemini gemma setup --force           # re-download everything
-gemini gemma setup --skip-model     # binary only, skip the 1GB model download
-```
-
-## How it works under the hood
-
-- Local Gemma classifies each request as "simple" or "complex" (~100ms)
-- Simple → Flash, Complex → Pro
-- If the local server is down, the CLI silently falls back to the cloud
-  classifier — no errors, no disruption
-
-## Disabling
-
-Set `enabled: false` in settings or just run `gemini gemma stop` to turn off the
-server:
-
-```json
-{ "experimental": { "gemmaModelRouter": { "enabled": false } } }
-```

From eb5a3b90d8c2236773b19841decaf726bbb4d45f Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Thu, 16 Apr 2026 18:23:43 -0700
Subject: [PATCH 16/33] chore: remove unnecessary comments across gemma
 commands

---
 packages/cli/src/commands/gemma.ts            |  5 +--
 packages/cli/src/commands/gemma/constants.ts  | 17 ----------
 packages/cli/src/commands/gemma/logs.ts       |  7 ----
 packages/cli/src/commands/gemma/platform.ts   | 29 +---------------
 packages/cli/src/commands/gemma/setup.ts      | 33 +------------------
 packages/cli/src/commands/gemma/start.ts      | 12 -------
 packages/cli/src/commands/gemma/status.ts     | 11 -------
 packages/cli/src/commands/gemma/stop.ts       | 15 ++-------
 packages/cli/src/gemini.tsx                   |  2 --
 .../cli/src/services/liteRtServerManager.ts   | 14 --------
 .../src/ui/components/views/GemmaStatus.tsx   |  6 ----
 11 files changed, 6 insertions(+), 145 deletions(-)

diff --git a/packages/cli/src/commands/gemma.ts b/packages/cli/src/commands/gemma.ts
index d7b88f06374..737bbb069ba 100644
--- a/packages/cli/src/commands/gemma.ts
+++ b/packages/cli/src/commands/gemma.ts
@@ -29,8 +29,5 @@ export const gemmaCommand: CommandModule = {
       .command(defer(logsCommand, 'gemma'))
       .demandCommand(1, 'You need at least one command before continuing.')
       .version(false),
-  handler: () => {
-    // yargs will automatically show help if no subcommand is provided
-    // thanks to demandCommand(1) in the builder.
-  },
+  handler: () => {},
 };
diff --git a/packages/cli/src/commands/gemma/constants.ts b/packages/cli/src/commands/gemma/constants.ts
index 76203a8a8f4..cab4c1f2b26 100644
--- a/packages/cli/src/commands/gemma/constants.ts
+++ b/packages/cli/src/commands/gemma/constants.ts
@@ -7,45 +7,28 @@
 import path from 'node:path';
 import { Storage } from '@google/gemini-cli-core';
 
-/** LiteRT-LM release version to download. */
 export const LITERT_RELEASE_VERSION = 'v0.9.0-alpha03';
-
-/** Base URL for LiteRT-LM GitHub releases. */
 export const LITERT_RELEASE_BASE_URL =
   'https://github.com/google-ai-edge/LiteRT-LM/releases/download';
-
-/** The only tested and supported model for local routing. */
 export const GEMMA_MODEL_NAME = 'gemma3-1b-gpu-custom';
-
-/** Default port for the LiteRT-LM server. */
 export const DEFAULT_PORT = 9379;
-
-/** Server health check timeout in milliseconds. */
 export const HEALTH_CHECK_TIMEOUT_MS = 5000;
-
-/** Delay before checking if server started successfully. */
 export const SERVER_START_WAIT_MS = 3000;
 
-/**
- * Maps `${process.platform}-${process.arch}` to the LiteRT-LM binary filename.
- */
 export const PLATFORM_BINARY_MAP: Record<string, string> = {
   'darwin-arm64': 'lit.macos_arm64',
   'linux-x64': 'lit.linux_x86_64',
   'win32-x64': 'lit.windows_x86_64.exe',
 };
 
-/** Directory where the LiteRT-LM binary is installed. */
 export function getLiteRtBinDir(): string {
   return path.join(Storage.getGlobalGeminiDir(), 'bin', 'litert');
 }
 
-/** Path to the PID file for the background LiteRT server. */
 export function getPidFilePath(): string {
   return path.join(Storage.getGlobalTempDir(), 'litert-server.pid');
 }
 
-/** Path to the log file for the background LiteRT server. */
 export function getLogFilePath(): string {
   return path.join(Storage.getGlobalTempDir(), 'litert-server.log');
 }
diff --git a/packages/cli/src/commands/gemma/logs.ts b/packages/cli/src/commands/gemma/logs.ts
index 473f3557fdc..bbe5780c358 100644
--- a/packages/cli/src/commands/gemma/logs.ts
+++ b/packages/cli/src/commands/gemma/logs.ts
@@ -11,14 +11,9 @@ import { debugLogger } from '@google/gemini-cli-core';
 import { exitCli } from '../utils.js';
 import { getLogFilePath } from './constants.js';
 
-/**
- * Reads the last N lines from a file using Node.js APIs.
- * Used as a cross-platform fallback when `tail` is unavailable (Windows).
- */
 function readLastLines(filePath: string, count: number): string {
   const content = fs.readFileSync(filePath, 'utf-8');
   const lines = content.split('\n');
-  // If the file ends with a newline, the last element is empty — skip it.
   if (lines.length > 0 && lines[lines.length - 1] === '') {
     lines.pop();
   }
@@ -64,7 +59,6 @@ export const logsCommand: CommandModule = {
         await exitCli(0);
         return;
       }
-      // Show last N lines and exit.
       const tailArgs = ['-n', String(lines), logPath];
       const child = spawn('tail', tailArgs, { stdio: 'inherit' });
       child.on('close', async (code) => {
@@ -81,7 +75,6 @@ export const logsCommand: CommandModule = {
       return;
     }
 
-    // Follow mode — stream live output until user presses Ctrl+C.
     debugLogger.log(`Tailing ${logPath} (Ctrl+C to stop)\n`);
     const tailArgs = ['-f', '-n', '20', logPath];
     const child = spawn('tail', tailArgs, { stdio: 'inherit' });
diff --git a/packages/cli/src/commands/gemma/platform.ts b/packages/cli/src/commands/gemma/platform.ts
index d1c9373f685..fd2dbc683b6 100644
--- a/packages/cli/src/commands/gemma/platform.ts
+++ b/packages/cli/src/commands/gemma/platform.ts
@@ -28,9 +28,6 @@ export interface GemmaConfigStatus {
   configuredPort: number;
 }
 
-/**
- * Resolves the Gemma configuration from the workspace settings.
- */
 export function resolveGemmaConfig(fallbackPort: number): GemmaConfigStatus {
   let settingsEnabled = false;
   let configuredPort = fallbackPort;
@@ -46,15 +43,11 @@ export function resolveGemmaConfig(fallbackPort: number): GemmaConfigStatus {
       }
     }
   } catch {
-    // Settings may fail to load in some contexts; treat as not enabled.
+    // ignore — settings may fail to load outside a workspace
   }
   return { settingsEnabled, configuredPort };
 }
 
-/**
- * Detects the current platform and resolves the corresponding LiteRT-LM binary name.
- * Returns null if the platform is unsupported.
- */
 export function detectPlatform(): PlatformInfo | null {
   const key = `${process.platform}-${process.arch}`;
   const binaryName = PLATFORM_BINARY_MAP[key];
@@ -64,29 +57,22 @@ export function detectPlatform(): PlatformInfo | null {
   return { key, binaryName };
 }
 
-/** Returns the full local path to the LiteRT-LM binary. */
 export function getBinaryPath(binaryName?: string): string | null {
   const name = binaryName ?? detectPlatform()?.binaryName;
   if (!name) return null;
   return path.join(getLiteRtBinDir(), name);
 }
 
-/** Returns the GitHub release download URL for the binary. */
 export function getBinaryDownloadUrl(binaryName: string): string {
   return `${LITERT_RELEASE_BASE_URL}/${LITERT_RELEASE_VERSION}/${binaryName}`;
 }
 
-/** Checks if the LiteRT-LM binary exists on disk. */
 export function isBinaryInstalled(): boolean {
   const binaryPath = getBinaryPath();
   if (!binaryPath) return false;
   return fs.existsSync(binaryPath);
 }
 
-/**
- * Checks if the Gemma model has been downloaded by running `lit list`
- * and looking for the model name in stdout.
- */
 export function isModelDownloaded(binaryPath: string): boolean {
   try {
     const output = execFileSync(binaryPath, ['list'], {
@@ -99,10 +85,6 @@ export function isModelDownloaded(binaryPath: string): boolean {
   }
 }
 
-/**
- * Checks if a LiteRT-LM server is running and responding on the given port.
- * Uses a simple HTTP request with a short timeout.
- */
 export async function isServerRunning(port: number): Promise<boolean> {
   try {
     const controller = new AbortController();
@@ -112,17 +94,12 @@ export async function isServerRunning(port: number): Promise<boolean> {
     );
     await fetch(`http://localhost:${port}/`, { signal: controller.signal });
     clearTimeout(timeout);
-    // Any response (even an error page) means the server is up.
     return true;
   } catch {
     return false;
   }
 }
 
-/**
- * Reads the PID from the PID file, if it exists.
- * Returns the PID number, or null if the file doesn't exist or is invalid.
- */
 export function readServerPid(): number | null {
   const pidPath = getPidFilePath();
   try {
@@ -134,12 +111,8 @@ export function readServerPid(): number | null {
   }
 }
 
-/**
- * Checks if a process with the given PID is still running.
- */
 export function isProcessRunning(pid: number): boolean {
   try {
-    // Sending signal 0 checks if the process exists without actually signaling it.
     process.kill(pid, 0);
     return true;
   } catch {
diff --git a/packages/cli/src/commands/gemma/setup.ts b/packages/cli/src/commands/gemma/setup.ts
index 8bf9eb4b58b..002afcbfdbf 100644
--- a/packages/cli/src/commands/gemma/setup.ts
+++ b/packages/cli/src/commands/gemma/setup.ts
@@ -29,10 +29,6 @@ import readline from 'node:readline';
 const log = (msg: string) => debugLogger.log(msg);
 const logError = (msg: string) => debugLogger.error(msg);
 
-/**
- * Prompts the user for a yes/no confirmation.
- * Returns true if the user answers 'y' or 'yes'.
- */
 async function promptYesNo(question: string): Promise<boolean> {
   const rl = readline.createInterface({
     input: process.stdin,
@@ -49,14 +45,12 @@ async function promptYesNo(question: string): Promise<boolean> {
   });
 }
 
-/** Formats a byte count into a human-readable string (e.g. "12.3 MB"). */
 function formatBytes(bytes: number): string {
   if (bytes < 1024) return `${bytes} B`;
   if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
   return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
 }
 
-/** Renders a single-line progress bar to stderr (overwriting in place). */
 function renderProgress(downloaded: number, total: number | null): void {
   const barWidth = 30;
   if (total && total > 0) {
@@ -72,14 +66,8 @@ function renderProgress(downloaded: number, total: number | null): void {
   }
 }
 
-/**
- * Downloads a file from a URL to a local path with a progress bar.
- * Uses a temporary `.downloading` suffix for safety against interrupted downloads.
- */
 async function downloadFile(url: string, destPath: string): Promise<void> {
   const tmpPath = destPath + '.downloading';
-
-  // Clean up any previous interrupted download.
   if (fs.existsSync(tmpPath)) {
     fs.unlinkSync(tmpPath);
   }
@@ -114,24 +102,17 @@ async function downloadFile(url: string, destPath: string): Promise<void> {
     }
   } finally {
     fileStream.end();
-    // Clear the progress line.
     process.stderr.write('\r' + ' '.repeat(80) + '\r');
   }
 
-  // Wait for the file to finish flushing.
   await new Promise<void>((resolve, reject) => {
     fileStream.on('finish', resolve);
     fileStream.on('error', reject);
   });
 
-  // Atomic rename after successful download.
   fs.renameSync(tmpPath, destPath);
 }
 
-/**
- * Spawns a child process and returns a promise that resolves with the exit code.
- * Inherits stdio so the user sees all output (progress, terms acceptance, etc.).
- */
 function spawnInherited(command: string, args: string[]): Promise<number> {
   return new Promise((resolve, reject) => {
     const child = nodeSpawn(command, args, {
@@ -158,7 +139,6 @@ async function handleSetup(argv: SetupArgs): Promise<number> {
   log(chalk.dim('─'.repeat(40)));
   log('');
 
-  // Step 1: Platform detection
   const platform = detectPlatform();
   if (!platform) {
     logError(
@@ -171,7 +151,6 @@ async function handleSetup(argv: SetupArgs): Promise<number> {
   }
   log(chalk.dim(`  Platform: ${platform.key} → ${platform.binaryName}`));
 
-  // Step 2: Consent
   if (!argv.consent) {
     log('');
     log('This will download and install the LiteRT-LM runtime and the');
@@ -188,7 +167,6 @@ async function handleSetup(argv: SetupArgs): Promise<number> {
     }
   }
 
-  // Step 3: Download binary
   const binaryPath = getBinaryPath(platform.binaryName)!;
   const alreadyInstalled = isBinaryInstalled();
 
@@ -217,7 +195,6 @@ async function handleSetup(argv: SetupArgs): Promise<number> {
       return 1;
     }
 
-    // Step 4: Make executable and handle macOS gatekeeper
     if (process.platform !== 'win32') {
       try {
         fs.chmodSync(binaryPath, 0o755);
@@ -238,15 +215,11 @@ async function handleSetup(argv: SetupArgs): Promise<number> {
         });
         log(chalk.green('  ✓ macOS quarantine attribute removed'));
       } catch {
-        // This is expected to fail if the attribute doesn't exist.
-        debugLogger.log(
-          'xattr quarantine removal not needed or failed (non-fatal)',
-        );
+        // Expected if the attribute doesn't exist.
       }
     }
   }
 
-  // Step 5: Pull the model
   if (!argv.skipModel) {
     const modelAlreadyDownloaded = isModelDownloaded(binaryPath);
     if (modelAlreadyDownloaded && !force) {
@@ -274,7 +247,6 @@ async function handleSetup(argv: SetupArgs): Promise<number> {
     }
   }
 
-  // Step 6: Configure settings
   log('');
   log('  Configuring settings...');
   try {
@@ -294,7 +266,6 @@ async function handleSetup(argv: SetupArgs): Promise<number> {
       },
     };
 
-    // Read existing experimental settings to avoid overwriting them.
     const existingExperimental =
       settings.forScope(SettingScope.User).settings.experimental ?? {};
     settings.setValue(SettingScope.User, 'experimental', {
@@ -314,7 +285,6 @@ async function handleSetup(argv: SetupArgs): Promise<number> {
     );
   }
 
-  // Step 7: Start server (if requested)
   if (argv.start) {
     log('');
     log('  Starting LiteRT server...');
@@ -330,7 +300,6 @@ async function handleSetup(argv: SetupArgs): Promise<number> {
     }
   }
 
-  // Step 8: Summary
   log('');
   log(chalk.dim('─'.repeat(40)));
   log(chalk.bold.green('  Setup complete! Local model routing is now active.'));
diff --git a/packages/cli/src/commands/gemma/start.ts b/packages/cli/src/commands/gemma/start.ts
index 22591b9fc0e..827c3f4fbd6 100644
--- a/packages/cli/src/commands/gemma/start.ts
+++ b/packages/cli/src/commands/gemma/start.ts
@@ -25,27 +25,18 @@ import {
   resolveGemmaConfig,
 } from './platform.js';
 
-/**
- * Starts the LiteRT-LM server as a detached background process.
- * Returns true if the server was started (or is already running).
- *
- * This function is also used by `setup.ts` to start the server after installation.
- */
 export async function startServer(
   binaryPath: string,
   port: number,
 ): Promise<boolean> {
-  // Check if already running
   const alreadyRunning = await isServerRunning(port);
   if (alreadyRunning) {
     debugLogger.log(`LiteRT server already running on port ${port}`);
     return true;
   }
 
-  // Ensure log directory exists
   const logPath = getLogFilePath();
   fs.mkdirSync(getLiteRtBinDir(), { recursive: true });
-  // Ensure tmp dir exists for log and pid files
   const tmpDir = path.dirname(getPidFilePath());
   fs.mkdirSync(tmpDir, { recursive: true });
 
@@ -57,19 +48,16 @@ export async function startServer(
       stdio: ['ignore', logFd, logFd],
     });
 
-    // Write PID file
     const pidPath = getPidFilePath();
     if (child.pid) {
       fs.writeFileSync(pidPath, String(child.pid), 'utf-8');
     }
 
-    // Detach the child so it survives after the CLI exits.
     child.unref();
   } finally {
     fs.closeSync(logFd);
   }
 
-  // Wait briefly and verify the server is responding.
   await new Promise((resolve) => setTimeout(resolve, SERVER_START_WAIT_MS));
   return isServerRunning(port);
 }
diff --git a/packages/cli/src/commands/gemma/status.ts b/packages/cli/src/commands/gemma/status.ts
index 1e061d7bc57..77847437cb9 100644
--- a/packages/cli/src/commands/gemma/status.ts
+++ b/packages/cli/src/commands/gemma/status.ts
@@ -30,11 +30,6 @@ export interface GemmaStatusResult {
   allPassing: boolean;
 }
 
-/**
- * Runs all diagnostic checks and returns a structured status result.
- * This is shared between the CLI `gemini gemma status` command and the
- * in-session `/gemma` slash command.
- */
 export async function checkGemmaStatus(
   port?: number,
 ): Promise<GemmaStatusResult> {
@@ -64,7 +59,6 @@ export async function checkGemmaStatus(
   };
 }
 
-/** Formats the status result into a human-readable string. */
 export function formatGemmaStatus(status: GemmaStatusResult): string {
   const check = (ok: boolean) => (ok ? chalk.green('✓') : chalk.red('✗'));
 
@@ -75,7 +69,6 @@ export function formatGemmaStatus(status: GemmaStatusResult): string {
     '',
   ];
 
-  // Binary
   if (status.binaryInstalled) {
     lines.push(`  Binary:    ${check(true)} Installed (${status.binaryPath})`);
   } else {
@@ -90,7 +83,6 @@ export function formatGemmaStatus(status: GemmaStatusResult): string {
     }
   }
 
-  // Model
   if (status.modelDownloaded) {
     lines.push(`  Model:     ${check(true)} ${GEMMA_MODEL_NAME} downloaded`);
   } else {
@@ -106,7 +98,6 @@ export function formatGemmaStatus(status: GemmaStatusResult): string {
     }
   }
 
-  // Server
   if (status.serverRunning) {
     const pidInfo = status.serverPid ? ` (PID ${status.serverPid})` : '';
     lines.push(
@@ -119,7 +110,6 @@ export function formatGemmaStatus(status: GemmaStatusResult): string {
     lines.push(chalk.dim(`             Run: gemini gemma start`));
   }
 
-  // Settings
   if (status.settingsEnabled) {
     lines.push(`  Settings:  ${check(true)} Enabled in settings.json`);
   } else {
@@ -169,7 +159,6 @@ export const statusCommand: CommandModule = {
     }
     const status = await checkGemmaStatus(port);
     const output = formatGemmaStatus(status);
-    // Use process.stdout directly for consistent output in non-interactive mode.
     process.stdout.write(output);
     await exitCli(status.allPassing ? 0 : 1);
   },
diff --git a/packages/cli/src/commands/gemma/stop.ts b/packages/cli/src/commands/gemma/stop.ts
index 409989e33c3..676dc0b667a 100644
--- a/packages/cli/src/commands/gemma/stop.ts
+++ b/packages/cli/src/commands/gemma/stop.ts
@@ -17,10 +17,6 @@ import {
   resolveGemmaConfig,
 } from './platform.js';
 
-/**
- * Stops the LiteRT-LM server by sending SIGTERM to the stored PID.
- * Returns true if the server was stopped successfully.
- */
 export async function stopServer(): Promise<boolean> {
   const pid = readServerPid();
   const pidPath = getPidFilePath();
@@ -30,11 +26,10 @@ export async function stopServer(): Promise<boolean> {
   }
 
   if (!isProcessRunning(pid)) {
-    // PID file exists but process is gone — clean up stale file.
     try {
       fs.unlinkSync(pidPath);
     } catch {
-      // Ignore cleanup errors.
+      // ignore
     }
     return false;
   }
@@ -45,24 +40,21 @@ export async function stopServer(): Promise<boolean> {
     return false;
   }
 
-  // Wait briefly for graceful shutdown.
   await new Promise((resolve) => setTimeout(resolve, 1000));
 
-  // If still running, escalate to SIGKILL.
   if (isProcessRunning(pid)) {
     try {
       process.kill(pid, 'SIGKILL');
     } catch {
-      // Process may have exited between the check and the kill.
+      // ignore
     }
     await new Promise((resolve) => setTimeout(resolve, 500));
   }
 
-  // Clean up PID file.
   try {
     fs.unlinkSync(pidPath);
   } catch {
-    // Ignore cleanup errors.
+    // ignore
   }
 
   return true;
@@ -101,7 +93,6 @@ export const stopCommand: CommandModule = {
       return;
     }
 
-    // No PID file or process not running — check if something else is on the port.
     const running = await isServerRunning(port);
     if (running) {
       debugLogger.log(
diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx
index 899be0bad57..1f43419c8da 100644
--- a/packages/cli/src/gemini.tsx
+++ b/packages/cli/src/gemini.tsx
@@ -612,8 +612,6 @@ export async function main() {
     const initializationResult = await initializeApp(config, settings);
     initAppHandle?.end();
 
-    // Auto-start the LiteRT-LM server for Gemma local routing if configured.
-    // This is fire-and-forget — failures are logged but never block startup.
     import('./services/liteRtServerManager.js')
       .then(({ LiteRtServerManager }) =>
         LiteRtServerManager.ensureRunning(
diff --git a/packages/cli/src/services/liteRtServerManager.ts b/packages/cli/src/services/liteRtServerManager.ts
index 375b34ca06a..7107d2321ef 100644
--- a/packages/cli/src/services/liteRtServerManager.ts
+++ b/packages/cli/src/services/liteRtServerManager.ts
@@ -13,19 +13,7 @@ import {
 } from '../commands/gemma/platform.js';
 import { DEFAULT_PORT } from '../commands/gemma/constants.js';
 
-/**
- * Manages the LiteRT-LM server lifecycle for auto-start during CLI startup.
- *
- * When the Gemma model router is enabled and `autoStartServer` is true,
- * this manager ensures the server is running before the CLI enters
- * interactive mode. The server is spawned as a detached daemon that
- * persists across CLI sessions — it is NOT stopped when the CLI exits.
- */
 export class LiteRtServerManager {
-  /**
-   * Ensures the LiteRT-LM server is running if the settings call for it.
-   * This is fire-and-forget: failures are logged but never block startup.
-   */
   static async ensureRunning(
     gemmaSettings: GemmaModelRouterSettings | undefined,
   ): Promise<void> {
@@ -57,8 +45,6 @@ export class LiteRtServerManager {
     );
 
     try {
-      // Dynamic import to avoid circular dependencies and to keep the start
-      // logic in one place.
       const { startServer } = await import('../commands/gemma/start.js');
       const binaryPath = gemmaSettings.binaryPath || getBinaryPath() || '';
       if (!binaryPath) {
diff --git a/packages/cli/src/ui/components/views/GemmaStatus.tsx b/packages/cli/src/ui/components/views/GemmaStatus.tsx
index b9c20142d46..160689ebeac 100644
--- a/packages/cli/src/ui/components/views/GemmaStatus.tsx
+++ b/packages/cli/src/ui/components/views/GemmaStatus.tsx
@@ -32,7 +32,6 @@ export const GemmaStatus: React.FC<GemmaStatusProps> = ({
     <Text bold>Gemma Local Model Routing</Text>
     <Box height={1} />
 
-    {/* Binary */}
     <Box>
       <StatusDot ok={binaryInstalled} />
       <Text>
@@ -46,7 +45,6 @@ export const GemmaStatus: React.FC<GemmaStatusProps> = ({
       </Text>
     </Box>
 
-    {/* Model */}
     <Box>
       <StatusDot ok={modelDownloaded} />
       <Text>
@@ -60,7 +58,6 @@ export const GemmaStatus: React.FC<GemmaStatusProps> = ({
       </Text>
     </Box>
 
-    {/* Server */}
     <Box>
       <StatusDot ok={serverRunning} />
       <Text>
@@ -81,7 +78,6 @@ export const GemmaStatus: React.FC<GemmaStatusProps> = ({
       </Text>
     </Box>
 
-    {/* Settings */}
     <Box>
       <StatusDot ok={settingsEnabled} />
       <Text>
@@ -95,7 +91,6 @@ export const GemmaStatus: React.FC<GemmaStatusProps> = ({
       </Text>
     </Box>
 
-    {/* Active For */}
     <Box marginTop={1}>
       <Text bold>Active for: </Text>
       {allPassing ? (
@@ -105,7 +100,6 @@ export const GemmaStatus: React.FC<GemmaStatusProps> = ({
       )}
     </Box>
 
-    {/* Summary */}
     <Box marginTop={1}>
       {allPassing ? (
         <Box flexDirection="column">

From c83376ca948e0420a4f40fa7216534ce11719910 Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Thu, 16 Apr 2026 19:31:25 -0700
Subject: [PATCH 17/33] fix(cli): harden gemma router setup and server handling

---
 packages/cli/src/commands/gemma/logs.test.ts  |  51 ++++++++
 packages/cli/src/commands/gemma/logs.ts       |  84 +++++++++++--
 .../cli/src/commands/gemma/platform.test.ts   | 113 ++++++++++++++++++
 packages/cli/src/commands/gemma/platform.ts   |  60 ++++++++--
 packages/cli/src/commands/gemma/setup.ts      |  62 +++++++---
 packages/cli/src/commands/gemma/start.ts      |   4 +-
 packages/cli/src/commands/gemma/status.ts     |   2 +-
 packages/cli/src/config/config.test.ts        |  13 ++
 .../cli/src/config/settingsSchema.test.ts     |  24 +++-
 .../src/services/liteRtServerManager.test.ts  |  68 +++++++++++
 .../cli/src/services/liteRtServerManager.ts   |  15 +--
 packages/core/src/config/config.test.ts       |   8 ++
 packages/core/src/config/config.ts            |   2 +
 13 files changed, 455 insertions(+), 51 deletions(-)
 create mode 100644 packages/cli/src/commands/gemma/logs.test.ts
 create mode 100644 packages/cli/src/commands/gemma/platform.test.ts
 create mode 100644 packages/cli/src/services/liteRtServerManager.test.ts

diff --git a/packages/cli/src/commands/gemma/logs.test.ts b/packages/cli/src/commands/gemma/logs.test.ts
new file mode 100644
index 00000000000..caa9bc1ec4e
--- /dev/null
+++ b/packages/cli/src/commands/gemma/logs.test.ts
@@ -0,0 +1,51 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import { afterEach, describe, expect, it } from 'vitest';
+import { readLastLines } from './logs.js';
+
+describe('readLastLines', () => {
+  const tempFiles: string[] = [];
+
+  afterEach(async () => {
+    await Promise.all(
+      tempFiles
+        .splice(0)
+        .map((filePath) => fs.promises.rm(filePath, { force: true })),
+    );
+  });
+
+  it('returns only the requested tail lines without reading the whole file eagerly', async () => {
+    const filePath = path.join(
+      os.tmpdir(),
+      `gemma-logs-${Date.now()}-${Math.random().toString(36).slice(2)}.log`,
+    );
+    tempFiles.push(filePath);
+
+    const content = Array.from({ length: 2000 }, (_, i) => `line-${i + 1}`)
+      .join('\n')
+      .concat('\n');
+    await fs.promises.writeFile(filePath, content, 'utf-8');
+
+    await expect(readLastLines(filePath, 3)).resolves.toBe(
+      'line-1998\nline-1999\nline-2000\n',
+    );
+  });
+
+  it('returns an empty string when zero lines are requested', async () => {
+    const filePath = path.join(
+      os.tmpdir(),
+      `gemma-logs-${Date.now()}-${Math.random().toString(36).slice(2)}.log`,
+    );
+    tempFiles.push(filePath);
+    await fs.promises.writeFile(filePath, 'line-1\nline-2\n', 'utf-8');
+
+    await expect(readLastLines(filePath, 0)).resolves.toBe('');
+  });
+});
diff --git a/packages/cli/src/commands/gemma/logs.ts b/packages/cli/src/commands/gemma/logs.ts
index bbe5780c358..5eb81ea33d4 100644
--- a/packages/cli/src/commands/gemma/logs.ts
+++ b/packages/cli/src/commands/gemma/logs.ts
@@ -11,13 +11,79 @@ import { debugLogger } from '@google/gemini-cli-core';
 import { exitCli } from '../utils.js';
 import { getLogFilePath } from './constants.js';
 
-function readLastLines(filePath: string, count: number): string {
-  const content = fs.readFileSync(filePath, 'utf-8');
-  const lines = content.split('\n');
-  if (lines.length > 0 && lines[lines.length - 1] === '') {
-    lines.pop();
+export async function readLastLines(
+  filePath: string,
+  count: number,
+): Promise<string> {
+  if (count <= 0) {
+    return '';
+  }
+
+  const CHUNK_SIZE = 64 * 1024;
+  const fileHandle = await fs.promises.open(filePath, fs.constants.O_RDONLY);
+
+  try {
+    const stats = await fileHandle.stat();
+    if (stats.size === 0) {
+      return '';
+    }
+
+    const chunks: Buffer[] = [];
+    let totalBytes = 0;
+    let newlineCount = 0;
+    let position = stats.size;
+
+    while (position > 0 && newlineCount <= count) {
+      const readSize = Math.min(CHUNK_SIZE, position);
+      position -= readSize;
+
+      const buffer = Buffer.allocUnsafe(readSize);
+      const { bytesRead } = await fileHandle.read(
+        buffer,
+        0,
+        readSize,
+        position,
+      );
+
+      if (bytesRead === 0) {
+        break;
+      }
+
+      const chunk =
+        bytesRead === readSize ? buffer : buffer.subarray(0, bytesRead);
+      chunks.unshift(chunk);
+      totalBytes += chunk.length;
+
+      for (const byte of chunk) {
+        if (byte === 0x0a) {
+          newlineCount += 1;
+        }
+      }
+    }
+
+    const content = Buffer.concat(chunks, totalBytes).toString('utf-8');
+    const lines = content.split('\n');
+
+    if (position > 0 && lines.length > 0) {
+      const boundary = Buffer.allocUnsafe(1);
+      const { bytesRead } = await fileHandle.read(boundary, 0, 1, position - 1);
+      if (bytesRead === 1 && boundary[0] !== 0x0a) {
+        lines.shift();
+      }
+    }
+
+    if (lines.length > 0 && lines[lines.length - 1] === '') {
+      lines.pop();
+    }
+
+    if (lines.length === 0) {
+      return '';
+    }
+
+    return lines.slice(-count).join('\n') + '\n';
+  } finally {
+    await fileHandle.close();
   }
-  return lines.slice(-count).join('\n') + '\n';
 }
 
 const isWindows = process.platform === 'win32';
@@ -41,7 +107,9 @@ export const logsCommand: CommandModule = {
   handler: async (argv) => {
     const logPath = getLogFilePath();
 
-    if (!fs.existsSync(logPath)) {
+    try {
+      await fs.promises.access(logPath, fs.constants.F_OK);
+    } catch {
       debugLogger.log(`No log file found at ${logPath}`);
       debugLogger.log(
         'Is the LiteRT server running? Start it with: gemini gemma start',
@@ -55,7 +123,7 @@ export const logsCommand: CommandModule = {
 
     if (lines !== undefined) {
       if (isWindows) {
-        process.stdout.write(readLastLines(logPath, lines));
+        process.stdout.write(await readLastLines(logPath, lines));
         await exitCli(0);
         return;
       }
diff --git a/packages/cli/src/commands/gemma/platform.test.ts b/packages/cli/src/commands/gemma/platform.test.ts
new file mode 100644
index 00000000000..fb10c026ece
--- /dev/null
+++ b/packages/cli/src/commands/gemma/platform.test.ts
@@ -0,0 +1,113 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import fs from 'node:fs';
+import path from 'node:path';
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+import { SettingScope } from '../../config/settings.js';
+import { getLiteRtBinDir } from './constants.js';
+
+const mockLoadSettings = vi.hoisted(() => vi.fn());
+
+vi.mock('../../config/settings.js', () => ({
+  loadSettings: mockLoadSettings,
+  SettingScope: {
+    User: 'User',
+  },
+}));
+
+import {
+  getBinaryPath,
+  isBinaryInstalled,
+  resolveGemmaConfig,
+} from './platform.js';
+
+describe('gemma platform helpers', () => {
+  function createMockSettings(
+    userGemmaSettings?: object,
+    mergedGemmaSettings?: object,
+  ) {
+    return {
+      merged: {
+        experimental: {
+          gemmaModelRouter: mergedGemmaSettings,
+        },
+      },
+      forScope: vi.fn((scope: SettingScope) => {
+        if (scope !== SettingScope.User) {
+          throw new Error(`Unexpected scope ${scope}`);
+        }
+        return {
+          settings: {
+            experimental: {
+              gemmaModelRouter: userGemmaSettings,
+            },
+          },
+        };
+      }),
+    };
+  }
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    mockLoadSettings.mockReturnValue(createMockSettings());
+  });
+
+  it('prefers the configured binary path from settings', () => {
+    mockLoadSettings.mockReturnValue(
+      createMockSettings({ binaryPath: '/custom/lit' }),
+    );
+
+    expect(getBinaryPath('lit.test')).toBe('/custom/lit');
+  });
+
+  it('ignores workspace overrides for the configured binary path', () => {
+    mockLoadSettings.mockReturnValue(
+      createMockSettings(
+        { binaryPath: '/user/lit' },
+        { binaryPath: '/workspace/evil' },
+      ),
+    );
+
+    expect(getBinaryPath('lit.test')).toBe('/user/lit');
+  });
+
+  it('falls back to the default install location when no custom path is set', () => {
+    expect(getBinaryPath('lit.test')).toBe(
+      path.join(getLiteRtBinDir(), 'lit.test'),
+    );
+  });
+
+  it('resolves the configured port and binary path from settings', () => {
+    mockLoadSettings.mockReturnValue(
+      createMockSettings(
+        { binaryPath: '/custom/lit' },
+        {
+          enabled: true,
+          classifier: {
+            host: 'http://localhost:8123/v1beta',
+          },
+        },
+      ),
+    );
+
+    expect(resolveGemmaConfig(9379)).toEqual({
+      settingsEnabled: true,
+      configuredPort: 8123,
+      configuredBinaryPath: '/custom/lit',
+    });
+  });
+
+  it('checks binary installation using the resolved binary path', () => {
+    mockLoadSettings.mockReturnValue(
+      createMockSettings({ binaryPath: '/custom/lit' }),
+    );
+    vi.spyOn(fs, 'existsSync').mockReturnValue(true);
+
+    expect(isBinaryInstalled()).toBe(true);
+    expect(fs.existsSync).toHaveBeenCalledWith('/custom/lit');
+  });
+});
diff --git a/packages/cli/src/commands/gemma/platform.ts b/packages/cli/src/commands/gemma/platform.ts
index fd2dbc683b6..0ee360ae63e 100644
--- a/packages/cli/src/commands/gemma/platform.ts
+++ b/packages/cli/src/commands/gemma/platform.ts
@@ -4,7 +4,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import { loadSettings } from '../../config/settings.js';
+import { loadSettings, SettingScope } from '../../config/settings.js';
 import fs from 'node:fs';
 import path from 'node:path';
 import { execFileSync } from 'node:child_process';
@@ -26,26 +26,60 @@ export interface PlatformInfo {
 export interface GemmaConfigStatus {
   settingsEnabled: boolean;
   configuredPort: number;
+  configuredBinaryPath?: string;
+}
+
+function getUserConfiguredBinaryPath(
+  workspaceDir = process.cwd(),
+): string | undefined {
+  try {
+    const userGemmaSettings = loadSettings(workspaceDir).forScope(
+      SettingScope.User,
+    ).settings.experimental?.gemmaModelRouter;
+    return userGemmaSettings?.binaryPath?.trim() || undefined;
+  } catch {
+    return undefined;
+  }
+}
+
+function parsePortFromHost(
+  host: string | undefined,
+  fallbackPort: number,
+): number {
+  if (!host) {
+    return fallbackPort;
+  }
+
+  try {
+    const url = new URL(host);
+    const port = Number(url.port);
+    return Number.isFinite(port) && port > 0 ? port : fallbackPort;
+  } catch {
+    const match = host.match(/:(\d+)/);
+    if (!match) {
+      return fallbackPort;
+    }
+    const port = parseInt(match[1], 10);
+    return Number.isFinite(port) && port > 0 ? port : fallbackPort;
+  }
 }
 
 export function resolveGemmaConfig(fallbackPort: number): GemmaConfigStatus {
   let settingsEnabled = false;
   let configuredPort = fallbackPort;
+  const configuredBinaryPath = getUserConfiguredBinaryPath();
   try {
     const settings = loadSettings(process.cwd());
     const gemmaSettings = settings.merged.experimental?.gemmaModelRouter;
     settingsEnabled = gemmaSettings?.enabled === true;
-    const hostStr = gemmaSettings?.classifier?.host;
-    if (hostStr) {
-      const match = hostStr.match(/:(\d+)/);
-      if (match) {
-        configuredPort = parseInt(match[1], 10);
-      }
-    }
+    configuredPort = parsePortFromHost(
+      gemmaSettings?.classifier?.host,
+      fallbackPort,
+    );
   } catch {
     // ignore — settings may fail to load outside a workspace
   }
-  return { settingsEnabled, configuredPort };
+  return { settingsEnabled, configuredPort, configuredBinaryPath };
 }
 
 export function detectPlatform(): PlatformInfo | null {
@@ -58,6 +92,11 @@ export function detectPlatform(): PlatformInfo | null {
 }
 
 export function getBinaryPath(binaryName?: string): string | null {
+  const configuredBinaryPath = getUserConfiguredBinaryPath();
+  if (configuredBinaryPath) {
+    return configuredBinaryPath;
+  }
+
   const name = binaryName ?? detectPlatform()?.binaryName;
   if (!name) return null;
   return path.join(getLiteRtBinDir(), name);
@@ -67,8 +106,7 @@ export function getBinaryDownloadUrl(binaryName: string): string {
   return `${LITERT_RELEASE_BASE_URL}/${LITERT_RELEASE_VERSION}/${binaryName}`;
 }
 
-export function isBinaryInstalled(): boolean {
-  const binaryPath = getBinaryPath();
+export function isBinaryInstalled(binaryPath = getBinaryPath()): boolean {
   if (!binaryPath) return false;
   return fs.existsSync(binaryPath);
 }
diff --git a/packages/cli/src/commands/gemma/setup.ts b/packages/cli/src/commands/gemma/setup.ts
index 002afcbfdbf..5a7034f6ae9 100644
--- a/packages/cli/src/commands/gemma/setup.ts
+++ b/packages/cli/src/commands/gemma/setup.ts
@@ -6,16 +6,13 @@
 
 import type { CommandModule } from 'yargs';
 import fs from 'node:fs';
-import { execSync, spawn as nodeSpawn } from 'node:child_process';
+import path from 'node:path';
+import { execFileSync, spawn as nodeSpawn } from 'node:child_process';
 import chalk from 'chalk';
 import { debugLogger } from '@google/gemini-cli-core';
 import { loadSettings, SettingScope } from '../../config/settings.js';
 import { exitCli } from '../utils.js';
-import {
-  DEFAULT_PORT,
-  GEMMA_MODEL_NAME,
-  getLiteRtBinDir,
-} from './constants.js';
+import { DEFAULT_PORT, GEMMA_MODEL_NAME } from './constants.js';
 import {
   detectPlatform,
   getBinaryDownloadUrl,
@@ -133,6 +130,9 @@ interface SetupArgs {
 
 async function handleSetup(argv: SetupArgs): Promise<number> {
   const { port, force } = argv;
+  let settingsUpdated = false;
+  let serverStarted = false;
+  let autoStartServer = true;
 
   log('');
   log(chalk.bold('Gemma Local Model Routing Setup'));
@@ -181,7 +181,7 @@ async function handleSetup(argv: SetupArgs): Promise<number> {
     debugLogger.log(`Downloading from: ${downloadUrl}`);
 
     try {
-      const binDir = getLiteRtBinDir();
+      const binDir = path.dirname(binaryPath);
       fs.mkdirSync(binDir, { recursive: true });
       await downloadFile(downloadUrl, binaryPath);
       log(chalk.green('  ✓ Binary downloaded successfully'));
@@ -210,7 +210,7 @@ async function handleSetup(argv: SetupArgs): Promise<number> {
 
     if (process.platform === 'darwin') {
       try {
-        execSync(`xattr -d com.apple.quarantine "${binaryPath}"`, {
+        execFileSync('xattr', ['-d', 'com.apple.quarantine', binaryPath], {
           stdio: 'ignore',
         });
         log(chalk.green('  ✓ macOS quarantine attribute removed'));
@@ -254,15 +254,16 @@ async function handleSetup(argv: SetupArgs): Promise<number> {
     const existingGemma =
       settings.forScope(SettingScope.User).settings.experimental
         ?.gemmaModelRouter ?? {};
+    autoStartServer = existingGemma.autoStartServer ?? true;
 
     const newGemmaSettings = {
       ...existingGemma,
       enabled: true,
-      autoStartServer: existingGemma.autoStartServer ?? true,
+      autoStartServer,
       classifier: {
+        ...existingGemma.classifier,
         host: `http://localhost:${port}`,
         model: GEMMA_MODEL_NAME,
-        ...existingGemma.classifier,
       },
     };
 
@@ -274,6 +275,7 @@ async function handleSetup(argv: SetupArgs): Promise<number> {
     });
 
     log(chalk.green('  ✓ Settings updated in ~/.gemini/settings.json'));
+    settingsUpdated = true;
   } catch (error) {
     logError(
       chalk.red(
@@ -288,8 +290,8 @@ async function handleSetup(argv: SetupArgs): Promise<number> {
   if (argv.start) {
     log('');
     log('  Starting LiteRT server...');
-    const started = await startServer(binaryPath, port);
-    if (started) {
+    serverStarted = await startServer(binaryPath, port);
+    if (serverStarted) {
       log(chalk.green(`  ✓ Server started on port ${port}`));
     } else {
       log(
@@ -300,9 +302,23 @@ async function handleSetup(argv: SetupArgs): Promise<number> {
     }
   }
 
+  const routingActive = settingsUpdated && serverStarted;
+  const setupSucceeded = settingsUpdated && (!argv.start || serverStarted);
   log('');
   log(chalk.dim('─'.repeat(40)));
-  log(chalk.bold.green('  Setup complete! Local model routing is now active.'));
+  if (routingActive) {
+    log(chalk.bold.green('  Setup complete! Local model routing is active.'));
+  } else if (settingsUpdated) {
+    log(
+      chalk.bold.green('  Setup complete! Local model routing is configured.'),
+    );
+  } else {
+    log(
+      chalk.bold.yellow(
+        '  Setup incomplete. Manual settings changes are still required.',
+      ),
+    );
+  }
   log('');
   log('  How it works: Every request is classified by the local Gemma model.');
   log(
@@ -317,15 +333,27 @@ async function handleSetup(argv: SetupArgs): Promise<number> {
   );
   log('  This happens automatically — just use the CLI as usual.');
   log('');
-  if (!argv.start) {
+  if (!settingsUpdated) {
     log(
       chalk.yellow(
-        '  Note: Run "gemini gemma start" to start the server, or restart',
+        '  Fix the settings update above, then rerun "gemini gemma status".',
       ),
     );
+    log('');
+  } else if (!argv.start) {
+    log(chalk.yellow('  Note: Run "gemini gemma start" to start the server.'));
+    if (autoStartServer) {
+      log(
+        chalk.yellow(
+          '  Or restart the CLI to auto-start it on the next launch.',
+        ),
+      );
+    }
+    log('');
+  } else if (!serverStarted) {
     log(
       chalk.yellow(
-        '  the CLI to auto-start it (if autoStartServer is enabled).',
+        '  Review the server logs and rerun "gemini gemma start" after fixing the issue.',
       ),
     );
     log('');
@@ -337,7 +365,7 @@ async function handleSetup(argv: SetupArgs): Promise<number> {
   log(chalk.dim('    /gemma               Check status inside a session'));
   log('');
 
-  return 0;
+  return setupSucceeded ? 0 : 1;
 }
 
 export const setupCommand: CommandModule = {
diff --git a/packages/cli/src/commands/gemma/start.ts b/packages/cli/src/commands/gemma/start.ts
index 827c3f4fbd6..96a31de218e 100644
--- a/packages/cli/src/commands/gemma/start.ts
+++ b/packages/cli/src/commands/gemma/start.ts
@@ -81,7 +81,8 @@ export const startCommand: CommandModule = {
       port = configuredPort;
     }
 
-    if (!isBinaryInstalled()) {
+    const binaryPath = getBinaryPath();
+    if (!binaryPath || !isBinaryInstalled(binaryPath)) {
       debugLogger.error(
         chalk.red(
           'LiteRT-LM binary not found. Run "gemini gemma setup" first.',
@@ -100,7 +101,6 @@ export const startCommand: CommandModule = {
       return;
     }
 
-    const binaryPath = getBinaryPath()!;
     debugLogger.log(`Starting LiteRT server on port ${port}...`);
 
     const started = await startServer(binaryPath, port);
diff --git a/packages/cli/src/commands/gemma/status.ts b/packages/cli/src/commands/gemma/status.ts
index 77847437cb9..8ce9f006dcb 100644
--- a/packages/cli/src/commands/gemma/status.ts
+++ b/packages/cli/src/commands/gemma/status.ts
@@ -37,7 +37,7 @@ export async function checkGemmaStatus(
 
   const effectivePort = port ?? configuredPort;
   const binaryPath = getBinaryPath();
-  const binaryInstalled = isBinaryInstalled();
+  const binaryInstalled = isBinaryInstalled(binaryPath);
   const modelDownloaded =
     binaryInstalled && binaryPath ? isModelDownloaded(binaryPath) : false;
   const serverRunning = await isServerRunning(effectivePort);
diff --git a/packages/cli/src/config/config.test.ts b/packages/cli/src/config/config.test.ts
index 04df366a983..4288150ba5a 100644
--- a/packages/cli/src/config/config.test.ts
+++ b/packages/cli/src/config/config.test.ts
@@ -338,6 +338,7 @@ describe('parseArguments', () => {
       { cmd: 'skill list', expected: true },
       { cmd: 'hooks migrate', expected: true },
       { cmd: 'hook migrate', expected: true },
+      { cmd: 'gemma status', expected: true },
       { cmd: 'some query', expected: undefined },
       { cmd: 'hello world', expected: undefined },
     ])(
@@ -758,6 +759,12 @@ describe('parseArguments', () => {
     const argv = await parseArguments(settings);
     expect(argv.isCommand).toBe(true);
   });
+
+  it('should set isCommand to true for gemma command', async () => {
+    process.argv = ['node', 'script.js', 'gemma', 'status'];
+    const argv = await parseArguments(createTestMergedSettings());
+    expect(argv.isCommand).toBe(true);
+  });
 });
 
 describe('loadCliConfig', () => {
@@ -3030,6 +3037,8 @@ describe('loadCliConfig gemmaModelRouter', () => {
       experimental: {
         gemmaModelRouter: {
           enabled: true,
+          autoStartServer: false,
+          binaryPath: '/custom/lit',
           classifier: {
             host: 'http://custom:1234',
             model: 'custom-gemma',
@@ -3040,6 +3049,8 @@ describe('loadCliConfig gemmaModelRouter', () => {
     const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getGemmaModelRouterEnabled()).toBe(true);
     const gemmaSettings = config.getGemmaModelRouterSettings();
+    expect(gemmaSettings.autoStartServer).toBe(false);
+    expect(gemmaSettings.binaryPath).toBe('/custom/lit');
     expect(gemmaSettings.classifier?.host).toBe('http://custom:1234');
     expect(gemmaSettings.classifier?.model).toBe('custom-gemma');
   });
@@ -3057,6 +3068,8 @@ describe('loadCliConfig gemmaModelRouter', () => {
     const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getGemmaModelRouterEnabled()).toBe(true);
     const gemmaSettings = config.getGemmaModelRouterSettings();
+    expect(gemmaSettings.autoStartServer).toBe(true);
+    expect(gemmaSettings.binaryPath).toBe('');
     expect(gemmaSettings.classifier?.host).toBe('http://localhost:9379');
     expect(gemmaSettings.classifier?.model).toBe('gemma3-1b-gpu-custom');
   });
diff --git a/packages/cli/src/config/settingsSchema.test.ts b/packages/cli/src/config/settingsSchema.test.ts
index 27639fa0311..a7ce6cf0159 100644
--- a/packages/cli/src/config/settingsSchema.test.ts
+++ b/packages/cli/src/config/settingsSchema.test.ts
@@ -471,11 +471,33 @@ describe('SettingsSchema', () => {
       expect(enabled.category).toBe('Experimental');
       expect(enabled.default).toBe(false);
       expect(enabled.requiresRestart).toBe(true);
-      expect(enabled.showInDialog).toBe(false);
+      expect(enabled.showInDialog).toBe(true);
       expect(enabled.description).toBe(
         'Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim.',
       );
 
+      const autoStartServer = gemmaModelRouter.properties.autoStartServer;
+      expect(autoStartServer).toBeDefined();
+      expect(autoStartServer.type).toBe('boolean');
+      expect(autoStartServer.category).toBe('Experimental');
+      expect(autoStartServer.default).toBe(true);
+      expect(autoStartServer.requiresRestart).toBe(true);
+      expect(autoStartServer.showInDialog).toBe(true);
+      expect(autoStartServer.description).toBe(
+        'Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.',
+      );
+
+      const binaryPath = gemmaModelRouter.properties.binaryPath;
+      expect(binaryPath).toBeDefined();
+      expect(binaryPath.type).toBe('string');
+      expect(binaryPath.category).toBe('Experimental');
+      expect(binaryPath.default).toBe('');
+      expect(binaryPath.requiresRestart).toBe(true);
+      expect(binaryPath.showInDialog).toBe(false);
+      expect(binaryPath.description).toBe(
+        'Custom path to the LiteRT-LM binary. Leave empty to use the default location (~/.gemini/bin/litert/).',
+      );
+
       const classifier = gemmaModelRouter.properties.classifier;
       expect(classifier).toBeDefined();
       expect(classifier.type).toBe('object');
diff --git a/packages/cli/src/services/liteRtServerManager.test.ts b/packages/cli/src/services/liteRtServerManager.test.ts
new file mode 100644
index 00000000000..f1af5c800ae
--- /dev/null
+++ b/packages/cli/src/services/liteRtServerManager.test.ts
@@ -0,0 +1,68 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import fs from 'node:fs';
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+import type { GemmaModelRouterSettings } from '@google/gemini-cli-core';
+
+const mockGetBinaryPath = vi.hoisted(() => vi.fn());
+const mockIsServerRunning = vi.hoisted(() => vi.fn());
+const mockStartServer = vi.hoisted(() => vi.fn());
+
+vi.mock('../commands/gemma/platform.js', () => ({
+  getBinaryPath: mockGetBinaryPath,
+  isServerRunning: mockIsServerRunning,
+}));
+
+vi.mock('../commands/gemma/start.js', () => ({
+  startServer: mockStartServer,
+}));
+
+import { LiteRtServerManager } from './liteRtServerManager.js';
+
+describe('LiteRtServerManager', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+    vi.spyOn(fs, 'existsSync').mockReturnValue(true);
+    mockIsServerRunning.mockResolvedValue(false);
+    mockStartServer.mockResolvedValue(true);
+  });
+
+  it('uses the configured custom binary path when auto-starting', async () => {
+    mockGetBinaryPath.mockReturnValue('/user/lit');
+
+    const settings: GemmaModelRouterSettings = {
+      enabled: true,
+      binaryPath: '/workspace/evil',
+      classifier: {
+        host: 'http://localhost:8123',
+      },
+    };
+
+    await LiteRtServerManager.ensureRunning(settings);
+
+    expect(mockGetBinaryPath).toHaveBeenCalledTimes(1);
+    expect(fs.existsSync).toHaveBeenCalledWith('/user/lit');
+    expect(mockStartServer).toHaveBeenCalledWith('/user/lit', 8123);
+  });
+
+  it('falls back to the default binary path when no custom path is configured', async () => {
+    mockGetBinaryPath.mockReturnValue('/default/lit');
+
+    const settings: GemmaModelRouterSettings = {
+      enabled: true,
+      classifier: {
+        host: 'http://localhost:9379',
+      },
+    };
+
+    await LiteRtServerManager.ensureRunning(settings);
+
+    expect(mockGetBinaryPath).toHaveBeenCalledTimes(1);
+    expect(fs.existsSync).toHaveBeenCalledWith('/default/lit');
+    expect(mockStartServer).toHaveBeenCalledWith('/default/lit', 9379);
+  });
+});
diff --git a/packages/cli/src/services/liteRtServerManager.ts b/packages/cli/src/services/liteRtServerManager.ts
index 7107d2321ef..e72d321f9d0 100644
--- a/packages/cli/src/services/liteRtServerManager.ts
+++ b/packages/cli/src/services/liteRtServerManager.ts
@@ -4,13 +4,10 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+import fs from 'node:fs';
 import { debugLogger } from '@google/gemini-cli-core';
 import type { GemmaModelRouterSettings } from '@google/gemini-cli-core';
-import {
-  getBinaryPath,
-  isBinaryInstalled,
-  isServerRunning,
-} from '../commands/gemma/platform.js';
+import { getBinaryPath, isServerRunning } from '../commands/gemma/platform.js';
 import { DEFAULT_PORT } from '../commands/gemma/constants.js';
 
 export class LiteRtServerManager {
@@ -19,7 +16,8 @@ export class LiteRtServerManager {
   ): Promise<void> {
     if (!gemmaSettings?.enabled) return;
     if (gemmaSettings.autoStartServer === false) return;
-    if (!isBinaryInstalled()) {
+    const binaryPath = getBinaryPath();
+    if (!binaryPath || !fs.existsSync(binaryPath)) {
       debugLogger.log(
         '[LiteRtServerManager] Binary not installed, skipping auto-start. Run "gemini gemma setup".',
       );
@@ -46,11 +44,6 @@ export class LiteRtServerManager {
 
     try {
       const { startServer } = await import('../commands/gemma/start.js');
-      const binaryPath = gemmaSettings.binaryPath || getBinaryPath() || '';
-      if (!binaryPath) {
-        debugLogger.warn('[LiteRtServerManager] Could not resolve binary path');
-        return;
-      }
       const started = await startServer(binaryPath, port);
       if (started) {
         debugLogger.log(`[LiteRtServerManager] Server started on port ${port}`);
diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts
index ab000b2691f..17fab3d8e07 100644
--- a/packages/core/src/config/config.test.ts
+++ b/packages/core/src/config/config.test.ts
@@ -1901,6 +1901,8 @@ describe('GemmaModelRouterSettings', () => {
     const config = new Config(baseParams);
     const settings = config.getGemmaModelRouterSettings();
     expect(settings.enabled).toBe(false);
+    expect(settings.autoStartServer).toBe(true);
+    expect(settings.binaryPath).toBe('');
     expect(settings.classifier?.host).toBe('http://localhost:9379');
     expect(settings.classifier?.model).toBe('gemma3-1b-gpu-custom');
   });
@@ -1910,6 +1912,8 @@ describe('GemmaModelRouterSettings', () => {
       ...baseParams,
       gemmaModelRouter: {
         enabled: true,
+        autoStartServer: false,
+        binaryPath: '/custom/lit',
         classifier: {
           host: 'http://custom:1234',
           model: 'custom-gemma',
@@ -1919,6 +1923,8 @@ describe('GemmaModelRouterSettings', () => {
     const config = new Config(params);
     const settings = config.getGemmaModelRouterSettings();
     expect(settings.enabled).toBe(true);
+    expect(settings.autoStartServer).toBe(false);
+    expect(settings.binaryPath).toBe('/custom/lit');
     expect(settings.classifier?.host).toBe('http://custom:1234');
     expect(settings.classifier?.model).toBe('custom-gemma');
   });
@@ -1933,6 +1939,8 @@ describe('GemmaModelRouterSettings', () => {
     const config = new Config(params);
     const settings = config.getGemmaModelRouterSettings();
     expect(settings.enabled).toBe(true);
+    expect(settings.autoStartServer).toBe(true);
+    expect(settings.binaryPath).toBe('');
     expect(settings.classifier?.host).toBe('http://localhost:9379');
     expect(settings.classifier?.model).toBe('gemma3-1b-gpu-custom');
   });
diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts
index 45abdceacce..3e397cb9e64 100644
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -1322,6 +1322,8 @@ export class Config implements McpContext, AgentLoopContext {
     };
     this.gemmaModelRouter = {
       enabled: params.gemmaModelRouter?.enabled ?? false,
+      autoStartServer: params.gemmaModelRouter?.autoStartServer ?? true,
+      binaryPath: params.gemmaModelRouter?.binaryPath ?? '',
       classifier: {
         host:
           params.gemmaModelRouter?.classifier?.host ?? 'http://localhost:9379',

From 637af65507c1b774e76c7e59595e427a6314782d Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Thu, 16 Apr 2026 19:44:08 -0700
Subject: [PATCH 18/33] fix(cli): keep gemma logs attached to tail

---
 packages/cli/src/commands/gemma/logs.test.ts | 129 ++++++++++++++++++-
 packages/cli/src/commands/gemma/logs.ts      |  90 ++++++++-----
 2 files changed, 186 insertions(+), 33 deletions(-)

diff --git a/packages/cli/src/commands/gemma/logs.test.ts b/packages/cli/src/commands/gemma/logs.test.ts
index caa9bc1ec4e..1cf34b77e5a 100644
--- a/packages/cli/src/commands/gemma/logs.test.ts
+++ b/packages/cli/src/commands/gemma/logs.test.ts
@@ -5,10 +5,54 @@
  */
 
 import fs from 'node:fs';
+import type { ChildProcess } from 'node:child_process';
+import { EventEmitter } from 'node:events';
 import os from 'node:os';
 import path from 'node:path';
-import { afterEach, describe, expect, it } from 'vitest';
-import { readLastLines } from './logs.js';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import { spawn } from 'node:child_process';
+import { exitCli } from '../utils.js';
+import { getLogFilePath } from './constants.js';
+import { logsCommand, readLastLines } from './logs.js';
+
+vi.mock('@google/gemini-cli-core', async (importOriginal) => {
+  const { mockCoreDebugLogger } = await import(
+    '../../test-utils/mockDebugLogger.js'
+  );
+  return mockCoreDebugLogger(
+    await importOriginal<typeof import('@google/gemini-cli-core')>(),
+    {
+      stripAnsi: false,
+    },
+  );
+});
+
+vi.mock('node:child_process', async (importOriginal) => {
+  const actual = await importOriginal<typeof import('node:child_process')>();
+  return {
+    ...actual,
+    spawn: vi.fn(),
+  };
+});
+
+vi.mock('../utils.js', () => ({
+  exitCli: vi.fn(),
+}));
+
+vi.mock('./constants.js', () => ({
+  getLogFilePath: vi.fn(),
+}));
+
+function createMockChild(): ChildProcess {
+  return Object.assign(new EventEmitter(), {
+    kill: vi.fn(),
+  }) as unknown as ChildProcess;
+}
+
+async function flushMicrotasks() {
+  await Promise.resolve();
+  await Promise.resolve();
+}
 
 describe('readLastLines', () => {
   const tempFiles: string[] = [];
@@ -49,3 +93,84 @@ describe('readLastLines', () => {
     await expect(readLastLines(filePath, 0)).resolves.toBe('');
   });
 });
+
+describe('logsCommand', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+    vi.mocked(getLogFilePath).mockReturnValue('/tmp/gemma.log');
+    vi.spyOn(fs.promises, 'access').mockResolvedValue(undefined);
+  });
+
+  afterEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  it('waits for the tail process to close before exiting in follow mode', async () => {
+    const child = createMockChild();
+    vi.mocked(spawn).mockReturnValue(child);
+
+    let resolved = false;
+    const handlerPromise = (
+      logsCommand.handler as (argv: Record<string, unknown>) => Promise<void>
+    )({}).then(() => {
+      resolved = true;
+    });
+
+    await flushMicrotasks();
+
+    expect(spawn).toHaveBeenCalledWith(
+      'tail',
+      ['-f', '-n', '20', '/tmp/gemma.log'],
+      { stdio: 'inherit' },
+    );
+    expect(resolved).toBe(false);
+    expect(exitCli).not.toHaveBeenCalled();
+
+    child.emit('close', 0);
+    await handlerPromise;
+
+    expect(exitCli).toHaveBeenCalledWith(0);
+  });
+
+  it('uses one-shot tail output when follow is disabled', async () => {
+    const child = createMockChild();
+    vi.mocked(spawn).mockReturnValue(child);
+
+    const handlerPromise = (
+      logsCommand.handler as (argv: Record<string, unknown>) => Promise<void>
+    )({ follow: false });
+
+    await flushMicrotasks();
+
+    expect(spawn).toHaveBeenCalledWith('tail', ['-n', '20', '/tmp/gemma.log'], {
+      stdio: 'inherit',
+    });
+
+    child.emit('close', 0);
+    await handlerPromise;
+
+    expect(exitCli).toHaveBeenCalledWith(0);
+  });
+
+  it('follows from the requested line count when both --lines and --follow are set', async () => {
+    const child = createMockChild();
+    vi.mocked(spawn).mockReturnValue(child);
+
+    const handlerPromise = (
+      logsCommand.handler as (argv: Record<string, unknown>) => Promise<void>
+    )({ lines: 5, follow: true });
+
+    await flushMicrotasks();
+
+    expect(spawn).toHaveBeenCalledWith(
+      'tail',
+      ['-f', '-n', '5', '/tmp/gemma.log'],
+      { stdio: 'inherit' },
+    );
+
+    child.emit('close', 0);
+    await handlerPromise;
+
+    expect(exitCli).toHaveBeenCalledWith(0);
+  });
+});
diff --git a/packages/cli/src/commands/gemma/logs.ts b/packages/cli/src/commands/gemma/logs.ts
index 5eb81ea33d4..f2f96b6b08c 100644
--- a/packages/cli/src/commands/gemma/logs.ts
+++ b/packages/cli/src/commands/gemma/logs.ts
@@ -6,7 +6,7 @@
 
 import type { CommandModule } from 'yargs';
 import fs from 'node:fs';
-import { spawn } from 'node:child_process';
+import { spawn, type ChildProcess } from 'node:child_process';
 import { debugLogger } from '@google/gemini-cli-core';
 import { exitCli } from '../utils.js';
 import { getLogFilePath } from './constants.js';
@@ -88,7 +88,41 @@ export async function readLastLines(
 
 const isWindows = process.platform === 'win32';
 
-export const logsCommand: CommandModule = {
+interface LogsArgs {
+  lines?: number;
+  follow?: boolean;
+}
+
+function waitForChild(child: ChildProcess): Promise<number> {
+  return new Promise((resolve, reject) => {
+    child.once('error', reject);
+    child.once('close', (code) => resolve(code ?? 1));
+  });
+}
+
+async function runTail(logPath: string, lines: number, follow: boolean) {
+  const tailArgs = follow
+    ? ['-f', '-n', String(lines), logPath]
+    : ['-n', String(lines), logPath];
+  const child = spawn('tail', tailArgs, { stdio: 'inherit' });
+
+  if (!follow) {
+    return waitForChild(child);
+  }
+
+  const handleSigint = () => {
+    child.kill('SIGTERM');
+  };
+  process.once('SIGINT', handleSigint);
+
+  try {
+    return await waitForChild(child);
+  } finally {
+    process.off('SIGINT', handleSigint);
+  }
+}
+
+export const logsCommand: CommandModule<object, LogsArgs> = {
   command: 'logs',
   describe: 'View LiteRT-LM server logs',
   builder: (yargs) =>
@@ -101,8 +135,8 @@ export const logsCommand: CommandModule = {
       .option('follow', {
         alias: 'f',
         type: 'boolean',
-        default: true,
-        description: 'Follow log output (default when --lines is not set)',
+        description:
+          'Follow log output (defaults to true when --lines is omitted)',
       }),
   handler: async (argv) => {
     const logPath = getLogFilePath();
@@ -118,24 +152,11 @@ export const logsCommand: CommandModule = {
       return;
     }
 
-    const rawLines = argv['lines'];
-    const lines = Number.isFinite(rawLines) ? Number(rawLines) : undefined;
+    const lines = argv.lines;
+    const follow = argv.follow ?? lines === undefined;
+    const requestedLines = lines ?? 20;
 
-    if (lines !== undefined) {
-      if (isWindows) {
-        process.stdout.write(await readLastLines(logPath, lines));
-        await exitCli(0);
-        return;
-      }
-      const tailArgs = ['-n', String(lines), logPath];
-      const child = spawn('tail', tailArgs, { stdio: 'inherit' });
-      child.on('close', async (code) => {
-        await exitCli(code ?? 0);
-      });
-      return;
-    }
-
-    if (isWindows) {
+    if (follow && isWindows) {
       debugLogger.log(
         'Live log following is not supported on Windows. Use --lines N to view recent logs.',
       );
@@ -143,16 +164,23 @@ export const logsCommand: CommandModule = {
       return;
     }
 
-    debugLogger.log(`Tailing ${logPath} (Ctrl+C to stop)\n`);
-    const tailArgs = ['-f', '-n', '20', logPath];
-    const child = spawn('tail', tailArgs, { stdio: 'inherit' });
-
-    process.on('SIGINT', () => {
-      child.kill('SIGTERM');
-    });
+    if (isWindows) {
+      process.stdout.write(await readLastLines(logPath, requestedLines));
+      await exitCli(0);
+      return;
+    }
 
-    child.on('close', async (code) => {
-      await exitCli(code ?? 0);
-    });
+    try {
+      if (follow) {
+        debugLogger.log(`Tailing ${logPath} (Ctrl+C to stop)\n`);
+      }
+      const exitCode = await runTail(logPath, requestedLines, follow);
+      await exitCli(exitCode);
+    } catch (error) {
+      debugLogger.error(
+        `Failed to read log output: ${error instanceof Error ? error.message : String(error)}`,
+      );
+      await exitCli(1);
+    }
   },
 };

From 21bd3bd9bdaf9621df66eeff9dad28d6138e1607 Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Thu, 16 Apr 2026 19:44:29 -0700
Subject: [PATCH 19/33] Update packages/cli/src/commands/gemma/platform.ts

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 packages/cli/src/commands/gemma/platform.ts | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/packages/cli/src/commands/gemma/platform.ts b/packages/cli/src/commands/gemma/platform.ts
index 0ee360ae63e..044056af182 100644
--- a/packages/cli/src/commands/gemma/platform.ts
+++ b/packages/cli/src/commands/gemma/platform.ts
@@ -70,12 +70,10 @@ export function resolveGemmaConfig(fallbackPort: number): GemmaConfigStatus {
   const configuredBinaryPath = getUserConfiguredBinaryPath();
   try {
     const settings = loadSettings(process.cwd());
-    const gemmaSettings = settings.merged.experimental?.gemmaModelRouter;
+    const gemmaSettings = settings.forScope(SettingScope.User).settings.experimental?.gemmaModelRouter;
     settingsEnabled = gemmaSettings?.enabled === true;
     configuredPort = parsePortFromHost(
       gemmaSettings?.classifier?.host,
-      fallbackPort,
-    );
   } catch {
     // ignore — settings may fail to load outside a workspace
   }

From 6fed15f626a15a55555f7e52ba3265cce8167ef2 Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Thu, 16 Apr 2026 19:44:41 -0700
Subject: [PATCH 20/33] Update packages/cli/src/gemini.tsx

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 packages/cli/src/gemini.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx
index 1f43419c8da..c10d74744af 100644
--- a/packages/cli/src/gemini.tsx
+++ b/packages/cli/src/gemini.tsx
@@ -615,7 +615,7 @@ export async function main() {
     import('./services/liteRtServerManager.js')
       .then(({ LiteRtServerManager }) =>
         LiteRtServerManager.ensureRunning(
-          settings.merged.experimental?.gemmaModelRouter,
+          settings.forScope(SettingScope.User).settings.experimental?.gemmaModelRouter,
         ),
       )
       .catch((e) => debugLogger.warn('LiteRT auto-start import failed:', e));

From 403a4c0e8475fb4ddaa30c747d04a7d0f919c2b6 Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Thu, 16 Apr 2026 20:02:50 -0700
Subject: [PATCH 21/33] fix(cli): harden gemma setup and stop safety

---
 packages/cli/src/commands/gemma/constants.ts  |  10 ++
 .../cli/src/commands/gemma/platform.test.ts   |  49 ++++++
 packages/cli/src/commands/gemma/platform.ts   | 157 +++++++++++++++++-
 packages/cli/src/commands/gemma/setup.test.ts |  60 +++++++
 packages/cli/src/commands/gemma/setup.ts      |  75 ++++++++-
 packages/cli/src/commands/gemma/start.ts      |   8 +-
 packages/cli/src/commands/gemma/stop.test.ts  | 112 +++++++++++++
 packages/cli/src/commands/gemma/stop.ts       |  60 +++++--
 8 files changed, 514 insertions(+), 17 deletions(-)
 create mode 100644 packages/cli/src/commands/gemma/setup.test.ts
 create mode 100644 packages/cli/src/commands/gemma/stop.test.ts

diff --git a/packages/cli/src/commands/gemma/constants.ts b/packages/cli/src/commands/gemma/constants.ts
index cab4c1f2b26..de80762a1a5 100644
--- a/packages/cli/src/commands/gemma/constants.ts
+++ b/packages/cli/src/commands/gemma/constants.ts
@@ -21,6 +21,16 @@ export const PLATFORM_BINARY_MAP: Record<string, string> = {
   'win32-x64': 'lit.windows_x86_64.exe',
 };
 
+// SHA-256 hashes for the official LiteRT-LM v0.9.0-alpha03 release binaries.
+export const PLATFORM_BINARY_SHA256: Record<string, string> = {
+  'lit.macos_arm64':
+    '9e826a2634f2e8b220ad0f1e1b5c139e0b47cb172326e3b7d46d31382f49478e',
+  'lit.linux_x86_64':
+    '66601df8a07f08244b188e9fcab0bf4a16562fe76d8d47e49f40273d57541ee8',
+  'lit.windows_x86_64.exe':
+    'de82d2829d2fb1cbdb318e2d8a78dc2f9659ff14cb11b2894d1f30e0bfde2bf6',
+};
+
 export function getLiteRtBinDir(): string {
   return path.join(Storage.getGlobalGeminiDir(), 'bin', 'litert');
 }
diff --git a/packages/cli/src/commands/gemma/platform.test.ts b/packages/cli/src/commands/gemma/platform.test.ts
index fb10c026ece..b00549365a9 100644
--- a/packages/cli/src/commands/gemma/platform.test.ts
+++ b/packages/cli/src/commands/gemma/platform.test.ts
@@ -21,7 +21,9 @@ vi.mock('../../config/settings.js', () => ({
 
 import {
   getBinaryPath,
+  isExpectedLiteRtServerCommand,
   isBinaryInstalled,
+  readServerProcessInfo,
   resolveGemmaConfig,
 } from './platform.js';
 
@@ -110,4 +112,51 @@ describe('gemma platform helpers', () => {
     expect(isBinaryInstalled()).toBe(true);
     expect(fs.existsSync).toHaveBeenCalledWith('/custom/lit');
   });
+
+  it('parses structured server process info from the pid file', () => {
+    vi.spyOn(fs, 'readFileSync').mockReturnValue(
+      JSON.stringify({
+        pid: 1234,
+        binaryPath: '/custom/lit',
+        port: 8123,
+      }),
+    );
+
+    expect(readServerProcessInfo()).toEqual({
+      pid: 1234,
+      binaryPath: '/custom/lit',
+      port: 8123,
+    });
+  });
+
+  it('parses legacy pid-only files for backward compatibility', () => {
+    vi.spyOn(fs, 'readFileSync').mockReturnValue('4321');
+
+    expect(readServerProcessInfo()).toEqual({
+      pid: 4321,
+    });
+  });
+
+  it('matches only the expected LiteRT serve command', () => {
+    expect(
+      isExpectedLiteRtServerCommand('/custom/lit serve --port=8123 --verbose', {
+        binaryPath: '/custom/lit',
+        port: 8123,
+      }),
+    ).toBe(true);
+
+    expect(
+      isExpectedLiteRtServerCommand('/custom/lit run --port=8123', {
+        binaryPath: '/custom/lit',
+        port: 8123,
+      }),
+    ).toBe(false);
+
+    expect(
+      isExpectedLiteRtServerCommand('/custom/lit serve --port=9000', {
+        binaryPath: '/custom/lit',
+        port: 8123,
+      }),
+    ).toBe(false);
+  });
 });
diff --git a/packages/cli/src/commands/gemma/platform.ts b/packages/cli/src/commands/gemma/platform.ts
index 044056af182..953bd700703 100644
--- a/packages/cli/src/commands/gemma/platform.ts
+++ b/packages/cli/src/commands/gemma/platform.ts
@@ -29,6 +29,12 @@ export interface GemmaConfigStatus {
   configuredBinaryPath?: string;
 }
 
+export interface LiteRtServerProcessInfo {
+  pid: number;
+  binaryPath?: string;
+  port?: number;
+}
+
 function getUserConfiguredBinaryPath(
   workspaceDir = process.cwd(),
 ): string | undefined {
@@ -136,17 +142,162 @@ export async function isServerRunning(port: number): Promise<boolean> {
   }
 }
 
-export function readServerPid(): number | null {
+function isLiteRtServerProcessInfo(
+  value: unknown,
+): value is LiteRtServerProcessInfo {
+  if (!value || typeof value !== 'object') {
+    return false;
+  }
+
+  const isPositiveInteger = (candidate: unknown): candidate is number =>
+    typeof candidate === 'number' &&
+    Number.isInteger(candidate) &&
+    candidate > 0;
+  const isNonEmptyString = (candidate: unknown): candidate is string =>
+    typeof candidate === 'string' && candidate.length > 0;
+
+  const pid: unknown = Object.getOwnPropertyDescriptor(value, 'pid')?.value;
+  if (!isPositiveInteger(pid)) {
+    return false;
+  }
+
+  const binaryPath: unknown = Object.getOwnPropertyDescriptor(
+    value,
+    'binaryPath',
+  )?.value;
+  if (binaryPath !== undefined && !isNonEmptyString(binaryPath)) {
+    return false;
+  }
+
+  const port: unknown = Object.getOwnPropertyDescriptor(value, 'port')?.value;
+  if (port !== undefined && !isPositiveInteger(port)) {
+    return false;
+  }
+
+  return true;
+}
+
+export function readServerProcessInfo(): LiteRtServerProcessInfo | null {
   const pidPath = getPidFilePath();
   try {
     const content = fs.readFileSync(pidPath, 'utf-8').trim();
-    const pid = parseInt(content, 10);
-    return isNaN(pid) ? null : pid;
+    if (!content) {
+      return null;
+    }
+
+    if (/^\d+$/.test(content)) {
+      return { pid: parseInt(content, 10) };
+    }
+
+    const parsed = JSON.parse(content) as unknown;
+    return isLiteRtServerProcessInfo(parsed) ? parsed : null;
+  } catch {
+    return null;
+  }
+}
+
+export function writeServerProcessInfo(
+  processInfo: LiteRtServerProcessInfo,
+): void {
+  fs.writeFileSync(getPidFilePath(), JSON.stringify(processInfo), 'utf-8');
+}
+
+export function readServerPid(): number | null {
+  return readServerProcessInfo()?.pid ?? null;
+}
+
+function normalizeProcessValue(value: string): string {
+  const normalized = value.replace(/\0/g, ' ').trim();
+  if (process.platform === 'win32') {
+    return normalized.replace(/\\/g, '/').replace(/\s+/g, ' ').toLowerCase();
+  }
+  return normalized.replace(/\s+/g, ' ');
+}
+
+function readProcessCommandLine(pid: number): string | null {
+  try {
+    if (process.platform === 'linux') {
+      const output = fs.readFileSync(`/proc/${pid}/cmdline`, 'utf-8');
+      return output.trim() ? output : null;
+    }
+
+    if (process.platform === 'win32') {
+      const output = execFileSync(
+        'powershell.exe',
+        [
+          '-NoProfile',
+          '-Command',
+          `(Get-CimInstance Win32_Process -Filter "ProcessId = ${pid}").CommandLine`,
+        ],
+        {
+          encoding: 'utf-8',
+          timeout: 5000,
+        },
+      );
+      return output.trim() || null;
+    }
+
+    const output = execFileSync('ps', ['-p', String(pid), '-o', 'command='], {
+      encoding: 'utf-8',
+      timeout: 5000,
+    });
+    return output.trim() || null;
   } catch {
     return null;
   }
 }
 
+export function isExpectedLiteRtServerCommand(
+  commandLine: string,
+  options: {
+    binaryPath?: string | null;
+    port?: number;
+  },
+): boolean {
+  const normalizedCommandLine = normalizeProcessValue(commandLine);
+  if (!normalizedCommandLine) {
+    return false;
+  }
+
+  if (!/(^|\s|")serve(\s|$)/.test(normalizedCommandLine)) {
+    return false;
+  }
+
+  if (
+    options.port !== undefined &&
+    !normalizedCommandLine.includes(`--port=${options.port}`)
+  ) {
+    return false;
+  }
+
+  if (!options.binaryPath) {
+    return true;
+  }
+
+  const normalizedBinaryPath = normalizeProcessValue(options.binaryPath);
+  const normalizedBinaryName = normalizeProcessValue(
+    path.basename(options.binaryPath),
+  );
+  return (
+    normalizedCommandLine.includes(normalizedBinaryPath) ||
+    normalizedCommandLine.includes(normalizedBinaryName)
+  );
+}
+
+export function isExpectedLiteRtServerProcess(
+  pid: number,
+  options: {
+    binaryPath?: string | null;
+    port?: number;
+  },
+): boolean {
+  const commandLine = readProcessCommandLine(pid);
+  if (!commandLine) {
+    return false;
+  }
+  return isExpectedLiteRtServerCommand(commandLine, options);
+}
+
 export function isProcessRunning(pid: number): boolean {
   try {
     process.kill(pid, 0);
diff --git a/packages/cli/src/commands/gemma/setup.test.ts b/packages/cli/src/commands/gemma/setup.test.ts
new file mode 100644
index 00000000000..663a5d6e4c3
--- /dev/null
+++ b/packages/cli/src/commands/gemma/setup.test.ts
@@ -0,0 +1,60 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import { afterEach, describe, expect, it } from 'vitest';
+import { PLATFORM_BINARY_MAP, PLATFORM_BINARY_SHA256 } from './constants.js';
+import { computeFileSha256, verifyFileSha256 } from './setup.js';
+
+describe('gemma setup checksum helpers', () => {
+  const tempFiles: string[] = [];
+
+  afterEach(async () => {
+    await Promise.all(
+      tempFiles
+        .splice(0)
+        .map((filePath) => fs.promises.rm(filePath, { force: true })),
+    );
+  });
+
+  it('has a pinned checksum for every supported LiteRT binary', () => {
+    expect(Object.keys(PLATFORM_BINARY_SHA256).sort()).toEqual(
+      Object.values(PLATFORM_BINARY_MAP).sort(),
+    );
+  });
+
+  it('computes the sha256 for a downloaded file', async () => {
+    const filePath = path.join(
+      os.tmpdir(),
+      `gemma-setup-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+    );
+    tempFiles.push(filePath);
+    await fs.promises.writeFile(filePath, 'hello world', 'utf-8');
+
+    await expect(computeFileSha256(filePath)).resolves.toBe(
+      'b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9',
+    );
+  });
+
+  it('verifies whether a file matches the expected sha256', async () => {
+    const filePath = path.join(
+      os.tmpdir(),
+      `gemma-setup-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+    );
+    tempFiles.push(filePath);
+    await fs.promises.writeFile(filePath, 'hello world', 'utf-8');
+
+    await expect(
+      verifyFileSha256(
+        filePath,
+        'b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9',
+      ),
+    ).resolves.toBe(true);
+    await expect(verifyFileSha256(filePath, 'deadbeef')).resolves.toBe(false);
+  });
+});
diff --git a/packages/cli/src/commands/gemma/setup.ts b/packages/cli/src/commands/gemma/setup.ts
index 5a7034f6ae9..5346924e6c6 100644
--- a/packages/cli/src/commands/gemma/setup.ts
+++ b/packages/cli/src/commands/gemma/setup.ts
@@ -5,6 +5,7 @@
  */
 
 import type { CommandModule } from 'yargs';
+import { createHash } from 'node:crypto';
 import fs from 'node:fs';
 import path from 'node:path';
 import { execFileSync, spawn as nodeSpawn } from 'node:child_process';
@@ -12,7 +13,11 @@ import chalk from 'chalk';
 import { debugLogger } from '@google/gemini-cli-core';
 import { loadSettings, SettingScope } from '../../config/settings.js';
 import { exitCli } from '../utils.js';
-import { DEFAULT_PORT, GEMMA_MODEL_NAME } from './constants.js';
+import {
+  DEFAULT_PORT,
+  GEMMA_MODEL_NAME,
+  PLATFORM_BINARY_SHA256,
+} from './constants.js';
 import {
   detectPlatform,
   getBinaryDownloadUrl,
@@ -110,6 +115,29 @@ async function downloadFile(url: string, destPath: string): Promise<void> {
   fs.renameSync(tmpPath, destPath);
 }
 
+export async function computeFileSha256(filePath: string): Promise<string> {
+  const hash = createHash('sha256');
+  const fileStream = fs.createReadStream(filePath);
+
+  return new Promise((resolve, reject) => {
+    fileStream.on('data', (chunk) => {
+      hash.update(chunk);
+    });
+    fileStream.on('error', reject);
+    fileStream.on('end', () => {
+      resolve(hash.digest('hex'));
+    });
+  });
+}
+
+export async function verifyFileSha256(
+  filePath: string,
+  expectedHash: string,
+): Promise<boolean> {
+  const actualHash = await computeFileSha256(filePath);
+  return actualHash === expectedHash;
+}
+
 function spawnInherited(command: string, args: string[]): Promise<number> {
   return new Promise((resolve, reject) => {
     const child = nodeSpawn(command, args, {
@@ -195,6 +223,51 @@ async function handleSetup(argv: SetupArgs): Promise<number> {
       return 1;
     }
 
+    const expectedHash = PLATFORM_BINARY_SHA256[platform.binaryName];
+    if (!expectedHash) {
+      logError(
+        chalk.red(
+          `  ✗ No checksum is configured for ${platform.binaryName}. Refusing to install the binary.`,
+        ),
+      );
+      try {
+        fs.rmSync(binaryPath, { force: true });
+      } catch {
+        // ignore
+      }
+      return 1;
+    }
+
+    try {
+      const checksumVerified = await verifyFileSha256(binaryPath, expectedHash);
+      if (!checksumVerified) {
+        logError(
+          chalk.red(
+            '  ✗ Downloaded binary checksum did not match the expected release hash.',
+          ),
+        );
+        try {
+          fs.rmSync(binaryPath, { force: true });
+        } catch {
+          // ignore
+        }
+        return 1;
+      }
+      log(chalk.green('  ✓ Binary checksum verified'));
+    } catch (error) {
+      logError(
+        chalk.red(
+          `  ✗ Failed to verify binary checksum: ${error instanceof Error ? error.message : String(error)}`,
+        ),
+      );
+      try {
+        fs.rmSync(binaryPath, { force: true });
+      } catch {
+        // ignore
+      }
+      return 1;
+    }
+
     if (process.platform !== 'win32') {
       try {
         fs.chmodSync(binaryPath, 0o755);
diff --git a/packages/cli/src/commands/gemma/start.ts b/packages/cli/src/commands/gemma/start.ts
index 96a31de218e..badf7b69a5a 100644
--- a/packages/cli/src/commands/gemma/start.ts
+++ b/packages/cli/src/commands/gemma/start.ts
@@ -23,6 +23,7 @@ import {
   isBinaryInstalled,
   isServerRunning,
   resolveGemmaConfig,
+  writeServerProcessInfo,
 } from './platform.js';
 
 export async function startServer(
@@ -48,9 +49,12 @@ export async function startServer(
       stdio: ['ignore', logFd, logFd],
     });
 
-    const pidPath = getPidFilePath();
     if (child.pid) {
-      fs.writeFileSync(pidPath, String(child.pid), 'utf-8');
+      writeServerProcessInfo({
+        pid: child.pid,
+        binaryPath,
+        port,
+      });
     }
 
     child.unref();
diff --git a/packages/cli/src/commands/gemma/stop.test.ts b/packages/cli/src/commands/gemma/stop.test.ts
new file mode 100644
index 00000000000..64eaf6d5fcd
--- /dev/null
+++ b/packages/cli/src/commands/gemma/stop.test.ts
@@ -0,0 +1,112 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import fs from 'node:fs';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+
+const mockGetBinaryPath = vi.hoisted(() => vi.fn());
+const mockIsExpectedLiteRtServerProcess = vi.hoisted(() => vi.fn());
+const mockIsProcessRunning = vi.hoisted(() => vi.fn());
+const mockIsServerRunning = vi.hoisted(() => vi.fn());
+const mockReadServerPid = vi.hoisted(() => vi.fn());
+const mockReadServerProcessInfo = vi.hoisted(() => vi.fn());
+const mockResolveGemmaConfig = vi.hoisted(() => vi.fn());
+
+vi.mock('@google/gemini-cli-core', async (importOriginal) => {
+  const { mockCoreDebugLogger } = await import(
+    '../../test-utils/mockDebugLogger.js'
+  );
+  return mockCoreDebugLogger(
+    await importOriginal<typeof import('@google/gemini-cli-core')>(),
+    {
+      stripAnsi: false,
+    },
+  );
+});
+
+vi.mock('./constants.js', () => ({
+  DEFAULT_PORT: 9379,
+  getPidFilePath: vi.fn(() => '/tmp/litert-server.pid'),
+}));
+
+vi.mock('./platform.js', () => ({
+  getBinaryPath: mockGetBinaryPath,
+  isExpectedLiteRtServerProcess: mockIsExpectedLiteRtServerProcess,
+  isProcessRunning: mockIsProcessRunning,
+  isServerRunning: mockIsServerRunning,
+  readServerPid: mockReadServerPid,
+  readServerProcessInfo: mockReadServerProcessInfo,
+  resolveGemmaConfig: mockResolveGemmaConfig,
+}));
+
+vi.mock('../utils.js', () => ({
+  exitCli: vi.fn(),
+}));
+
+import { stopServer } from './stop.js';
+
+describe('gemma stop command', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+    vi.useFakeTimers();
+    mockGetBinaryPath.mockReturnValue('/custom/lit');
+    mockResolveGemmaConfig.mockReturnValue({ configuredPort: 9379 });
+  });
+
+  afterEach(() => {
+    vi.useRealTimers();
+    vi.restoreAllMocks();
+  });
+
+  it('refuses to signal a pid that does not match the expected LiteRT server', async () => {
+    mockReadServerProcessInfo.mockReturnValue({
+      pid: 1234,
+      binaryPath: '/custom/lit',
+      port: 8123,
+    });
+    mockIsProcessRunning.mockReturnValue(true);
+    mockIsExpectedLiteRtServerProcess.mockReturnValue(false);
+
+    const killSpy = vi.spyOn(process, 'kill').mockImplementation(() => true);
+
+    await expect(stopServer(8123)).resolves.toBe('unexpected-process');
+    expect(killSpy).not.toHaveBeenCalled();
+  });
+
+  it('stops the verified LiteRT server and removes the pid file', async () => {
+    mockReadServerProcessInfo.mockReturnValue({
+      pid: 1234,
+      binaryPath: '/custom/lit',
+      port: 8123,
+    });
+    mockIsProcessRunning.mockReturnValueOnce(true).mockReturnValueOnce(false);
+    mockIsExpectedLiteRtServerProcess.mockReturnValue(true);
+
+    const unlinkSpy = vi.spyOn(fs, 'unlinkSync').mockImplementation(() => {});
+    const killSpy = vi.spyOn(process, 'kill').mockImplementation(() => true);
+
+    const stopPromise = stopServer(8123);
+    await vi.runAllTimersAsync();
+
+    await expect(stopPromise).resolves.toBe('stopped');
+    expect(killSpy).toHaveBeenCalledWith(1234, 'SIGTERM');
+    expect(unlinkSpy).toHaveBeenCalledWith('/tmp/litert-server.pid');
+  });
+
+  it('cleans up a stale pid file when the recorded process is no longer running', async () => {
+    mockReadServerProcessInfo.mockReturnValue({
+      pid: 1234,
+      binaryPath: '/custom/lit',
+      port: 8123,
+    });
+    mockIsProcessRunning.mockReturnValue(false);
+
+    const unlinkSpy = vi.spyOn(fs, 'unlinkSync').mockImplementation(() => {});
+
+    await expect(stopServer(8123)).resolves.toBe('not-running');
+    expect(unlinkSpy).toHaveBeenCalledWith('/tmp/litert-server.pid');
+  });
+});
diff --git a/packages/cli/src/commands/gemma/stop.ts b/packages/cli/src/commands/gemma/stop.ts
index 676dc0b667a..a135f4e36d1 100644
--- a/packages/cli/src/commands/gemma/stop.ts
+++ b/packages/cli/src/commands/gemma/stop.ts
@@ -11,33 +11,54 @@ import { debugLogger } from '@google/gemini-cli-core';
 import { exitCli } from '../utils.js';
 import { DEFAULT_PORT, getPidFilePath } from './constants.js';
 import {
-  readServerPid,
+  getBinaryPath,
+  isExpectedLiteRtServerProcess,
   isProcessRunning,
   isServerRunning,
+  readServerPid,
+  readServerProcessInfo,
   resolveGemmaConfig,
 } from './platform.js';
 
-export async function stopServer(): Promise<boolean> {
-  const pid = readServerPid();
+export type StopServerResult =
+  | 'stopped'
+  | 'not-running'
+  | 'unexpected-process'
+  | 'failed';
+
+export async function stopServer(
+  expectedPort?: number,
+): Promise<StopServerResult> {
+  const processInfo = readServerProcessInfo();
   const pidPath = getPidFilePath();
 
-  if (pid === null) {
-    return false;
+  if (!processInfo) {
+    return 'not-running';
   }
 
+  const { pid } = processInfo;
   if (!isProcessRunning(pid)) {
     try {
       fs.unlinkSync(pidPath);
     } catch {
       // ignore
     }
-    return false;
+    return 'not-running';
+  }
+
+  const binaryPath = processInfo.binaryPath ?? getBinaryPath();
+  const port = processInfo.port ?? expectedPort;
+  if (!isExpectedLiteRtServerProcess(pid, { binaryPath, port })) {
+    debugLogger.warn(
+      `Refusing to stop PID ${pid} because it does not match the expected LiteRT server process.`,
+    );
+    return 'unexpected-process';
   }
 
   try {
     process.kill(pid, 'SIGTERM');
   } catch {
-    return false;
+    return 'failed';
   }
 
   await new Promise((resolve) => setTimeout(resolve, 1000));
@@ -49,6 +70,9 @@ export async function stopServer(): Promise<boolean> {
       // ignore
     }
     await new Promise((resolve) => setTimeout(resolve, 500));
+    if (isProcessRunning(pid)) {
+      return 'failed';
+    }
   }
 
   try {
@@ -57,8 +81,9 @@ export async function stopServer(): Promise<boolean> {
     // ignore
   }
 
-  return true;
+  return 'stopped';
 }
+
 export const stopCommand: CommandModule = {
   command: 'stop',
   describe: 'Stop the LiteRT-LM server',
@@ -78,14 +103,27 @@ export const stopCommand: CommandModule = {
       port = configuredPort;
     }
 
-    const pid = readServerPid();
+    const processInfo = readServerProcessInfo();
+    const pid = processInfo?.pid ?? readServerPid();
 
     if (pid !== null && isProcessRunning(pid)) {
       debugLogger.log(`Stopping LiteRT server (PID ${pid})...`);
-      const stopped = await stopServer();
-      if (stopped) {
+      const result = await stopServer(port);
+      if (result === 'stopped') {
         debugLogger.log(chalk.green('LiteRT server stopped.'));
         await exitCli(0);
+      } else if (result === 'unexpected-process') {
+        debugLogger.error(
+          chalk.red(
+            `Refusing to stop PID ${pid} because it does not match the expected LiteRT server process.`,
+          ),
+        );
+        debugLogger.error(
+          chalk.dim(
+            'Remove the stale pid file after verifying the process, or stop the process manually.',
+          ),
+        );
+        await exitCli(1);
       } else {
         debugLogger.error(chalk.red('Failed to stop LiteRT server.'));
         await exitCli(1);

From 078aeb3fa202e2a4890a4720b760786051336044 Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Thu, 16 Apr 2026 20:07:21 -0700
Subject: [PATCH 22/33] fix(cli): repair gemma platform config parsing

---
 packages/cli/src/commands/gemma/platform.ts | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/packages/cli/src/commands/gemma/platform.ts b/packages/cli/src/commands/gemma/platform.ts
index 953bd700703..aa7a4c82b69 100644
--- a/packages/cli/src/commands/gemma/platform.ts
+++ b/packages/cli/src/commands/gemma/platform.ts
@@ -76,10 +76,12 @@ export function resolveGemmaConfig(fallbackPort: number): GemmaConfigStatus {
   const configuredBinaryPath = getUserConfiguredBinaryPath();
   try {
     const settings = loadSettings(process.cwd());
-    const gemmaSettings = settings.forScope(SettingScope.User).settings.experimental?.gemmaModelRouter;
+    const gemmaSettings = settings.merged.experimental?.gemmaModelRouter;
     settingsEnabled = gemmaSettings?.enabled === true;
     configuredPort = parsePortFromHost(
       gemmaSettings?.classifier?.host,
+      fallbackPort,
+    );
   } catch {
     // ignore — settings may fail to load outside a workspace
   }

From dec7329e40d7fbcab9d3b0fa9a9217f6179604d2 Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Thu, 16 Apr 2026 20:17:46 -0700
Subject: [PATCH 23/33] fix(cli): prettier

---
 packages/cli/src/gemini.tsx | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx
index c10d74744af..7182fa9b34e 100644
--- a/packages/cli/src/gemini.tsx
+++ b/packages/cli/src/gemini.tsx
@@ -615,7 +615,8 @@ export async function main() {
     import('./services/liteRtServerManager.js')
       .then(({ LiteRtServerManager }) =>
         LiteRtServerManager.ensureRunning(
-          settings.forScope(SettingScope.User).settings.experimental?.gemmaModelRouter,
+          settings.forScope(SettingScope.User).settings.experimental
+            ?.gemmaModelRouter,
         ),
       )
       .catch((e) => debugLogger.warn('LiteRT auto-start import failed:', e));

From 8f2a331713739c0d467b2b33aee1312332d42ea7 Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Mon, 20 Apr 2026 14:14:25 -0700
Subject: [PATCH 24/33] fix(cli): default autostart to false

---
 docs/reference/configuration.md | 2 +-
 schemas/settings.schema.json    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md
index d0eb4ef8d36..c3d9ff0f387 100644
--- a/docs/reference/configuration.md
+++ b/docs/reference/configuration.md
@@ -1714,7 +1714,7 @@ their corresponding top-level category object in your `settings.json` file.
 - **`experimental.gemmaModelRouter.autoStartServer`** (boolean):
   - **Description:** Automatically start the LiteRT-LM server when Gemini CLI
     starts and the Gemma router is enabled.
-  - **Default:** `true`
+  - **Default:** `false`
   - **Requires restart:** Yes
 
 - **`experimental.gemmaModelRouter.binaryPath`** (string):
diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json
index 94912f3a223..d9dd31a4a5a 100644
--- a/schemas/settings.schema.json
+++ b/schemas/settings.schema.json
@@ -2924,7 +2924,7 @@
               "title": "Auto-start LiteRT Server",
               "description": "Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.",
               "markdownDescription": "Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `true`",
-              "default": true,
+              "default": false,
               "type": "boolean"
             },
             "binaryPath": {

From 364eb1c230faf9fa5abc0f6dcfda14595d20d3b4 Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Mon, 20 Apr 2026 14:24:04 -0700
Subject: [PATCH 25/33] fix(cli): make server check more robust

---
 packages/cli/src/commands/gemma/constants.ts | 1 +
 packages/cli/src/commands/gemma/platform.ts  | 8 ++++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/packages/cli/src/commands/gemma/constants.ts b/packages/cli/src/commands/gemma/constants.ts
index de80762a1a5..a37326a0578 100644
--- a/packages/cli/src/commands/gemma/constants.ts
+++ b/packages/cli/src/commands/gemma/constants.ts
@@ -13,6 +13,7 @@ export const LITERT_RELEASE_BASE_URL =
 export const GEMMA_MODEL_NAME = 'gemma3-1b-gpu-custom';
 export const DEFAULT_PORT = 9379;
 export const HEALTH_CHECK_TIMEOUT_MS = 5000;
+export const LITERT_API_VERSION = 'v1beta';
 export const SERVER_START_WAIT_MS = 3000;
 
 export const PLATFORM_BINARY_MAP: Record<string, string> = {
diff --git a/packages/cli/src/commands/gemma/platform.ts b/packages/cli/src/commands/gemma/platform.ts
index aa7a4c82b69..aa85b8353fe 100644
--- a/packages/cli/src/commands/gemma/platform.ts
+++ b/packages/cli/src/commands/gemma/platform.ts
@@ -15,6 +15,7 @@ import {
   getLiteRtBinDir,
   GEMMA_MODEL_NAME,
   HEALTH_CHECK_TIMEOUT_MS,
+  LITERT_API_VERSION,
   getPidFilePath,
 } from './constants.js';
 
@@ -136,9 +137,12 @@ export async function isServerRunning(port: number): Promise<boolean> {
       () => controller.abort(),
       HEALTH_CHECK_TIMEOUT_MS,
     );
-    await fetch(`http://localhost:${port}/`, { signal: controller.signal });
+    const response = await fetch(
+      `http://localhost:${port}/${LITERT_API_VERSION}/models/${GEMMA_MODEL_NAME}`,
+      { signal: controller.signal },
+    );
     clearTimeout(timeout);
-    return true;
+    return response.ok;
   } catch {
     return false;
   }

From dac00da10ed7507b680412ba548483615e6370ac Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Mon, 20 Apr 2026 14:37:26 -0700
Subject: [PATCH 26/33] fix(cli): scope gemma settings for security and project
 isolation

---
 packages/cli/src/commands/gemma/setup.ts | 56 ++++++++++++++++--------
 packages/cli/src/gemini.tsx              | 19 +++++---
 2 files changed, 52 insertions(+), 23 deletions(-)

diff --git a/packages/cli/src/commands/gemma/setup.ts b/packages/cli/src/commands/gemma/setup.ts
index 5346924e6c6..a936462dbff 100644
--- a/packages/cli/src/commands/gemma/setup.ts
+++ b/packages/cli/src/commands/gemma/setup.ts
@@ -324,30 +324,50 @@ async function handleSetup(argv: SetupArgs): Promise<number> {
   log('  Configuring settings...');
   try {
     const settings = loadSettings(process.cwd());
-    const existingGemma =
+
+    // User scope: security-sensitive settings that must not be overridable
+    // by workspace configs (prevents arbitrary binary execution).
+    const existingUserGemma =
       settings.forScope(SettingScope.User).settings.experimental
         ?.gemmaModelRouter ?? {};
-    autoStartServer = existingGemma.autoStartServer ?? true;
-
-    const newGemmaSettings = {
-      ...existingGemma,
-      enabled: true,
-      autoStartServer,
-      classifier: {
-        ...existingGemma.classifier,
-        host: `http://localhost:${port}`,
-        model: GEMMA_MODEL_NAME,
-      },
-    };
-
-    const existingExperimental =
+    autoStartServer = existingUserGemma.autoStartServer ?? true;
+    const existingUserExperimental =
       settings.forScope(SettingScope.User).settings.experimental ?? {};
     settings.setValue(SettingScope.User, 'experimental', {
-      ...existingExperimental,
-      gemmaModelRouter: newGemmaSettings,
+      ...existingUserExperimental,
+      gemmaModelRouter: {
+        autoStartServer,
+        ...(existingUserGemma.binaryPath !== undefined
+          ? { binaryPath: existingUserGemma.binaryPath }
+          : {}),
+      },
+    });
+
+    // Workspace scope: project-isolated settings so the local model only
+    // runs for this specific project, saving resources globally.
+    const existingWorkspaceGemma =
+      settings.forScope(SettingScope.Workspace).settings.experimental
+        ?.gemmaModelRouter ?? {};
+    const existingWorkspaceExperimental =
+      settings.forScope(SettingScope.Workspace).settings.experimental ?? {};
+    settings.setValue(SettingScope.Workspace, 'experimental', {
+      ...existingWorkspaceExperimental,
+      gemmaModelRouter: {
+        ...existingWorkspaceGemma,
+        enabled: true,
+        classifier: {
+          ...existingWorkspaceGemma.classifier,
+          host: `http://localhost:${port}`,
+          model: GEMMA_MODEL_NAME,
+        },
+      },
     });
 
-    log(chalk.green('  ✓ Settings updated in ~/.gemini/settings.json'));
+    log(chalk.green('  ✓ Settings updated'));
+    log(chalk.dim('    User (~/.gemini/settings.json): autoStartServer'));
+    log(
+      chalk.dim('    Workspace (.gemini/settings.json): enabled, classifier'),
+    );
     settingsUpdated = true;
   } catch (error) {
     logError(
diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx
index 7182fa9b34e..fb338664a29 100644
--- a/packages/cli/src/gemini.tsx
+++ b/packages/cli/src/gemini.tsx
@@ -613,12 +613,21 @@ export async function main() {
     initAppHandle?.end();
 
     import('./services/liteRtServerManager.js')
-      .then(({ LiteRtServerManager }) =>
-        LiteRtServerManager.ensureRunning(
+      .then(({ LiteRtServerManager }) => {
+        const mergedGemma = settings.merged.experimental?.gemmaModelRouter;
+        if (!mergedGemma) return;
+        // Security: binaryPath and autoStartServer must come from user-scoped
+        // settings only to prevent workspace configs from triggering arbitrary
+        // binary execution.
+        const userGemma =
           settings.forScope(SettingScope.User).settings.experimental
-            ?.gemmaModelRouter,
-        ),
-      )
+            ?.gemmaModelRouter;
+        return LiteRtServerManager.ensureRunning({
+          ...mergedGemma,
+          binaryPath: userGemma?.binaryPath,
+          autoStartServer: userGemma?.autoStartServer,
+        });
+      })
       .catch((e) => debugLogger.warn('LiteRT auto-start import failed:', e));
 
     if (

From e17478b3eb86e57c5ed129bfdec173992c682c69 Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Mon, 20 Apr 2026 14:40:03 -0700
Subject: [PATCH 27/33] fix(cli): handle missing tail command and log stale PID
 cleanup

---
 packages/cli/src/commands/gemma/logs.ts | 22 ++++++++++++++++++----
 packages/cli/src/commands/gemma/stop.ts |  3 +++
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/packages/cli/src/commands/gemma/logs.ts b/packages/cli/src/commands/gemma/logs.ts
index f2f96b6b08c..c7f61f05cb6 100644
--- a/packages/cli/src/commands/gemma/logs.ts
+++ b/packages/cli/src/commands/gemma/logs.ts
@@ -177,10 +177,24 @@ export const logsCommand: CommandModule<object, LogsArgs> = {
       const exitCode = await runTail(logPath, requestedLines, follow);
       await exitCli(exitCode);
     } catch (error) {
-      debugLogger.error(
-        `Failed to read log output: ${error instanceof Error ? error.message : String(error)}`,
-      );
-      await exitCli(1);
+      if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
+        if (!follow) {
+          process.stdout.write(
+            await readLastLines(logPath, requestedLines),
+          );
+          await exitCli(0);
+        } else {
+          debugLogger.error(
+            '"tail" command not found. Use --lines N to view recent logs without tail.',
+          );
+          await exitCli(1);
+        }
+      } else {
+        debugLogger.error(
+          `Failed to read log output: ${error instanceof Error ? error.message : String(error)}`,
+        );
+        await exitCli(1);
+      }
     }
   },
 };
diff --git a/packages/cli/src/commands/gemma/stop.ts b/packages/cli/src/commands/gemma/stop.ts
index a135f4e36d1..c51269c579c 100644
--- a/packages/cli/src/commands/gemma/stop.ts
+++ b/packages/cli/src/commands/gemma/stop.ts
@@ -38,6 +38,9 @@ export async function stopServer(
 
   const { pid } = processInfo;
   if (!isProcessRunning(pid)) {
+    debugLogger.log(
+      `Stale PID file found (PID ${pid} is not running), removing ${pidPath}`,
+    );
     try {
       fs.unlinkSync(pidPath);
     } catch {

From 6bad21190e3cdd9277b2fce064181b4c886ccd26 Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Mon, 20 Apr 2026 15:01:13 -0700
Subject: [PATCH 28/33] fix(cli): use generateContent endpoint for server
 health check

---
 packages/cli/src/commands/gemma/platform.ts | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/packages/cli/src/commands/gemma/platform.ts b/packages/cli/src/commands/gemma/platform.ts
index aa85b8353fe..0fdd6e02e14 100644
--- a/packages/cli/src/commands/gemma/platform.ts
+++ b/packages/cli/src/commands/gemma/platform.ts
@@ -138,11 +138,13 @@ export async function isServerRunning(port: number): Promise<boolean> {
       HEALTH_CHECK_TIMEOUT_MS,
     );
     const response = await fetch(
-      `http://localhost:${port}/${LITERT_API_VERSION}/models/${GEMMA_MODEL_NAME}`,
-      { signal: controller.signal },
+      `http://localhost:${port}/${LITERT_API_VERSION}/models/${GEMMA_MODEL_NAME}:generateContent`,
+      { method: 'POST', signal: controller.signal },
     );
     clearTimeout(timeout);
-    return response.ok;
+    // A 400 (bad request) confirms the route exists — the server recognises
+    // the model endpoint.  Only a 404 means "wrong server / wrong model".
+    return response.status !== 404;
   } catch {
     return false;
   }

From d3576a2256583eca7f49b7fb7696790d627cb52f Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Mon, 20 Apr 2026 15:16:59 -0700
Subject: [PATCH 29/33] fix(cli): default autoStartServer to false in schema

---
 docs/cli/settings.md                      | 23 ++++++++++++-----------
 packages/cli/src/config/settingsSchema.ts |  2 +-
 schemas/settings.schema.json              |  2 +-
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/docs/cli/settings.md b/docs/cli/settings.md
index 7f34365bb00..d9e72f16f6e 100644
--- a/docs/cli/settings.md
+++ b/docs/cli/settings.md
@@ -161,17 +161,18 @@ they appear in the UI.
 
 ### Experimental
 
-| UI Label                                             | Setting                          | Description                                                                                                                                               | Default |
-| ---------------------------------------------------- | -------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
-| Enable Git Worktrees                                 | `experimental.worktrees`         | Enable automated Git worktree management for parallel work.                                                                                               | `false` |
-| Use OSC 52 Paste                                     | `experimental.useOSC52Paste`     | Use OSC 52 for pasting. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` |
-| Use OSC 52 Copy                                      | `experimental.useOSC52Copy`      | Use OSC 52 for copying. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` |
-| Model Steering                                       | `experimental.modelSteering`     | Enable model steering (user hints) to guide the model during tool execution.                                                                              | `false` |
-| Direct Web Fetch                                     | `experimental.directWebFetch`    | Enable web fetch behavior that bypasses LLM summarization.                                                                                                | `false` |
-| Memory Manager Agent                                 | `experimental.memoryManager`     | Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories.             | `false` |
-| Auto Memory                                          | `experimental.autoMemory`        | Automatically extract reusable skills from past sessions in the background. Review results with /memory inbox.                                            | `false` |
-| Use the generalist profile to manage agent contexts. | `experimental.generalistProfile` | Suitable for general coding and software development tasks.                                                                                               | `false` |
-| Enable Context Management                            | `experimental.contextManagement` | Enable logic for context management.                                                                                                                      | `false` |
+| UI Label                                             | Setting                                         | Description                                                                                                                                               | Default |
+| ---------------------------------------------------- | ----------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
+| Enable Git Worktrees                                 | `experimental.worktrees`                        | Enable automated Git worktree management for parallel work.                                                                                               | `false` |
+| Use OSC 52 Paste                                     | `experimental.useOSC52Paste`                    | Use OSC 52 for pasting. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` |
+| Use OSC 52 Copy                                      | `experimental.useOSC52Copy`                     | Use OSC 52 for copying. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` |
+| Model Steering                                       | `experimental.modelSteering`                    | Enable model steering (user hints) to guide the model during tool execution.                                                                              | `false` |
+| Direct Web Fetch                                     | `experimental.directWebFetch`                   | Enable web fetch behavior that bypasses LLM summarization.                                                                                                | `false` |
+| Enable Gemma Model Router                            | `experimental.gemmaModelRouter.enabled`         | Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim.                            | `false` |
+| Auto-start LiteRT Server                             | `experimental.gemmaModelRouter.autoStartServer` | Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.                                                          | `false` |
+| Memory Manager Agent                                 | `experimental.memoryManager`                    | Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories.             | `false` |
+| Use the generalist profile to manage agent contexts. | `experimental.generalistProfile`                | Suitable for general coding and software development tasks.                                                                                               | `false` |
+| Enable Context Management                            | `experimental.contextManagement`                | Enable logic for context management.                                                                                                                      | `false` |
 
 ### Skills
 
diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts
index b7a517141f9..7e7de801328 100644
--- a/packages/cli/src/config/settingsSchema.ts
+++ b/packages/cli/src/config/settingsSchema.ts
@@ -2176,7 +2176,7 @@ const SETTINGS_SCHEMA = {
             label: 'Auto-start LiteRT Server',
             category: 'Experimental',
             requiresRestart: true,
-            default: true,
+            default: false,
             description:
               'Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.',
             showInDialog: true,
diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json
index 8dcfbe02d88..d30a6f4b0a3 100644
--- a/schemas/settings.schema.json
+++ b/schemas/settings.schema.json
@@ -2923,7 +2923,7 @@
             "autoStartServer": {
               "title": "Auto-start LiteRT Server",
               "description": "Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.",
-              "markdownDescription": "Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `true`",
+              "markdownDescription": "Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`",
               "default": false,
               "type": "boolean"
             },

From 2d3e3ab46a7c2ec8c439db6972eaf8a20a8948ef Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Mon, 20 Apr 2026 15:25:07 -0700
Subject: [PATCH 30/33] fix(cli): fix unsafe type assertion in logs error
 handler

---
 packages/cli/src/commands/gemma/logs.ts | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/packages/cli/src/commands/gemma/logs.ts b/packages/cli/src/commands/gemma/logs.ts
index c7f61f05cb6..ed81ac4a4e4 100644
--- a/packages/cli/src/commands/gemma/logs.ts
+++ b/packages/cli/src/commands/gemma/logs.ts
@@ -177,11 +177,13 @@ export const logsCommand: CommandModule<object, LogsArgs> = {
       const exitCode = await runTail(logPath, requestedLines, follow);
       await exitCli(exitCode);
     } catch (error) {
-      if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
+      if (
+        error instanceof Error &&
+        'code' in error &&
+        error.code === 'ENOENT'
+      ) {
         if (!follow) {
-          process.stdout.write(
-            await readLastLines(logPath, requestedLines),
-          );
+          process.stdout.write(await readLastLines(logPath, requestedLines));
           await exitCli(0);
         } else {
           debugLogger.error(

From 14e60b3fe0fb5f998ed45a65b5146503de0b28bc Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Mon, 20 Apr 2026 15:49:11 -0700
Subject: [PATCH 31/33] format

---
 packages/cli/src/gemini.tsx | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx
index fb338664a29..6e257270d77 100644
--- a/packages/cli/src/gemini.tsx
+++ b/packages/cli/src/gemini.tsx
@@ -619,9 +619,8 @@ export async function main() {
         // Security: binaryPath and autoStartServer must come from user-scoped
         // settings only to prevent workspace configs from triggering arbitrary
         // binary execution.
-        const userGemma =
-          settings.forScope(SettingScope.User).settings.experimental
-            ?.gemmaModelRouter;
+        const userGemma = settings.forScope(SettingScope.User).settings
+          .experimental?.gemmaModelRouter;
         return LiteRtServerManager.ensureRunning({
           ...mergedGemma,
           binaryPath: userGemma?.binaryPath,

From b894f16e3b0d91b0b7b4d734e0577868eb668bac Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameez@google.com>
Date: Mon, 20 Apr 2026 15:57:53 -0700
Subject: [PATCH 32/33] fix(chore): update docs

---
 docs/cli/settings.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/cli/settings.md b/docs/cli/settings.md
index d9e72f16f6e..fbe556a3705 100644
--- a/docs/cli/settings.md
+++ b/docs/cli/settings.md
@@ -171,6 +171,7 @@ they appear in the UI.
 | Enable Gemma Model Router                            | `experimental.gemmaModelRouter.enabled`         | Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim.                            | `false` |
 | Auto-start LiteRT Server                             | `experimental.gemmaModelRouter.autoStartServer` | Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.                                                          | `false` |
 | Memory Manager Agent                                 | `experimental.memoryManager`                    | Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories.             | `false` |
+| Auto Memory                                          | `experimental.autoMemory`                       | Automatically extract reusable skills from past sessions in the background. Review results with /memory inbox.                                            | `false` |
 | Use the generalist profile to manage agent contexts. | `experimental.generalistProfile`                | Suitable for general coding and software development tasks.                                                                                               | `false` |
 | Enable Context Management                            | `experimental.contextManagement`                | Enable logic for context management.                                                                                                                      | `false` |
 

From 8ff63304aabd33779767cca7de7b00da5f453e5f Mon Sep 17 00:00:00 2001
From: Samee Zahid <sameescouser24@gmail.com>
Date: Mon, 20 Apr 2026 16:37:51 -0700
Subject: [PATCH 33/33] fix(cli): update tests for autoStartServer default and
 cross-platform logs

---
 packages/cli/src/commands/gemma/logs.test.ts   | 10 ++++++++++
 packages/cli/src/commands/gemma/logs.ts        |  6 ++----
 packages/cli/src/config/config.test.ts         |  2 +-
 packages/cli/src/config/settingsSchema.test.ts |  2 +-
 4 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/packages/cli/src/commands/gemma/logs.test.ts b/packages/cli/src/commands/gemma/logs.test.ts
index 1cf34b77e5a..49ab8d43c68 100644
--- a/packages/cli/src/commands/gemma/logs.test.ts
+++ b/packages/cli/src/commands/gemma/logs.test.ts
@@ -95,13 +95,23 @@ describe('readLastLines', () => {
 });
 
 describe('logsCommand', () => {
+  const originalPlatform = process.platform;
+
   beforeEach(() => {
     vi.clearAllMocks();
+    Object.defineProperty(process, 'platform', {
+      value: 'linux',
+      configurable: true,
+    });
     vi.mocked(getLogFilePath).mockReturnValue('/tmp/gemma.log');
     vi.spyOn(fs.promises, 'access').mockResolvedValue(undefined);
   });
 
   afterEach(() => {
+    Object.defineProperty(process, 'platform', {
+      value: originalPlatform,
+      configurable: true,
+    });
     vi.restoreAllMocks();
   });
 
diff --git a/packages/cli/src/commands/gemma/logs.ts b/packages/cli/src/commands/gemma/logs.ts
index ed81ac4a4e4..023b8e6352a 100644
--- a/packages/cli/src/commands/gemma/logs.ts
+++ b/packages/cli/src/commands/gemma/logs.ts
@@ -86,8 +86,6 @@ export async function readLastLines(
   }
 }
 
-const isWindows = process.platform === 'win32';
-
 interface LogsArgs {
   lines?: number;
   follow?: boolean;
@@ -156,7 +154,7 @@ export const logsCommand: CommandModule<object, LogsArgs> = {
     const follow = argv.follow ?? lines === undefined;
     const requestedLines = lines ?? 20;
 
-    if (follow && isWindows) {
+    if (follow && process.platform === 'win32') {
       debugLogger.log(
         'Live log following is not supported on Windows. Use --lines N to view recent logs.',
       );
@@ -164,7 +162,7 @@ export const logsCommand: CommandModule<object, LogsArgs> = {
       return;
     }
 
-    if (isWindows) {
+    if (process.platform === 'win32') {
       process.stdout.write(await readLastLines(logPath, requestedLines));
       await exitCli(0);
       return;
diff --git a/packages/cli/src/config/config.test.ts b/packages/cli/src/config/config.test.ts
index 4288150ba5a..180f4617491 100644
--- a/packages/cli/src/config/config.test.ts
+++ b/packages/cli/src/config/config.test.ts
@@ -3068,7 +3068,7 @@ describe('loadCliConfig gemmaModelRouter', () => {
     const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getGemmaModelRouterEnabled()).toBe(true);
     const gemmaSettings = config.getGemmaModelRouterSettings();
-    expect(gemmaSettings.autoStartServer).toBe(true);
+    expect(gemmaSettings.autoStartServer).toBe(false);
     expect(gemmaSettings.binaryPath).toBe('');
     expect(gemmaSettings.classifier?.host).toBe('http://localhost:9379');
     expect(gemmaSettings.classifier?.model).toBe('gemma3-1b-gpu-custom');
diff --git a/packages/cli/src/config/settingsSchema.test.ts b/packages/cli/src/config/settingsSchema.test.ts
index a7ce6cf0159..81e5f32ff09 100644
--- a/packages/cli/src/config/settingsSchema.test.ts
+++ b/packages/cli/src/config/settingsSchema.test.ts
@@ -480,7 +480,7 @@ describe('SettingsSchema', () => {
       expect(autoStartServer).toBeDefined();
       expect(autoStartServer.type).toBe('boolean');
       expect(autoStartServer.category).toBe('Experimental');
-      expect(autoStartServer.default).toBe(true);
+      expect(autoStartServer.default).toBe(false);
       expect(autoStartServer.requiresRestart).toBe(true);
       expect(autoStartServer.showInDialog).toBe(true);
       expect(autoStartServer.description).toBe(