diff --git a/packages/cli/src/commands/gemma/platform.ts b/packages/cli/src/commands/gemma/platform.ts
index e39d99e557f..ef5c5288a65 100644
--- a/packages/cli/src/commands/gemma/platform.ts
+++ b/packages/cli/src/commands/gemma/platform.ts
@@ -4,6 +4,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+import { loadSettings } from '../../config/settings.js';
 import fs from 'node:fs';
 import path from 'node:path';
 import { execFileSync } from 'node:child_process';
@@ -22,6 +23,34 @@ export interface PlatformInfo {
   binaryName: string;
 }
 
+export interface GemmaConfigStatus {
+  settingsEnabled: boolean;
+  configuredPort: number;
+}
+
+/**
+ * Resolves the Gemma configuration from the workspace settings.
+ */
+export function resolveGemmaConfig(fallbackPort: number): GemmaConfigStatus {
+  let settingsEnabled = false;
+  let configuredPort = fallbackPort;
+  try {
+    const settings = loadSettings(process.cwd());
+    const gemmaSettings = settings.merged.experimental?.gemmaModelRouter;
+    settingsEnabled = gemmaSettings?.enabled === true;
+    const hostStr = gemmaSettings?.classifier?.host;
+    if (hostStr) {
+      const match = hostStr.match(/:(\d+)/);
+      if (match) {
+        configuredPort = parseInt(match[1], 10);
+      }
+    }
+  } catch {
+    // Settings may fail to load in some contexts; treat as not enabled.
+  }
+  return { settingsEnabled, configuredPort };
+}
+
 /**
  * Detects the current platform and resolves the corresponding LiteRT-LM binary name.
  * Returns null if the platform is unsupported.
diff --git a/packages/cli/src/commands/gemma/start.ts b/packages/cli/src/commands/gemma/start.ts
index 8918daa9faf..02b1bd0e4a6 100644
--- a/packages/cli/src/commands/gemma/start.ts
+++ b/packages/cli/src/commands/gemma/start.ts
@@ -21,6 +21,7 @@ import {
   getBinaryPath,
   isBinaryInstalled,
   isServerRunning,
+  resolveGemmaConfig,
 } from './platform.js';
 
 /**
@@ -78,11 +79,18 @@ export const startCommand: CommandModule = {
   builder: (yargs) =>
     yargs.option('port', {
       type: 'number',
-      default: DEFAULT_PORT,
       description: 'Port for the LiteRT server',
     }),
   handler: async (argv) => {
-    const port = Number(argv['port']);
+    let port: number | undefined;
+    if (argv['port'] !== undefined) {
+      port = Number(argv['port']);
+    }
+
+    if (!port) {
+      const { configuredPort } = resolveGemmaConfig(DEFAULT_PORT);
+      port = configuredPort;
+    }
 
     if (!isBinaryInstalled()) {
       debugLogger.error(
diff --git a/packages/cli/src/commands/gemma/status.ts b/packages/cli/src/commands/gemma/status.ts
index 4a265dd9445..1e061d7bc57 100644
--- a/packages/cli/src/commands/gemma/status.ts
+++ b/packages/cli/src/commands/gemma/status.ts
@@ -6,7 +6,6 @@
 
 import type { CommandModule } from 'yargs';
 import chalk from 'chalk';
-import { loadSettings } from '../../config/settings.js';
 import { DEFAULT_PORT, GEMMA_MODEL_NAME } from './constants.js';
 import {
   detectPlatform,
@@ -16,6 +15,7 @@ import {
   isServerRunning,
   readServerPid,
   isProcessRunning,
+  resolveGemmaConfig,
 } from './platform.js';
 import { exitCli } from '../utils.js';
 
@@ -38,7 +38,9 @@ export interface GemmaStatusResult {
 export async function checkGemmaStatus(
   port?: number,
 ): Promise<GemmaStatusResult> {
-  const effectivePort = port ?? DEFAULT_PORT;
+  const { settingsEnabled, configuredPort } = resolveGemmaConfig(DEFAULT_PORT);
+
+  const effectivePort = port ?? configuredPort;
   const binaryPath = getBinaryPath();
   const binaryInstalled = isBinaryInstalled();
   const modelDownloaded =
@@ -47,15 +49,6 @@ export async function checkGemmaStatus(
   const pid = readServerPid();
   const serverPid = pid && isProcessRunning(pid) ? pid : null;
 
-  let settingsEnabled = false;
-  try {
-    const settings = loadSettings(process.cwd());
-    const gemmaSettings = settings.merged.experimental?.gemmaModelRouter;
-    settingsEnabled = gemmaSettings?.enabled === true;
-  } catch {
-    // Settings may fail to load in some contexts; treat as not enabled.
-  }
-
   const allPassing =
     binaryInstalled && modelDownloaded && serverRunning && settingsEnabled;
 
@@ -167,11 +160,13 @@ export const statusCommand: CommandModule = {
   builder: (yargs) =>
     yargs.option('port', {
       type: 'number',
-      default: DEFAULT_PORT,
       description: 'Port to check for the LiteRT server',
     }),
   handler: async (argv) => {
-    const port = Number(argv['port']);
+    let port: number | undefined;
+    if (argv['port'] !== undefined) {
+      port = Number(argv['port']);
+    }
     const status = await checkGemmaStatus(port);
     const output = formatGemmaStatus(status);
     // Use process.stdout directly for consistent output in non-interactive mode.
diff --git a/packages/cli/src/commands/gemma/stop.ts b/packages/cli/src/commands/gemma/stop.ts
index 15db60eaa8e..409989e33c3 100644
--- a/packages/cli/src/commands/gemma/stop.ts
+++ b/packages/cli/src/commands/gemma/stop.ts
@@ -14,6 +14,7 @@ import {
   readServerPid,
   isProcessRunning,
   isServerRunning,
+  resolveGemmaConfig,
 } from './platform.js';
 
 /**
@@ -66,18 +67,25 @@ export async function stopServer(): Promise<boolean> {
 
   return true;
 }
-
 export const stopCommand: CommandModule = {
   command: 'stop',
   describe: 'Stop the LiteRT-LM server',
   builder: (yargs) =>
     yargs.option('port', {
       type: 'number',
-      default: DEFAULT_PORT,
-      description: 'Port the server is running on',
+      description: 'Port where the LiteRT server is running',
     }),
   handler: async (argv) => {
-    const port = Number(argv['port']);
+    let port: number | undefined;
+    if (argv['port'] !== undefined) {
+      port = Number(argv['port']);
+    }
+
+    if (!port) {
+      const { configuredPort } = resolveGemmaConfig(DEFAULT_PORT);
+      port = configuredPort;
+    }
+
     const pid = readServerPid();
 
     if (pid !== null && isProcessRunning(pid)) {
diff --git a/packages/core/src/core/localLiteRtLmClient.test.ts b/packages/core/src/core/localLiteRtLmClient.test.ts
index c4398b5b9c1..6c64143ec3d 100644
--- a/packages/core/src/core/localLiteRtLmClient.test.ts
+++ b/packages/core/src/core/localLiteRtLmClient.test.ts
@@ -7,6 +7,8 @@
 import { describe, it, expect, vi, beforeEach } from 'vitest';
 import { LocalLiteRtLmClient } from './localLiteRtLmClient.js';
 import type { Config } from '../config/config.js';
+import { GoogleGenAI } from '@google/genai';
+
 const mockGenerateContent = vi.fn();
 
 vi.mock('@google/genai', () => {
@@ -44,6 +46,14 @@ describe('LocalLiteRtLmClient', () => {
     const result = await client.generateJson([], 'test-instruction');
 
     expect(result).toEqual({ key: 'value' });
+    expect(GoogleGenAI).toHaveBeenCalledWith(
+      expect.objectContaining({
+        apiVersion: 'v1beta',
+        httpOptions: expect.objectContaining({
+          baseUrl: 'http://test-host:1234',
+        }),
+      }),
+    );
     expect(mockGenerateContent).toHaveBeenCalledWith(
       expect.objectContaining({
         model: 'gemma:latest',
diff --git a/packages/core/src/core/localLiteRtLmClient.ts b/packages/core/src/core/localLiteRtLmClient.ts
index 798dcb57656..82fa44e87b9 100644
--- a/packages/core/src/core/localLiteRtLmClient.ts
+++ b/packages/core/src/core/localLiteRtLmClient.ts
@@ -25,6 +25,8 @@ export class LocalLiteRtLmClient {
     this.client = new GoogleGenAI({
       // The LiteRT-LM server does not require an API key, but the SDK requires one to be set even for local endpoints. This is a dummy value and is not used for authentication.
       apiKey: 'no-api-key-needed',
+      apiVersion: 'v1beta',
+      vertexai: false,
       httpOptions: {
         baseUrl: this.host,
         // If the LiteRT-LM server is started but the wrong port is set, there will be a lengthy TCP timeout (here fixed to be 10 seconds).