diff --git a/packages/cli/src/commands/gemma/platform.ts b/packages/cli/src/commands/gemma/platform.ts index e39d99e557f..ef5c5288a65 100644 --- a/packages/cli/src/commands/gemma/platform.ts +++ b/packages/cli/src/commands/gemma/platform.ts @@ -4,6 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ +import { loadSettings } from '../../config/settings.js'; import fs from 'node:fs'; import path from 'node:path'; import { execFileSync } from 'node:child_process'; @@ -22,6 +23,34 @@ export interface PlatformInfo { binaryName: string; } +export interface GemmaConfigStatus { + settingsEnabled: boolean; + configuredPort: number; +} + +/** + * Resolves the Gemma configuration from the workspace settings. + */ +export function resolveGemmaConfig(fallbackPort: number): GemmaConfigStatus { + let settingsEnabled = false; + let configuredPort = fallbackPort; + try { + const settings = loadSettings(process.cwd()); + const gemmaSettings = settings.merged.experimental?.gemmaModelRouter; + settingsEnabled = gemmaSettings?.enabled === true; + const hostStr = gemmaSettings?.classifier?.host; + if (hostStr) { + const match = hostStr.match(/:(\d+)/); + if (match) { + configuredPort = parseInt(match[1], 10); + } + } + } catch { + // Settings may fail to load in some contexts; treat as not enabled. + } + return { settingsEnabled, configuredPort }; +} + /** * Detects the current platform and resolves the corresponding LiteRT-LM binary name. * Returns null if the platform is unsupported. diff --git a/packages/cli/src/commands/gemma/start.ts b/packages/cli/src/commands/gemma/start.ts index 8918daa9faf..02b1bd0e4a6 100644 --- a/packages/cli/src/commands/gemma/start.ts +++ b/packages/cli/src/commands/gemma/start.ts @@ -21,6 +21,7 @@ import { getBinaryPath, isBinaryInstalled, isServerRunning, + resolveGemmaConfig, } from './platform.js'; /** @@ -78,11 +79,18 @@ export const startCommand: CommandModule = { builder: (yargs) => yargs.option('port', { type: 'number', - default: DEFAULT_PORT, description: 'Port for the LiteRT server', }), handler: async (argv) => { - const port = Number(argv['port']); + let port: number | undefined; + if (argv['port'] !== undefined) { + port = Number(argv['port']); + } + + if (!port) { + const { configuredPort } = resolveGemmaConfig(DEFAULT_PORT); + port = configuredPort; + } if (!isBinaryInstalled()) { debugLogger.error( diff --git a/packages/cli/src/commands/gemma/status.ts b/packages/cli/src/commands/gemma/status.ts index 4a265dd9445..1e061d7bc57 100644 --- a/packages/cli/src/commands/gemma/status.ts +++ b/packages/cli/src/commands/gemma/status.ts @@ -6,7 +6,6 @@ import type { CommandModule } from 'yargs'; import chalk from 'chalk'; -import { loadSettings } from '../../config/settings.js'; import { DEFAULT_PORT, GEMMA_MODEL_NAME } from './constants.js'; import { detectPlatform, @@ -16,6 +15,7 @@ import { isServerRunning, readServerPid, isProcessRunning, + resolveGemmaConfig, } from './platform.js'; import { exitCli } from '../utils.js'; @@ -38,7 +38,9 @@ export interface GemmaStatusResult { export async function checkGemmaStatus( port?: number, ): Promise { - const effectivePort = port ?? DEFAULT_PORT; + const { settingsEnabled, configuredPort } = resolveGemmaConfig(DEFAULT_PORT); + + const effectivePort = port ?? configuredPort; const binaryPath = getBinaryPath(); const binaryInstalled = isBinaryInstalled(); const modelDownloaded = @@ -47,15 +49,6 @@ export async function checkGemmaStatus( const pid = readServerPid(); const serverPid = pid && isProcessRunning(pid) ? pid : null; - let settingsEnabled = false; - try { - const settings = loadSettings(process.cwd()); - const gemmaSettings = settings.merged.experimental?.gemmaModelRouter; - settingsEnabled = gemmaSettings?.enabled === true; - } catch { - // Settings may fail to load in some contexts; treat as not enabled. - } - const allPassing = binaryInstalled && modelDownloaded && serverRunning && settingsEnabled; @@ -167,11 +160,13 @@ export const statusCommand: CommandModule = { builder: (yargs) => yargs.option('port', { type: 'number', - default: DEFAULT_PORT, description: 'Port to check for the LiteRT server', }), handler: async (argv) => { - const port = Number(argv['port']); + let port: number | undefined; + if (argv['port'] !== undefined) { + port = Number(argv['port']); + } const status = await checkGemmaStatus(port); const output = formatGemmaStatus(status); // Use process.stdout directly for consistent output in non-interactive mode. diff --git a/packages/cli/src/commands/gemma/stop.ts b/packages/cli/src/commands/gemma/stop.ts index 15db60eaa8e..409989e33c3 100644 --- a/packages/cli/src/commands/gemma/stop.ts +++ b/packages/cli/src/commands/gemma/stop.ts @@ -14,6 +14,7 @@ import { readServerPid, isProcessRunning, isServerRunning, + resolveGemmaConfig, } from './platform.js'; /** @@ -66,18 +67,25 @@ export async function stopServer(): Promise { return true; } - export const stopCommand: CommandModule = { command: 'stop', describe: 'Stop the LiteRT-LM server', builder: (yargs) => yargs.option('port', { type: 'number', - default: DEFAULT_PORT, - description: 'Port the server is running on', + description: 'Port where the LiteRT server is running', }), handler: async (argv) => { - const port = Number(argv['port']); + let port: number | undefined; + if (argv['port'] !== undefined) { + port = Number(argv['port']); + } + + if (!port) { + const { configuredPort } = resolveGemmaConfig(DEFAULT_PORT); + port = configuredPort; + } + const pid = readServerPid(); if (pid !== null && isProcessRunning(pid)) { diff --git a/packages/core/src/core/localLiteRtLmClient.test.ts b/packages/core/src/core/localLiteRtLmClient.test.ts index c4398b5b9c1..6c64143ec3d 100644 --- a/packages/core/src/core/localLiteRtLmClient.test.ts +++ b/packages/core/src/core/localLiteRtLmClient.test.ts @@ -7,6 +7,8 @@ import { describe, it, expect, vi, beforeEach } from 'vitest'; import { LocalLiteRtLmClient } from './localLiteRtLmClient.js'; import type { Config } from '../config/config.js'; +import { GoogleGenAI } from '@google/genai'; + const mockGenerateContent = vi.fn(); vi.mock('@google/genai', () => { @@ -44,6 +46,14 @@ describe('LocalLiteRtLmClient', () => { const result = await client.generateJson([], 'test-instruction'); expect(result).toEqual({ key: 'value' }); + expect(GoogleGenAI).toHaveBeenCalledWith( + expect.objectContaining({ + apiVersion: 'v1beta', + httpOptions: expect.objectContaining({ + baseUrl: 'http://test-host:1234', + }), + }), + ); expect(mockGenerateContent).toHaveBeenCalledWith( expect.objectContaining({ model: 'gemma:latest', diff --git a/packages/core/src/core/localLiteRtLmClient.ts b/packages/core/src/core/localLiteRtLmClient.ts index 798dcb57656..82fa44e87b9 100644 --- a/packages/core/src/core/localLiteRtLmClient.ts +++ b/packages/core/src/core/localLiteRtLmClient.ts @@ -25,6 +25,8 @@ export class LocalLiteRtLmClient { this.client = new GoogleGenAI({ // The LiteRT-LM server does not require an API key, but the SDK requires one to be set even for local endpoints. This is a dummy value and is not used for authentication. apiKey: 'no-api-key-needed', + apiVersion: 'v1beta', + vertexai: false, httpOptions: { baseUrl: this.host, // If the LiteRT-LM server is started but the wrong port is set, there will be a lengthy TCP timeout (here fixed to be 10 seconds).