diff --git a/services/gastown/container/src/process-manager.test.ts b/services/gastown/container/src/process-manager.test.ts new file mode 100644 index 000000000..91ffdcce4 --- /dev/null +++ b/services/gastown/container/src/process-manager.test.ts @@ -0,0 +1,119 @@ +import { describe, it, expect, vi } from 'vitest'; + +// Mock heavy imports so the module can be loaded without spinning up +// a real SDK server or hono app. +vi.mock('@kilocode/sdk', () => ({ + createKilo: vi.fn(), +})); +vi.mock('./agent-runner', () => ({ + runAgent: vi.fn(), + buildKiloConfigContent: vi.fn(), + resolveGitCredentials: vi.fn(), + writeMayorSystemPromptToAgentsMd: vi.fn(), +})); +vi.mock('./control-server', () => ({ + getCurrentTownConfig: vi.fn(() => ({})), + getLastAppliedEnvVarKeys: vi.fn(() => new Set()), + RESERVED_ENV_KEYS: new Set(), +})); +vi.mock('./completion-reporter', () => ({ + reportAgentCompleted: vi.fn(), + reportMayorWaiting: vi.fn(), +})); +vi.mock('./token-refresh', () => ({ + refreshTokenIfNearExpiry: vi.fn(), +})); + +const { applyModelToSession } = await import('./process-manager'); + +type PromptCall = { + path: { id: string }; + body: { + parts: Array<{ type: 'text'; text: string }>; + model: { providerID: string; modelID: string }; + noReply?: boolean; + }; +}; + +function makeClient(impl?: (args: PromptCall) => Promise) { + const calls: PromptCall[] = []; + const prompt = vi.fn(async (args: PromptCall) => { + calls.push(args); + if (impl) return impl(args); + return {}; + }); + return { client: { session: { prompt } }, calls, prompt }; +} + +describe('applyModelToSession', () => { + it('sends the startup prompt with the model for a fresh session', async () => { + const { client, calls } = makeClient(); + await applyModelToSession({ + client, + sessionId: 'sess-new', + model: 'anthropic/claude-sonnet-4.6', + prompt: 'STARTUP PROMPT', + resumedSession: false, + }); + expect(calls).toHaveLength(1); + expect(calls[0].path).toEqual({ id: 'sess-new' }); + expect(calls[0].body.parts).toEqual([{ type: 'text', text: 'STARTUP PROMPT' }]); + expect(calls[0].body.model).toEqual({ + providerID: 'kilo', + modelID: 'anthropic/claude-sonnet-4.6', + }); + expect(calls[0].body.noReply).toBeUndefined(); + }); + + it('pushes the new model with noReply:true for a resumed session without replaying the startup prompt', async () => { + const { client, calls } = makeClient(); + await applyModelToSession({ + client, + sessionId: 'sess-resumed', + model: 'anthropic/claude-opus-4.7', + prompt: 'STARTUP PROMPT (must not be sent)', + resumedSession: true, + }); + expect(calls).toHaveLength(1); + expect(calls[0].path).toEqual({ id: 'sess-resumed' }); + expect(calls[0].body.model).toEqual({ + providerID: 'kilo', + modelID: 'anthropic/claude-opus-4.7', + }); + expect(calls[0].body.noReply).toBe(true); + expect(calls[0].body.parts).toEqual([{ type: 'text', text: '' }]); + // Ensure the MAYOR_STARTUP_PROMPT is NOT replayed on resume. + expect(calls[0].body.parts[0].text).not.toContain('STARTUP PROMPT'); + }); + + it('swallows errors from the resumed-session prompt so the hot-swap can continue', async () => { + const { client } = makeClient(async () => { + throw new Error('simulated SDK failure'); + }); + // Should not throw — errors on the noReply path are logged and ignored. + await expect( + applyModelToSession({ + client, + sessionId: 'sess-resumed', + model: 'anthropic/claude-opus-4.7', + prompt: 'STARTUP PROMPT', + resumedSession: true, + }) + ).resolves.toBeUndefined(); + }); + + it('propagates errors for a fresh session (so the hot-swap can roll back)', async () => { + const { client } = makeClient(async () => { + throw new Error('simulated SDK failure'); + }); + await expect( + applyModelToSession({ + client, + sessionId: 'sess-new', + model: 'anthropic/claude-sonnet-4.6', + prompt: 'STARTUP PROMPT', + resumedSession: false, + }) + ).rejects.toThrow('simulated SDK failure'); + }); +}); diff --git a/services/gastown/container/src/process-manager.ts b/services/gastown/container/src/process-manager.ts index 2c58efa79..33a37fa29 100644 --- a/services/gastown/container/src/process-manager.ts +++ b/services/gastown/container/src/process-manager.ts @@ -1815,6 +1815,80 @@ export async function refreshTokenForAllAgents(): Promise< return Promise.all(snapshot.map(restartAgent)); } +/** + * Minimal shape of `client.session` needed by {@link applyModelToSession}. + * Defined structurally so tests can pass a fake without pulling in the + * whole KiloClient type. + */ +type SessionPromptClient = { + session: { + prompt: (args: { + path: { id: string }; + body: { + parts: Array<{ type: 'text'; text: string }>; + model: { providerID: string; modelID: string }; + noReply?: boolean; + }; + }) => Promise; + }; +}; + +/** + * Push a model selection onto a mayor session. + * + * For a freshly created session, sends the startup prompt together with + * the model param so the first turn runs the configured model. + * + * For a resumed session the startup prompt MUST NOT be replayed (it + * would recreate the duplicate turn regression fixed by 9785570b9), + * but the per-session model on the SDK server still needs to be updated + * so the next user turn uses the newly-selected model. We do this by + * sending a `noReply: true` prompt that carries only the model param; + * the SDK treats this as a state update and does not trigger the model. + * + * Errors on the resumed path are swallowed: if pushing the model fails, + * the mayor falls back to whichever model the SDK server loaded from + * KILO_CONFIG_CONTENT at startup, which we have already updated. + */ +export async function applyModelToSession(params: { + client: SessionPromptClient; + sessionId: string; + model: string; + prompt: string; + resumedSession: boolean; +}): Promise { + const { client, sessionId, model, prompt, resumedSession } = params; + const modelParam = { providerID: 'kilo', modelID: model }; + if (!resumedSession) { + await client.session.prompt({ + path: { id: sessionId }, + body: { + parts: [{ type: 'text', text: prompt }], + model: modelParam, + }, + }); + return; + } + try { + await client.session.prompt({ + path: { id: sessionId }, + body: { + parts: [{ type: 'text', text: '' }], + model: modelParam, + noReply: true, + }, + }); + console.log( + `${MANAGER_LOG} updateAgentModel: pushed model=${model} to resumed session ${sessionId}` + ); + } catch (err) { + console.warn( + `${MANAGER_LOG} updateAgentModel: failed to push model to resumed session ${sessionId}:`, + err + ); + } +} + /** * Update the model for a running agent by restarting its SDK server with * new KILO_CONFIG_CONTENT. The kilo serve child process reads the model @@ -1958,16 +2032,13 @@ export async function updateAgentModel( const prompt = conversationHistory ? `${conversationHistory}\n\n${MAYOR_STARTUP_PROMPT}` : MAYOR_STARTUP_PROMPT; - if (!resumedSession) { - const modelParam = { providerID: 'kilo', modelID: model }; - await client.session.prompt({ - path: { id: agent.sessionId }, - body: { - parts: [{ type: 'text', text: prompt }], - model: modelParam, - }, - }); - } + await applyModelToSession({ + client, + sessionId: agent.sessionId, + model, + prompt, + resumedSession, + }); agent.messageCount = 1; // 6. New server is healthy — now tear down the old one. diff --git a/services/gastown/container/vitest.config.ts b/services/gastown/container/vitest.config.ts index 468ee375f..32b052a10 100644 --- a/services/gastown/container/vitest.config.ts +++ b/services/gastown/container/vitest.config.ts @@ -3,6 +3,6 @@ import { defineConfig } from 'vitest/config'; export default defineConfig({ test: { globals: false, - include: ['plugin/**/*.test.ts'], + include: ['plugin/**/*.test.ts', 'src/**/*.test.ts'], }, });