From 1debdf5fb77a2726b2634d1ef0af3413656fea9c Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Tue, 17 Mar 2026 23:04:37 -0500 Subject: [PATCH 1/2] fix(gastown): prevent town deletion from leaking DOs and containers (#1182) Town deletion via tRPC only removed rows from GastownUserDO, leaving TownDO, TownContainerDO, and AgentDOs running indefinitely. Six compounding root causes: 1. tRPC deleteTown now calls TownDO.destroy() before removing user rows 2. TownDO.destroy() now destroys TownContainerDO (kills container) 3. Alarm has an exit condition: skips re-arming when town:id is missing 4. armAlarmIfNeeded() checks for town:id to prevent alarm resurrection 5. Compatibility date updated to 2026-02-24 so deleteAll() clears alarms 6. Integration tests for the full deletion flow Closes #1182 --- cloudflare-gastown/src/dos/Town.do.ts | 29 ++- cloudflare-gastown/src/trpc/router.ts | 8 + .../test/integration/town-deletion.test.ts | 212 ++++++++++++++++++ cloudflare-gastown/wrangler.jsonc | 2 +- cloudflare-gastown/wrangler.test.jsonc | 2 +- 5 files changed, 249 insertions(+), 4 deletions(-) create mode 100644 cloudflare-gastown/test/integration/town-deletion.test.ts diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index 839dfe3d32..c8eada5164 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -2561,6 +2561,17 @@ export class TownDO extends DurableObject { // ══════════════════════════════════════════════════════════════════ async alarm(): Promise { + // Exit condition: if this DO was destroyed, don't re-arm. + // After destroy(), deleteAll() wipes storage but may not clear + // the alarm (compat date < 2026-02-24). A resurrected alarm + // will find no town:id — stop the loop immediately. + const storedId = await this.ctx.storage.get('town:id'); + if (!storedId) { + console.log(`${TOWN_LOG} alarm: no town:id — town was destroyed, not re-arming`); + await this.ctx.storage.deleteAlarm(); + return; + } + await this.ensureInitialized(); const townId = this.townId; console.log(`${TOWN_LOG} alarm: fired for town=${townId}`); @@ -3801,6 +3812,11 @@ export class TownDO extends DurableObject { // ── Alarm helpers ───────────────────────────────────────────────── private async armAlarmIfNeeded(): Promise { + // Don't resurrect the alarm on a destroyed DO. After destroy(), + // town:id is wiped — if it's missing, the town was deleted. + const storedId = await this.ctx.storage.get('town:id'); + if (!storedId) return; + const current = await this.ctx.storage.getAlarm(); if (!current || current < Date.now()) { await this.ctx.storage.setAlarm(Date.now() + ACTIVE_ALARM_INTERVAL_MS); @@ -4044,13 +4060,22 @@ export class TownDO extends DurableObject { async destroy(): Promise { console.log(`${TOWN_LOG} destroy: clearing all storage and alarms`); + // Destroy all AgentDOs (clears agent_events tables) try { const allAgents = agents.listAgents(this.sql); await Promise.allSettled( allAgents.map(agent => getAgentDOStub(this.env, agent.id).destroy()) ); - } catch { - // Best-effort + } catch (err) { + console.warn(`${TOWN_LOG} destroy: agent cleanup failed`, err); + } + + // Destroy TownContainerDO (sends SIGKILL to container process, clears state) + try { + const containerStub = getTownContainerStub(this.env, this.townId); + await containerStub.destroy(); + } catch (err) { + console.warn(`${TOWN_LOG} destroy: container cleanup failed`, err); } await this.ctx.storage.deleteAlarm(); diff --git a/cloudflare-gastown/src/trpc/router.ts b/cloudflare-gastown/src/trpc/router.ts index c3d2f69347..890769ed2c 100644 --- a/cloudflare-gastown/src/trpc/router.ts +++ b/cloudflare-gastown/src/trpc/router.ts @@ -157,6 +157,14 @@ export const gastownRouter = router({ .input(z.object({ townId: z.string().uuid() })) .mutation(async ({ ctx, input }) => { await verifyTownOwnership(ctx.env, ctx.userId, input.townId); + + // Destroy the Town DO (agents, container, alarms, storage). + // Let failures propagate — if cleanup fails, don't delete the + // user record (that's the only reference for recovering the + // leaked resources). + const townDOStub = getTownDOStub(ctx.env, input.townId); + await townDOStub.destroy(); + const userStub = getGastownUserStub(ctx.env, ctx.userId); await userStub.deleteTown(input.townId); }), diff --git a/cloudflare-gastown/test/integration/town-deletion.test.ts b/cloudflare-gastown/test/integration/town-deletion.test.ts new file mode 100644 index 0000000000..dff188cff3 --- /dev/null +++ b/cloudflare-gastown/test/integration/town-deletion.test.ts @@ -0,0 +1,212 @@ +import { env, runDurableObjectAlarm } from 'cloudflare:test'; +import { describe, it, expect, beforeEach } from 'vitest'; + +function getTownStub(name: string) { + return env.TOWN.get(env.TOWN.idFromName(name)); +} + +function getUserStub(name: string) { + return env.GASTOWN_USER.get(env.GASTOWN_USER.idFromName(name)); +} + +function getAgentStub(name: string) { + return env.AGENT.get(env.AGENT.idFromName(name)); +} + +describe('Town deletion (#1182)', () => { + let townName: string; + let town: ReturnType; + + beforeEach(() => { + townName = `town-del-${crypto.randomUUID()}`; + town = getTownStub(townName); + }); + + // ── TownDO.destroy() ────────────────────────────────────────────────── + + describe('TownDO.destroy()', () => { + it('should clear all storage so beads are no longer retrievable', async () => { + await town.createBead({ type: 'issue', title: 'Doomed bead' }); + await town.registerAgent({ + role: 'polecat', + name: 'P1', + identity: `del-agent-${townName}`, + }); + + await town.destroy(); + + // After destroy, the same stub should find no data (storage was cleared) + const beads = await town.listBeads({}); + expect(beads).toHaveLength(0); + }); + + it('should clear all agents from storage', async () => { + await town.registerAgent({ + role: 'polecat', + name: 'P1', + identity: `del-agents-a-${townName}`, + }); + await town.registerAgent({ + role: 'refinery', + name: 'R1', + identity: `del-agents-b-${townName}`, + }); + + const agentsBefore = await town.listAgents(); + expect(agentsBefore).toHaveLength(2); + + await town.destroy(); + + const agentsAfter = await town.listAgents(); + expect(agentsAfter).toHaveLength(0); + }); + + it('should delete the alarm so it does not re-fire', async () => { + // setTownId is required for armAlarmIfNeeded to arm the alarm + await town.setTownId(townName); + await town.slingBead({ type: 'issue', title: 'Alarm bead', rigId: 'test-rig' }); + const ranBefore = await runDurableObjectAlarm(town); + expect(ranBefore).toBe(true); + + await town.destroy(); + + // After destroy, the alarm should not re-fire + const ranAfter = await runDurableObjectAlarm(town); + expect(ranAfter).toBe(false); + }); + + it('should destroy AgentDOs (clearing their event tables)', async () => { + const agent = await town.registerAgent({ + role: 'polecat', + name: 'P1', + identity: `del-agentdo-${townName}`, + }); + + // Write events to the AgentDO + const agentDO = getAgentStub(agent.id); + await agentDO.appendEvents([{ type: 'session.start', data: JSON.stringify({ test: true }) }]); + + const eventsBefore = await agentDO.getEvents(); + expect(eventsBefore.length).toBeGreaterThan(0); + + await town.destroy(); + + // AgentDO should have been destroyed — events cleared + const eventsAfter = await agentDO.getEvents(); + expect(eventsAfter).toHaveLength(0); + }); + }); + + // ── Alarm exit condition ──────────────────────────────────────────────── + + describe('alarm exit condition', () => { + it('should not re-arm alarm on a destroyed DO', async () => { + // setTownId is required for armAlarmIfNeeded to arm the alarm + await town.setTownId(townName); + await town.configureRig({ + rigId: 'test-rig', + townId: townName, + gitUrl: 'https://github.com/org/repo.git', + defaultBranch: 'main', + userId: 'test-user', + }); + + // Arm alarm + await town.slingBead({ type: 'issue', title: 'Active bead', rigId: 'test-rig' }); + const ranBefore = await runDurableObjectAlarm(town); + expect(ranBefore).toBe(true); + + await town.destroy(); + + // deleteAll() with compat date >= 2026-02-24 clears alarms, + // so this should return false + const ranAfterDestroy = await runDurableObjectAlarm(town); + expect(ranAfterDestroy).toBe(false); + }); + + it('should not resurrect alarm when accessing a destroyed town', async () => { + await town.createBead({ type: 'issue', title: 'Soon to die' }); + + await town.destroy(); + + // Accessing the destroyed DO triggers ensureInitialized() → + // armAlarmIfNeeded(), which should NOT re-arm because town:id is gone + const beads = await town.listBeads({}); + expect(beads).toHaveLength(0); + + // Alarm should NOT have been re-armed + const ran = await runDurableObjectAlarm(town); + expect(ran).toBe(false); + }); + }); + + // ── GastownUserDO.deleteTown() ───────────────────────────────────────── + + describe('GastownUserDO.deleteTown()', () => { + it('should remove the town from the user list', async () => { + const userId = `user-${crypto.randomUUID()}`; + const userStub = getUserStub(userId); + + // createTown generates its own id; it requires name + owner_user_id + const created = await userStub.createTown({ + name: 'Test Town', + owner_user_id: userId, + }); + + const townsBefore = await userStub.listTowns(); + expect(townsBefore).toHaveLength(1); + + const deleted = await userStub.deleteTown(created.id); + expect(deleted).toBe(true); + + const townsAfter = await userStub.listTowns(); + expect(townsAfter).toHaveLength(0); + }); + }); + + // ── Full deletion flow (tRPC-equivalent) ───────────────────────────────── + + describe('full deletion flow', () => { + it('should clean up TownDO storage and user records', async () => { + const userId = `user-${crypto.randomUUID()}`; + const userStub = getUserStub(userId); + + // createTown generates its own id; it requires name + owner_user_id + const created = await userStub.createTown({ + name: 'Full Delete Town', + owner_user_id: userId, + }); + const townId = created.id; + + // Set up TownDO with data (setTownId mirrors the tRPC createTown flow) + const townStub = getTownStub(townId); + await townStub.setTownId(townId); + await townStub.createBead({ type: 'issue', title: 'Bead in deleted town' }); + await townStub.registerAgent({ + role: 'polecat', + name: 'P1', + identity: `full-del-${townId}`, + }); + + // Simulate the fixed tRPC deleteTown flow: + // 1. Destroy TownDO (agents, container, alarms, storage) + await townStub.destroy(); + // 2. Remove from user's list + await userStub.deleteTown(townId); + + // Verify user-side cleanup + const towns = await userStub.listTowns(); + expect(towns).toHaveLength(0); + + // Verify TownDO-side cleanup + const beads = await townStub.listBeads({}); + expect(beads).toHaveLength(0); + const agents = await townStub.listAgents(); + expect(agents).toHaveLength(0); + + // Verify alarm is dead + const ran = await runDurableObjectAlarm(townStub); + expect(ran).toBe(false); + }); + }); +}); diff --git a/cloudflare-gastown/wrangler.jsonc b/cloudflare-gastown/wrangler.jsonc index 5b8945b319..ef4b6e9e34 100644 --- a/cloudflare-gastown/wrangler.jsonc +++ b/cloudflare-gastown/wrangler.jsonc @@ -2,7 +2,7 @@ "$schema": "node_modules/wrangler/config-schema.json", "name": "gastown", "main": "src/gastown.worker.ts", - "compatibility_date": "2026-01-27", + "compatibility_date": "2026-02-24", "compatibility_flags": ["nodejs_compat"], "placement": { "mode": "smart" }, "observability": { "enabled": true }, diff --git a/cloudflare-gastown/wrangler.test.jsonc b/cloudflare-gastown/wrangler.test.jsonc index 9df67acfb3..c986338136 100644 --- a/cloudflare-gastown/wrangler.test.jsonc +++ b/cloudflare-gastown/wrangler.test.jsonc @@ -3,7 +3,7 @@ // Test configuration - stripped down for vitest-pool-workers "name": "gastown-test", "main": "src/gastown.worker.ts", - "compatibility_date": "2026-01-27", + "compatibility_date": "2026-02-24", "compatibility_flags": ["nodejs_compat", "service_binding_extra_handlers"], "durable_objects": { From 86217966fd477837af1235ab2ee4884192372b2f Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Tue, 17 Mar 2026 23:04:45 -0500 Subject: [PATCH 2/2] =?UTF-8?q?fix(gastown):=20fix=20terminal=20stability?= =?UTF-8?q?=20=E2=80=94=20reconnection,=20resize=20debounce,=20control=20f?= =?UTF-8?q?rame=20filter=20(#1195)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three compounding bugs caused terminal instability: 1. PTY WebSockets had no reconnection logic — container sleep/restart permanently froze the terminal. Now reconnects with exponential backoff (1s→2s→4s→8s, 8 attempts) and re-creates PTY sessions. 2. Undebounced ResizeObserver fired resize storms during CSS transitions, causing PTY/xterm dimension mismatches (blacked-out cells). Now debounced at 150ms. 3. SDK cursor metadata (0x00 prefix + JSON) written as raw text to xterm. Now filtered on both binary and string WebSocket frames. Additionally: - Deduplicated terminal setup: MayorTerminalPane and AgentTerminalPane now use the shared useXtermPty hook instead of duplicating ~150 lines of xterm/WebSocket/resize logic each. - Added visual connection status indicator (green=connected, yellow=reconnecting, gray=disconnected) to all terminal panes. Closes #1195 --- src/components/gastown/TerminalBar.tsx | 367 +++---------------------- src/components/gastown/useXtermPty.ts | 249 +++++++++++++---- 2 files changed, 234 insertions(+), 382 deletions(-) diff --git a/src/components/gastown/TerminalBar.tsx b/src/components/gastown/TerminalBar.tsx index c8fe14e114..1f88c3ba07 100644 --- a/src/components/gastown/TerminalBar.tsx +++ b/src/components/gastown/TerminalBar.tsx @@ -8,10 +8,9 @@ import { useGastownTRPC, gastownWsUrl } from '@/lib/gastown/trpc'; import { useSidebar } from '@/components/ui/sidebar'; import { useTerminalBar } from './TerminalBarContext'; import { useDrawerStack } from './DrawerStack'; +import { useXtermPty } from './useXtermPty'; import { ChevronDown, ChevronUp, Crown, Activity, Terminal as TerminalIcon, X } from 'lucide-react'; import { motion, AnimatePresence } from 'motion/react'; -import type { Terminal } from '@xterm/xterm'; -import type { FitAddon } from '@xterm/addon-fit'; const COLLAPSED_HEIGHT = 38; const EXPANDED_HEIGHT = 300; @@ -606,20 +605,35 @@ function eventTypeColor(type: string): string { } } +// ── Terminal Status Badge ───────────────────────────────────────────────── + +function TerminalStatusBadge({ + connectionStatus, + status, +}: { + connectionStatus: 'connected' | 'reconnecting' | 'disconnected'; + status: string; +}) { + const dotColor = + connectionStatus === 'connected' + ? 'bg-emerald-400' + : connectionStatus === 'reconnecting' + ? 'animate-pulse bg-yellow-400' + : 'bg-white/20'; + + return ( +
+ + {status} +
+ ); +} + // ── Mayor Terminal Pane ────────────────────────────────────────────────── function MayorTerminalPane({ townId, collapsed }: { townId: string; collapsed: boolean }) { const trpc = useGastownTRPC(); const queryClient = useQueryClient(); - const [connected, setConnected] = useState(false); - const [status, setStatus] = useState('Initializing...'); - - const terminalRef = useRef(null); - const wsRef = useRef(null); - const xtermRef = useRef(null); - const fitAddonRef = useRef(null); - const resizeObserverRef = useRef(null); - const ptyRef = useRef<{ id: string } | null>(null); const ensureMayor = useMutation( trpc.gastown.ensureMayor.mutationOptions({ @@ -650,166 +664,12 @@ function MayorTerminalPane({ townId, collapsed }: { townId: string; collapsed: b const mayorAgentId = statusQuery.data?.session?.agentId ?? null; - const createPty = useMutation( - trpc.gastown.createPtySession.mutationOptions({ - onError: err => setStatus(`Error: ${err.message}`), - }) - ); - - const resizePty = useMutation(trpc.gastown.resizePtySession.mutationOptions({})); - const resizeMutateRef = useRef(resizePty.mutate); - resizeMutateRef.current = resizePty.mutate; - - const connectedAgentRef = useRef(null); - useEffect(() => { - if (!mayorAgentId || mayorAgentId === connectedAgentRef.current) return; - const agentId = mayorAgentId; - connectedAgentRef.current = agentId; - - let disposed = false; - - async function init() { - const container = terminalRef.current; - if (!container) return; - - const [{ Terminal }, { FitAddon }, { WebLinksAddon }] = await Promise.all([ - import('@xterm/xterm'), - import('@xterm/addon-fit'), - import('@xterm/addon-web-links'), - ]); - - if (disposed) return; - - xtermRef.current?.dispose(); - - const fitAddon = new FitAddon(); - const webLinksAddon = new WebLinksAddon(); - - const term = new Terminal({ - cursorBlink: true, - fontSize: 13, - fontFamily: 'ui-monospace, SFMono-Regular, "SF Mono", Menlo, Consolas, monospace', - theme: { - background: '#0a0a0a', - foreground: '#e0e0e0', - cursor: '#e0e0e0', - selectionBackground: '#3a3a5a', - }, - allowProposedApi: true, - // Disable xterm's scrollback so kilo's TUI handles all scrolling. - // Without this, xterm's viewport captures mouse wheel events and - // prevents the TUI's own scroll from working. - scrollback: 0, - }); - - term.loadAddon(fitAddon); - term.loadAddon(webLinksAddon); - term.open(container); - fitAddon.fit(); - - xtermRef.current = term; - fitAddonRef.current = fitAddon; - - setStatus('Connecting to mayor...'); - - function doResize(cols: number, rows: number) { - if (!ptyRef.current) return; - resizeMutateRef.current({ - townId, - agentId, - ptyId: ptyRef.current.id, - cols, - rows, - }); - } - - let result: { pty: { id: string }; wsUrl: string } | null = null; - for (let attempt = 0; attempt < 10 && !disposed; attempt++) { - try { - result = await new Promise<{ pty: { id: string }; wsUrl: string }>((resolve, reject) => { - createPty.mutate({ townId, agentId }, { onSuccess: resolve, onError: reject }); - }); - break; - } catch { - if (disposed) return; - setStatus(`Waiting for mayor... (${attempt + 1})`); - await new Promise(r => setTimeout(r, 3_000)); - } - } - - if (disposed || !result) { - if (!disposed && !result) setStatus('Failed to connect to mayor'); - return; - } - - ptyRef.current = result.pty; - setStatus('Connecting...'); - - const ws = new WebSocket(gastownWsUrl(result.wsUrl)); - ws.binaryType = 'arraybuffer'; - wsRef.current = ws; - - ws.onopen = () => { - if (disposed) return; - setConnected(true); - setStatus('Connected'); - const dims = fitAddon.proposeDimensions(); - if (dims) doResize(dims.cols, dims.rows); - }; - - ws.onmessage = (e: MessageEvent) => { - if (e.data instanceof ArrayBuffer) { - term.write(new Uint8Array(e.data)); - } else if (typeof e.data === 'string') { - if (e.data.startsWith('{')) { - try { - JSON.parse(e.data); - return; - } catch { - // not JSON control message - } - } - term.write(e.data); - } - }; - - ws.onclose = () => { - if (disposed) return; - setConnected(false); - setStatus('Disconnected'); - }; - - ws.onerror = () => { - if (disposed) return; - setStatus('Connection error'); - }; - - term.onData(data => { - if (ws.readyState === WebSocket.OPEN) ws.send(data); - }); - - term.onResize(({ cols, rows }) => doResize(cols, rows)); - - const observer = new ResizeObserver(() => fitAddon.fit()); - observer.observe(container); - resizeObserverRef.current = observer; - } - - void init(); - - return () => { - disposed = true; - resizeObserverRef.current?.disconnect(); - resizeObserverRef.current = null; - wsRef.current?.close(1000, 'Mayor terminal unmount'); - wsRef.current = null; - xtermRef.current?.dispose(); - xtermRef.current = null; - fitAddonRef.current = null; - ptyRef.current = null; - connectedAgentRef.current = null; - }; - }, [mayorAgentId, townId]); + const { terminalRef, connectionStatus, status, fitAddonRef } = useXtermPty({ + townId, + agentId: mayorAgentId, + retries: 10, + retryDelay: 3_000, + }); const { state: sidebarState } = useSidebar(); @@ -822,11 +682,7 @@ function MayorTerminalPane({ townId, collapsed }: { townId: string; collapsed: b return (
- {/* Status indicator overlaid top-right */} -
- - {status} -
+
); @@ -835,163 +691,14 @@ function MayorTerminalPane({ townId, collapsed }: { townId: string; collapsed: b // ── Agent Terminal Pane ────────────────────────────────────────────────── function AgentTerminalPane({ townId, agentId }: { townId: string; agentId: string }) { - const trpc = useGastownTRPC(); - const terminalRef = useRef(null); - const wsRef = useRef(null); - const xtermRef = useRef(null); - const fitAddonRef = useRef(null); - const resizeObserverRef = useRef(null); - const ptyRef = useRef<{ id: string } | null>(null); - const [status, setStatus] = useState('Initializing...'); - const [connected, setConnected] = useState(false); - - const createPty = useMutation( - trpc.gastown.createPtySession.mutationOptions({ - onError: err => setStatus(`Error: ${err.message}`), - }) - ); - - const resizePty = useMutation(trpc.gastown.resizePtySession.mutationOptions({})); - const resizeMutateRef = useRef(resizePty.mutate); - resizeMutateRef.current = resizePty.mutate; - - useEffect(() => { - let disposed = false; - - async function init() { - const container = terminalRef.current; - if (!container) return; - - const [{ Terminal }, { FitAddon }, { WebLinksAddon }] = await Promise.all([ - import('@xterm/xterm'), - import('@xterm/addon-fit'), - import('@xterm/addon-web-links'), - ]); - - if (disposed) return; - - const fitAddon = new FitAddon(); - const webLinksAddon = new WebLinksAddon(); - - const term = new Terminal({ - cursorBlink: true, - fontSize: 13, - fontFamily: 'ui-monospace, SFMono-Regular, "SF Mono", Menlo, Consolas, monospace', - theme: { - background: '#0a0a0a', - foreground: '#e0e0e0', - cursor: '#e0e0e0', - selectionBackground: '#3a3a5a', - }, - allowProposedApi: true, - scrollback: 0, - }); - - term.loadAddon(fitAddon); - term.loadAddon(webLinksAddon); - term.open(container); - fitAddon.fit(); - - xtermRef.current = term; - fitAddonRef.current = fitAddon; - - setStatus('Creating PTY session...'); - - function doResize(cols: number, rows: number) { - if (!ptyRef.current) return; - resizeMutateRef.current({ townId, agentId, ptyId: ptyRef.current.id, cols, rows }); - } - - let result: { pty: { id: string }; wsUrl: string }; - try { - result = await new Promise<{ pty: { id: string }; wsUrl: string }>((resolve, reject) => { - createPty.mutate({ townId, agentId }, { onSuccess: resolve, onError: reject }); - }); - } catch (err) { - if (!disposed) { - setStatus( - `Error: ${err instanceof Error ? err.message : 'Failed to create PTY session'}` - ); - } - return; - } - - if (disposed) return; - - ptyRef.current = result.pty; - setStatus('Connecting...'); - - const ws = new WebSocket(gastownWsUrl(result.wsUrl)); - ws.binaryType = 'arraybuffer'; - wsRef.current = ws; - - ws.onopen = () => { - if (disposed) return; - setConnected(true); - setStatus('Connected'); - const dims = fitAddon.proposeDimensions(); - if (dims) doResize(dims.cols, dims.rows); - }; - - ws.onmessage = (e: MessageEvent) => { - if (e.data instanceof ArrayBuffer) { - term.write(new Uint8Array(e.data)); - } else if (typeof e.data === 'string') { - if (e.data.startsWith('{')) { - try { - JSON.parse(e.data); - return; - } catch { - // not JSON - } - } - term.write(e.data); - } - }; - - ws.onclose = () => { - if (disposed) return; - setConnected(false); - setStatus('Disconnected'); - }; - - ws.onerror = () => { - if (disposed) return; - setStatus('Connection error'); - }; - - term.onData(data => { - if (ws.readyState === WebSocket.OPEN) ws.send(data); - }); - - term.onResize(({ cols, rows }) => doResize(cols, rows)); - - const observer = new ResizeObserver(() => fitAddon.fit()); - observer.observe(container); - resizeObserverRef.current = observer; - } - - void init(); - - return () => { - disposed = true; - resizeObserverRef.current?.disconnect(); - resizeObserverRef.current = null; - wsRef.current?.close(1000, 'Agent terminal unmount'); - wsRef.current = null; - xtermRef.current?.dispose(); - xtermRef.current = null; - fitAddonRef.current = null; - ptyRef.current = null; - }; - }, [townId, agentId]); + const { terminalRef, connectionStatus, status } = useXtermPty({ + townId, + agentId, + }); return (
-
- - {status} -
+
); diff --git a/src/components/gastown/useXtermPty.ts b/src/components/gastown/useXtermPty.ts index e520b55693..75c432fd29 100644 --- a/src/components/gastown/useXtermPty.ts +++ b/src/components/gastown/useXtermPty.ts @@ -1,6 +1,6 @@ 'use client'; -import { useEffect, useRef, useState } from 'react'; +import { useEffect, useRef, useState, useCallback } from 'react'; import { useMutation } from '@tanstack/react-query'; import { useGastownTRPC, gastownWsUrl } from '@/lib/gastown/trpc'; import type { Terminal } from '@xterm/xterm'; @@ -17,16 +17,39 @@ type XtermPtyOptions = { onStatusChange?: (status: string) => void; }; +type ConnectionStatus = 'connected' | 'reconnecting' | 'disconnected'; + type XtermPtyResult = { terminalRef: React.RefObject; connected: boolean; + connectionStatus: ConnectionStatus; status: string; fitAddonRef: React.RefObject; }; +/** Debounce interval for ResizeObserver events (ms). */ +const RESIZE_DEBOUNCE_MS = 150; + +/** Max reconnection attempts before giving up. */ +const MAX_RECONNECT_ATTEMPTS = 8; + +/** Base delay for exponential backoff (ms). */ +const RECONNECT_BASE_DELAY_MS = 1_000; + +/** Max backoff cap (ms). */ +const RECONNECT_MAX_DELAY_MS = 8_000; + /** * Shared hook that sets up an xterm.js terminal connected to a PTY session - * via WebSocket. Used by both MayorChat and AgentTerminal. + * via WebSocket. Used by MayorTerminalPane, AgentTerminalPane, and any + * other component that needs a terminal. + * + * Handles: + * - PTY session creation with retries + * - WebSocket connection with automatic reconnection (exponential backoff) + * - 0x00 control frame filtering (SDK cursor metadata) + * - Debounced resize events to prevent storms during CSS transitions + * - Connection status indicator (connected/reconnecting/disconnected) */ export function useXtermPty({ townId, @@ -37,6 +60,7 @@ export function useXtermPty({ }: XtermPtyOptions): XtermPtyResult { const trpc = useGastownTRPC(); const [connected, setConnected] = useState(false); + const [connectionStatus, setConnectionStatus] = useState('disconnected'); const [status, setStatus] = useState('Initializing...'); const terminalRef = useRef(null); @@ -45,11 +69,18 @@ export function useXtermPty({ const fitAddonRef = useRef(null); const resizeObserverRef = useRef(null); const ptyRef = useRef<{ id: string } | null>(null); - - function updateStatus(s: string) { - setStatus(s); - onStatusChange?.(s); - } + const reconnectTimerRef = useRef | null>(null); + const reconnectAttemptsRef = useRef(0); + const intentionalCloseRef = useRef(false); + const resizeTimerRef = useRef | null>(null); + + const updateStatus = useCallback( + (s: string) => { + setStatus(s); + onStatusChange?.(s); + }, + [onStatusChange] + ); const createPty = useMutation( trpc.gastown.createPtySession.mutationOptions({ @@ -70,6 +101,147 @@ export function useXtermPty({ let disposed = false; + /** Connect a WebSocket to the PTY session at `wsUrl`. */ + function connectWs( + term: Terminal, + fitAddon: FitAddon, + wsUrl: string, + doResize: (cols: number, rows: number) => void + ) { + if (disposed) return; + + const ws = new WebSocket(gastownWsUrl(wsUrl)); + ws.binaryType = 'arraybuffer'; + wsRef.current = ws; + + ws.onopen = () => { + if (disposed) return; + reconnectAttemptsRef.current = 0; + setConnected(true); + setConnectionStatus('connected'); + updateStatus('Connected'); + const dims = fitAddon.proposeDimensions(); + if (dims) doResize(dims.cols, dims.rows); + }; + + ws.onmessage = (e: MessageEvent) => { + if (e.data instanceof ArrayBuffer) { + const bytes = new Uint8Array(e.data); + // Filter 0x00 control frames — SDK cursor metadata. + // The SDK sends [0x00, ...JSON.stringify({cursor: N})]. + // The NUL byte is invisible, but the JSON text renders + // as visible garbage in the terminal. + if (bytes.length > 0 && bytes[0] === 0x00) return; + term.write(bytes); + } else if (typeof e.data === 'string') { + // Filter SDK control messages that arrive as strings. + // The SDK sends cursor metadata as NUL-prefixed JSON or + // plain JSON objects like {"cursor":N}. These are never + // valid PTY output — real terminal data is raw bytes or + // escape sequences, not well-formed JSON objects. + const data = e.data; + if (data.length > 0 && data.charCodeAt(0) === 0) return; + if (data.startsWith('{')) { + try { + JSON.parse(data); + // Valid JSON on the PTY WebSocket = SDK control message. + // Actual PTY output that starts with '{' would be part + // of a larger escape sequence and wouldn't parse as JSON. + return; + } catch { + // Not valid JSON — genuine PTY data, write it + } + } + term.write(data); + } + }; + + ws.onclose = () => { + if (disposed) return; + setConnected(false); + + // Don't reconnect if the close was intentional (cleanup) + if (intentionalCloseRef.current) { + setConnectionStatus('disconnected'); + updateStatus('Disconnected'); + return; + } + + // Exponential backoff reconnection + const attempt = reconnectAttemptsRef.current; + if (attempt >= MAX_RECONNECT_ATTEMPTS) { + setConnectionStatus('disconnected'); + updateStatus('Connection lost'); + return; + } + + setConnectionStatus('reconnecting'); + const delay = Math.min( + RECONNECT_BASE_DELAY_MS * Math.pow(2, attempt), + RECONNECT_MAX_DELAY_MS + ); + updateStatus(`Reconnecting (${attempt + 1}/${MAX_RECONNECT_ATTEMPTS})...`); + reconnectAttemptsRef.current = attempt + 1; + + reconnectTimerRef.current = setTimeout(() => { + if (disposed) return; + // Re-create PTY session — the old one may be gone after + // container restart or sleep. + void recreatePtyAndConnect(term, fitAddon, doResize); + }, delay); + }; + + ws.onerror = () => { + if (disposed) return; + // onclose will fire after onerror — reconnection is handled there + }; + + term.onData(data => { + if (ws.readyState === WebSocket.OPEN) ws.send(data); + }); + } + + /** Re-create the PTY session and reconnect the WebSocket. */ + async function recreatePtyAndConnect( + term: Terminal, + fitAddon: FitAddon, + doResize: (cols: number, rows: number) => void + ) { + if (disposed) return; + try { + const result = await new Promise<{ pty: { id: string }; wsUrl: string }>( + (resolve, reject) => { + createPty.mutate( + { townId, agentId: capturedAgentId }, + { onSuccess: resolve, onError: reject } + ); + } + ); + if (disposed) return; + ptyRef.current = result.pty; + connectWs(term, fitAddon, result.wsUrl, doResize); + } catch { + if (disposed) return; + // PTY creation failed — schedule another reconnect attempt + const attempt = reconnectAttemptsRef.current; + if (attempt >= MAX_RECONNECT_ATTEMPTS) { + setConnectionStatus('disconnected'); + updateStatus('Connection lost'); + return; + } + const delay = Math.min( + RECONNECT_BASE_DELAY_MS * Math.pow(2, attempt), + RECONNECT_MAX_DELAY_MS + ); + updateStatus(`Reconnecting (${attempt + 1}/${MAX_RECONNECT_ATTEMPTS})...`); + reconnectAttemptsRef.current = attempt + 1; + reconnectTimerRef.current = setTimeout(() => { + if (disposed) return; + void recreatePtyAndConnect(term, fitAddon, doResize); + }, delay); + } + } + async function init() { const container = terminalRef.current; if (!container) return; @@ -154,54 +326,22 @@ export function useXtermPty({ ptyRef.current = result.pty; updateStatus('Connecting...'); + intentionalCloseRef.current = false; + reconnectAttemptsRef.current = 0; - const ws = new WebSocket(gastownWsUrl(result.wsUrl)); - ws.binaryType = 'arraybuffer'; - wsRef.current = ws; - - ws.onopen = () => { - if (disposed) return; - setConnected(true); - updateStatus('Connected'); - const dims = fitAddon.proposeDimensions(); - if (dims) doResize(dims.cols, dims.rows); - }; - - ws.onmessage = (e: MessageEvent) => { - if (e.data instanceof ArrayBuffer) { - term.write(new Uint8Array(e.data)); - } else if (typeof e.data === 'string') { - // Filter out JSON control messages (e.g. {"cursor":N}) from the SDK - if (e.data.startsWith('{')) { - try { - JSON.parse(e.data); - return; - } catch { - // Not valid JSON — fall through to write as PTY data - } - } - term.write(e.data); - } - }; - - ws.onclose = () => { - if (disposed) return; - setConnected(false); - updateStatus('Disconnected'); - }; - - ws.onerror = () => { - if (disposed) return; - updateStatus('Connection error'); - }; - - term.onData(data => { - if (ws.readyState === WebSocket.OPEN) ws.send(data); - }); + connectWs(term, fitAddon, result.wsUrl, doResize); term.onResize(({ cols, rows }) => doResize(cols, rows)); - const observer = new ResizeObserver(() => fitAddon.fit()); + // Debounced ResizeObserver — prevents resize storms during CSS + // transitions (sidebar expand/collapse, terminal bar resize). + const observer = new ResizeObserver(() => { + if (resizeTimerRef.current) clearTimeout(resizeTimerRef.current); + resizeTimerRef.current = setTimeout(() => { + if (disposed) return; + fitAddon.fit(); + }, RESIZE_DEBOUNCE_MS); + }); observer.observe(container); resizeObserverRef.current = observer; } @@ -210,6 +350,11 @@ export function useXtermPty({ return () => { disposed = true; + intentionalCloseRef.current = true; + if (reconnectTimerRef.current) clearTimeout(reconnectTimerRef.current); + reconnectTimerRef.current = null; + if (resizeTimerRef.current) clearTimeout(resizeTimerRef.current); + resizeTimerRef.current = null; resizeObserverRef.current?.disconnect(); resizeObserverRef.current = null; wsRef.current?.close(1000, 'Terminal unmount'); @@ -222,5 +367,5 @@ export function useXtermPty({ }; }, [agentId, townId]); - return { terminalRef, connected, status, fitAddonRef }; + return { terminalRef, connected, connectionStatus, status, fitAddonRef }; }