diff --git a/cloudflare-gastown/AGENTS.md b/cloudflare-gastown/AGENTS.md index 563a9a66fc..f06633c061 100644 --- a/cloudflare-gastown/AGENTS.md +++ b/cloudflare-gastown/AGENTS.md @@ -8,6 +8,37 @@ ## Durable Objects - Each DO module must export a `get{ClassName}Stub` helper function (e.g. `getRigDOStub`) that centralizes how that DO namespace creates instances. Callers should use this helper instead of accessing the namespace binding directly. +- **Sub-modules for large DOs**: When a Durable Object grows beyond a few hundred lines, extract domain logic into sub-modules under a `/` directory alongside the DO file. For example, `Town.do.ts` delegates to modules in `town/`: + + ``` + dos/ + Town.do.ts # Class definition, RPC methods, alarm loop + town/ + agents.ts # Agent CRUD, hook management + beads.ts # Bead CRUD, convoy progress + scheduling.ts # Agent dispatch, pending work scheduling + review-queue.ts # Review lifecycle, recovery + patrol.ts # Zombie detection, stale hook recovery + config.ts # Town configuration + rigs.ts # Rig registry + mail.ts # Inter-agent mail + container-dispatch.ts # Container start/stop/status + ``` + + Each sub-module exports plain functions (not classes) that accept `SqlStorage` and any other required context as arguments. The DO imports them with the `import * as X` pattern: + + ```ts + import * as beadOps from './town/beads'; + import * as agents from './town/agents'; + import * as scheduling from './town/scheduling'; + + // In the DO class: + beadOps.updateBeadStatus(this.sql, beadId, 'closed', agentId); + agents.getOrCreateAgent(this.sql, 'polecat', rigId, this.townId); + await scheduling.schedulePendingWork(this.schedulingCtx); + ``` + + This keeps the DO class thin (RPC surface + orchestration) while sub-modules own the business logic. The `import * as X` pattern makes call sites self-documenting — you can always tell which domain a function belongs to. ## IO boundaries diff --git a/cloudflare-gastown/container/Dockerfile b/cloudflare-gastown/container/Dockerfile index 4c653a7485..0916754f59 100644 --- a/cloudflare-gastown/container/Dockerfile +++ b/cloudflare-gastown/container/Dockerfile @@ -44,6 +44,14 @@ RUN cd /opt/gastown-plugin && npm install --omit=dev && \ ln -s /opt/gastown-plugin/index.ts /home/agent/.config/kilo/plugins/gastown.ts && \ chown -R agent:agent /home/agent/.config +# ── Git config for agent user ─────────────────────────────────────── +# Skip LFS smudge filter: agents don't need binary assets and LFS +# downloads can fail when credentials don't cover the batch endpoint. +# Also disable LFS fetch entirely so clone/worktree never stalls. +RUN printf '[filter "lfs"]\n\tsmudge = git-lfs smudge --skip -- %%f\n\tprocess = git-lfs filter-process --skip\n\tclean = git-lfs clean -- %%f\n\trequired = true\n[lfs]\n\tfetchexclude = *\n' \ + > /home/agent/.gitconfig && \ + chown agent:agent /home/agent/.gitconfig + WORKDIR /app # ── Install production deps via pnpm ──────────────────────────────── diff --git a/cloudflare-gastown/container/package.json b/cloudflare-gastown/container/package.json index ccf6f0513c..74bc8dbeaf 100644 --- a/cloudflare-gastown/container/package.json +++ b/cloudflare-gastown/container/package.json @@ -8,10 +8,8 @@ "start": "bun run src/main.ts", "test": "vitest run", "test:watch": "vitest", - "typecheck": "tsgo --noEmit --incremental false", - "lint": "pnpm run lint:oxlint && pnpm run lint:eslint:fallback", - "lint:oxlint": "pnpm -w exec oxlint --config .oxlintrc.json cloudflare-gastown/container/src", - "lint:eslint:fallback": "eslint --config eslint.config.mjs --cache 'src/**/*.ts'" + "typecheck": "tsc --noEmit", + "lint": "eslint --config eslint.config.mjs --cache 'src/**/*.ts'" }, "dependencies": { "@kilocode/plugin": "7.0.37", @@ -21,8 +19,7 @@ }, "devDependencies": { "@kilocode/eslint-config": "workspace:*", - "@types/bun": "^1.3.10", - "@typescript/native-preview": "catalog:", + "@types/bun": "^1.2.17", "eslint": "catalog:", "typescript": "catalog:", "vitest": "^3.2.4" diff --git a/cloudflare-gastown/container/plugin/client.ts b/cloudflare-gastown/container/plugin/client.ts index 3f4ed8d1ea..93b972854d 100644 --- a/cloudflare-gastown/container/plugin/client.ts +++ b/cloudflare-gastown/container/plugin/client.ts @@ -116,6 +116,16 @@ export class GastownClient { }); } + async requestChanges(input: { + feedback: string; + files?: string[]; + }): Promise<{ rework_bead_id: string }> { + return this.request<{ rework_bead_id: string }>(this.agentPath('/request-changes'), { + method: 'POST', + body: JSON.stringify(input), + }); + } + async checkMail(): Promise { return this.request(this.agentPath('/mail')); } @@ -300,8 +310,6 @@ export class MayorGastownClient { title: string; body?: string; metadata?: Record; - depends_on?: string[]; - convoy_id?: string; }): Promise { return this.request(this.mayorPath('/sling'), { method: 'POST', @@ -383,35 +391,6 @@ export class MayorGastownClient { ); } - async addBeadDependency(input: { - rig_id: string; - bead_id: string; - depends_on_bead_id: string; - dependency_type?: 'blocks' | 'tracks' | 'parent-child'; - }): Promise { - await this.request<{ ok: true }>( - `${this.baseUrl}/api/towns/${this.townId}/rigs/${input.rig_id}/beads/${input.bead_id}/dependencies`, - { - method: 'POST', - body: JSON.stringify({ - depends_on_bead_id: input.depends_on_bead_id, - dependency_type: input.dependency_type, - }), - } - ); - } - - async removeBeadDependency(input: { - rig_id: string; - bead_id: string; - depends_on_bead_id: string; - }): Promise { - await this.request<{ ok: true; deleted: boolean }>( - `${this.baseUrl}/api/towns/${this.townId}/rigs/${input.rig_id}/beads/${input.bead_id}/dependencies/${input.depends_on_bead_id}`, - { method: 'DELETE' } - ); - } - async listConvoys(): Promise { return this.request(this.mayorPath('/convoys')); } @@ -429,7 +408,6 @@ export class MayorGastownClient { status?: 'open' | 'in_progress' | 'in_review' | 'closed' | 'failed'; priority?: 'low' | 'medium' | 'high' | 'critical'; labels?: string[]; - convoy_id?: string | null; } ): Promise { return this.request(this.mayorPath(`/rigs/${rigId}/beads/${beadId}`), { diff --git a/cloudflare-gastown/container/plugin/mayor-tools.ts b/cloudflare-gastown/container/plugin/mayor-tools.ts index 4a55aa03ea..24b0fb50eb 100644 --- a/cloudflare-gastown/container/plugin/mayor-tools.ts +++ b/cloudflare-gastown/container/plugin/mayor-tools.ts @@ -67,46 +67,22 @@ export function createMayorTools(client: MayorGastownClient) { .string() .describe('JSON-encoded metadata object for additional context') .optional(), - depends_on: tool.schema - .array(tool.schema.string()) - .describe( - 'Optional list of bead IDs this task depends on. The new bead will not be dispatched until all listed beads are closed.' - ) - .optional(), - convoy_id: tool.schema - .string() - .describe( - 'Optional convoy ID to add this bead to. The bead will be tracked by the convoy and included in its progress.' - ) - .optional(), }, async execute(args) { const metadata = args.metadata ? parseJsonObject(args.metadata, 'metadata') : undefined; - // Pass depends_on directly to client.sling() so TownDO.slingBead() - // inserts the dependency rows atomically before arming dispatch. const result = await client.sling({ rig_id: args.rig_id, title: args.title, body: args.body, metadata, - depends_on: args.depends_on, - convoy_id: args.convoy_id, }); - - const lines = [ + return [ `Task slung successfully.`, `Bead: ${result.bead.bead_id} — "${result.bead.title}"`, `Assigned to: ${result.agent.name} (${result.agent.role}, id: ${result.agent.id})`, `Status: ${result.bead.status}`, - ]; - if (args.depends_on && args.depends_on.length > 0) { - lines.push(`Dependencies: blocked by ${args.depends_on.length} bead(s)`); - } - if (args.convoy_id) { - lines.push(`Convoy: added to ${args.convoy_id}`); - } - lines.push(`The polecat will be dispatched automatically by the alarm scheduler.`); - return lines.join('\n'); + `The polecat will be dispatched automatically by the alarm scheduler.`, + ].join('\n'); }, }), @@ -325,9 +301,7 @@ export function createMayorTools(client: MayorGastownClient) { }), gt_bead_update: tool({ - description: - "Edit a bead's status, title, body, priority, labels, or convoy membership. " + - 'Set convoy_id to add the bead to a convoy, or set it to null/empty to remove it.', + description: "Edit a bead's status, title, body, priority, or labels.", args: { rig_id: tool.schema.string().describe('The UUID of the rig the bead belongs to'), bead_id: tool.schema.string().describe('The UUID of the bead to update'), @@ -345,13 +319,6 @@ export function createMayorTools(client: MayorGastownClient) { .array(tool.schema.string()) .describe('Replacement labels array for the bead') .optional(), - convoy_id: tool.schema - .string() - .describe( - 'Set to a convoy UUID to add this bead to that convoy. ' + - 'Set to an empty string to remove the bead from its current convoy.' - ) - .optional(), }, async execute(args) { const bead = await client.updateBead(args.rig_id, args.bead_id, { @@ -360,7 +327,6 @@ export function createMayorTools(client: MayorGastownClient) { status: args.status, priority: args.priority, labels: args.labels, - convoy_id: args.convoy_id === '' ? null : args.convoy_id, }); return `Bead ${bead.bead_id} updated. Status: ${bead.status}, Priority: ${bead.priority}, Title: "${bead.title}".`; }, @@ -506,52 +472,5 @@ export function createMayorTools(client: MayorGastownClient) { return `Nudge queued: ${result.nudge_id} (mode: ${args.mode ?? 'wait-idle'})`; }, }), - - gt_bead_add_dependency: tool({ - description: - 'Add a dependency between two beads. The bead at bead_id will be blocked by depends_on_bead_id — ' + - 'it will not be dispatched until the dependency is closed.', - args: { - rig_id: tool.schema.string().describe('The UUID of the rig the beads belong to'), - bead_id: tool.schema.string().describe('The UUID of the bead that should be blocked'), - depends_on_bead_id: tool.schema - .string() - .describe('The UUID of the bead that must close first'), - dependency_type: tool.schema - .enum(['blocks', 'parent-child']) - .describe('Type of dependency (default: blocks)') - .optional(), - }, - async execute(args) { - await client.addBeadDependency({ - rig_id: args.rig_id, - bead_id: args.bead_id, - depends_on_bead_id: args.depends_on_bead_id, - dependency_type: args.dependency_type ?? 'blocks', - }); - return `Dependency added: bead ${args.bead_id} now depends on ${args.depends_on_bead_id} (type: ${args.dependency_type ?? 'blocks'}).`; - }, - }), - - gt_bead_remove_dependency: tool({ - description: - 'Remove a dependency between two beads. If removing the dependency unblocks the bead, ' + - 'it will be dispatched automatically.', - args: { - rig_id: tool.schema.string().describe('The UUID of the rig the beads belong to'), - bead_id: tool.schema.string().describe('The UUID of the dependent bead'), - depends_on_bead_id: tool.schema - .string() - .describe('The UUID of the bead it currently depends on'), - }, - async execute(args) { - await client.removeBeadDependency({ - rig_id: args.rig_id, - bead_id: args.bead_id, - depends_on_bead_id: args.depends_on_bead_id, - }); - return `Dependency removed: bead ${args.bead_id} no longer depends on ${args.depends_on_bead_id}. If this was the last blocker, the bead will be dispatched automatically.`; - }, - }), }; } diff --git a/cloudflare-gastown/container/plugin/tools.ts b/cloudflare-gastown/container/plugin/tools.ts index 0df5d6ce67..1c360c0f20 100644 --- a/cloudflare-gastown/container/plugin/tools.ts +++ b/cloudflare-gastown/container/plugin/tools.ts @@ -78,6 +78,38 @@ export function createTools(client: GastownClient) { }, }), + gt_request_changes: tool({ + description: + 'Request changes on the code you are reviewing. This creates a rework task ' + + 'for a polecat to address your feedback. After calling this, call gt_done to ' + + 'release your session. The polecat will push fixes to the same branch, and ' + + 'you will be re-dispatched to re-review once the rework is complete. ' + + 'Only available to refinery agents.', + args: { + feedback: tool.schema + .string() + .describe( + 'Detailed description of what needs to change. Be specific: ' + + 'reference file names, function names, and the exact issues found.' + ), + files: tool.schema + .array(tool.schema.string()) + .describe('Optional list of specific file paths that need changes') + .optional(), + }, + async execute(args) { + const result = await client.requestChanges({ + feedback: args.feedback, + files: args.files, + }); + return ( + `Rework request created (bead ${result.rework_bead_id}). ` + + 'A polecat will be assigned to address your feedback. ' + + 'Call gt_done now to release your session. You will be re-dispatched to re-review once the rework is complete.' + ); + }, + }), + gt_mail_send: tool({ description: 'Send a typed message to another agent in the rig. ' + diff --git a/cloudflare-gastown/container/src/git-manager.ts b/cloudflare-gastown/container/src/git-manager.ts index 24b312d43c..a65d34370b 100644 --- a/cloudflare-gastown/container/src/git-manager.ts +++ b/cloudflare-gastown/container/src/git-manager.ts @@ -1,4 +1,4 @@ -import { mkdir, realpath, rm, stat } from 'node:fs/promises'; +import { mkdir, realpath, rm, stat, writeFile } from 'node:fs/promises'; import { join, resolve } from 'node:path'; import type { CloneOptions, WorktreeOptions } from './types'; @@ -105,6 +105,49 @@ function authenticateGitUrl(gitUrl: string, envVars?: Record): s return gitUrl; } +/** + * Configure a credential-store helper on the bare repo so that worktree + * operations (checkout, reset, lfs smudge) can resolve credentials + * through the standard git credential chain. + * + * Without this, git-lfs smudge filters triggered by `git worktree add` + * or `git reset --hard` fail with "Smudge error" because the LFS batch + * API request has no credentials. The token is embedded in the remote + * URL, but some git-lfs versions require the credential helper for the + * LFS batch endpoint (which uses a different URL path). + */ +async function configureRepoCredentials( + repoDir: string, + gitUrl: string, + envVars?: Record +): Promise { + if (!envVars) return; + + const token = envVars.GIT_TOKEN ?? envVars.GITHUB_TOKEN; + const gitlabToken = envVars.GITLAB_TOKEN; + if (!token && !gitlabToken) return; + + try { + const url = new URL(gitUrl); + const credentialLine = + gitlabToken && (url.hostname.includes('gitlab') || envVars.GITLAB_INSTANCE_URL) + ? `https://oauth2:${gitlabToken}@${url.hostname}` + : token + ? `https://x-access-token:${token}@${url.hostname}` + : null; + + if (!credentialLine) return; + + // Write to a per-repo credential file outside the repo itself + const credFile = `/tmp/.git-credentials-repo-${repoDir.replace(/[^a-zA-Z0-9]/g, '-')}`; + await writeFile(credFile, credentialLine + '\n', { mode: 0o600 }); + + await exec('git', ['config', 'credential.helper', `store --file=${credFile}`], repoDir); + } catch (err) { + console.warn(`Failed to configure repo credentials for ${repoDir}:`, err); + } +} + /** * Validate a branch name — block control characters and shell metacharacters. */ @@ -148,6 +191,11 @@ async function exec(cmd: string, args: string[], cwd?: string): Promise // Public repos clone without auth; private repos fail fast with // a clear error instead of hanging on a username prompt. GIT_TERMINAL_PROMPT: '0', + // Skip LFS smudge filter during checkout/worktree operations. + // Agents don't need binary assets (videos, images, etc.) and + // LFS downloads can fail when the credential helper doesn't + // cover the LFS batch endpoint, blocking worktree creation. + GIT_LFS_SKIP_SMUDGE: '1', }, }); @@ -211,6 +259,7 @@ async function cloneRepoInner( await exec('git', ['remote', 'set-url', 'origin', authUrl], dir).catch(err => { console.warn(`Failed to update remote URL for rig ${options.rigId}:`, err); }); + await configureRepoCredentials(dir, options.gitUrl, options.envVars); await exec('git', ['fetch', '--all', '--prune'], dir); console.log(`Fetched latest for rig ${options.rigId}`); return dir; @@ -228,6 +277,7 @@ async function cloneRepoInner( await mkdir(dir, { recursive: true }); await exec('git', ['clone', '--no-checkout', '--branch', options.defaultBranch, authUrl, dir]); + await configureRepoCredentials(dir, options.gitUrl, options.envVars); console.log(`Cloned repo for rig ${options.rigId}`); return dir; } diff --git a/cloudflare-gastown/container/src/heartbeat.ts b/cloudflare-gastown/container/src/heartbeat.ts index 8d78c31ff6..bd9dd8db32 100644 --- a/cloudflare-gastown/container/src/heartbeat.ts +++ b/cloudflare-gastown/container/src/heartbeat.ts @@ -53,6 +53,10 @@ async function sendHeartbeats(): Promise { townId: agent.townId, status: agent.status, timestamp: new Date().toISOString(), + lastEventType: agent.lastEventType ?? null, + lastEventAt: agent.lastEventAt ?? null, + activeTools: agent.activeTools ?? [], + messageCount: agent.messageCount ?? 0, }; try { diff --git a/cloudflare-gastown/container/src/process-manager.ts b/cloudflare-gastown/container/src/process-manager.ts index 1b21f70ff2..f1e2a98e2c 100644 --- a/cloudflare-gastown/container/src/process-manager.ts +++ b/cloudflare-gastown/container/src/process-manager.ts @@ -62,7 +62,12 @@ export function unregisterEventSink( // ── Event buffer for HTTP polling ───────────────────────────────────── // The TownContainerDO polls GET /agents/:id/events?after=N to get events // because containerFetch doesn't support WebSocket upgrades. -type BufferedEvent = { id: number; event: string; data: unknown; timestamp: string }; +type BufferedEvent = { + id: number; + event: string; + data: unknown; + timestamp: string; +}; const MAX_BUFFERED_EVENTS = 2000; const agentEventBuffers = new Map(); let nextEventId = 1; @@ -73,7 +78,12 @@ function bufferAgentEvent(agentId: string, event: string, data: unknown): void { buf = []; agentEventBuffers.set(agentId, buf); } - buf.push({ id: nextEventId++, event, data, timestamp: new Date().toISOString() }); + buf.push({ + id: nextEventId++, + event, + data, + timestamp: new Date().toISOString(), + }); if (buf.length > MAX_BUFFERED_EVENTS) { buf.splice(0, buf.length - MAX_BUFFERED_EVENTS); } @@ -449,6 +459,8 @@ async function subscribeToEvents( if (sessionID && sessionID !== agent.sessionId) continue; agent.lastActivityAt = new Date().toISOString(); + agent.lastEventType = event.type ?? 'unknown'; + agent.lastEventAt = new Date().toISOString(); // Track active tool calls if (event.properties && 'activeTools' in event.properties) { @@ -487,7 +499,9 @@ async function subscribeToEvents( clearIdleTimer(agent.agentId); agent.status = 'failed'; agent.exitReason = 'Event stream error'; - broadcastEvent(agent.agentId, 'agent.exited', { reason: 'stream error' }); + broadcastEvent(agent.agentId, 'agent.exited', { + reason: 'stream error', + }); void reportAgentCompleted(agent, 'failed', 'Event stream error'); // Release SDK session on stream error (same cleanup as normal completion) @@ -518,7 +532,17 @@ export async function startAgent( ): Promise { const existing = agents.get(request.agentId); if (existing && (existing.status === 'running' || existing.status === 'starting')) { - throw new Error(`Agent ${request.agentId} is already running`); + // Agent has a live session (probably idle after gt_done, waiting for + // the idle timer). Stop it so the new dispatch can proceed. + console.log( + `${MANAGER_LOG} startAgent: stopping existing session for ${request.agentId} (status=${existing.status})` + ); + await stopAgent(request.agentId).catch(err => { + console.warn( + `${MANAGER_LOG} startAgent: failed to stop existing session for ${request.agentId}`, + err + ); + }); } const now = new Date().toISOString(); @@ -534,6 +558,8 @@ export async function startAgent( workdir, startedAt: now, lastActivityAt: now, + lastEventType: null, + lastEventAt: null, activeTools: [], messageCount: 0, exitReason: null, @@ -741,7 +767,9 @@ export async function stopAll(): Promise { try { const instance = sdkInstances.get(agent.workdir); if (instance) { - await instance.client.session.abort({ path: { id: agent.sessionId } }); + await instance.client.session.abort({ + path: { id: agent.sessionId }, + }); } } catch { // Best-effort diff --git a/cloudflare-gastown/container/src/types.ts b/cloudflare-gastown/container/src/types.ts index 2d7b5eacbb..c0d1568c5b 100644 --- a/cloudflare-gastown/container/src/types.ts +++ b/cloudflare-gastown/container/src/types.ts @@ -103,6 +103,10 @@ export type ManagedAgent = { workdir: string; startedAt: string; lastActivityAt: string; + /** Event type of the most recent SDK event (e.g. 'message_part.updated') */ + lastEventType: string | null; + /** ISO 8601 timestamp of the most recent SDK event */ + lastEventAt: string | null; /** Last known active tool calls (populated from SSE events) */ activeTools: string[]; /** Total messages sent to this agent */ @@ -299,6 +303,11 @@ export type HeartbeatPayload = { townId: string; status: AgentStatus; timestamp: string; + // SDK activity watermark + lastEventType: string | null; + lastEventAt: string | null; + activeTools: string[]; + messageCount: number; }; // ── Stream ticket (for WebSocket streaming) ───────────────────────────── diff --git a/cloudflare-gastown/docs/post-deploy-monitoring.md b/cloudflare-gastown/docs/post-deploy-monitoring.md new file mode 100644 index 0000000000..9c331321d9 --- /dev/null +++ b/cloudflare-gastown/docs/post-deploy-monitoring.md @@ -0,0 +1,254 @@ +# Post-Deploy Town Health Monitoring + +Guide for an AI agent to verify town health after a production deploy. + +## Prerequisites + +- The debug endpoint is deployed: `GET /debug/towns/:townId/status` +- The debug endpoint is protected by Cloudflare Access. Requests must include service token headers. +- Base URL: `https://gastown.kiloapps.io` +- Town ID: obtain from `GET /trpc/gastown.listOrgTowns` (requires auth) or from the user + +### Authentication + +The debug endpoint requires Cloudflare Access service token headers. These are the same credentials the Next.js app uses to communicate with gastown: + +```bash +# Set these from your Cloudflare Access service token +export CF_ACCESS_CLIENT_ID="" +export CF_ACCESS_CLIENT_SECRET="" +``` + +All `curl` commands in this document use a helper function that includes these headers: + +```bash +debug_curl() { + curl -s \ + -H "CF-Access-Client-Id: $CF_ACCESS_CLIENT_ID" \ + -H "CF-Access-Client-Secret: $CF_ACCESS_CLIENT_SECRET" \ + "$@" +} +``` + +## 1. Monitor Script + +The monitoring script at `scripts/monitor-town.sh` polls the debug endpoint: + +```bash +export CF_ACCESS_CLIENT_ID="" +export CF_ACCESS_CLIENT_SECRET="" +./scripts/monitor-town.sh [interval_seconds] +``` + +Or poll manually: + +```bash +debug_curl "https://gastown.kiloapps.io/debug/towns/$TOWN_ID/status" | python3 -c " +import sys, json +d = json.load(sys.stdin) +a = d['alarmStatus'] +print(f\"Working: {a['agents']['working']} Idle: {a['agents']['idle']}\") +print(f\"open: {a['beads']['open']} inProgress: {a['beads']['inProgress']} inReview: {a['beads']['inReview']}\") +ref = [x for x in d['agentMeta'] if x.get('role') == 'refinery'] +if ref: + r = ref[0] + print(f\"Refinery: status={r['status']} hook={r.get('current_hook_bead_id') or 'NULL'}\") +recon = a.get('reconciler') +if recon: + print(f\"Reconciler: events={recon['eventsDrained']} actions={recon['actionsEmitted']} violations={recon['invariantViolations']} wallMs={recon['wallClockMs']}\") +for e in a.get('recentEvents', [])[:5]: + print(f\" {e['time'][:19]} {e['message'][:80]}\") +" +``` + +## 2. Post-Deploy Health Checks + +After `pnpm deploy:prod`, verify these in order: + +### Phase 1: DO Reset (0-30s) + +The Durable Object reinitializes. Check the alarm is running: + +```bash +# Alarm should show 'active (5s)' within 10s of deploy +debug_curl "https://gastown.kiloapps.io/debug/towns/$TOWN_ID/status" | python3 -c " +import sys, json; d = json.load(sys.stdin) +print(d['alarmStatus']['alarm']['intervalLabel']) +" +``` + +**Expected**: `active (5s)` or `idle (60s)` + +### Phase 2: Container Restart (30s-3min) + +The container is evicted and a new one starts. The `ensureMayor` tRPC call can kick-start it: + +```bash +curl -s -X POST -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d "{\"townId\":\"$TOWN_ID\"}" \ + "https://gastown.kiloapps.io/trpc/gastown.ensureMayor" +``` + +Poll until agents start working: + +```bash +# Should see Working > 0 within 2-3 min +debug_curl "https://gastown.kiloapps.io/debug/towns/$TOWN_ID/status" | python3 -c " +import sys, json; d = json.load(sys.stdin) +print(f\"Working: {d['alarmStatus']['agents']['working']}\") +" +``` + +### Phase 3: Agent Recovery (1-5min) + +Verify agents recover from the container restart. The reconciler handles all recovery: + +- **Polecats**: idle+hooked agents are re-dispatched by `reconcileBeads` Rule 2 +- **Refinery**: if it was mid-review, the container status observation detects the dead container and sets the refinery to idle. `reconcileReviewQueue` Rule 6 re-dispatches it. +- **Orphaned beads**: `reconcileBeads` Rule 3 resets in-progress beads with no working agent to open after 5 min, then Rule 1 assigns a new agent. + +**Red flags**: + +- `Working: 0` for more than 5 min after container is active +- `invariantViolations > 0` in reconciler metrics +- `failed` count increasing rapidly (dispatch attempts burning out) + +### Phase 4: Review Pipeline (5-15min) + +Watch for a full review cycle: + +``` +in_progress → in_review → review_completed → closed +``` + +Check that: + +- The refinery picks up MR beads (status transitions to `working`) +- Reviews complete as `merged` (not `Refinery container failed to start`) +- Source beads reach `closed` and stay closed + +### Phase 5: Reconciler Health + +Verify the reconciler is running correctly: + +```bash +debug_curl "https://gastown.kiloapps.io/debug/towns/$TOWN_ID/status" | python3 -c " +import sys, json; d = json.load(sys.stdin) +r = d['alarmStatus'].get('reconciler') +if r: + print(f\"Events drained: {r['eventsDrained']}\") + print(f\"Actions emitted: {r['actionsEmitted']}\") + print(f\"Invariant violations: {r['invariantViolations']}\") + print(f\"Wall clock: {r['wallClockMs']}ms\") + print(f\"Pending events: {r['pendingEventCount']}\") + if r.get('actionsByType'): + print(f\"Action types: {r['actionsByType']}\") +else: + print('No reconciler metrics yet') +" +``` + +**Expected**: `invariantViolations: 0`, `wallClockMs < 100`, `pendingEventCount: 0` + +## 3. Test Convoy + +Create a simple test convoy to verify the full pipeline. Use the tRPC `slingConvoy` endpoint: + +```bash +TOKEN="" +TOWN_ID="" +RIG_ID="" + +# Create a 2-bead test convoy +curl -s -X POST -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d "{ + \"rigId\": \"$RIG_ID\", + \"convoyTitle\": \"Post-deploy health check $(date -u +%H:%M)\", + \"tasks\": [ + {\"title\": \"Update README with current timestamp (pass 1 of 2)\"}, + {\"title\": \"Update README with current timestamp (pass 2 of 2)\"} + ] + }" \ + "https://gastown.kiloapps.io/trpc/gastown.slingConvoy" +``` + +Then monitor until both beads reach `closed`: + +```bash +# Poll every 30s until no non-terminal issue beads remain +while true; do + RESP=$(debug_curl "https://gastown.kiloapps.io/debug/towns/$TOWN_ID/status") + ISSUES=$(echo "$RESP" | python3 -c " +import sys, json +beads = json.load(sys.stdin).get('beadSummary', []) +issues = [b for b in beads if b.get('type') == 'issue'] +print(len(issues)) +for b in issues: + print(f\" {b['status']:12s} {b.get('title','')[:60]}\") +") + echo "$(date -u +%H:%M:%S) Non-terminal issues: $ISSUES" + # Exit when 0 non-terminal issues + echo "$ISSUES" | head -1 | grep -q "^0$" && echo "All beads closed!" && break + sleep 30 +done +``` + +**Expected timeline**: + +- 0-2 min: beads created, polecats dispatched by reconciler (lazy assignment) +- 2-10 min: polecats work, submit reviews +- 10-15 min: refinery reviews and merges +- 15-25 min: second bead goes through the same cycle +- 25-30 min: convoy lands (all beads closed) + +**Failure indicators**: + +- Beads stuck in `open` for >5 min → check reconciler actions (should emit `dispatch_agent`) +- Beads stuck in `in_review` for >15 min → check refinery status and MR beads +- MR beads stuck in `in_progress` for >5 min → check refinery dispatch retry +- Beads cycling `in_progress → open` → check `agentCompleted` events and STALE_IN_PROGRESS_TIMEOUT_MS +- Reviews completing as `failed` → check container start errors on refinery agent status message + +## 4. Cleanup + +After monitoring is complete, clean up test beads. + +### Remove test convoy beads + +```bash +# Get all beads for the rig +BEADS=$(curl -s -H "Authorization: Bearer $TOKEN" \ + "https://gastown.kiloapps.io/trpc/gastown.listBeads?input=$(python3 -c \ + 'import json,urllib.parse; print(urllib.parse.quote(json.dumps({"rigId":"'$RIG_ID'"})))')") + +# Find and delete test convoy beads (match by title prefix) +echo "$BEADS" | python3 -c " +import sys, json +data = json.load(sys.stdin)['result']['data'] +for b in data: + title = b.get('title', '') + if 'Post-deploy health check' in title or 'Update README with current timestamp' in title: + print(b['bead_id']) +" | while read BEAD_ID; do + curl -s -X POST -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d "{\"rigId\":\"$RIG_ID\",\"beadId\":\"$BEAD_ID\"}" \ + "https://gastown.kiloapps.io/trpc/gastown.deleteBead" + echo "Deleted $BEAD_ID" +done +``` + +## 5. Key Metrics to Watch + +| Metric | Healthy | Unhealthy | +| --------------------- | --------------------------------------- | ------------------------------------- | +| Working agents | >0 when beads exist | 0 for >5 min with open beads | +| Failed bead count | Stable | Increasing rapidly | +| Invariant violations | 0 | >0 (check reconciler logs) | +| Refinery status | `working` during review, `idle` between | `idle` with in_progress MR for >5 min | +| Review outcomes | `merged` | `Refinery container failed to start` | +| Alarm interval | `active (5s)` with work | Stuck at same `nextFireAt` | +| Reconciler wall clock | <100ms | >500ms consistently | +| Pending event count | 0 between ticks | Growing (events not draining) | diff --git a/cloudflare-gastown/package.json b/cloudflare-gastown/package.json index 0afe83ec60..3e62c28323 100644 --- a/cloudflare-gastown/package.json +++ b/cloudflare-gastown/package.json @@ -20,33 +20,31 @@ "test:integration:watch": "vitest --config vitest.workers.config.ts", "typecheck": "tsgo --noEmit --incremental false", "build:types": "tsgo -p tsconfig.types.json || true", - "lint": "pnpm run lint:oxlint && pnpm run lint:eslint:fallback", - "lint:oxlint": "pnpm -w exec oxlint --config .oxlintrc.json cloudflare-gastown/src", - "lint:eslint:fallback": "eslint --config eslint.config.mjs --cache 'src/**/*.ts'" + "lint": "eslint --config eslint.config.mjs --cache 'src/**/*.ts'" }, "dependencies": { - "@cloudflare/containers": "^0.1.1", + "@cloudflare/containers": "^0.1.0", "@hono/trpc-server": "^0.4.2", "@kilocode/db": "workspace:*", "@kilocode/worker-utils": "workspace:*", - "@sentry/cloudflare": "^9.47.1", - "@trpc/server": "^11.13.0", + "@sentry/cloudflare": "^9", + "@trpc/server": "^11.0.0", "drizzle-orm": "catalog:", "hono": "catalog:", "itty-time": "^1.0.6", "jose": "catalog:", "jsonwebtoken": "catalog:", - "pg": "^8.20.0", + "pg": "^8.16.3", "zod": "catalog:" }, "devDependencies": { - "@cloudflare/vitest-pool-workers": "^0.12.21", + "@cloudflare/vitest-pool-workers": "^0.12.8", "@cloudflare/workers-types": "catalog:", "@kilocode/eslint-config": "workspace:*", - "@sentry/cli": "^2.58.5", + "@sentry/cli": "^2.58.2", "@types/jsonwebtoken": "catalog:", - "@types/node": "^22.19.15", - "@typescript/native-preview": "catalog:", + "@types/node": "^22", + "@typescript/native-preview": "7.0.0-dev.20251019.1", "eslint": "catalog:", "typescript": "catalog:", "vitest": "^3.2.4", diff --git a/cloudflare-gastown/scripts/monitor-town.sh b/cloudflare-gastown/scripts/monitor-town.sh new file mode 100755 index 0000000000..ef3075e56c --- /dev/null +++ b/cloudflare-gastown/scripts/monitor-town.sh @@ -0,0 +1,100 @@ +#!/bin/bash +# Continuously monitor a town's state via the debug endpoint. +# Usage: ./scripts/monitor-town.sh [townId] [interval_seconds] +# +# Requires Cloudflare Access service token credentials: +# export CF_ACCESS_CLIENT_ID="" +# export CF_ACCESS_CLIENT_SECRET="" + +TOWN_ID="${1:-8a6f9375-b806-4ee0-ad6e-1697ea2dbfff}" +INTERVAL="${2:-15}" +BASE_URL="${GASTOWN_URL:-https://gastown.kiloapps.io}" +URL="${BASE_URL}/debug/towns/${TOWN_ID}/status" + +if [ -z "$CF_ACCESS_CLIENT_ID" ] || [ -z "$CF_ACCESS_CLIENT_SECRET" ]; then + echo "Error: CF_ACCESS_CLIENT_ID and CF_ACCESS_CLIENT_SECRET must be set" + echo "These are the Cloudflare Access service token credentials." + exit 1 +fi + +echo "Monitoring town ${TOWN_ID} every ${INTERVAL}s" +echo "Endpoint: ${URL}" +echo "Press Ctrl+C to stop" +echo "==========================================" + +while true; do + RESP=$(curl -s --max-time 10 \ + -H "CF-Access-Client-Id: $CF_ACCESS_CLIENT_ID" \ + -H "CF-Access-Client-Secret: $CF_ACCESS_CLIENT_SECRET" \ + "${URL}" 2>/dev/null) + if [ -z "$RESP" ]; then + echo "$(date -u +%H:%M:%S) [ERROR] No response from ${URL}" + sleep "$INTERVAL" + continue + fi + + echo "$RESP" | python3 -c " +import sys, json, datetime + +try: + d = json.load(sys.stdin) +except: + print('$(date -u +%H:%M:%S) [ERROR] Invalid JSON response') + sys.exit(0) + +ts = datetime.datetime.utcnow().strftime('%H:%M:%S') +alarm = d.get('alarmStatus', {}) +agents_info = alarm.get('agents', {}) +beads_info = alarm.get('beads', {}) +patrol_info = alarm.get('patrol', {}) +recon = alarm.get('reconciler') or {} +events = alarm.get('recentEvents', []) + +working = agents_info.get('working', 0) +idle = agents_info.get('idle', 0) +op = beads_info.get('open', 0) +ip = beads_info.get('inProgress', 0) +ir = beads_info.get('inReview', 0) +failed = beads_info.get('failed', 0) +violations = recon.get('invariantViolations', '-') +actions = recon.get('actionsEmitted', '-') +wall_ms = recon.get('wallClockMs', '-') + +# Agent details +agents = d.get('agentMeta', []) +hooked_agents = [a for a in agents if a.get('current_hook_bead_id')] +refinery = [a for a in agents if a.get('role') == 'refinery'] + +# Non-terminal beads +beads = d.get('beadSummary', []) + +print(f'{ts} W={working} I={idle} | open={op} prog={ip} review={ir} fail={failed} | v={violations} act={actions} ms={wall_ms}') + +# Show refinery state +for r in refinery: + hook = r.get('current_hook_bead_id', 'NULL') or 'NULL' + print(f' refinery: status={r.get(\"status\",\"?\"):8s} hook={hook[:12]:12s} dispatch={r.get(\"dispatch_attempts\",0)}') + +# Show non-terminal beads +if beads: + for b in beads[:8]: + assignee = str(b.get('assignee_agent_bead_id', '') or '')[:8] + print(f' {b.get(\"status\",\"?\"):12s} {b.get(\"type\",\"?\"):16s} {str(b.get(\"bead_id\",\"\"))[:8]} agent={assignee:8s} {str(b.get(\"title\",\"\"))[:50]}') + if len(beads) > 8: + print(f' ... and {len(beads) - 8} more') + +# Show most recent event +if events: + e = events[0] + print(f' last: {e.get(\"time\",\"\")[:19]} {e.get(\"type\",\"\"):20s} {e.get(\"message\",\"\")[:70]}') + +# Show review outcomes +review_events = [e for e in events if e.get('type') == 'review_completed'] +for e in review_events[:2]: + print(f' REVIEW: {e.get(\"time\",\"\")[:19]} {e.get(\"message\",\"\")[:70]}') + +print() +" 2>/dev/null + + sleep "$INTERVAL" +done diff --git a/cloudflare-gastown/src/db/tables/agent-metadata.table.ts b/cloudflare-gastown/src/db/tables/agent-metadata.table.ts index c618ca0900..6409e4c1a6 100644 --- a/cloudflare-gastown/src/db/tables/agent-metadata.table.ts +++ b/cloudflare-gastown/src/db/tables/agent-metadata.table.ts @@ -31,6 +31,10 @@ export const AgentMetadataRecord = z.object({ last_activity_at: z.string().nullable(), agent_status_message: z.string().nullable(), agent_status_updated_at: z.string().nullable(), + // SDK-level activity watermark (populated by enriched heartbeat) + last_event_type: z.string().nullable().optional(), + last_event_at: z.string().nullable().optional(), + active_tools: z.string().nullable().optional(), }); export type AgentMetadataRecord = z.output; @@ -53,6 +57,9 @@ export function createTableAgentMetadata(): string { last_activity_at: `text`, agent_status_message: `text`, agent_status_updated_at: `text`, + last_event_type: `text`, + last_event_at: `text`, + active_tools: `text default '[]'`, }); } @@ -61,5 +68,9 @@ export function migrateAgentMetadata(): string[] { return [ `ALTER TABLE agent_metadata ADD COLUMN agent_status_message text`, `ALTER TABLE agent_metadata ADD COLUMN agent_status_updated_at text`, + // SDK activity watermark columns (Phase 4 reconciler) + `ALTER TABLE agent_metadata ADD COLUMN last_event_type text`, + `ALTER TABLE agent_metadata ADD COLUMN last_event_at text`, + `ALTER TABLE agent_metadata ADD COLUMN active_tools text default '[]'`, ]; } diff --git a/cloudflare-gastown/src/db/tables/bead-events.table.ts b/cloudflare-gastown/src/db/tables/bead-events.table.ts index 63607973b9..1a9ac8219b 100644 --- a/cloudflare-gastown/src/db/tables/bead-events.table.ts +++ b/cloudflare-gastown/src/db/tables/bead-events.table.ts @@ -23,8 +23,7 @@ export const BeadEventType = z.enum([ 'review_queue_depth_alert', 'escalation_rate_spike', 'agent_restart_loop', - 'dependency_added', - 'dependency_removed', + 'rework_requested', ]); export type BeadEventType = z.infer; diff --git a/cloudflare-gastown/src/db/tables/town-events.table.ts b/cloudflare-gastown/src/db/tables/town-events.table.ts new file mode 100644 index 0000000000..95309e7a04 --- /dev/null +++ b/cloudflare-gastown/src/db/tables/town-events.table.ts @@ -0,0 +1,58 @@ +import { z } from 'zod'; +import { getTableFromZodSchema, getCreateTableQueryFromTable } from '../../util/table'; + +export const TownEventType = z.enum([ + 'agent_done', + 'agent_completed', + 'container_status', + 'pr_status_changed', + 'bead_created', + 'bead_cancelled', + 'convoy_started', + 'nudge_timeout', +]); + +export type TownEventType = z.output; + +export const TownEventRecord = z.object({ + event_id: z.string(), + event_type: TownEventType, + agent_id: z.string().nullable(), + bead_id: z.string().nullable(), + payload: z + .string() + .transform((v, ctx): Record => { + try { + return JSON.parse(v) as Record; + } catch { + ctx.addIssue({ code: z.ZodIssueCode.custom, message: 'Invalid JSON in payload' }); + return {}; + } + }) + .pipe(z.record(z.string(), z.unknown())), + created_at: z.string(), + processed_at: z.string().nullable(), +}); + +export type TownEventRecord = z.output; + +export const town_events = getTableFromZodSchema('town_events', TownEventRecord); + +export function createTableTownEvents(): string { + return getCreateTableQueryFromTable(town_events, { + event_id: `text primary key`, + event_type: `text not null`, + agent_id: `text`, + bead_id: `text`, + payload: `text not null default '{}'`, + created_at: `text not null`, + processed_at: `text`, + }); +} + +export function getIndexesTownEvents(): string[] { + return [ + `CREATE INDEX IF NOT EXISTS idx_town_events_pending ON ${town_events}(${town_events.columns.created_at}) WHERE ${town_events.columns.processed_at} IS NULL`, + `CREATE INDEX IF NOT EXISTS idx_town_events_type ON ${town_events}(${town_events.columns.event_type})`, + ]; +} diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index 211639da78..808358a680 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -26,13 +26,18 @@ import * as config from './town/config'; import * as rigs from './town/rigs'; import * as dispatch from './town/container-dispatch'; import * as patrol from './town/patrol'; +import * as scheduling from './town/scheduling'; +import * as events from './town/events'; +import * as reconciler from './town/reconciler'; +import { applyAction } from './town/actions'; +import type { ApplyActionContext } from './town/actions'; +import { buildRefinerySystemPrompt } from '../prompts/refinery-system.prompt'; import { GitHubPRStatusSchema, GitLabMRStatusSchema } from '../util/platform-pr.util'; // Table imports for beads-centric operations import { beads, BeadRecord, - AgentBeadRecord, EscalationBeadRecord, ConvoyBeadRecord, } from '../db/tables/beads.table'; @@ -115,8 +120,6 @@ function formatEventMessage(row: Record): string { // Alarm intervals const ACTIVE_ALARM_INTERVAL_MS = 5_000; // 5s when agents are active const IDLE_ALARM_INTERVAL_MS = 1 * 60_000; // 1m when idle -const DISPATCH_COOLDOWN_MS = 2 * 60_000; // 2 min — skip agents with recent dispatch activity -const MAX_DISPATCH_ATTEMPTS = 5; // Escalation constants const STALE_ESCALATION_THRESHOLD_MS = 4 * 60 * 60 * 1000; @@ -240,6 +243,80 @@ export class TownDO extends DurableObject { writeEvent(this.env, { ...data, delivery: 'internal', userId: this._ownerUserId }); } + /** Build the context object used by the scheduling sub-module. */ + private get schedulingCtx(): Parameters[0] { + return { + sql: this.sql, + env: this.env, + storage: this.ctx.storage, + townId: this.townId, + getTownConfig: () => this.getTownConfig(), + getRigConfig: (rigId: string) => this.getRigConfig(rigId), + resolveKilocodeToken: () => this.resolveKilocodeToken(), + emitEvent: data => this.emitEvent(data), + }; + } + + /** Build the context object used by the reconciler's applyAction. */ + private get applyActionCtx(): ApplyActionContext { + const schedulingCtx = this.schedulingCtx; + return { + sql: this.sql, + townId: this.townId, + dispatchAgent: async (agentId, beadId, rigId) => { + const agent = agents.getAgent(this.sql, agentId); + const bead = beadOps.getBead(this.sql, beadId); + if (!agent || !bead) return false; + + // Build refinery-specific system prompt with branch/target info + let systemPromptOverride: string | undefined; + if (agent.role === 'refinery' && bead.type === 'merge_request') { + const reviewMeta = reviewQueue.getReviewMetadata(this.sql, beadId); + const sourceBeadId = + typeof bead.metadata?.source_bead_id === 'string' ? bead.metadata.source_bead_id : null; + const townConfig = await this.getTownConfig(); + systemPromptOverride = buildRefinerySystemPrompt({ + identity: agent.identity, + rigId, + townId: this.townId, + gates: townConfig.refinery?.gates ?? [], + branch: reviewMeta?.branch ?? 'unknown', + targetBranch: reviewMeta?.target_branch ?? 'main', + polecatAgentId: + typeof bead.metadata?.source_agent_id === 'string' + ? bead.metadata.source_agent_id + : 'unknown', + mergeStrategy: townConfig.merge_strategy ?? 'direct', + }); + } + + return scheduling.dispatchAgent(schedulingCtx, agent, bead, { systemPromptOverride }); + }, + stopAgent: async agentId => { + await dispatch.stopAgentInContainer(this.env, this.townId, agentId); + }, + checkPRStatus: async prUrl => { + const townConfig = await this.getTownConfig(); + return this.checkPRStatus(prUrl, townConfig); + }, + queueNudge: async (agentId, message, _tier) => { + await this.queueNudge(agentId, message, { + mode: 'immediate', + priority: 'urgent', + source: 'reconciler', + }); + }, + insertEvent: (eventType, params) => { + events.insertEvent(this.sql, eventType as Parameters[1], params); + }, + emitEvent: data => { + if (typeof data.event === 'string') { + this.emitEvent(data as Parameters[0]); + } + }, + }; + } + // ── WebSocket: status broadcast ────────────────────────────────────── /** @@ -252,7 +329,6 @@ export class TownDO extends DurableObject { url.pathname.endsWith('/status/ws') && request.headers.get('Upgrade')?.toLowerCase() === 'websocket' ) { - await this.ensureInitialized(); const pair = new WebSocketPair(); const [client, server] = [pair[0], pair[1]]; this.ctx.acceptWebSocket(server, ['status']); @@ -393,7 +469,6 @@ export class TownDO extends DurableObject { * Called by the mayor via the /mayor/ui-action HTTP route. */ async broadcastUiAction(action: UiAction): Promise { - await this.ensureInitialized(); const sockets = this.ctx.getWebSockets('status'); if (sockets.length === 0) return; const frame = JSON.stringify({ channel: 'ui_action', action, ts: now() }); @@ -443,6 +518,9 @@ export class TownDO extends DurableObject { query(this.sql, idx, []); } + // Reconciler event log + events.initTownEventsTable(this.sql); + // Ensure the alarm loop is running. After a deploy/restart, the // Cloudflare runtime normally delivers missed alarms, but if the alarm // was never set or was deleted by destroy(), the loop is dead. Re-arm @@ -452,6 +530,7 @@ export class TownDO extends DurableObject { } private _townId: string | null = null; + private _lastReconcilerMetrics: reconciler.ReconcilerMetrics | null = null; private _dashboardContext: string | null = null; private get townId(): string { @@ -557,12 +636,10 @@ export class TownDO extends DurableObject { gitUrl: string; defaultBranch: string; }): Promise { - await this.ensureInitialized(); return rigs.addRig(this.sql, input); } async removeRig(rigId: string): Promise { - await this.ensureInitialized(); rigs.removeRig(this.sql, rigId); await this.ctx.storage.delete(`rig:${rigId}:config`); // Delete all beads belonging to this rig (cascades to satellite tables via deleteBead) @@ -581,12 +658,10 @@ export class TownDO extends DurableObject { } async listRigs(): Promise { - await this.ensureInitialized(); return rigs.listRigs(this.sql); } async getRigAsync(rigId: string): Promise { - await this.ensureInitialized(); return rigs.getRig(this.sql, rigId); } @@ -596,9 +671,6 @@ export class TownDO extends DurableObject { console.log( `${TOWN_LOG} configureRig: rigId=${rigConfig.rigId} hasKilocodeToken=${!!rigConfig.kilocodeToken}` ); - if (rigConfig.townId) { - await this.setTownId(rigConfig.townId); - } await this.ctx.storage.put(`rig:${rigConfig.rigId}:config`, rigConfig); if (rigConfig.kilocodeToken) { @@ -696,7 +768,6 @@ export class TownDO extends DurableObject { // ══════════════════════════════════════════════════════════════════ async createBead(input: CreateBeadInput): Promise { - await this.ensureInitialized(); const bead = beadOps.createBead(this.sql, input); this.emitEvent({ event: 'bead.created', @@ -716,17 +787,23 @@ export class TownDO extends DurableObject { } async getBeadAsync(beadId: string): Promise { - await this.ensureInitialized(); return beadOps.getBead(this.sql, beadId); } async listBeads(filter: BeadFilter): Promise { - await this.ensureInitialized(); return beadOps.listBeads(this.sql, filter); } async updateBeadStatus(beadId: string, status: string, agentId: string): Promise { - await this.ensureInitialized(); + // Record terminal transitions as bead_cancelled events for the reconciler. + // Non-terminal transitions are normal lifecycle changes, not cancellations. + if (status === 'closed' || status === 'failed') { + events.insertEvent(this.sql, 'bead_cancelled', { + bead_id: beadId, + payload: { cancel_status: status }, + }); + } + // Convoy progress is updated automatically inside beadOps.updateBeadStatus // when the bead reaches a terminal status (closed/failed). const bead = beadOps.updateBeadStatus(this.sql, beadId, status, agentId); @@ -792,7 +869,6 @@ export class TownDO extends DurableObject { } async deleteBead(beadId: string): Promise { - await this.ensureInitialized(); beadOps.deleteBead(this.sql, beadId); } @@ -801,7 +877,6 @@ export class TownDO extends DurableObject { since?: string; limit?: number; }): Promise { - await this.ensureInitialized(); return beadOps.listBeadEvents(this.sql, options); } @@ -822,7 +897,6 @@ export class TownDO extends DurableObject { }>, actorId: string ): Promise { - await this.ensureInitialized(); const bead = beadOps.updateBeadFields(this.sql, beadId, fields, actorId); // When a bead closes via field update, check for newly unblocked beads @@ -833,80 +907,12 @@ export class TownDO extends DurableObject { return bead; } - // ── Bead Dependency Editing ────────────────────────────────────────── - - /** - * Add a dependency edge between two beads. - * Validates, detects cycles, and logs a bead event. - */ - async addBeadDependency( - beadId: string, - dependsOnBeadId: string, - type: 'blocks' | 'tracks' | 'parent-child' - ): Promise { - await this.ensureInitialized(); - beadOps.addBeadDependency(this.sql, beadId, dependsOnBeadId, type); - beadOps.logBeadEvent(this.sql, { - beadId, - agentId: null, - eventType: 'dependency_added', - metadata: { depends_on_bead_id: dependsOnBeadId, dependency_type: type }, - }); - } - - /** - * Remove a dependency edge between two beads. - * After removal, checks if any beads are now unblocked and arms the - * alarm so they get dispatched promptly. - */ - async removeBeadDependency(beadId: string, dependsOnBeadId: string): Promise { - await this.ensureInitialized(); - const deleted = beadOps.removeBeadDependency(this.sql, beadId, dependsOnBeadId); - if (deleted) { - beadOps.logBeadEvent(this.sql, { - beadId, - agentId: null, - eventType: 'dependency_removed', - metadata: { depends_on_bead_id: dependsOnBeadId }, - }); - // If beadId has no remaining unresolved blockers, arm the alarm so - // it gets dispatched promptly. - if (!beadOps.hasUnresolvedBlockers(this.sql, beadId)) { - await this.ctx.storage.setAlarm(Date.now()); - } - } - return deleted; - } - - // ── Convoy Membership ──────────────────────────────────────────────── - - /** - * Add a bead to an existing convoy. Creates the 'tracks' dependency, - * merges convoy metadata into the bead, and increments total_beads. - */ - async addBeadToConvoy(beadId: string, convoyId: string): Promise { - await this.ensureInitialized(); - beadOps.addBeadToConvoy(this.sql, beadId, convoyId); - } - - /** - * Remove a bead from its convoy. Deletes the 'tracks' dependency, - * strips convoy metadata, and decrements total_beads. - * Returns the convoy ID the bead was removed from, or null if not in a convoy. - */ - async removeBeadFromConvoy(beadId: string): Promise { - await this.ensureInitialized(); - return beadOps.removeBeadFromConvoy(this.sql, beadId); - } - /** * Force-reset an agent to idle, unhooking from its current bead if any. * Sets the bead status back to 'open' so it can be re-dispatched. * Writes a bead_event for auditability. */ async resetAgent(agentId: string): Promise { - await this.ensureInitialized(); - const agent = agents.getAgent(this.sql, agentId); if (!agent) throw new Error(`Agent ${agentId} not found`); @@ -945,8 +951,6 @@ export class TownDO extends DurableObject { convoyId: string, fields: Partial<{ merge_mode: ConvoyMergeMode; feature_branch: string }> ): Promise { - await this.ensureInitialized(); - const convoy = this.getConvoy(convoyId); if (!convoy) return null; @@ -991,32 +995,26 @@ export class TownDO extends DurableObject { // ══════════════════════════════════════════════════════════════════ async registerAgent(input: RegisterAgentInput): Promise { - await this.ensureInitialized(); return agents.registerAgent(this.sql, input); } async getAgentAsync(agentId: string): Promise { - await this.ensureInitialized(); return agents.getAgent(this.sql, agentId); } async getAgentByIdentity(identity: string): Promise { - await this.ensureInitialized(); return agents.getAgentByIdentity(this.sql, identity); } async listAgents(filter?: AgentFilter): Promise { - await this.ensureInitialized(); return agents.listAgents(this.sql, filter); } async updateAgentStatus(agentId: string, status: string): Promise { - await this.ensureInitialized(); agents.updateAgentStatus(this.sql, agentId, status); } async deleteAgent(agentId: string): Promise { - await this.ensureInitialized(); agents.deleteAgent(this.sql, agentId); try { const agentDO = getAgentDOStub(this.env, agentId); @@ -1027,23 +1025,19 @@ export class TownDO extends DurableObject { } async hookBead(agentId: string, beadId: string): Promise { - await this.ensureInitialized(); agents.hookBead(this.sql, agentId, beadId); await this.armAlarmIfNeeded(); } async unhookBead(agentId: string): Promise { - await this.ensureInitialized(); agents.unhookBead(this.sql, agentId); } async getHookedBead(agentId: string): Promise { - await this.ensureInitialized(); return agents.getHookedBead(this.sql, agentId); } async getOrCreateAgent(role: AgentRole, rigId: string): Promise { - await this.ensureInitialized(); return agents.getOrCreateAgent(this.sql, role, rigId, this.townId); } @@ -1062,30 +1056,32 @@ export class TownDO extends DurableObject { // ── Prime & Checkpoint ──────────────────────────────────────────── async prime(agentId: string): Promise { - await this.ensureInitialized(); return agents.prime(this.sql, agentId); } async writeCheckpoint(agentId: string, data: unknown): Promise { - await this.ensureInitialized(); agents.writeCheckpoint(this.sql, agentId, data); } async readCheckpoint(agentId: string): Promise { - await this.ensureInitialized(); return agents.readCheckpoint(this.sql, agentId); } // ── Heartbeat ───────────────────────────────────────────────────── - async touchAgentHeartbeat(agentId: string): Promise { - await this.ensureInitialized(); - agents.touchAgent(this.sql, agentId); + async touchAgentHeartbeat( + agentId: string, + watermark?: { + lastEventType?: string | null; + lastEventAt?: string | null; + activeTools?: string[]; + } + ): Promise { + agents.touchAgent(this.sql, agentId, watermark); await this.armAlarmIfNeeded(); } async updateAgentStatusMessage(agentId: string, message: string): Promise { - await this.ensureInitialized(); agents.updateAgentStatusMessage(this.sql, agentId, message); const agent = agents.getAgent(this.sql, agentId); if (agent?.current_hook_bead_id) { @@ -1112,12 +1108,10 @@ export class TownDO extends DurableObject { // ══════════════════════════════════════════════════════════════════ async sendMail(input: SendMailInput): Promise { - await this.ensureInitialized(); mail.sendMail(this.sql, input); } async checkMail(agentId: string): Promise { - await this.ensureInitialized(); return mail.checkMail(this.sql, agentId); } @@ -1140,8 +1134,6 @@ export class TownDO extends DurableObject { ttlSeconds?: number; } ): Promise { - await this.ensureInitialized(); - const nudgeId = crypto.randomUUID(); const mode = options?.mode ?? 'wait-idle'; const priority = options?.priority ?? 'normal'; @@ -1209,8 +1201,6 @@ export class TownDO extends DurableObject { ): Promise< { nudge_id: string; message: string; mode: string; priority: string; source: string }[] > { - await this.ensureInitialized(); - const rows = [ ...query( this.sql, @@ -1246,8 +1236,6 @@ export class TownDO extends DurableObject { /** Mark a nudge as delivered. */ async markNudgeDelivered(nudgeId: string): Promise { - await this.ensureInitialized(); - query( this.sql, /* sql */ ` @@ -1264,8 +1252,6 @@ export class TownDO extends DurableObject { * Called from the alarm loop. Returns the count of nudges expired. */ async expireStaleNudges(): Promise { - await this.ensureInitialized(); - const result = [ ...query( this.sql, @@ -1289,7 +1275,6 @@ export class TownDO extends DurableObject { // ══════════════════════════════════════════════════════════════════ async submitToReviewQueue(input: ReviewQueueInput): Promise { - await this.ensureInitialized(); reviewQueue.submitToReviewQueue(this.sql, input); this.emitEvent({ event: 'review.submitted', @@ -1301,12 +1286,10 @@ export class TownDO extends DurableObject { } async popReviewQueue(): Promise { - await this.ensureInitialized(); return reviewQueue.popReviewQueue(this.sql); } async completeReview(entryId: string, status: 'merged' | 'failed'): Promise { - await this.ensureInitialized(); reviewQueue.completeReview(this.sql, entryId, status); } @@ -1316,8 +1299,6 @@ export class TownDO extends DurableObject { message?: string; commit_sha?: string; }): Promise { - await this.ensureInitialized(); - // Resolve the source bead ID before completing the review, so we can // trigger dispatchUnblockedBeads for it after the MR closes. const mrBead = beadOps.getBead(this.sql, input.entry_id); @@ -1346,40 +1327,25 @@ export class TownDO extends DurableObject { }); } - // When a review fails or conflicts (rework), the source bead was - // returned to in_progress. Re-hook a polecat and re-dispatch so the - // rework starts automatically. The original polecat may already be - // working on something else, so fall back to getOrCreateAgent. - if ((input.status === 'failed' || input.status === 'conflict') && sourceBeadId) { - const sourceBead = beadOps.getBead(this.sql, sourceBeadId); - if (sourceBead?.rig_id) { - try { - const reworkAgent = agents.getOrCreateAgent( - this.sql, - 'polecat', - sourceBead.rig_id, - this.townId - ); - agents.hookBead(this.sql, reworkAgent.id, sourceBeadId); - this.dispatchAgent(reworkAgent, sourceBead).catch(err => - console.error( - `${TOWN_LOG} completeReviewWithResult: fire-and-forget rework dispatch failed for bead=${sourceBeadId}`, - err - ) - ); - } catch (err) { - console.warn( - `${TOWN_LOG} completeReviewWithResult: could not dispatch rework for bead=${sourceBeadId}:`, - err - ); - } - } - } + // Rework is handled by the normal scheduling path: the failed/conflict + // path in completeReviewWithResult sets the source bead to 'open' with + // assignee cleared. feedStrandedConvoys or rehookOrphanedBeads will + // hook a polecat, and schedulePendingWork will dispatch it. } async agentDone(agentId: string, input: AgentDoneInput): Promise { - await this.ensureInitialized(); - reviewQueue.agentDone(this.sql, agentId, input); + // Event-only: record the fact. The alarm's Phase 0 drains and + // applies all pending events before reconciliation runs. DO RPCs + // are serialized, so agentCompleted can't race with this — it + // waits for agentDone to finish before executing. + events.insertEvent(this.sql, 'agent_done', { + agent_id: agentId, + payload: { + branch: input.branch, + ...(input.pr_url ? { pr_url: input.pr_url } : {}), + ...(input.summary ? { summary: input.summary } : {}), + }, + }); await this.armAlarmIfNeeded(); } @@ -1387,14 +1353,26 @@ export class TownDO extends DurableObject { agentId: string, input: { status: 'completed' | 'failed'; reason?: string } ): Promise { - await this.ensureInitialized(); + // Resolve empty agentId to mayor (backwards compat with container callback) let resolvedAgentId = agentId; if (!resolvedAgentId) { const mayor = agents.listAgents(this.sql, { role: 'mayor' })[0]; if (mayor) resolvedAgentId = mayor.id; } + + // Event-only: record the fact. The alarm's Phase 0 drains and + // applies all pending events. DO RPCs are serialized so there's + // no race with agentDone. + events.insertEvent(this.sql, 'agent_completed', { + agent_id: resolvedAgentId || agentId, + payload: { + status: input.status, + ...(input.reason ? { reason: input.reason } : {}), + }, + }); + + // Emit analytics event (not part of reconciler — UI/observability concern) if (resolvedAgentId) { - const result = reviewQueue.agentCompleted(this.sql, resolvedAgentId, input); const agent = agents.getAgent(this.sql, resolvedAgentId); this.emitEvent({ event: 'agent.exited', @@ -1402,36 +1380,79 @@ export class TownDO extends DurableObject { agentId: resolvedAgentId, role: agent?.role, }); + } + await this.armAlarmIfNeeded(); + // Rework dispatch is handled by the reconciler's reconcileBeads Rule 1: + // open beads with no assignee get agents on the next alarm tick. + } - // If the refinery exited without merging (rework), dispatch a - // polecat to re-work the source bead. This mirrors the rework - // dispatch in completeReviewWithResult. - if (result.reworkSourceBeadId) { - const sourceBead = beadOps.getBead(this.sql, result.reworkSourceBeadId); - if (sourceBead?.rig_id) { - try { - const reworkAgent = agents.getOrCreateAgent( - this.sql, - 'polecat', - sourceBead.rig_id, - this.townId - ); - agents.hookBead(this.sql, reworkAgent.id, result.reworkSourceBeadId); - this.dispatchAgent(reworkAgent, sourceBead).catch(err => - console.error( - `${TOWN_LOG} agentCompleted: rework dispatch failed for bead=${result.reworkSourceBeadId}`, - err - ) - ); - } catch (err) { - console.warn( - `${TOWN_LOG} agentCompleted: could not dispatch rework for bead=${result.reworkSourceBeadId}:`, - err - ); - } - } - } + /** + * Refinery requests changes on an in-progress MR bead. Creates a rework + * bead that blocks the MR bead. The refinery should call gt_done after + * this to release its session. The reconciler assigns a polecat to the + * rework bead; when it closes, the MR unblocks and the refinery re-reviews. + */ + async requestChanges( + agentId: string, + input: { feedback: string; files?: string[] } + ): Promise<{ rework_bead_id: string }> { + const agent = agents.getAgent(this.sql, agentId); + if (!agent) throw new Error(`Agent ${agentId} not found`); + if (agent.role !== 'refinery') throw new Error(`Only refineries can request changes`); + if (!agent.current_hook_bead_id) throw new Error(`Agent ${agentId} is not hooked to a bead`); + + const mrBead = beadOps.getBead(this.sql, agent.current_hook_bead_id); + if (!mrBead || mrBead.type !== 'merge_request') { + throw new Error(`Agent ${agentId} is not hooked to a merge_request bead`); } + + // Find the source bead (the original issue the polecat worked on) + const sourceBeadId = + typeof mrBead.metadata?.source_bead_id === 'string' ? mrBead.metadata.source_bead_id : null; + const sourceBead = sourceBeadId ? beadOps.getBead(this.sql, sourceBeadId) : null; + + // Get branch info from review_metadata + const reviewMeta = reviewQueue.getReviewMetadata(this.sql, mrBead.bead_id); + + const reworkBead = beadOps.createBead(this.sql, { + type: 'issue', + title: `Rework: ${sourceBead?.title ?? mrBead.title}`, + body: input.feedback, + priority: sourceBead?.priority ?? 'medium', + rig_id: mrBead.rig_id ?? undefined, + labels: ['gt:rework'], + metadata: { + rework_for: sourceBeadId, + mr_bead_id: mrBead.bead_id, + branch: reviewMeta?.branch ?? null, + target_branch: reviewMeta?.target_branch ?? null, + files: input.files ?? [], + }, + }); + + // Rework bead blocks the MR bead — MR can't proceed until rework is done + beadOps.insertDependency(this.sql, mrBead.bead_id, reworkBead.bead_id, 'blocks'); + + // Record event so the reconciler picks up the rework bead + events.insertEvent(this.sql, 'bead_created', { + bead_id: reworkBead.bead_id, + payload: { bead_type: 'issue', rig_id: mrBead.rig_id }, + }); + + beadOps.logBeadEvent(this.sql, { + beadId: mrBead.bead_id, + agentId, + eventType: 'rework_requested', + newValue: reworkBead.bead_id, + metadata: { feedback: input.feedback.slice(0, 500), files: input.files }, + }); + + console.log( + `${TOWN_LOG} requestChanges: refinery=${agentId} mr=${mrBead.bead_id} rework=${reworkBead.bead_id}` + ); + + await this.armAlarmIfNeeded(); + return { rework_bead_id: reworkBead.bead_id }; } /** @@ -1445,7 +1466,6 @@ export class TownDO extends DurableObject { action: string; resolution_notes: string; }): Promise { - await this.ensureInitialized(); const triageBead = beadOps.getBead(this.sql, input.triage_request_bead_id); if (!triageBead) throw new Error(`Triage request bead ${input.triage_request_bead_id} not found`); @@ -1671,19 +1691,16 @@ export class TownDO extends DurableObject { } async createMolecule(beadId: string, formula: unknown): Promise { - await this.ensureInitialized(); return reviewQueue.createMolecule(this.sql, beadId, formula); } async getMoleculeCurrentStep( agentId: string ): Promise<{ molecule: Molecule; step: unknown } | null> { - await this.ensureInitialized(); return reviewQueue.getMoleculeCurrentStep(this.sql, agentId); } async advanceMoleculeStep(agentId: string, summary: string): Promise { - await this.ensureInitialized(); return reviewQueue.advanceMoleculeStep(this.sql, agentId, summary); } @@ -1697,21 +1714,7 @@ export class TownDO extends DurableObject { body?: string; priority?: string; metadata?: Record; - dependsOn?: string[]; - convoyId?: string; }): Promise<{ bead: Bead; agent: Agent }> { - await this.ensureInitialized(); - - // Validate the convoy exists before creating the bead so a bad - // convoy_id doesn't leave behind an orphan bead row. - if (input.convoyId) { - const convoyBead = beadOps.getBead(this.sql, input.convoyId); - if (!convoyBead) throw new Error(`Convoy ${input.convoyId} not found`); - if (convoyBead.type !== 'convoy') { - throw new Error(`Bead ${input.convoyId} is not a convoy (type: ${convoyBead.type})`); - } - } - const createdBead = beadOps.createBead(this.sql, { type: 'issue', title: input.title, @@ -1721,21 +1724,14 @@ export class TownDO extends DurableObject { metadata: input.metadata, }); - // If a convoy_id was provided, add the bead to the convoy (tracks dep + metadata + counter). - // The convoy was already validated above, so addBeadToConvoy won't throw for a missing convoy. - if (input.convoyId) { - beadOps.addBeadToConvoy(this.sql, createdBead.bead_id, input.convoyId); - } - - // Insert dependency rows before hooking/dispatching so the bead's - // blocker set is complete before any agent can start work on it. - // This is atomic within the DO's synchronous SQLite transaction. - if (input.dependsOn && input.dependsOn.length > 0) { - for (const depBeadId of input.dependsOn) { - beadOps.addBeadDependency(this.sql, createdBead.bead_id, depBeadId, 'blocks'); - } - } + events.insertEvent(this.sql, 'bead_created', { + bead_id: createdBead.bead_id, + payload: { bead_type: 'issue', rig_id: input.rigId, has_blockers: false }, + }); + // Fast path: assign agent immediately for UX ("Toast is on it!") + // rather than waiting for the next alarm tick. Uses the same + // getOrCreateAgent + hookBead path the reconciler would use. const agent = agents.getOrCreateAgent(this.sql, 'polecat', input.rigId, this.townId); agents.hookBead(this.sql, agent.id, createdBead.bead_id); @@ -1743,18 +1739,11 @@ export class TownDO extends DurableObject { const bead = beadOps.getBead(this.sql, createdBead.bead_id) ?? createdBead; const hookedAgent = agents.getAgent(this.sql, agent.id) ?? agent; - // Only dispatch if the bead has no unresolved blockers. Mirror the - // slingConvoy() guard so a bead with depends_on is not started before - // its blockers close. - if (!beadOps.hasUnresolvedBlockers(this.sql, bead.bead_id)) { - this.dispatchAgent(hookedAgent, bead).catch(err => - console.error(`${TOWN_LOG} slingBead: fire-and-forget dispatchAgent failed:`, err) - ); - } else { - console.log( - `${TOWN_LOG} slingBead: bead=${bead.bead_id} blocked, deferring dispatch until deps close` - ); - } + // Fire-and-forget dispatch so the sling call returns immediately. + // The alarm loop retries if this fails. + this.dispatchAgent(hookedAgent, bead).catch(err => + console.error(`${TOWN_LOG} slingBead: fire-and-forget dispatchAgent failed:`, err) + ); await this.armAlarmIfNeeded(); return { bead, agent: hookedAgent }; } @@ -1786,7 +1775,6 @@ export class TownDO extends DurableObject { _model?: string, uiContext?: string ): Promise<{ agentId: string; sessionStatus: 'idle' | 'active' | 'starting' }> { - await this.ensureInitialized(); const townId = this.townId; let mayor = agents.listAgents(this.sql, { role: 'mayor' })[0] ?? null; @@ -1871,7 +1859,6 @@ export class TownDO extends DurableObject { * without requiring the user to send a message first. */ async ensureMayor(): Promise<{ agentId: string; sessionStatus: 'idle' | 'active' | 'starting' }> { - await this.ensureInitialized(); const townId = this.townId; let mayor = agents.listAgents(this.sql, { role: 'mayor' })[0] ?? null; @@ -1955,7 +1942,6 @@ export class TownDO extends DurableObject { lastActivityAt: string; } | null; }> { - await this.ensureInitialized(); const mayor = agents.listAgents(this.sql, { role: 'mayor' })[0] ?? null; const mapStatus = (agentStatus: string): 'idle' | 'active' | 'starting' => { @@ -2014,7 +2000,6 @@ export class TownDO extends DurableObject { beads: Array<{ bead_id: string; rig_id: string }>; created_by?: string; }): Promise { - await this.ensureInitialized(); const parsed = z .object({ title: z.string().min(1), @@ -2096,8 +2081,6 @@ export class TownDO extends DurableObject { } async onBeadClosed(input: { convoyId: string; beadId: string }): Promise { - await this.ensureInitialized(); - // Count closed tracked beads const closedRows = [ ...query( @@ -2163,8 +2146,6 @@ export class TownDO extends DurableObject { * still assigned to those beads so they return to the idle pool. */ async closeConvoy(convoyId: string): Promise { - await this.ensureInitialized(); - const convoy = this.getConvoy(convoyId); if (!convoy) return null; @@ -2252,8 +2233,6 @@ export class TownDO extends DurableObject { merge_mode?: 'review-then-land' | 'review-and-merge'; staged?: boolean; }): Promise<{ convoy: ConvoyEntry; beads: Array<{ bead: Bead; agent: Agent | null }> }> { - await this.ensureInitialized(); - // Resolve staged: explicit request wins, otherwise fall back to town config default. const townConfig = await this.getTownConfig(); const isStaged = input.staged ?? townConfig.staged_convoys_default; @@ -2411,38 +2390,31 @@ export class TownDO extends DurableObject { } } - if (isStaged) { - // Staged mode: collect beads without hooking agents or dispatching. - for (const beadId of beadIds) { - const bead = beadOps.getBead(this.sql, beadId); - if (!bead) continue; - results.push({ bead, agent: null }); - } - } else { - // 4. For each bead: assign a polecat, but only dispatch if unblocked - for (let i = 0; i < beadIds.length; i++) { - const beadId = beadIds[i]; - const agent = agents.getOrCreateAgent(this.sql, 'polecat', input.rigId, this.townId); - agents.hookBead(this.sql, agent.id, beadId); - - const bead = beadOps.getBead(this.sql, beadId); - const hookedAgent = agents.getAgent(this.sql, agent.id) ?? agent; - if (!bead) continue; - - // Only dispatch beads with no unresolved blockers - if (!beadOps.hasUnresolvedBlockers(this.sql, beadId)) { - this.dispatchAgent(hookedAgent, bead).catch(err => - console.error(`${TOWN_LOG} slingConvoy: fire-and-forget dispatchAgent failed:`, err) - ); - } else { - console.log( - `${TOWN_LOG} slingConvoy: bead=${beadId} blocked, deferring dispatch until deps close` - ); - } + // Record bead_created events for reconciler (dual-write, no behavior change) + for (let i = 0; i < beadIds.length; i++) { + const hasBlockers = (input.tasks[i].depends_on ?? []).length > 0; + events.insertEvent(this.sql, 'bead_created', { + bead_id: beadIds[i], + payload: { + bead_type: 'issue', + rig_id: input.rigId, + convoy_id: convoyId, + has_blockers: hasBlockers, + }, + }); + } - results.push({ bead, agent: hookedAgent }); - } + // Lazy assignment: beads are created with no assignee. The reconciler's + // reconcileBeads Rule 1 assigns agents to unblocked beads on the next + // alarm tick. This avoids creating N polecats upfront for a convoy + // where only 1-3 beads are unblocked initially (#1249). + for (const beadId of beadIds) { + const bead = beadOps.getBead(this.sql, beadId); + if (!bead) continue; + results.push({ bead, agent: null }); + } + if (!isStaged) { await this.armAlarmIfNeeded(); } @@ -2461,9 +2433,7 @@ export class TownDO extends DurableObject { */ async startConvoy( convoyId: string - ): Promise<{ convoy: ConvoyEntry; beads: Array<{ bead: Bead; agent: Agent }> }> { - await this.ensureInitialized(); - + ): Promise<{ convoy: ConvoyEntry; beads: Array<{ bead: Bead; agent: Agent | null }> }> { const convoy = this.getConvoy(convoyId); if (!convoy) throw new Error(`Convoy not found: ${convoyId}`); if (!convoy.staged) throw new Error(`Convoy is not staged: ${convoyId}`); @@ -2487,51 +2457,17 @@ export class TownDO extends DurableObject { .parse(trackedRows) .map(r => r.bead_id); - const results: Array<{ bead: Bead; agent: Agent }> = []; + const results: Array<{ bead: Bead; agent: Agent | null }> = []; + // Lazy assignment: just collect beads. The reconciler's reconcileBeads + // Rule 1 assigns agents to unblocked beads on the next alarm tick. for (const beadId of trackedBeadIds) { const bead = beadOps.getBead(this.sql, beadId); if (!bead) continue; - - const rigId = bead.rig_id; - if (!rigId) continue; - - // Skip beads already hooked from a prior partial attempt (retry-safe). - let hookedAgent: Agent; - if (bead.assignee_agent_bead_id) { - const existing = agents.getAgent(this.sql, bead.assignee_agent_bead_id); - if (existing) { - hookedAgent = existing; - } else { - // Orphaned assignee reference — re-hook with a fresh agent - const agent = agents.getOrCreateAgent(this.sql, 'polecat', rigId, this.townId); - agents.hookBead(this.sql, agent.id, beadId); - hookedAgent = agents.getAgent(this.sql, agent.id) ?? agent; - } - } else { - const agent = agents.getOrCreateAgent(this.sql, 'polecat', rigId, this.townId); - agents.hookBead(this.sql, agent.id, beadId); - hookedAgent = agents.getAgent(this.sql, agent.id) ?? agent; - } - - // Re-read bead after potential hookBead so assignee_agent_bead_id is up to date - const updatedBead = beadOps.getBead(this.sql, beadId) ?? bead; - - if (!beadOps.hasUnresolvedBlockers(this.sql, beadId)) { - this.dispatchAgent(hookedAgent, updatedBead).catch(err => - console.error(`${TOWN_LOG} startConvoy: fire-and-forget dispatchAgent failed:`, err) - ); - } else { - console.log( - `${TOWN_LOG} startConvoy: bead=${beadId} blocked, deferring dispatch until deps close` - ); - } - - results.push({ bead: updatedBead, agent: hookedAgent }); + results.push({ bead, agent: null }); } - // Clear the staged flag only after all agents are successfully hooked. - // If the loop above throws, the convoy stays staged so the caller can retry. + // Clear the staged flag so the reconciler sees these beads as active. query( this.sql, /* sql */ ` @@ -2542,6 +2478,10 @@ export class TownDO extends DurableObject { [convoyId] ); + events.insertEvent(this.sql, 'convoy_started', { + payload: { convoy_id: convoyId }, + }); + await this.armAlarmIfNeeded(); const updatedConvoy = this.getConvoy(convoyId); @@ -2558,7 +2498,6 @@ export class TownDO extends DurableObject { * List active convoys with progress counts. */ async listConvoys(): Promise { - await this.ensureInitialized(); const rows = [ ...query( this.sql, @@ -2592,7 +2531,6 @@ export class TownDO extends DurableObject { } > > { - await this.ensureInitialized(); const convoys = await this.listConvoys(); const detailed = []; for (const convoy of convoys) { @@ -2621,7 +2559,6 @@ export class TownDO extends DurableObject { }) | null > { - await this.ensureInitialized(); const convoy = this.getConvoy(convoyId); if (!convoy) return null; @@ -2679,7 +2616,6 @@ export class TownDO extends DurableObject { // ══════════════════════════════════════════════════════════════════ async acknowledgeEscalation(escalationId: string): Promise { - await this.ensureInitialized(); query( this.sql, /* sql */ ` @@ -2706,7 +2642,6 @@ export class TownDO extends DurableObject { } async listEscalations(filter?: { acknowledged?: boolean }): Promise { - await this.ensureInitialized(); const rows = filter?.acknowledged !== undefined ? [ @@ -2734,7 +2669,6 @@ export class TownDO extends DurableObject { category?: string; message: string; }): Promise { - await this.ensureInitialized(); const beadId = generateId(); const timestamp = now(); @@ -2887,7 +2821,6 @@ export class TownDO extends DurableObject { return; } - await this.ensureInitialized(); const townId = this.townId; console.log(`${TOWN_LOG} alarm: fired for town=${townId}`); @@ -2912,70 +2845,183 @@ export class TownDO extends DurableObject { } } - // Process reviews FIRST so the refinery gets assigned before the - // scheduler dispatches new polecats. This prevents downstream beads - // from starting before upstream reviews are merged. + // ── Pre-phase: Observe container status for working agents ──────── + // Replaces witnessPatrol's zombie detection. Poll the container for + // each working/stalled agent and emit container_status events. These + // are drained in Phase 0 and applied before reconciliation. try { - await this.processReviewQueue(); + const workingAgentRows = z + .object({ bead_id: z.string() }) + .array() + .parse([ + ...query( + this.sql, + /* sql */ ` + SELECT ${agent_metadata.bead_id} + FROM ${agent_metadata} + WHERE ${agent_metadata.status} IN ('working', 'stalled') + `, + [] + ), + ]); + + if (workingAgentRows.length > 0) { + const statusChecks = workingAgentRows.map(async row => { + try { + const containerInfo = await dispatch.checkAgentContainerStatus( + this.env, + townId, + row.bead_id + ); + events.upsertContainerStatus(this.sql, row.bead_id, { + status: containerInfo.status, + exit_reason: containerInfo.exitReason, + }); + } catch (err) { + console.warn( + `${TOWN_LOG} alarm: container status check failed for agent=${row.bead_id}`, + err + ); + } + }); + await Promise.allSettled(statusChecks); + } } catch (err) { - console.error(`${TOWN_LOG} alarm: processReviewQueue failed`, err); - Sentry.captureException(err); - } + console.error(`${TOWN_LOG} alarm: container observation failed`, err); + } + + // ── Reconciler loop (Phase 0-2) with metrics ───────────────────── + const reconcilerStart = Date.now(); + const metrics: reconciler.ReconcilerMetrics = { + eventsDrained: 0, + actionsEmitted: 0, + actionsByType: {}, + sideEffectsAttempted: 0, + sideEffectsSucceeded: 0, + sideEffectsFailed: 0, + invariantViolations: 0, + wallClockMs: 0, + pendingEventCount: 0, + }; + + // Phase 0: Drain events and apply state transitions try { - await this.processConvoyLandings(); + const pending = events.drainEvents(this.sql); + metrics.eventsDrained = pending.length; + if (pending.length > 0) { + console.log(`${TOWN_LOG} [reconciler] town=${townId} draining ${pending.length} event(s)`); + } + for (const event of pending) { + try { + reconciler.applyEvent(this.sql, event); + events.markProcessed(this.sql, event.event_id); + } catch (err) { + console.error( + `${TOWN_LOG} [reconciler] town=${townId} applyEvent failed: event=${event.event_id} type=${event.event_type}`, + err + ); + // Event stays unprocessed — will be retried on the next alarm tick. + // Mark it processed anyway after 3 consecutive failures to prevent + // a poison event from blocking the entire queue forever. + // For now, we skip it and let the next tick retry. + } + } } catch (err) { - console.error(`${TOWN_LOG} alarm: processConvoyLandings failed`, err); + console.error(`${TOWN_LOG} [reconciler] town=${townId} event drain failed`, err); Sentry.captureException(err); } + + // Phase 1: Reconcile — compute desired state vs actual state + const sideEffects: Array<() => Promise> = []; try { - await this.schedulePendingWork(); + const actions = reconciler.reconcile(this.sql); + metrics.actionsEmitted = actions.length; + for (const a of actions) { + metrics.actionsByType[a.type] = (metrics.actionsByType[a.type] ?? 0) + 1; + } + if (actions.length > 0) { + console.log( + `${TOWN_LOG} [reconciler] town=${townId} actions=${actions.length} types=${[...new Set(actions.map(a => a.type))].join(',')}` + ); + } + const ctx = this.applyActionCtx; + for (const action of actions) { + try { + const effect = applyAction(ctx, action); + if (effect) sideEffects.push(effect); + } catch (err) { + console.error( + `${TOWN_LOG} [reconciler] town=${townId} applyAction failed: type=${action.type}`, + err + ); + } + } } catch (err) { - console.error(`${TOWN_LOG} alarm: schedulePendingWork failed`, err); + console.error(`${TOWN_LOG} [reconciler] town=${townId} reconcile failed`, err); Sentry.captureException(err); } - try { - await this.witnessPatrol(); - } catch (err) { - console.error(`${TOWN_LOG} alarm: witnessPatrol failed`, err); - Sentry.captureException(err); + + // Phase 2: Execute side effects (async, best-effort) + metrics.sideEffectsAttempted = sideEffects.length; + if (sideEffects.length > 0) { + const results = await Promise.allSettled(sideEffects.map(fn => fn())); + for (const r of results) { + if (r.status === 'fulfilled') metrics.sideEffectsSucceeded++; + else metrics.sideEffectsFailed++; + } } + + // Post-reconcile: Invariant checker try { - this.deaconPatrol(); + const violations = reconciler.checkInvariants(this.sql); + metrics.invariantViolations = violations.length; + if (violations.length > 0) { + console.error( + `${TOWN_LOG} [reconciler:invariants] town=${townId} ${violations.length} violation(s): ${JSON.stringify(violations)}` + ); + } } catch (err) { - console.error(`${TOWN_LOG} alarm: deaconPatrol failed`, err); - Sentry.captureException(err); + console.warn(`${TOWN_LOG} [reconciler:invariants] town=${townId} check failed`, err); } + + metrics.wallClockMs = Date.now() - reconcilerStart; + metrics.pendingEventCount = events.pendingEventCount(this.sql); + this._lastReconcilerMetrics = metrics; + + // ── Phase 3: Housekeeping (independent, all parallelizable) ──── + await Promise.allSettled([ + this.deliverPendingMail().catch(err => + console.warn(`${TOWN_LOG} alarm: deliverPendingMail failed`, err) + ), + this.expireStaleNudges().catch(err => + console.warn(`${TOWN_LOG} alarm: expireStaleNudges failed`, err) + ), + this.reEscalateStaleEscalations().catch(err => + console.warn(`${TOWN_LOG} alarm: reEscalation failed`, err) + ), + this.maybeDispatchTriageAgent().catch(err => + console.warn(`${TOWN_LOG} alarm: maybeDispatchTriageAgent failed`, err) + ), + // Prune processed reconciler events older than 7 days + Promise.resolve().then(() => { + try { + events.pruneOldEvents(this.sql, 7 * 24 * 60 * 60 * 1000); + } catch (err) { + console.warn(`${TOWN_LOG} alarm: event pruning failed`, err); + } + }), + ]); + // Re-arm: fast when active, slow when idle + const active = this.hasActiveWork(); + const interval = active ? ACTIVE_ALARM_INTERVAL_MS : IDLE_ALARM_INTERVAL_MS; + await this.ctx.storage.setAlarm(Date.now() + interval); + + // Broadcast status snapshot to connected WebSocket clients try { - await this.deliverPendingMail(); + const snapshot = await this.getAlarmStatus(); + this.broadcastAlarmStatus(snapshot); } catch (err) { - console.warn(`${TOWN_LOG} alarm: deliverPendingMail failed`, err); - } - try { - await this.expireStaleNudges(); - } catch (err) { - console.warn(`${TOWN_LOG} alarm: expireStaleNudges failed`, err); - } - try { - await this.reEscalateStaleEscalations(); - } catch (err) { - console.warn(`${TOWN_LOG} alarm: reEscalation failed`, err); - } - try { - await this.maybeDispatchTriageAgent(); - } catch (err) { - console.warn(`${TOWN_LOG} alarm: maybeDispatchTriageAgent failed`, err); - } - // Re-arm: fast when active, slow when idle - const active = this.hasActiveWork(); - const interval = active ? ACTIVE_ALARM_INTERVAL_MS : IDLE_ALARM_INTERVAL_MS; - await this.ctx.storage.setAlarm(Date.now() + interval); - - // Broadcast status snapshot to connected WebSocket clients - try { - const snapshot = await this.getAlarmStatus(); - this.broadcastAlarmStatus(snapshot); - } catch (err) { - console.warn(`${TOWN_LOG} alarm: status broadcast failed`, err); + console.warn(`${TOWN_LOG} alarm: status broadcast failed`, err); } } @@ -3002,431 +3048,21 @@ export class TownDO extends DurableObject { } private hasActiveWork(): boolean { - const activeAgentRows = [ - ...query( - this.sql, - /* sql */ `SELECT COUNT(*) as cnt FROM ${agent_metadata} WHERE ${agent_metadata.status} IN ('working', 'stalled')`, - [] - ), - ]; - const pendingBeadRows = [ - ...query( - this.sql, - /* sql */ `SELECT COUNT(*) as cnt FROM ${agent_metadata} WHERE ${agent_metadata.status} = 'idle' AND ${agent_metadata.current_hook_bead_id} IS NOT NULL`, - [] - ), - ]; - const pendingReviewRows = [ - ...query( - this.sql, - /* sql */ `SELECT COUNT(*) as cnt FROM ${beads} WHERE ${beads.type} = 'merge_request' AND ${beads.status} IN ('open', 'in_progress')`, - [] - ), - ]; - const pendingTriageRows = [ - ...query( - this.sql, - /* sql */ `SELECT COUNT(*) as cnt FROM ${beads} WHERE ${beads.type} = 'issue' AND ${beads.labels} LIKE ? AND ${beads.status} = 'open'`, - [patrol.TRIAGE_LABEL_LIKE] - ), - ]; - return ( - Number(activeAgentRows[0]?.cnt ?? 0) > 0 || - Number(pendingBeadRows[0]?.cnt ?? 0) > 0 || - Number(pendingReviewRows[0]?.cnt ?? 0) > 0 || - Number(pendingTriageRows[0]?.cnt ?? 0) > 0 - ); + return scheduling.hasActiveWork(this.sql); } - /** - * Dispatch a single agent to the container. Used for eager dispatch from - * slingBead (so agents start immediately) and from schedulePendingWork - * (periodic recovery). Returns true if the agent was started. - */ - private async dispatchAgent(agent: Agent, bead: Bead): Promise { - try { - const rigId = agent.rig_id ?? rigs.listRigs(this.sql)[0]?.id ?? ''; - const rigConfig = rigId ? await this.getRigConfig(rigId) : null; - if (!rigConfig) { - console.warn(`${TOWN_LOG} dispatchAgent: no rig config for agent=${agent.id} rig=${rigId}`); - return false; - } - - const townConfig = await this.getTownConfig(); - const kilocodeToken = await this.resolveKilocodeToken(); - - // Check if this bead belongs to a convoy and resolve its feature branch. - // Convoy beads branch from the feature branch, not from defaultBranch. - const convoyId = beadOps.getConvoyForBead(this.sql, bead.bead_id); - const convoyFeatureBranch = convoyId - ? beadOps.getConvoyFeatureBranch(this.sql, convoyId) - : null; - - // Transition the bead to in_progress BEFORE starting the container. - // This must happen synchronously within the DO's I/O gate — the - // fire-and-forget pattern used by slingBead/slingConvoy means the - // calling RPC may return before startAgentInContainer completes, - // closing the I/O gate and preventing further SQL writes. - const currentBead = beadOps.getBead(this.sql, bead.bead_id); - if ( - currentBead && - currentBead.status !== 'in_progress' && - currentBead.status !== 'closed' && - currentBead.status !== 'failed' - ) { - beadOps.updateBeadStatus(this.sql, bead.bead_id, 'in_progress', agent.id); - } - - // Set status to 'working' BEFORE the async container start. This - // must happen synchronously so the SQL write executes while the I/O - // gate is still open. When dispatchAgent is called fire-and-forget - // (from slingBead, slingConvoy, dispatchUnblockedBeads), any SQL - // writes after the first `await` may be silently dropped because - // the DO's RPC response closes the I/O gate. If the container fails - // to start, we roll back to 'idle'. - const timestamp = now(); - query( - this.sql, - /* sql */ ` - UPDATE ${agent_metadata} - SET ${agent_metadata.columns.status} = 'working', - ${agent_metadata.columns.dispatch_attempts} = ${agent_metadata.columns.dispatch_attempts} + 1, - ${agent_metadata.columns.last_activity_at} = ? - WHERE ${agent_metadata.bead_id} = ? - `, - [timestamp, agent.id] - ); - - const started = await dispatch.startAgentInContainer(this.env, this.ctx.storage, { - townId: this.townId, - rigId, - userId: rigConfig.userId, - agentId: agent.id, - agentName: agent.name, - role: agent.role, - identity: agent.identity, - beadId: bead.bead_id, - beadTitle: bead.title, - beadBody: bead.body ?? '', - checkpoint: agent.checkpoint, - gitUrl: rigConfig.gitUrl, - defaultBranch: rigConfig.defaultBranch, - kilocodeToken, - townConfig, - platformIntegrationId: rigConfig.platformIntegrationId, - convoyFeatureBranch: convoyFeatureBranch ?? undefined, - }); - - if (started) { - // Reset dispatch_attempts on success (best-effort — may be - // dropped if the I/O gate is already closed, but that's fine - // because the agent is already 'working'). - query( - this.sql, - /* sql */ ` - UPDATE ${agent_metadata} - SET ${agent_metadata.columns.dispatch_attempts} = 0 - WHERE ${agent_metadata.bead_id} = ? - `, - [agent.id] - ); - console.log(`${TOWN_LOG} dispatchAgent: started agent=${agent.name}(${agent.id})`); - this.emitEvent({ - event: 'agent.spawned', - townId: this.townId, - rigId, - agentId: agent.id, - beadId: bead.bead_id, - role: agent.role, - }); - } else { - // Container failed to start — roll back to idle - query( - this.sql, - /* sql */ ` - UPDATE ${agent_metadata} - SET ${agent_metadata.columns.status} = 'idle' - WHERE ${agent_metadata.bead_id} = ? - `, - [agent.id] - ); - this.emitEvent({ - event: 'agent.dispatch_failed', - townId: this.townId, - rigId, - agentId: agent.id, - beadId: bead.bead_id, - role: agent.role, - }); - } - return started; - } catch (err) { - console.error(`${TOWN_LOG} dispatchAgent: failed for agent=${agent.id}:`, err); - Sentry.captureException(err, { extra: { agentId: agent.id, beadId: bead.bead_id } }); - // Roll back agent and bead to prevent them from being stuck in - // working/in_progress state when the container call throws. - try { - query( - this.sql, - /* sql */ ` - UPDATE ${agent_metadata} - SET ${agent_metadata.columns.status} = 'idle' - WHERE ${agent_metadata.bead_id} = ? - `, - [agent.id] - ); - if (agent.current_hook_bead_id) { - beadOps.updateBeadStatus(this.sql, agent.current_hook_bead_id, 'open', agent.id); - } - } catch (rollbackErr) { - console.error(`${TOWN_LOG} dispatchAgent: rollback also failed:`, rollbackErr); - } - this.emitEvent({ - event: 'agent.dispatch_failed', - townId: this.townId, - agentId: agent.id, - beadId: bead.bead_id, - role: agent.role, - }); - return false; - } + /** Dispatch a single agent to the container. Delegates to scheduling module. */ + private dispatchAgent( + agent: Agent, + bead: Bead, + options?: { systemPromptOverride?: string } + ): Promise { + return scheduling.dispatchAgent(this.schedulingCtx, agent, bead, options); } - /** - * When a bead closes, find beads that were blocked by it and are now - * fully unblocked (all 'blocks' dependencies resolved). Dispatch their - * assigned agents. - */ + /** When a bead closes, dispatch any beads it was blocking. */ private dispatchUnblockedBeads(closedBeadId: string): void { - const unblockedIds = beadOps.getNewlyUnblockedBeads(this.sql, closedBeadId); - if (unblockedIds.length === 0) return; - - console.log( - `${TOWN_LOG} dispatchUnblockedBeads: ${unblockedIds.length} beads unblocked by ${closedBeadId}` - ); - - for (const beadId of unblockedIds) { - const bead = beadOps.getBead(this.sql, beadId); - if (!bead || bead.status === 'closed' || bead.status === 'failed') continue; - - // Find the agent hooked to this bead - if (!bead.assignee_agent_bead_id) continue; - const agent = agents.getAgent(this.sql, bead.assignee_agent_bead_id); - if (!agent || agent.status !== 'idle') continue; - - this.dispatchAgent(agent, bead).catch(err => - console.error( - `${TOWN_LOG} dispatchUnblockedBeads: fire-and-forget dispatch failed for bead=${beadId}`, - err - ) - ); - } - } - - /** - * Find idle agents with hooked beads and dispatch them to the container. - * Agents whose last_activity_at is within the dispatch cooldown are - * skipped — they have a fire-and-forget dispatch already in flight. - */ - private async schedulePendingWork(): Promise { - const cooldownCutoff = new Date(Date.now() - DISPATCH_COOLDOWN_MS).toISOString(); - const rows = [ - ...query( - this.sql, - /* sql */ ` - SELECT ${beads}.*, - ${agent_metadata.role}, ${agent_metadata.identity}, - ${agent_metadata.container_process_id}, - ${agent_metadata.status} AS status, - ${agent_metadata.current_hook_bead_id}, - ${agent_metadata.dispatch_attempts}, ${agent_metadata.last_activity_at}, - ${agent_metadata.checkpoint}, - ${agent_metadata.agent_status_message}, ${agent_metadata.agent_status_updated_at} - FROM ${beads} - INNER JOIN ${agent_metadata} ON ${beads.bead_id} = ${agent_metadata.bead_id} - WHERE ${agent_metadata.status} = 'idle' - AND ${agent_metadata.current_hook_bead_id} IS NOT NULL - AND (${agent_metadata.last_activity_at} IS NULL OR ${agent_metadata.last_activity_at} < ?) - `, - [cooldownCutoff] - ), - ]; - const pendingAgents: Agent[] = AgentBeadRecord.array() - .parse(rows) - .map(row => ({ - id: row.bead_id, - rig_id: row.rig_id, - role: row.role, - name: row.title, - identity: row.identity, - status: row.status, - current_hook_bead_id: row.current_hook_bead_id, - dispatch_attempts: row.dispatch_attempts, - last_activity_at: row.last_activity_at, - checkpoint: row.checkpoint, - created_at: row.created_at, - agent_status_message: row.agent_status_message, - agent_status_updated_at: row.agent_status_updated_at, - })); - - console.log(`${TOWN_LOG} schedulePendingWork: found ${pendingAgents.length} pending agents`); - if (pendingAgents.length === 0) return; - - const dispatchTasks: Array<() => Promise> = []; - - for (const agent of pendingAgents) { - const beadId = agent.current_hook_bead_id; - if (!beadId) continue; - const bead = beadOps.getBead(this.sql, beadId); - if (!bead) continue; - - if (agent.dispatch_attempts >= MAX_DISPATCH_ATTEMPTS) { - beadOps.updateBeadStatus(this.sql, beadId, 'failed', agent.id); - agents.unhookBead(this.sql, agent.id); - continue; - } - - // Skip beads that still have unresolved 'blocks' dependencies — - // they'll be dispatched by dispatchUnblockedBeads when their - // blockers close. - if (beadOps.hasUnresolvedBlockers(this.sql, beadId)) { - continue; - } - - dispatchTasks.push(async () => { - await this.dispatchAgent(agent, bead); - }); - } - - if (dispatchTasks.length > 0) { - await Promise.allSettled(dispatchTasks.map(fn => fn())); - } - } - - /** - * Witness patrol: detect dead/stale agents, GUPP violations with - * tiered escalation, orphaned work, agent GC, per-bead timeouts. - * - * Mechanical checks run as deterministic code. Ambiguous situations - * produce triage_request beads for the on-demand triage agent. - * See #442. - */ - private async witnessPatrol(): Promise { - const townId = this.townId; - - // ── Zombie detection (container status reconciliation) ────────── - const WorkingAgentRow = AgentMetadataRecord.pick({ - bead_id: true, - current_hook_bead_id: true, - last_activity_at: true, - }); - const workingAgents = WorkingAgentRow.array().parse([ - ...query( - this.sql, - /* sql */ ` - SELECT ${agent_metadata.bead_id}, ${agent_metadata.current_hook_bead_id}, ${agent_metadata.last_activity_at} - FROM ${agent_metadata} - WHERE ${agent_metadata.status} IN ('working', 'stalled') - `, - [] - ), - ]); - - for (const working of workingAgents) { - const agentId = working.bead_id; - - const containerInfo = await dispatch.checkAgentContainerStatus(this.env, townId, agentId); - - if (containerInfo.status === 'not_found' || containerInfo.status === 'exited') { - if (containerInfo.exitReason === 'completed') { - reviewQueue.agentCompleted(this.sql, agentId, { status: 'completed' }); - continue; - } - query( - this.sql, - /* sql */ ` - UPDATE ${agent_metadata} - SET ${agent_metadata.columns.status} = 'idle', - ${agent_metadata.columns.last_activity_at} = ? - WHERE ${agent_metadata.bead_id} = ? - `, - [now(), agentId] - ); - continue; - } - } - - // ── Tiered GUPP violation handling ────────────────────────────── - // Re-query to get the current set of working agents (some may have - // been reset to idle by zombie detection above). - const currentWorking = WorkingAgentRow.array().parse([ - ...query( - this.sql, - /* sql */ ` - SELECT ${agent_metadata.bead_id}, ${agent_metadata.current_hook_bead_id}, ${agent_metadata.last_activity_at} - FROM ${agent_metadata} - WHERE ${agent_metadata.status} IN ('working', 'stalled') - `, - [] - ), - ]); - - const forceStopIds = patrol.detectGUPPViolations( - this.sql, - currentWorking, - (agentId, message, opts) => this.queueNudge(agentId, message, opts) - ); - - // Force-stop agents in the container (best-effort) - for (const agentId of forceStopIds) { - dispatch - .stopAgentInContainer(this.env, townId, agentId) - .catch(err => - console.warn(`${TOWN_LOG} witnessPatrol: force-stop failed for agent=${agentId}`, err) - ); - } - - // ── Orphaned work detection ──────────────────────────────────── - patrol.detectOrphanedWork(this.sql); - - // ── Agent garbage collection ─────────────────────────────────── - const gcCount = patrol.agentGC(this.sql); - if (gcCount > 0) { - // Clean up AgentDO storage for GC'd agents (best-effort) - console.log(`${TOWN_LOG} witnessPatrol: GC'd ${gcCount} agent(s)`); - } - - // ── Per-bead timeout enforcement ─────────────────────────────── - const timedOut = patrol.checkTimerGates(this.sql); - for (const { agentId } of timedOut) { - if (agentId) { - dispatch - .stopAgentInContainer(this.env, townId, agentId) - .catch(err => - console.warn( - `${TOWN_LOG} witnessPatrol: failed to stop timed-out agent=${agentId}`, - err - ) - ); - } - } - } - - /** - * Deacon patrol: stale hook detection, stranded convoy feeding, - * crash loop detection. - * - * Mechanical checks that complement the witness patrol. See #442. - */ - private deaconPatrol(): void { - // ── Stale hook detection ─────────────────────────────────────── - patrol.detectStaleHooks(this.sql); - - // ── Stranded convoy feeding ──────────────────────────────────── - patrol.feedStrandedConvoys(this.sql, this.townId); - - // ── Crash loop detection ─────────────────────────────────────── - patrol.detectCrashLoops(this.sql); + scheduling.dispatchUnblockedBeads(this.schedulingCtx, closedBeadId); } /** @@ -3447,7 +3083,7 @@ export class TownDO extends DurableObject { patrol.TRIAGE_REQUEST_LABEL, patrol.TRIAGE_BATCH_LABEL ); - const cooldownCutoff = new Date(Date.now() - DISPATCH_COOLDOWN_MS).toISOString(); + const cooldownCutoff = new Date(Date.now() - scheduling.DISPATCH_COOLDOWN_MS).toISOString(); const existingBatch = [ ...query( this.sql, @@ -3598,358 +3234,6 @@ export class TownDO extends DurableObject { await Promise.allSettled(deliveries); } - /** - * Process the review queue: pop pending entries and trigger merge. - */ - private async processReviewQueue(): Promise { - reviewQueue.recoverStuckReviews(this.sql); - reviewQueue.closeOrphanedReviewBeads(this.sql); - - // Poll open PRs created by the 'pr' strategy - await this.pollPendingPRs(); - - const entry = reviewQueue.popReviewQueue(this.sql); - if (!entry) return; - - // Resolve rig from the merge_request bead — not rigList[0] which would - // pick the wrong rig in multi-rig towns. - const rigId = entry.rig_id; - if (!rigId) { - console.error(`${TOWN_LOG} processReviewQueue: entry ${entry.id} has no rig_id, skipping`); - reviewQueue.completeReview(this.sql, entry.id, 'failed'); - return; - } - const rigConfig = await this.getRigConfig(rigId); - if (!rigConfig) { - reviewQueue.completeReview(this.sql, entry.id, 'failed'); - return; - } - - const townConfig = await this.getTownConfig(); - const mergeStrategy = config.resolveMergeStrategy(townConfig, rigConfig.merge_strategy); - const gates = townConfig.refinery?.gates ?? []; - - // Resolve the target branch from review_metadata. For convoy beads - // this will be the convoy's feature branch; for standalone beads it's - // the rig's default branch. For convoy landing MRs it's back to default. - const targetBranchRows = z - .object({ target_branch: z.string() }) - .array() - .parse([ - ...query( - this.sql, - /* sql */ ` - SELECT ${review_metadata.target_branch} - FROM ${review_metadata} - WHERE ${review_metadata.bead_id} = ? - `, - [entry.id] - ), - ]); - const targetBranch = targetBranchRows[0]?.target_branch ?? rigConfig.defaultBranch; - - // Check if this MR belongs to a convoy and what the merge mode is. - // For 'review-then-land' convoys, the refinery only reviews and merges - // into the feature branch (using direct strategy regardless of town config), - // because the final land to main happens once ALL beads are done. - // For 'review-and-merge' convoys (and standalone beads), use the normal strategy. - const sourceBeadId = typeof entry.bead_id === 'string' ? entry.bead_id : null; - const convoyId = sourceBeadId ? beadOps.getConvoyForBead(this.sql, sourceBeadId) : null; - const convoyMergeMode = convoyId ? beadOps.getConvoyMergeMode(this.sql, convoyId) : null; - - // For review-then-land convoys targeting the feature branch, always use - // direct merge strategy (the refinery merges the polecat's work into the - // feature branch directly, no PR needed for intermediate steps). - const isConvoyIntermediateMerge = - convoyMergeMode === 'review-then-land' && targetBranch !== rigConfig.defaultBranch; - const effectiveMergeStrategy = isConvoyIntermediateMerge ? 'direct' : mergeStrategy; - - console.log( - `${TOWN_LOG} processReviewQueue: entry=${entry.id} branch=${entry.branch} ` + - `targetBranch=${targetBranch} mergeStrategy=${effectiveMergeStrategy} ` + - `convoyMode=${convoyMergeMode ?? 'standalone'} gates=${gates.length}` - ); - - // Get or create the per-rig refinery. If it already exists and is busy - // (processing another review), put the entry back to 'open' so it gets - // retried on the next alarm cycle. - const refineryAgent = agents.getOrCreateAgent(this.sql, 'refinery', rigId, this.townId); - if (refineryAgent.status !== 'idle') { - console.log( - `${TOWN_LOG} processReviewQueue: refinery for rig=${rigId} is ${refineryAgent.status}, re-queuing entry=${entry.id}` - ); - beadOps.updateBeadStatus(this.sql, entry.id, 'open', 'system'); - return; - } - - const { buildRefinerySystemPrompt } = await import('../prompts/refinery-system.prompt'); - const systemPrompt = buildRefinerySystemPrompt({ - identity: refineryAgent.identity, - rigId, - townId: this.townId, - gates, - branch: entry.branch, - targetBranch, - polecatAgentId: entry.agent_id, - mergeStrategy: effectiveMergeStrategy, - convoyContext: convoyMergeMode - ? { - mergeMode: convoyMergeMode, - isIntermediateStep: isConvoyIntermediateMerge, - } - : undefined, - }); - - // Hook the refinery to the MR bead (entry.id), not the source bead - // (entry.bead_id). The source bead stays closed with its original - // polecat assignee preserved. - agents.hookBead(this.sql, refineryAgent.id, entry.id); - - // Mark as working before the async container start (same I/O gate - // rationale as dispatchAgent — see comment there). - agents.updateAgentStatus(this.sql, refineryAgent.id, 'working'); - - const started = await dispatch.startAgentInContainer(this.env, this.ctx.storage, { - townId: this.townId, - rigId, - userId: rigConfig.userId, - agentId: refineryAgent.id, - agentName: refineryAgent.name, - role: 'refinery', - identity: refineryAgent.identity, - beadId: entry.id, - beadTitle: `Review merge: ${entry.branch} → ${targetBranch}`, - beadBody: entry.summary ?? '', - checkpoint: null, - gitUrl: rigConfig.gitUrl, - // Always clone from the rig's real default branch. The targetBranch - // may be a convoy feature branch that doesn't exist on the remote yet. - // The refinery's system prompt tells it which branch to merge into. - defaultBranch: rigConfig.defaultBranch, - kilocodeToken: rigConfig.kilocodeToken, - townConfig, - systemPromptOverride: systemPrompt, - platformIntegrationId: rigConfig.platformIntegrationId, - }); - - if (!started) { - agents.unhookBead(this.sql, refineryAgent.id); - agents.updateAgentStatus(this.sql, refineryAgent.id, 'idle'); - console.error( - `${TOWN_LOG} processReviewQueue: refinery agent failed to start for entry=${entry.id}` - ); - reviewQueue.completeReview(this.sql, entry.id, 'failed'); - } - } - - /** - * Process convoys whose tracked beads are all closed and that have a - * feature branch waiting to be landed. Creates a final merge_request bead - * to merge the convoy's feature branch into the default branch. - */ - private async processConvoyLandings(): Promise { - // Find convoys with ready_to_land flag in metadata that are still open - const ReadyConvoyRow = z.object({ - bead_id: z.string(), - metadata: z - .string() - .transform(v => { - try { - return JSON.parse(v) as Record; - } catch { - return {}; - } - }) - .pipe(z.record(z.string(), z.any())), - }); - const readyRows = ReadyConvoyRow.array().parse([ - ...query( - this.sql, - /* sql */ ` - SELECT ${beads.bead_id}, ${beads.metadata} - FROM ${beads} - WHERE ${beads.type} = 'convoy' - AND ${beads.status} = 'open' - AND json_extract(${beads.metadata}, '$.ready_to_land') = 1 - `, - [] - ), - ]); - - for (const row of readyRows) { - const convoyId = row.bead_id; - const featureBranch = beadOps.getConvoyFeatureBranch(this.sql, convoyId); - if (!featureBranch) continue; - - // Check if there's already a pending landing MR for this convoy - const existingLanding = [ - ...query( - this.sql, - /* sql */ ` - SELECT ${beads.bead_id} - FROM ${beads} - WHERE ${beads.type} = 'merge_request' - AND ${beads.status} IN ('open', 'in_progress') - AND json_extract(${beads.metadata}, '$.convoy_landing') = 1 - AND json_extract(${beads.metadata}, '$.convoy_id') = ? - LIMIT 1 - `, - [convoyId] - ), - ]; - if (existingLanding.length > 0) continue; - - // Find which rig this convoy's beads belong to - const rigRow = z - .object({ rig_id: z.string().nullable() }) - .array() - .parse([ - ...query( - this.sql, - /* sql */ ` - SELECT ${beads.rig_id} - FROM ${bead_dependencies} - INNER JOIN ${beads} ON ${bead_dependencies.bead_id} = ${beads.bead_id} - WHERE ${bead_dependencies.depends_on_bead_id} = ? - AND ${bead_dependencies.dependency_type} = 'tracks' - AND ${beads.rig_id} IS NOT NULL - LIMIT 1 - `, - [convoyId] - ), - ]); - const rigId = rigRow[0]?.rig_id; - if (!rigId) continue; - - const rigConfig = await this.getRigConfig(rigId); - if (!rigConfig) continue; - - console.log( - `${TOWN_LOG} processConvoyLandings: creating landing MR for convoy=${convoyId} branch=${featureBranch} → ${rigConfig.defaultBranch}` - ); - - // Submit a landing MR: feature branch → defaultBranch - reviewQueue.submitToReviewQueue(this.sql, { - agent_id: 'system', - bead_id: convoyId, - rig_id: rigId, - branch: featureBranch, - summary: `Landing convoy: merge ${featureBranch} → ${rigConfig.defaultBranch}`, - }); - - // Patch the just-created MR bead's metadata to mark it as a convoy landing - // and set the target_branch to the default branch (not the convoy feature branch). - const mrRows = z - .object({ bead_id: z.string() }) - .array() - .parse([ - ...query( - this.sql, - /* sql */ ` - SELECT ${beads.bead_id} - FROM ${beads} - WHERE ${beads.type} = 'merge_request' - AND ${beads.created_by} = 'system' - AND json_extract(${beads.metadata}, '$.source_bead_id') = ? - ORDER BY ${beads.created_at} DESC - LIMIT 1 - `, - [convoyId] - ), - ]); - if (mrRows.length > 0) { - const mrBeadId = mrRows[0].bead_id; - query( - this.sql, - /* sql */ ` - UPDATE ${beads} - SET ${beads.columns.metadata} = json_set( - COALESCE(${beads.metadata}, '{}'), - '$.convoy_landing', 1, - '$.convoy_id', ? - ) - WHERE ${beads.bead_id} = ? - `, - [convoyId, mrBeadId] - ); - // Override the target_branch to the default branch for the landing MR - query( - this.sql, - /* sql */ ` - UPDATE ${review_metadata} - SET ${review_metadata.columns.target_branch} = ? - WHERE ${review_metadata.bead_id} = ? - `, - [rigConfig.defaultBranch, mrBeadId] - ); - } - - // Clear the ready_to_land flag - query( - this.sql, - /* sql */ ` - UPDATE ${beads} - SET ${beads.columns.metadata} = json_remove(COALESCE(${beads.metadata}, '{}'), '$.ready_to_land'), - ${beads.columns.updated_at} = ? - WHERE ${beads.bead_id} = ? - `, - [now(), convoyId] - ); - } - } - - /** - * Poll external PRs created by the 'pr' merge strategy. - * Checks if PRs have been merged or closed and updates the MR bead status. - */ - private async pollPendingPRs(): Promise { - const pendingReviews = reviewQueue.listPendingPRReviews(this.sql); - if (pendingReviews.length === 0) return; - - console.log(`${TOWN_LOG} pollPendingPRs: checking ${pendingReviews.length} pending PR(s)`); - - const townConfig = await this.getTownConfig(); - - // Cap the number of PRs polled per alarm tick to avoid exhausting - // GitHub/GitLab API rate limits when many PRs are pending. - const MAX_POLLS_PER_TICK = 10; - for (const review of pendingReviews.slice(0, MAX_POLLS_PER_TICK)) { - const prUrl = review.pr_url; - if (!prUrl) continue; - // review.bead_id is the MR bead's own ID (not the source bead). - // MergeRequestBeadRecord.bead_id == the merge_request bead PK. - - try { - const status = await this.checkPRStatus(prUrl, townConfig); - console.log( - `${TOWN_LOG} pollPendingPRs: entry=${review.bead_id} url=${prUrl} status=${status ?? 'null (could not determine)'}` - ); - if (!status) continue; - - if (status === 'merged') { - reviewQueue.completeReviewWithResult(this.sql, { - entry_id: review.bead_id, - status: 'merged', - message: 'PR merged externally', - }); - console.log(`${TOWN_LOG} pollPendingPRs: PR merged for entry=${review.bead_id}`); - } else if (status === 'closed') { - reviewQueue.completeReviewWithResult(this.sql, { - entry_id: review.bead_id, - status: 'failed', - message: 'PR closed without merge', - }); - console.log( - `${TOWN_LOG} pollPendingPRs: PR closed without merge for entry=${review.bead_id}` - ); - } - // 'open' — still waiting, do nothing - } catch (err) { - console.warn(`${TOWN_LOG} pollPendingPRs: failed to check PR status for ${prUrl}:`, err); - } - } - } - /** * Check the status of a PR/MR via its URL. * Returns 'open', 'merged', or 'closed' (null if cannot determine). @@ -4127,7 +3411,9 @@ export class TownDO extends DurableObject { try { const container = getTownContainerStub(this.env, townId); - await container.fetch('http://container/health'); + await container.fetch('http://container/health', { + signal: AbortSignal.timeout(5_000), + }); } catch { // Container is starting up or unavailable — alarm will retry } @@ -4162,7 +3448,6 @@ export class TownDO extends DurableObject { activeAgents: number; pendingBeads: number; }> { - await this.ensureInitialized(); const townId = this.townId; // Check if alarm is set @@ -4228,14 +3513,13 @@ export class TownDO extends DurableObject { stalledAgents: number; orphanedHooks: number; }; + reconciler: reconciler.ReconcilerMetrics | null; recentEvents: Array<{ time: string; type: string; message: string; }>; }> { - await this.ensureInitialized(); - const currentAlarm = await this.ctx.storage.getAlarm(); const active = this.hasActiveWork(); const intervalMs = active ? ACTIVE_ALARM_INTERVAL_MS : IDLE_ALARM_INTERVAL_MS; @@ -4377,10 +3661,52 @@ export class TownDO extends DurableObject { stalledAgents, orphanedHooks, }, + reconciler: this._lastReconcilerMetrics, recentEvents, }; } + // DEBUG: concise non-terminal bead summary — remove after debugging + async debugBeadSummary(): Promise { + return [ + ...query( + this.sql, + /* sql */ ` + SELECT ${beads.bead_id}, + ${beads.type}, + ${beads.status}, + ${beads.title}, + ${beads.assignee_agent_bead_id}, + ${beads.updated_at} + FROM ${beads} + WHERE ${beads.status} NOT IN ('closed', 'failed') + AND ${beads.type} != 'agent' + ORDER BY ${beads.type}, ${beads.status} + `, + [] + ), + ]; + } + + // DEBUG: raw agent_metadata dump — remove after debugging + async debugAgentMetadata(): Promise { + return [ + ...query( + this.sql, + /* sql */ ` + SELECT ${agent_metadata.bead_id}, + ${agent_metadata.role}, + ${agent_metadata.status}, + ${agent_metadata.current_hook_bead_id}, + ${agent_metadata.dispatch_attempts}, + ${agent_metadata.last_activity_at} + FROM ${agent_metadata} + `, + [] + ), + ]; + } + async destroy(): Promise { console.log(`${TOWN_LOG} destroy: clearing all storage and alarms`); diff --git a/cloudflare-gastown/src/dos/town/actions.ts b/cloudflare-gastown/src/dos/town/actions.ts new file mode 100644 index 0000000000..49e91091d2 --- /dev/null +++ b/cloudflare-gastown/src/dos/town/actions.ts @@ -0,0 +1,639 @@ +/** + * Reconciler action types and application. + * + * Actions are the reconciler's outputs — they describe mutations to apply + * and side effects to execute. Nothing mutates bead/agent/convoy state + * directly; all mutations flow through applyAction(). + * + * See reconciliation-spec.md §4. + */ + +import { z } from 'zod'; +import { beads } from '../../db/tables/beads.table'; +import { agent_metadata } from '../../db/tables/agent-metadata.table'; +import { convoy_metadata } from '../../db/tables/convoy-metadata.table'; +import { bead_dependencies } from '../../db/tables/bead-dependencies.table'; +import { agent_nudges } from '../../db/tables/agent-nudges.table'; +import { query } from '../../util/query.util'; +import * as beadOps from './beads'; +import * as agentOps from './agents'; +import * as reviewQueue from './review-queue'; +import * as patrol from './patrol'; + +// ── Bead mutations ────────────────────────────────────────────────── + +const TransitionBead = z.object({ + type: z.literal('transition_bead'), + bead_id: z.string(), + from: z.string().nullable(), + to: z.string(), + reason: z.string(), + actor: z.string(), +}); + +const AssignBead = z.object({ + type: z.literal('assign_bead'), + bead_id: z.string(), + agent_id: z.string(), +}); + +const ClearBeadAssignee = z.object({ + type: z.literal('clear_bead_assignee'), + bead_id: z.string(), +}); + +const CreateMrBead = z.object({ + type: z.literal('create_mr_bead'), + source_bead_id: z.string(), + agent_id: z.string(), + rig_id: z.string(), + branch: z.string(), + target_branch: z.string(), + pr_url: z.string().optional(), + summary: z.string().optional(), +}); + +const CreateLandingMr = z.object({ + type: z.literal('create_landing_mr'), + convoy_id: z.string(), + rig_id: z.string(), + feature_branch: z.string(), + target_branch: z.string(), +}); + +const CloseSiblingMrs = z.object({ + type: z.literal('close_sibling_mrs'), + source_bead_id: z.string(), + exclude_mr_id: z.string(), +}); + +const SetReviewPrUrl = z.object({ + type: z.literal('set_review_pr_url'), + bead_id: z.string(), + pr_url: z.string(), +}); + +// ── Agent mutations ───────────────────────────────────────────────── + +const TransitionAgent = z.object({ + type: z.literal('transition_agent'), + agent_id: z.string(), + from: z.string().nullable(), + to: z.string(), + reason: z.string(), +}); + +const HookAgent = z.object({ + type: z.literal('hook_agent'), + agent_id: z.string(), + bead_id: z.string(), +}); + +const UnhookAgent = z.object({ + type: z.literal('unhook_agent'), + agent_id: z.string(), + reason: z.string(), +}); + +const ClearAgentCheckpoint = z.object({ + type: z.literal('clear_agent_checkpoint'), + agent_id: z.string(), +}); + +const DeleteAgent = z.object({ + type: z.literal('delete_agent'), + agent_id: z.string(), + reason: z.string(), +}); + +// ── Convoy mutations ──────────────────────────────────────────────── + +const UpdateConvoyProgress = z.object({ + type: z.literal('update_convoy_progress'), + convoy_id: z.string(), + closed_beads: z.number(), +}); + +const SetConvoyReadyToLand = z.object({ + type: z.literal('set_convoy_ready_to_land'), + convoy_id: z.string(), +}); + +const CloseConvoy = z.object({ + type: z.literal('close_convoy'), + convoy_id: z.string(), +}); + +// ── Side effects (deferred) ───────────────────────────────────────── + +const DispatchAgent = z.object({ + type: z.literal('dispatch_agent'), + agent_id: z.string(), + bead_id: z.string(), + rig_id: z.string(), +}); + +const StopAgent = z.object({ + type: z.literal('stop_agent'), + agent_id: z.string(), + reason: z.string(), +}); + +const PollPr = z.object({ + type: z.literal('poll_pr'), + bead_id: z.string(), + pr_url: z.string(), +}); + +const SendNudge = z.object({ + type: z.literal('send_nudge'), + agent_id: z.string(), + message: z.string(), + tier: z.enum(['warn', 'escalate', 'force_stop']), +}); + +const CreateTriageRequest = z.object({ + type: z.literal('create_triage_request'), + agent_id: z.string(), + triage_type: z.string(), + reason: z.string(), +}); + +const NotifyMayor = z.object({ + type: z.literal('notify_mayor'), + message: z.string(), +}); + +const EmitEvent = z.object({ + type: z.literal('emit_event'), + event_name: z.string(), + data: z.record(z.string(), z.unknown()), +}); + +// ── Union ─────────────────────────────────────────────────────────── + +export const Action = z.discriminatedUnion('type', [ + // Bead mutations + TransitionBead, + AssignBead, + ClearBeadAssignee, + CreateMrBead, + CreateLandingMr, + CloseSiblingMrs, + SetReviewPrUrl, + // Agent mutations + TransitionAgent, + HookAgent, + UnhookAgent, + ClearAgentCheckpoint, + DeleteAgent, + // Convoy mutations + UpdateConvoyProgress, + SetConvoyReadyToLand, + CloseConvoy, + // Side effects + DispatchAgent, + StopAgent, + PollPr, + SendNudge, + CreateTriageRequest, + NotifyMayor, + EmitEvent, +]); + +export type Action = z.infer; + +// ── Per-type exports for construction ─────────────────────────────── +// These aren't validated at construction time (they're built by the +// reconciler itself), so we export plain type aliases for convenience. + +export type TransitionBead = z.infer; +export type AssignBead = z.infer; +export type ClearBeadAssignee = z.infer; +export type CreateMrBead = z.infer; +export type CreateLandingMr = z.infer; +export type CloseSiblingMrs = z.infer; +export type SetReviewPrUrl = z.infer; +export type TransitionAgent = z.infer; +export type HookAgent = z.infer; +export type UnhookAgent = z.infer; +export type ClearAgentCheckpoint = z.infer; +export type DeleteAgent = z.infer; +export type UpdateConvoyProgress = z.infer; +export type SetConvoyReadyToLand = z.infer; +export type CloseConvoy = z.infer; +export type DispatchAgent = z.infer; +export type StopAgent = z.infer; +export type PollPr = z.infer; +export type SendNudge = z.infer; +export type CreateTriageRequest = z.infer; +export type NotifyMayor = z.infer; +export type EmitEvent = z.infer; + +// ── Action application context ────────────────────────────────────── +// applyAction needs access to TownDO-level resources for side effects. +// The SQL handle is for synchronous mutations; the rest are for async +// side effects (dispatch, stop, poll, nudge). + +export type ApplyActionContext = { + sql: SqlStorage; + townId: string; + /** Dispatch an agent to its container. Returns true if container accepted. */ + dispatchAgent: (agentId: string, beadId: string, rigId: string) => Promise; + /** Stop an agent's container process. */ + stopAgent: (agentId: string) => Promise; + /** Check a PR's status via GitHub/GitLab API. Returns 'open'|'merged'|'closed'|null. */ + checkPRStatus: (prUrl: string) => Promise<'open' | 'merged' | 'closed' | null>; + /** Queue a nudge message for an agent. */ + queueNudge: (agentId: string, message: string, tier: string) => Promise; + /** Insert a town_event for deferred processing (e.g. pr_status_changed). */ + insertEvent: ( + eventType: string, + params: { agent_id?: string | null; bead_id?: string | null; payload?: Record } + ) => void; + /** Emit an analytics/WebSocket event. */ + emitEvent: (data: Record) => void; +}; + +const LOG = '[actions]'; + +function now(): string { + return new Date().toISOString(); +} + +// ── applyAction ───────────────────────────────────────────────────── + +/** + * Apply a single action. Synchronous SQL mutations happen inline. + * Async side effects (container dispatch, PR polling, etc.) are returned + * as a deferred function to be executed after all SQL is committed. + * + * See reconciliation-spec.md §5.4. + */ +export function applyAction(ctx: ApplyActionContext, action: Action): (() => Promise) | null { + const { sql, townId } = ctx; + + switch (action.type) { + // ── Bead mutations ────────────────────────────────────────── + + case 'transition_bead': { + try { + beadOps.updateBeadStatus(sql, action.bead_id, action.to, action.actor); + } catch (err) { + console.warn(`${LOG} transition_bead failed: bead=${action.bead_id} to=${action.to}`, err); + } + return null; + } + + case 'assign_bead': { + query( + sql, + /* sql */ ` + UPDATE ${beads} + SET ${beads.columns.assignee_agent_bead_id} = ?, + ${beads.columns.updated_at} = ? + WHERE ${beads.bead_id} = ? + `, + [action.agent_id, now(), action.bead_id] + ); + return null; + } + + case 'clear_bead_assignee': { + // Clear the assignee on the bead + query( + sql, + /* sql */ ` + UPDATE ${beads} + SET ${beads.columns.assignee_agent_bead_id} = NULL, + ${beads.columns.updated_at} = ? + WHERE ${beads.bead_id} = ? + `, + [now(), action.bead_id] + ); + // Also unhook any agents still pointing at this bead, to prevent + // split-brain where the bead looks unassigned but agents still hold hooks. + const hookedAgents = z + .object({ bead_id: z.string() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT ${agent_metadata.bead_id} + FROM ${agent_metadata} + WHERE ${agent_metadata.current_hook_bead_id} = ? + `, + [action.bead_id] + ), + ]); + for (const row of hookedAgents) { + agentOps.unhookBead(sql, row.bead_id); + } + return null; + } + + case 'create_mr_bead': { + reviewQueue.submitToReviewQueue(sql, { + agent_id: action.agent_id, + bead_id: action.source_bead_id, + rig_id: action.rig_id, + branch: action.branch, + pr_url: action.pr_url, + summary: action.summary, + }); + return null; + } + + case 'create_landing_mr': { + // Create an MR bead for the landing merge (feature branch → main) + reviewQueue.submitToReviewQueue(sql, { + agent_id: 'system', + bead_id: action.convoy_id, + rig_id: action.rig_id, + branch: action.feature_branch, + default_branch: action.target_branch, + }); + return null; + } + + case 'close_sibling_mrs': { + // Find sibling MR beads, then close each via updateBeadStatus for + // proper terminal guard + bead event logging. + const siblingRows = z + .object({ bead_id: z.string() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT ${beads.bead_id} + FROM ${beads} + WHERE ${beads.type} = 'merge_request' + AND ${beads.bead_id} != ? + AND ${beads.status} NOT IN ('closed', 'failed') + AND ${beads.bead_id} IN ( + SELECT dep.${bead_dependencies.columns.bead_id} + FROM ${bead_dependencies} AS dep + WHERE dep.${bead_dependencies.columns.depends_on_bead_id} = ? + AND dep.${bead_dependencies.columns.dependency_type} = 'tracks' + ) + `, + [action.exclude_mr_id, action.source_bead_id] + ), + ]); + for (const row of siblingRows) { + beadOps.updateBeadStatus(sql, row.bead_id, 'closed', 'system'); + } + return null; + } + + case 'set_review_pr_url': { + reviewQueue.setReviewPrUrl(sql, action.bead_id, action.pr_url); + return null; + } + + // ── Agent mutations ───────────────────────────────────────── + + case 'transition_agent': { + try { + agentOps.updateAgentStatus(sql, action.agent_id, action.to); + } catch (err) { + console.warn( + `${LOG} transition_agent failed: agent=${action.agent_id} to=${action.to}`, + err + ); + } + return null; + } + + case 'hook_agent': { + try { + agentOps.hookBead(sql, action.agent_id, action.bead_id); + } catch (err) { + console.warn( + `${LOG} hook_agent failed: agent=${action.agent_id} bead=${action.bead_id}`, + err + ); + } + return null; + } + + case 'unhook_agent': { + agentOps.unhookBead(sql, action.agent_id); + return null; + } + + case 'clear_agent_checkpoint': { + agentOps.writeCheckpoint(sql, action.agent_id, null); + return null; + } + + case 'delete_agent': { + try { + agentOps.deleteAgent(sql, action.agent_id); + } catch (err) { + console.warn(`${LOG} delete_agent failed: agent=${action.agent_id}`, err); + } + return null; + } + + // ── Convoy mutations ──────────────────────────────────────── + + case 'update_convoy_progress': { + query( + sql, + /* sql */ ` + UPDATE ${convoy_metadata} + SET ${convoy_metadata.columns.closed_beads} = ? + WHERE ${convoy_metadata.columns.bead_id} = ? + `, + [action.closed_beads, action.convoy_id] + ); + return null; + } + + case 'set_convoy_ready_to_land': { + const timestamp = now(); + query( + sql, + /* sql */ ` + UPDATE ${beads} + SET ${beads.columns.metadata} = json_set(COALESCE(${beads.metadata}, '{}'), '$.ready_to_land', 1), + ${beads.columns.updated_at} = ? + WHERE ${beads.bead_id} = ? + `, + [timestamp, action.convoy_id] + ); + return null; + } + + case 'close_convoy': { + // Use updateBeadStatus for terminal state guard + bead event logging + beadOps.updateBeadStatus(sql, action.convoy_id, 'closed', 'system'); + query( + sql, + /* sql */ ` + UPDATE ${convoy_metadata} + SET ${convoy_metadata.columns.landed_at} = ? + WHERE ${convoy_metadata.columns.bead_id} = ? + `, + [now(), action.convoy_id] + ); + return null; + } + + // ── Side effects (deferred) ───────────────────────────────── + + case 'dispatch_agent': { + // Resolve agent if not yet assigned (agent_id is '' for Rule 1 dispatches) + let agentId = action.agent_id; + const beadId = action.bead_id; + const rigId = action.rig_id; + + if (!agentId) { + // Need to get-or-create an agent for this bead. + // Infer role from bead type: MR beads need refineries, issue beads need polecats. + const targetBead = beadOps.getBead(sql, beadId); + const role = targetBead?.type === 'merge_request' ? 'refinery' : 'polecat'; + try { + const agent = agentOps.getOrCreateAgent(sql, role, rigId, townId); + agentOps.hookBead(sql, agent.id, beadId); + agentId = agent.id; + } catch (err) { + console.warn(`${LOG} dispatch_agent: failed to hook agent for bead=${beadId}`, err); + return null; + } + } + + // Set agent to working and bead to in_progress synchronously + agentOps.updateAgentStatus(sql, agentId, 'working'); + query( + sql, + /* sql */ ` + UPDATE ${agent_metadata} + SET ${agent_metadata.columns.dispatch_attempts} = ${agent_metadata.columns.dispatch_attempts} + 1 + WHERE ${agent_metadata.bead_id} = ? + `, + [agentId] + ); + beadOps.updateBeadStatus(sql, beadId, 'in_progress', agentId); + + const capturedAgentId = agentId; + return async () => { + // Best-effort dispatch. If it fails, the agent stays 'working' + // and the bead stays 'in_progress'. The reconciler detects the + // mismatch on the next tick (idle agent hooked to in_progress + // bead) and retries dispatch. + await ctx.dispatchAgent(capturedAgentId, beadId, rigId).catch(err => { + console.warn( + `${LOG} dispatch_agent: container start failed for agent=${capturedAgentId} bead=${beadId}`, + err + ); + }); + }; + } + + case 'stop_agent': { + return async () => { + try { + await ctx.stopAgent(action.agent_id); + } catch (err) { + console.warn(`${LOG} stop_agent failed: agent=${action.agent_id}`, err); + } + }; + } + + case 'poll_pr': { + // Touch updated_at synchronously so the bead doesn't look stale + // to Rule 4 (orphaned PR review, 30 min timeout). Without this, + // active polling keeps the PR alive but updated_at was set once + // at PR creation and never refreshed, causing a false "orphaned" + // failure after 30 minutes. + query( + sql, + /* sql */ ` + UPDATE ${beads} + SET ${beads.columns.updated_at} = ? + WHERE ${beads.bead_id} = ? + `, + [now(), action.bead_id] + ); + + return async () => { + try { + const status = await ctx.checkPRStatus(action.pr_url); + if (status && status !== 'open') { + ctx.insertEvent('pr_status_changed', { + bead_id: action.bead_id, + payload: { pr_url: action.pr_url, pr_state: status }, + }); + } + } catch (err) { + console.warn(`${LOG} poll_pr failed: bead=${action.bead_id} url=${action.pr_url}`, err); + } + }; + } + + case 'send_nudge': { + // Insert nudge record synchronously + const nudgeId = crypto.randomUUID(); + query( + sql, + /* sql */ ` + INSERT INTO ${agent_nudges} ( + ${agent_nudges.columns.nudge_id}, + ${agent_nudges.columns.agent_bead_id}, + ${agent_nudges.columns.message}, + ${agent_nudges.columns.mode}, + ${agent_nudges.columns.priority}, + ${agent_nudges.columns.source}, + ${agent_nudges.columns.expires_at} + ) VALUES (?, ?, ?, 'immediate', 'urgent', ?, ?) + `, + [nudgeId, action.agent_id, action.message, `reconciler:${action.tier}`, null] + ); + + return async () => { + try { + await ctx.queueNudge(action.agent_id, action.message, action.tier); + } catch (err) { + console.warn(`${LOG} send_nudge failed: agent=${action.agent_id}`, err); + } + }; + } + + case 'create_triage_request': { + try { + patrol.createTriageRequest(sql, { + triageType: action.triage_type as patrol.TriageType, + agentBeadId: action.agent_id, + title: `Triage: ${action.reason}`, + context: { reason: action.reason }, + options: ['RESTART', 'CLOSE', 'ESCALATE'], + }); + } catch (err) { + console.warn(`${LOG} create_triage_request failed: agent=${action.agent_id}`, err); + } + return null; + } + + case 'notify_mayor': { + // Mayor notifications are informational — log for now + console.log(`${LOG} notify_mayor: town=${townId} msg=${action.message}`); + return null; + } + + case 'emit_event': { + ctx.emitEvent({ event: action.event_name, townId, ...action.data }); + return null; + } + + default: { + // Exhaustiveness check via never + const _exhaustive: never = action; + console.warn(`${LOG} applyAction: unknown action type`, _exhaustive); + return null; + } + } +} diff --git a/cloudflare-gastown/src/dos/town/agents.ts b/cloudflare-gastown/src/dos/town/agents.ts index e5a34166ba..62d68acdcb 100644 --- a/cloudflare-gastown/src/dos/town/agents.ts +++ b/cloudflare-gastown/src/dos/town/agents.ts @@ -5,6 +5,7 @@ * joined with agent_metadata for operational state. */ +import { z } from 'zod'; import { beads, BeadRecord, AgentBeadRecord } from '../../db/tables/beads.table'; import { agent_metadata } from '../../db/tables/agent-metadata.table'; import { query } from '../../util/query.util'; @@ -256,6 +257,31 @@ export function hookBead(sql: SqlStorage, agentId: string, beadId: string): void ); } + // Mutual exclusion: unhook any other agents already hooked to this bead. + // This prevents multi-agent assignment when reconcileBeads Rule 1 fires + // while an idle agent still holds a stale hook from a previous cycle. + const staleHooks = z + .object({ bead_id: z.string() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT ${agent_metadata.bead_id} + FROM ${agent_metadata} + WHERE ${agent_metadata.current_hook_bead_id} = ? + AND ${agent_metadata.bead_id} != ? + `, + [beadId, agentId] + ), + ]); + for (const stale of staleHooks) { + console.warn( + `[agents] hookBead: unhooking stale agent ${stale.bead_id} from bead ${beadId} (replaced by ${agentId})` + ); + unhookBead(sql, stale.bead_id); + } + query( sql, /* sql */ ` @@ -449,11 +475,28 @@ export function prime(sql: SqlStorage, agentId: string): PrimeContext { ]; const openBeads = BeadRecord.array().parse(openBeadRows); + // Build rework context if the hooked bead is a rework request + let rework_context: PrimeContext['rework_context'] = null; + if (hookedBead?.labels.includes('gt:rework') && hookedBead.metadata) { + const meta = hookedBead.metadata as Record; + const originalBeadId = typeof meta.rework_for === 'string' ? meta.rework_for : null; + const originalBead = originalBeadId ? getBead(sql, originalBeadId) : null; + rework_context = { + feedback: hookedBead.body ?? '', + branch: typeof meta.branch === 'string' ? meta.branch : null, + target_branch: typeof meta.target_branch === 'string' ? meta.target_branch : null, + files: Array.isArray(meta.files) ? (meta.files as string[]) : [], + original_bead_title: originalBead?.title ?? null, + mr_bead_id: typeof meta.mr_bead_id === 'string' ? meta.mr_bead_id : null, + }; + } + return { agent, hooked_bead: hookedBead, undelivered_mail: undeliveredMail, open_beads: openBeads, + rework_context, }; } @@ -494,14 +537,31 @@ export function updateAgentStatusMessage(sql: SqlStorage, agentId: string, messa // ── Touch (heartbeat helper) ──────────────────────────────────────── -export function touchAgent(sql: SqlStorage, agentId: string): void { +export function touchAgent( + sql: SqlStorage, + agentId: string, + watermark?: { + lastEventType?: string | null; + lastEventAt?: string | null; + activeTools?: string[]; + } +): void { query( sql, /* sql */ ` UPDATE ${agent_metadata} - SET ${agent_metadata.columns.last_activity_at} = ? + SET ${agent_metadata.columns.last_activity_at} = ?, + ${agent_metadata.columns.last_event_type} = COALESCE(?, ${agent_metadata.columns.last_event_type}), + ${agent_metadata.columns.last_event_at} = COALESCE(?, ${agent_metadata.columns.last_event_at}), + ${agent_metadata.columns.active_tools} = COALESCE(?, ${agent_metadata.columns.active_tools}) WHERE ${agent_metadata.bead_id} = ? `, - [now(), agentId] + [ + now(), + watermark?.lastEventType ?? null, + watermark?.lastEventAt ?? null, + watermark?.activeTools ? JSON.stringify(watermark.activeTools) : null, + agentId, + ] ); } diff --git a/cloudflare-gastown/src/dos/town/beads.ts b/cloudflare-gastown/src/dos/town/beads.ts index 37218b542e..5fd9463fa9 100644 --- a/cloudflare-gastown/src/dos/town/beads.ts +++ b/cloudflare-gastown/src/dos/town/beads.ts @@ -13,7 +13,6 @@ import { } from '../../db/tables/bead-events.table'; import { bead_dependencies, - BeadDependencyRecord, createTableBeadDependencies, getIndexesBeadDependencies, } from '../../db/tables/bead-dependencies.table'; @@ -51,10 +50,6 @@ function now(): string { return new Date().toISOString(); } -function cloneBeadMetadata(metadata: Bead['metadata'] | null | undefined): Record { - return metadata ? { ...metadata } : {}; -} - export function initBeadTables(sql: SqlStorage): void { // Create all tables first (IF NOT EXISTS — safe for existing DOs) query(sql, createTableBeads(), []); @@ -263,6 +258,17 @@ export function updateBeadStatus( // No-op if already in the target status — avoids redundant events if (bead.status === status) return bead; + // HARD INVARIANT: terminal states (closed/failed) are immutable. + // Once a bead reaches a terminal state, no recovery function, stale MR + // failure, or race condition should ever change its status. Return the + // bead as-is (no-op, not an error) so callers don't need to pre-check. + if (bead.status === 'closed' || bead.status === 'failed') { + console.warn( + `[beads] updateBeadStatus: blocked ${bead.status} → ${status} for bead=${beadId} — terminal state is immutable` + ); + return bead; + } + const oldStatus = bead.status; const timestamp = now(); const closedAt = status === 'closed' ? timestamp : bead.closed_at; @@ -486,6 +492,26 @@ export function hasUnresolvedBlockers(sql: SqlStorage, beadId: string): boolean return z.object({ count: z.number() }).parse(rows[0]).count > 0; } +/** Insert a dependency between two beads. */ +export function insertDependency( + sql: SqlStorage, + beadId: string, + dependsOnBeadId: string, + dependencyType: 'blocks' | 'tracks' | 'parent-child' +): void { + query( + sql, + /* sql */ ` + INSERT OR IGNORE INTO ${bead_dependencies} ( + ${bead_dependencies.columns.bead_id}, + ${bead_dependencies.columns.depends_on_bead_id}, + ${bead_dependencies.columns.dependency_type} + ) VALUES (?, ?, ?) + `, + [beadId, dependsOnBeadId, dependencyType] + ); +} + /** * Find beads that were blocked by `closedBeadId` and are now fully unblocked * (all their 'blocks' dependencies are resolved). @@ -913,335 +939,3 @@ export function getConvoyFeatureBranch(sql: SqlStorage, convoyId: string): strin if (rows.length === 0) return null; return z.object({ feature_branch: z.string().nullable() }).parse(rows[0]).feature_branch; } - -/** - * Recount closed_beads for a convoy using the same logic as - * updateConvoyProgress: a tracked bead counts as closed only when - * it is closed/failed AND has no pending merge_request child beads. - */ -function recountConvoyClosedBeads(sql: SqlStorage, convoyId: string): void { - const countRows = [ - ...query( - sql, - /* sql */ ` - SELECT COUNT(1) AS count FROM ${bead_dependencies} AS tracked - INNER JOIN ${beads} AS tracked_bead - ON tracked.${bead_dependencies.columns.bead_id} = tracked_bead.${beads.columns.bead_id} - WHERE tracked.${bead_dependencies.columns.depends_on_bead_id} = ? - AND tracked.${bead_dependencies.columns.dependency_type} = 'tracks' - AND tracked_bead.${beads.columns.status} IN ('closed', 'failed') - AND NOT EXISTS ( - SELECT 1 FROM ${bead_dependencies} AS mr_dep - INNER JOIN ${beads} AS mr_bead - ON mr_dep.${bead_dependencies.columns.bead_id} = mr_bead.${beads.columns.bead_id} - WHERE mr_dep.${bead_dependencies.columns.depends_on_bead_id} = tracked_bead.${beads.columns.bead_id} - AND mr_dep.${bead_dependencies.columns.dependency_type} = 'tracks' - AND mr_bead.${beads.columns.type} = 'merge_request' - AND mr_bead.${beads.columns.status} IN ('open', 'in_progress') - ) - `, - [convoyId] - ), - ]; - const closedCount = z.object({ count: z.number() }).parse(countRows[0]).count; - - query( - sql, - /* sql */ ` - UPDATE ${convoy_metadata} - SET ${convoy_metadata.columns.closed_beads} = ? - WHERE ${convoy_metadata.bead_id} = ? - `, - [closedCount, convoyId] - ); -} - -// ── Convoy Membership ─────────────────────────────────────────────── - -/** - * Add a bead to an existing convoy. Creates the 'tracks' dependency, - * merges convoy_id + feature_branch into the bead's metadata, and - * increments the convoy's total_beads counter. - * - * No-ops if the bead already tracks this convoy. - */ -export function addBeadToConvoy(sql: SqlStorage, beadId: string, convoyId: string): void { - // Verify both exist - const bead = getBead(sql, beadId); - if (!bead) throw new Error(`Bead ${beadId} not found`); - - const convoyBead = getBead(sql, convoyId); - if (!convoyBead) throw new Error(`Convoy ${convoyId} not found`); - if (convoyBead.type !== 'convoy') { - throw new Error(`Bead ${convoyId} is not a convoy (type: ${convoyBead.type})`); - } - - // Check if already tracked - const existing = getConvoyForBead(sql, beadId); - if (existing === convoyId) return; // already a member - if (existing) { - throw new Error( - `Bead ${beadId} already belongs to convoy ${existing}. Remove it first before adding to a different convoy.` - ); - } - - // Insert 'tracks' dependency - query( - sql, - /* sql */ ` - INSERT INTO ${bead_dependencies} ( - ${bead_dependencies.columns.bead_id}, - ${bead_dependencies.columns.depends_on_bead_id}, - ${bead_dependencies.columns.dependency_type} - ) VALUES (?, ?, 'tracks') - ON CONFLICT DO NOTHING - `, - [beadId, convoyId] - ); - - // Merge convoy_id + feature_branch into bead metadata - const featureBranch = getConvoyFeatureBranch(sql, convoyId); - const timestamp = now(); - const metadataPatch: Record = { convoy_id: convoyId }; - if (featureBranch) metadataPatch.feature_branch = featureBranch; - - const existingMetadata = cloneBeadMetadata(bead.metadata); - const merged = { ...existingMetadata, ...metadataPatch }; - - query( - sql, - /* sql */ ` - UPDATE ${beads} - SET ${beads.columns.metadata} = ?, - ${beads.columns.updated_at} = ? - WHERE ${beads.bead_id} = ? - `, - [JSON.stringify(merged), timestamp, beadId] - ); - - // Increment total_beads and recount closed_beads (the bead may already - // be closed/failed, so a naive +1 on total_beads alone would leave - // closed_beads stale). - query( - sql, - /* sql */ ` - UPDATE ${convoy_metadata} - SET ${convoy_metadata.columns.total_beads} = ${convoy_metadata.columns.total_beads} + 1 - WHERE ${convoy_metadata.bead_id} = ? - `, - [convoyId] - ); - recountConvoyClosedBeads(sql, convoyId); - - // If the bead is still open, clear the ready_to_land flag on the convoy - // in case it was already set — a new open bead means the convoy is not - // complete and must not submit the final landing MR. - if (bead.status !== 'closed' && bead.status !== 'failed') { - query( - sql, - /* sql */ ` - UPDATE ${beads} - SET ${beads.columns.metadata} = json_remove(COALESCE(${beads.metadata}, '{}'), '$.ready_to_land'), - ${beads.columns.updated_at} = ? - WHERE ${beads.bead_id} = ? - `, - [timestamp, convoyId] - ); - } -} - -/** - * Remove a bead from its convoy. Deletes the 'tracks' dependency, - * strips convoy_id + feature_branch from metadata, and decrements - * the convoy's total_beads counter. - * - * No-ops if the bead is not in any convoy. - */ -export function removeBeadFromConvoy(sql: SqlStorage, beadId: string): string | null { - const convoyId = getConvoyForBead(sql, beadId); - if (!convoyId) return null; - - // Remove 'tracks' dependency - query( - sql, - /* sql */ ` - DELETE FROM ${bead_dependencies} - WHERE ${bead_dependencies.bead_id} = ? - AND ${bead_dependencies.depends_on_bead_id} = ? - AND ${bead_dependencies.dependency_type} = 'tracks' - `, - [beadId, convoyId] - ); - - // Strip convoy_id + feature_branch from metadata - const bead = getBead(sql, beadId); - if (bead) { - const existingMetadata = cloneBeadMetadata(bead.metadata); - delete existingMetadata.convoy_id; - delete existingMetadata.feature_branch; - const timestamp = now(); - - query( - sql, - /* sql */ ` - UPDATE ${beads} - SET ${beads.columns.metadata} = ?, - ${beads.columns.updated_at} = ? - WHERE ${beads.bead_id} = ? - `, - [JSON.stringify(existingMetadata), timestamp, beadId] - ); - } - - // Decrement total_beads and recount closed_beads. A naive decrement of - // closed_beads is unreliable because updateConvoyProgress excludes beads - // with pending MR children from the count — a bead that is closed but - // mid-review was never counted, so decrementing would undercount. - query( - sql, - /* sql */ ` - UPDATE ${convoy_metadata} - SET ${convoy_metadata.columns.total_beads} = MAX(${convoy_metadata.columns.total_beads} - 1, 0) - WHERE ${convoy_metadata.bead_id} = ? - `, - [convoyId] - ); - recountConvoyClosedBeads(sql, convoyId); - - return convoyId; -} - -// ── Bead Dependency Editing ───────────────────────────────────────── - -/** - * Add a dependency edge between two beads. - * - * - Validates self-reference (`beadId !== dependsOnBeadId`) - * - Checks both beads exist - * - Runs cycle detection for 'blocks' dependencies (DFS from `dependsOnBeadId` - * — if you can reach `beadId`, adding the edge would create a cycle) - * - Uses `ON CONFLICT DO NOTHING` so duplicate adds are a no-op - */ -export function addBeadDependency( - sql: SqlStorage, - beadId: string, - dependsOnBeadId: string, - type: 'blocks' | 'tracks' | 'parent-child' -): void { - if (beadId === dependsOnBeadId) { - throw new Error('A bead cannot depend on itself'); - } - - // Verify both beads exist - const existCheck = [ - ...query( - sql, - /* sql */ ` - SELECT ${beads.bead_id} - FROM ${beads} - WHERE ${beads.bead_id} IN (?, ?) - `, - [beadId, dependsOnBeadId] - ), - ]; - const foundIds = new Set( - z - .object({ bead_id: z.string() }) - .array() - .parse(existCheck) - .map(r => r.bead_id) - ); - if (!foundIds.has(beadId)) throw new Error(`Bead ${beadId} not found`); - if (!foundIds.has(dependsOnBeadId)) throw new Error(`Bead ${dependsOnBeadId} not found`); - - // Cycle detection for 'blocks' dependencies: DFS from dependsOnBeadId - // following existing 'blocks' edges. If we can reach beadId, adding - // this edge would create a cycle. - if (type === 'blocks') { - const adjacency = new Map(); - const edgeRows = [ - ...query( - sql, - /* sql */ ` - SELECT ${bead_dependencies.bead_id}, ${bead_dependencies.depends_on_bead_id} - FROM ${bead_dependencies} - WHERE ${bead_dependencies.dependency_type} = 'blocks' - `, - [] - ), - ]; - const edges = BeadDependencyRecord.pick({ bead_id: true, depends_on_bead_id: true }) - .array() - .parse(edgeRows); - for (const edge of edges) { - const neighbors = adjacency.get(edge.bead_id) ?? []; - neighbors.push(edge.depends_on_bead_id); - adjacency.set(edge.bead_id, neighbors); - } - - // DFS from dependsOnBeadId following the direction: bead_id → depends_on_bead_id - // We want to check: can dependsOnBeadId reach beadId through existing edges? - // The graph direction is: beadId depends on dependsOnBeadId. - // A cycle means: dependsOnBeadId already (transitively) depends on beadId. - // So we follow edges from dependsOnBeadId: check dependsOnBeadId's own - // depends_on edges to see if beadId is reachable. - const visited = new Set(); - const stack = [dependsOnBeadId]; - while (stack.length > 0) { - const current = stack.pop(); - if (current === undefined) break; - if (current === beadId) { - throw new Error( - `Adding dependency would create a cycle: ${beadId} → ${dependsOnBeadId} → ... → ${beadId}` - ); - } - if (visited.has(current)) continue; - visited.add(current); - const neighbors = adjacency.get(current); - if (neighbors) { - for (const neighbor of neighbors) { - if (!visited.has(neighbor)) stack.push(neighbor); - } - } - } - } - - query( - sql, - /* sql */ ` - INSERT INTO ${bead_dependencies} ( - ${bead_dependencies.columns.bead_id}, - ${bead_dependencies.columns.depends_on_bead_id}, - ${bead_dependencies.columns.dependency_type} - ) VALUES (?, ?, ?) - ON CONFLICT DO NOTHING - `, - [beadId, dependsOnBeadId, type] - ); -} - -/** - * Remove a dependency edge between two beads. - * Does NOT allow removing 'tracks' dependencies (system-managed convoy edges). - * Returns true if a row was actually deleted, false otherwise. - */ -export function removeBeadDependency( - sql: SqlStorage, - beadId: string, - dependsOnBeadId: string -): boolean { - const result = [ - ...query( - sql, - /* sql */ ` - DELETE FROM ${bead_dependencies} - WHERE ${bead_dependencies.bead_id} = ? - AND ${bead_dependencies.depends_on_bead_id} = ? - AND ${bead_dependencies.dependency_type} != 'tracks' - RETURNING ${bead_dependencies.bead_id} - `, - [beadId, dependsOnBeadId] - ), - ]; - return result.length > 0; -} diff --git a/cloudflare-gastown/src/dos/town/container-dispatch.ts b/cloudflare-gastown/src/dos/town/container-dispatch.ts index 5f56a0608c..4ada979758 100644 --- a/cloudflare-gastown/src/dos/town/container-dispatch.ts +++ b/cloudflare-gastown/src/dos/town/container-dispatch.ts @@ -12,6 +12,13 @@ import { buildContainerConfig, resolveModel, resolveSmallModel } from './config' const TOWN_LOG = '[Town.do]'; +// Module-level diagnostic: stores the last container start error so +// callers can surface it via the admin API. Reset on each call. +let lastStartError: string | null = null; +export function getLastStartError(): string | null { + return lastStartError; +} + /** * Resolve the GASTOWN_JWT_SECRET binding to a string. */ @@ -101,6 +108,7 @@ export async function ensureContainerToken( try { const resp = await container.fetch('http://container/refresh-token', { method: 'POST', + signal: AbortSignal.timeout(10_000), headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ token }), }); @@ -303,6 +311,7 @@ export async function startAgentInContainer( }>; } ): Promise { + lastStartError = null; console.log( `${TOWN_LOG} startAgentInContainer: agentId=${params.agentId} role=${params.role} name=${params.agentName}` ); @@ -376,6 +385,7 @@ export async function startAgentInContainer( const response = await container.fetch('http://container/agents/start', { method: 'POST', + signal: AbortSignal.timeout(60_000), headers: { 'Content-Type': 'application/json', 'X-Town-Config': JSON.stringify(containerConfig), @@ -427,11 +437,27 @@ export async function startAgentInContainer( if (!response.ok) { const text = await response.text().catch(() => '(unreadable)'); - console.error(`${TOWN_LOG} startAgentInContainer: error response: ${text.slice(0, 500)}`); + // "Already running" means a previous dispatch succeeded — the agent + // IS alive in the container. Treat as success so the DO marks the + // agent as working and stops retrying. + if (response.status === 500 && text.includes('already running')) { + console.log( + `${TOWN_LOG} startAgentInContainer: agent ${params.agentId} already running — treating as success` + ); + return true; + } + const errorMsg = `(${response.status}) ${text.slice(0, 300)}`; + console.error( + `${TOWN_LOG} startAgentInContainer: error response for ` + + `agent=${params.agentId} role=${params.role}: ${errorMsg}` + ); + lastStartError = errorMsg; } return response.ok; } catch (err) { + const message = err instanceof Error ? err.message : String(err); console.error(`${TOWN_LOG} startAgentInContainer: EXCEPTION for agent ${params.agentId}:`, err); + lastStartError = `EXCEPTION: ${message.slice(0, 300)}`; return false; } } @@ -533,13 +559,17 @@ export async function checkAgentContainerStatus( ): Promise<{ status: string; exitReason?: string }> { try { const container = getTownContainerStub(env, townId); - // TODO: Generally you should use containerFetch which waits for ports to be available - const response = await container.fetch(`http://container/agents/${agentId}/status`); + const response = await container.fetch(`http://container/agents/${agentId}/status`, { + signal: AbortSignal.timeout(10_000), + }); // 404 means the container is running but has no record of this agent // (e.g. after container eviction). Report as 'not_found' so // witnessPatrol can immediately reset and redispatch the agent // instead of waiting for the 2-hour GUPP timeout. if (response.status === 404) return { status: 'not_found' }; + // Non-OK but not 404 — container is having issues but may still + // have the agent running. Return 'unknown' so witnessPatrol doesn't + // falsely reset a working agent. if (!response.ok) return { status: 'unknown' }; const data: unknown = await response.json(); if (typeof data === 'object' && data !== null && 'status' in data) { @@ -553,6 +583,10 @@ export async function checkAgentContainerStatus( } return { status: 'unknown' }; } catch { + // Timeout, network error, or container starting up — return + // 'unknown' so witnessPatrol doesn't falsely reset working agents. + // True zombies will be caught after repeated 'unknown' results + // once the GIPP/heartbeat timeout expires. return { status: 'unknown' }; } } diff --git a/cloudflare-gastown/src/dos/town/events.ts b/cloudflare-gastown/src/dos/town/events.ts new file mode 100644 index 0000000000..ea95388be3 --- /dev/null +++ b/cloudflare-gastown/src/dos/town/events.ts @@ -0,0 +1,213 @@ +/** + * Town event recording and draining for the reconciler. + * + * Events are facts recorded by RPC handlers. The reconciler drains them + * on each alarm tick and applies state transitions. See reconciliation-spec.md §3. + */ + +import { z } from 'zod'; +import { + town_events, + TownEventRecord, + createTableTownEvents, + getIndexesTownEvents, +} from '../../db/tables/town-events.table'; +import type { TownEventType } from '../../db/tables/town-events.table'; +import { query } from '../../util/query.util'; + +function generateId(): string { + return crypto.randomUUID(); +} + +function now(): string { + return new Date().toISOString(); +} + +/** Create the town_events table and indexes. Idempotent. */ +export function initTownEventsTable(sql: SqlStorage): void { + query(sql, createTableTownEvents(), []); + for (const idx of getIndexesTownEvents()) { + query(sql, idx, []); + } +} + +/** + * Insert a new event into the town_events table. + * Events start with processed_at = NULL and are consumed by drainEvents(). + */ +export function insertEvent( + sql: SqlStorage, + eventType: TownEventType, + params: { + agent_id?: string | null; + bead_id?: string | null; + payload?: Record; + } = {} +): string { + const eventId = generateId(); + query( + sql, + /* sql */ ` + INSERT INTO ${town_events} ( + ${town_events.columns.event_id}, + ${town_events.columns.event_type}, + ${town_events.columns.agent_id}, + ${town_events.columns.bead_id}, + ${town_events.columns.payload}, + ${town_events.columns.created_at}, + ${town_events.columns.processed_at} + ) VALUES (?, ?, ?, ?, ?, ?, ?) + `, + [ + eventId, + eventType, + params.agent_id ?? null, + params.bead_id ?? null, + JSON.stringify(params.payload ?? {}), + now(), + null, + ] + ); + return eventId; +} + +/** + * Upsert a container_status event for an agent. Instead of inserting a new + * event every tick (which floods the table at 5s intervals × N agents), + * this reuses an existing unprocessed container_status event for the same + * agent if the status hasn't changed — just bumping the timestamp. A fresh + * event is only inserted when the status actually changes or no prior + * unprocessed event exists. + */ +export function upsertContainerStatus( + sql: SqlStorage, + agentId: string, + payload: { status: string; exit_reason?: string | null } +): void { + // Check for an existing unprocessed container_status event for this agent + const existing = z + .object({ event_id: z.string(), payload: z.string() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT ${town_events.columns.event_id} as event_id, + ${town_events.columns.payload} as payload + FROM ${town_events} + WHERE ${town_events.columns.event_type} = 'container_status' + AND ${town_events.columns.agent_id} = ? + AND ${town_events.columns.processed_at} IS NULL + ORDER BY ${town_events.columns.created_at} DESC + LIMIT 1 + `, + [agentId] + ), + ]); + + if (existing.length > 0) { + let prevPayload: Record = {}; + try { + prevPayload = JSON.parse(existing[0].payload) as Record; + } catch { + /* ignore */ + } + + if (prevPayload.status === payload.status) { + // Same status — just bump the timestamp, don't create a new event + query( + sql, + /* sql */ ` + UPDATE ${town_events} + SET ${town_events.columns.created_at} = ? + WHERE ${town_events.columns.event_id} = ? + `, + [now(), existing[0].event_id] + ); + return; + } + } + + // Status changed or no prior event — insert a new one + insertEvent(sql, 'container_status', { + agent_id: agentId, + payload: { + status: payload.status, + ...(payload.exit_reason ? { exit_reason: payload.exit_reason } : {}), + }, + }); +} + +/** + * Drain all unprocessed events, ordered by creation time. + * Returns events with processed_at = NULL, oldest first. + */ +export function drainEvents(sql: SqlStorage): TownEventRecord[] { + const rows = [ + ...query( + sql, + /* sql */ ` + SELECT ${town_events.event_id}, ${town_events.event_type}, + ${town_events.agent_id}, ${town_events.bead_id}, + ${town_events.payload}, ${town_events.created_at}, + ${town_events.processed_at} + FROM ${town_events} + WHERE ${town_events.processed_at} IS NULL + ORDER BY ${town_events.created_at} ASC + `, + [] + ), + ]; + return TownEventRecord.array().parse(rows); +} + +/** Mark an event as processed so it won't be returned by drainEvents again. */ +export function markProcessed(sql: SqlStorage, eventId: string): void { + query( + sql, + /* sql */ ` + UPDATE ${town_events} + SET ${town_events.columns.processed_at} = ? + WHERE ${town_events.event_id} = ? + `, + [now(), eventId] + ); +} + +/** + * Delete old processed events beyond the retention window. + * Only deletes events that have been processed (processed_at IS NOT NULL) + * and whose created_at is older than the cutoff. + */ +export function pruneOldEvents(sql: SqlStorage, retentionMs: number): number { + const cutoff = new Date(Date.now() - retentionMs).toISOString(); + const deleted = [ + ...query( + sql, + /* sql */ ` + DELETE FROM ${town_events} + WHERE ${town_events.processed_at} IS NOT NULL + AND ${town_events.created_at} < ? + RETURNING ${town_events.event_id} + `, + [cutoff] + ), + ]; + return deleted.length; +} + +/** Count unprocessed events (useful for metrics). */ +export function pendingEventCount(sql: SqlStorage): number { + const rows = [ + ...query( + sql, + /* sql */ ` + SELECT count(*) as cnt FROM ${town_events} + WHERE ${town_events.processed_at} IS NULL + `, + [] + ), + ]; + const row = rows[0]; + return typeof row?.cnt === 'number' ? row.cnt : 0; +} diff --git a/cloudflare-gastown/src/dos/town/patrol.ts b/cloudflare-gastown/src/dos/town/patrol.ts index 5fb78db442..b0e540ea53 100644 --- a/cloudflare-gastown/src/dos/town/patrol.ts +++ b/cloudflare-gastown/src/dos/town/patrol.ts @@ -30,14 +30,7 @@ export const GUPP_ESCALATE_MS = 60 * 60_000; // 1h export const GUPP_FORCE_STOP_MS = 2 * 60 * 60_000; // 2h /** Agents dead/completed for longer than this are GC'd */ export const AGENT_GC_RETENTION_MS = 24 * 60 * 60_000; // 24h -/** Per-bead timeout (if metadata.timeout_ms is set) */ -export const DEFAULT_BEAD_TIMEOUT_MS = 4 * 60 * 60_000; // 4h fallback -/** Hook considered stale after this duration with no dispatch activity */ -export const STALE_HOOK_MS = 30 * 60_000; // 30 min -/** Agent failing repeatedly within this window is a crash loop */ -export const CRASH_LOOP_WINDOW_MS = 30 * 60_000; // 30 min -/** Minimum failures within the window to flag a crash loop */ -export const CRASH_LOOP_THRESHOLD = 3; + /** Maximum number of open triage request beads allowed at once */ export const MAX_OPEN_TRIAGE_REQUESTS = 5; @@ -159,507 +152,6 @@ export function createTriageRequest( ); } -// ── Witness patrol sub-checks ─────────────────────────────────────── - -/** - * Tiered GUPP violation handling: - * - 30 min: nudge agent with GUPP_CHECK warning - * - 1h: nudge agent with GUPP_ESCALATION, create triage request - * - 2h: force-stop agent, create triage request for dirty polecat - * - * Returns agent IDs that were force-stopped (caller should stop them - * in the container). - * - * The `queueNudge` callback sends a time-sensitive message to the agent. - * It is fire-and-forget (returns a Promise that the caller ignores). - */ -export function detectGUPPViolations( - sql: SqlStorage, - workingAgents: Array<{ - bead_id: string; - current_hook_bead_id: string | null; - last_activity_at: string | null; - }>, - queueNudge: ( - agentId: string, - message: string, - opts: { mode: 'immediate'; source: string; priority: 'urgent' } - ) => Promise -): string[] { - const nowMs = Date.now(); - const forceStopIds: string[] = []; - - for (const agent of workingAgents) { - if (!agent.last_activity_at) continue; - const staleMs = nowMs - new Date(agent.last_activity_at).getTime(); - - if (staleMs >= GUPP_FORCE_STOP_MS) { - // Tier 3: force-stop and flag for triage - forceStopIds.push(agent.bead_id); - - createTriageRequest(sql, { - triageType: 'stuck_agent', - agentBeadId: agent.bead_id, - hookedBeadId: agent.current_hook_bead_id, - title: `Force-stopped agent after ${Math.round(staleMs / 60_000)}min GUPP violation`, - context: { - last_activity_at: agent.last_activity_at, - stale_minutes: Math.round(staleMs / 60_000), - action_taken: 'force_stop', - }, - options: ['RESTART', 'ESCALATE_TO_MAYOR', 'CLOSE_BEAD'], - }); - - // Mark agent as stalled - query( - sql, - /* sql */ ` - UPDATE ${agent_metadata} - SET ${agent_metadata.columns.status} = 'stalled' - WHERE ${agent_metadata.bead_id} = ? - `, - [agent.bead_id] - ); - - console.log( - `${LOG} GUPP force-stop: agent=${agent.bead_id} stale=${Math.round(staleMs / 60_000)}min` - ); - } else if (staleMs >= GUPP_ESCALATE_MS) { - // Tier 2: create a triage request for the stuck agent. The triage - // agent (or mayor, if escalated) will decide whether to restart, - // nudge, or force-stop. Also nudge the stuck agent directly. - const existingEsc = [ - ...query( - sql, - /* sql */ ` - SELECT ${agent_nudges.nudge_id} FROM ${agent_nudges} - WHERE ${agent_nudges.agent_bead_id} = ? - AND ${agent_nudges.source} = 'witness' - AND ${agent_nudges.message} LIKE '%GUPP_ESCALATION%' - AND (${agent_nudges.delivered_at} IS NULL OR ${agent_nudges.delivered_at} > datetime('now', '-60 minutes')) - LIMIT 1 - `, - [agent.bead_id] - ), - ]; - if (existingEsc.length === 0) { - // Nudge the stuck agent — time-sensitive, deliver immediately - queueNudge( - agent.bead_id, - `GUPP_ESCALATION: You have been inactive for ${Math.round(staleMs / 60_000)} minutes. This has been escalated. You will be force-stopped if inactivity continues.`, - { mode: 'immediate', source: 'witness', priority: 'urgent' } - ).catch(() => {}); - - // Create a triage request so the triage agent (or mayor) is aware - createTriageRequest(sql, { - triageType: 'stuck_agent', - agentBeadId: agent.bead_id, - hookedBeadId: agent.current_hook_bead_id, - title: `Agent inactive for ${Math.round(staleMs / 60_000)}min — GUPP escalation`, - context: { - last_activity_at: agent.last_activity_at, - stale_minutes: Math.round(staleMs / 60_000), - tier: 'escalation', - }, - options: ['RESTART', 'NUDGE', 'ESCALATE_TO_MAYOR', 'CLOSE_BEAD'], - }); - - console.log(`${LOG} GUPP escalation: agent=${agent.bead_id}`); - } - } else if (staleMs >= GUPP_WARN_MS) { - // Tier 1: nudge agent with GUPP_CHECK warning (idempotent) - const existingGupp = [ - ...query( - sql, - /* sql */ ` - SELECT ${agent_nudges.nudge_id} FROM ${agent_nudges} - WHERE ${agent_nudges.agent_bead_id} = ? - AND ${agent_nudges.source} = 'witness' - AND ${agent_nudges.message} LIKE '%GUPP_CHECK%' - AND (${agent_nudges.delivered_at} IS NULL OR ${agent_nudges.delivered_at} > datetime('now', '-60 minutes')) - LIMIT 1 - `, - [agent.bead_id] - ), - ]; - if (existingGupp.length === 0) { - queueNudge( - agent.bead_id, - 'GUPP_CHECK: You have had work hooked for 30+ minutes with no activity. Are you stuck? If so, call gt_escalate.', - { mode: 'immediate', source: 'witness', priority: 'urgent' } - ).catch(() => {}); - console.log( - `${LOG} GUPP warn: agent=${agent.bead_id} stale=${Math.round(staleMs / 60_000)}min` - ); - } - } - } - - return forceStopIds; -} - -/** - * Detect orphaned work: idle agents with a hooked bead but no recent - * dispatch activity. These agents were assigned work but never started. - * - * Different from schedulePendingWork which handles the cooldown/retry - * loop — this catches agents that have been idle+hooked for an - * unreasonably long time (beyond what the scheduler would tolerate). - */ -export function detectOrphanedWork(sql: SqlStorage): void { - const cutoff = new Date(Date.now() - STALE_HOOK_MS).toISOString(); - - const rows = AgentMetadataRecord.pick({ - bead_id: true, - current_hook_bead_id: true, - dispatch_attempts: true, - last_activity_at: true, - }) - .array() - .parse([ - ...query( - sql, - /* sql */ ` - SELECT ${agent_metadata.bead_id}, - ${agent_metadata.current_hook_bead_id}, - ${agent_metadata.dispatch_attempts}, - ${agent_metadata.last_activity_at} - FROM ${agent_metadata} - WHERE ${agent_metadata.status} = 'idle' - AND ${agent_metadata.current_hook_bead_id} IS NOT NULL - AND ${agent_metadata.dispatch_attempts} >= 5 - AND (${agent_metadata.last_activity_at} IS NULL OR ${agent_metadata.last_activity_at} < ?) - `, - [cutoff] - ), - ]); - - for (const row of rows) { - // These agents have exhausted dispatch attempts AND are still hooked. - // schedulePendingWork should have failed the bead — this is a safety net. - console.log( - `${LOG} orphaned work detected: agent=${row.bead_id} hook=${row.current_hook_bead_id} attempts=${row.dispatch_attempts}` - ); - - // Actually fail the bead and unhook the agent (matching schedulePendingWork behavior) - if (row.current_hook_bead_id) { - updateBeadStatus(sql, row.current_hook_bead_id, 'failed', row.bead_id); - unhookBead(sql, row.bead_id); - } - } -} - -/** - * Garbage-collect dead/completed agents past the retention period. - * Agents in 'idle' status with no hooked bead whose creation time - * exceeds the retention threshold and that have been idle for longer - * than the retention period are deleted. - * - * Only targets polecats and refinery agents — the mayor singleton - * is never GC'd. - */ -export function agentGC(sql: SqlStorage): number { - const cutoff = new Date(Date.now() - AGENT_GC_RETENTION_MS).toISOString(); - - // Find agents eligible for GC: idle polecats/refinery with no hook, - // whose last activity is older than the retention period - const rows = AgentMetadataRecord.pick({ bead_id: true }) - .array() - .parse([ - ...query( - sql, - /* sql */ ` - SELECT ${agent_metadata.bead_id} - FROM ${agent_metadata} - WHERE ${agent_metadata.status} IN ('idle', 'dead') - AND ${agent_metadata.current_hook_bead_id} IS NULL - AND ${agent_metadata.role} IN ('polecat', 'refinery') - AND ( - ${agent_metadata.last_activity_at} IS NOT NULL - AND ${agent_metadata.last_activity_at} < ? - ) - `, - [cutoff] - ), - ]); - - for (const row of rows) { - console.log(`${LOG} agentGC: deleting agent=${row.bead_id}`); - deleteAgent(sql, row.bead_id); - } - - return rows.length; -} - -/** - * Enforce per-bead timeouts. Beads with metadata.timeout_ms that have - * been in_progress for longer than their timeout are failed. - * - * Returns timed-out bead IDs and their assigned agent IDs (so the - * caller can stop the agent processes in the container). - */ -export function checkTimerGates( - sql: SqlStorage -): Array<{ beadId: string; agentId: string | null }> { - const nowMs = Date.now(); - const timedOut: Array<{ beadId: string; agentId: string | null }> = []; - - // Find in_progress beads with a timeout_ms in metadata - const rows = BeadRecordSchema.pick({ - bead_id: true, - metadata: true, - updated_at: true, - assignee_agent_bead_id: true, - }) - .array() - .parse([ - ...query( - sql, - /* sql */ ` - SELECT ${beads.bead_id}, ${beads.metadata}, ${beads.updated_at}, ${beads.assignee_agent_bead_id} - FROM ${beads} - WHERE ${beads.status} = 'in_progress' - AND ${beads.type} IN ('issue', 'molecule') - AND json_extract(${beads.metadata}, '$.timeout_ms') IS NOT NULL - `, - [] - ), - ]); - - for (const row of rows) { - const timeoutMs = Number(row.metadata?.timeout_ms ?? DEFAULT_BEAD_TIMEOUT_MS); - if (!timeoutMs || isNaN(timeoutMs) || timeoutMs <= 0) continue; - - const elapsedMs = nowMs - new Date(row.updated_at).getTime(); - if (elapsedMs > timeoutMs) { - // Fail the bead and unhook the assigned agent so the scheduler - // can recover the slot (matching schedulePendingWork's failure path). - // updateBeadStatus already logs a status_changed event internally, - // so no additional logBeadEvent call is needed here. - updateBeadStatus(sql, row.bead_id, 'failed', row.assignee_agent_bead_id ?? 'patrol'); - - if (row.assignee_agent_bead_id) { - unhookBead(sql, row.assignee_agent_bead_id); - } - - timedOut.push({ beadId: row.bead_id, agentId: row.assignee_agent_bead_id ?? null }); - console.log( - `${LOG} checkTimerGates: bead=${row.bead_id} timed out after ${Math.round(elapsedMs / 60_000)}min (limit=${Math.round(timeoutMs / 60_000)}min)` - ); - } - } - - return timedOut; -} - -// ── Deacon patrol sub-checks ──────────────────────────────────────── - -/** - * Detect stale hooks: agents that have been idle with a hook for an - * extended period without any dispatch activity. This catches cases - * where schedulePendingWork's cooldown/retry loop failed silently. - * - * Different from detectOrphanedWork (which catches exhausted retries) — - * this catches agents that are hooked+idle but haven't even been - * attempted recently. - */ -export function detectStaleHooks(sql: SqlStorage): void { - const cutoff = new Date(Date.now() - STALE_HOOK_MS).toISOString(); - - const rows = AgentMetadataRecord.pick({ - bead_id: true, - current_hook_bead_id: true, - dispatch_attempts: true, - last_activity_at: true, - }) - .array() - .parse([ - ...query( - sql, - /* sql */ ` - SELECT ${agent_metadata.bead_id}, - ${agent_metadata.current_hook_bead_id}, - ${agent_metadata.dispatch_attempts}, - ${agent_metadata.last_activity_at} - FROM ${agent_metadata} - WHERE ${agent_metadata.status} = 'idle' - AND ${agent_metadata.current_hook_bead_id} IS NOT NULL - AND ${agent_metadata.dispatch_attempts} < 5 - AND (${agent_metadata.last_activity_at} IS NULL OR ${agent_metadata.last_activity_at} < ?) - `, - [cutoff] - ), - ]); - - for (const row of rows) { - // Reset last_activity_at to trigger schedulePendingWork to pick it up - // on the next alarm cycle (it skips agents with recent activity). - query( - sql, - /* sql */ ` - UPDATE ${agent_metadata} - SET ${agent_metadata.columns.last_activity_at} = NULL - WHERE ${agent_metadata.bead_id} = ? - `, - [row.bead_id] - ); - - console.log( - `${LOG} stale hook nudge: agent=${row.bead_id} hook=${row.current_hook_bead_id} attempts=${row.dispatch_attempts}` - ); - } -} - -/** - * Feed stranded convoys: find active (non-staged) convoys that have open - * beads with no assigned agent. Auto-sling by assigning idle polecats. - * Staged convoys are excluded — their beads remain unassigned until - * the convoy is explicitly started via startConvoy(). - */ -export function feedStrandedConvoys(sql: SqlStorage, townId: string): void { - // Find open issue beads that: - // 1. Belong to an active convoy (tracked by a convoy bead) - // 2. Have no assigned agent - const StrandedBeadRow = z.object({ - bead_id: z.string(), - rig_id: z.string().nullable(), - convoy_bead_id: z.string(), - }); - - const rows = StrandedBeadRow.array().parse([ - ...query( - sql, - /* sql */ ` - SELECT ${beads.bead_id}, - ${beads.rig_id}, - ${bead_dependencies.depends_on_bead_id} AS convoy_bead_id - FROM ${bead_dependencies} - INNER JOIN ${beads} ON ${bead_dependencies.bead_id} = ${beads.bead_id} - INNER JOIN ${beads} AS convoy ON ${bead_dependencies.depends_on_bead_id} = convoy.${beads.columns.bead_id} - INNER JOIN ${convoy_metadata} ON ${convoy_metadata.bead_id} = convoy.${beads.columns.bead_id} - WHERE ${bead_dependencies.dependency_type} = 'tracks' - AND convoy.${beads.columns.type} = 'convoy' - AND convoy.${beads.columns.status} = 'open' - AND ${convoy_metadata.staged} = 0 - AND ${beads.status} = 'open' - AND ${beads.type} = 'issue' - AND ${beads.assignee_agent_bead_id} IS NULL - `, - [] - ), - ]); - - if (rows.length === 0) return; - - console.log(`${LOG} feedStrandedConvoys: found ${rows.length} unassigned convoy bead(s)`); - - // For each stranded bead, find or create an idle polecat in the same rig - // and hook it. The next schedulePendingWork cycle will dispatch it. - // We import getOrCreateAgent inline to avoid circular dependency issues. - for (const row of rows) { - const rigId = row.rig_id; - if (!rigId) continue; - - try { - const agent = getOrCreateAgent(sql, 'polecat', rigId, townId); - hookBead(sql, agent.id, row.bead_id); - // Clear last_activity_at so schedulePendingWork picks this up on - // the next alarm tick instead of waiting for the dispatch cooldown. - query( - sql, - /* sql */ ` - UPDATE ${agent_metadata} - SET ${agent_metadata.columns.last_activity_at} = NULL - WHERE ${agent_metadata.bead_id} = ? - `, - [agent.id] - ); - console.log( - `${LOG} feedStrandedConvoys: assigned agent=${agent.id} to bead=${row.bead_id} in convoy=${row.convoy_bead_id}` - ); - } catch (err) { - console.warn( - `${LOG} feedStrandedConvoys: failed to assign agent to bead=${row.bead_id}:`, - err - ); - } - } -} - -/** - * Detect crash loops: agents that have failed repeatedly within a - * short window. Creates a triage request for LLM assessment. - * - * Crash loop detection uses the bead_events table to count recent - * status_changed events to 'failed' for each agent. - */ -export function detectCrashLoops(sql: SqlStorage): void { - const windowCutoff = new Date(Date.now() - CRASH_LOOP_WINDOW_MS).toISOString(); - - // Count recent failure events per agent - const CrashRow = z.object({ - agent_id: z.string(), - fail_count: z.number(), - }); - - // Exclude triage agents from crash loop detection — their failures must - // not create new triage requests, which would feed the feedback loop. - // Two complementary checks: - // 1. The failed bead itself carries a triage label (covers triage batch - // bead failures, stable after unhook clears current_hook_bead_id). - // 2. The agent is currently hooked to a triage-labeled bead (covers - // resolveTriage actions like CLOSE_BEAD that fail ordinary beads - // while the triage agent is still working its batch). - const TRIAGE_LABEL_ANY = `%"gt:triage%`; - - const rows = CrashRow.array().parse([ - ...query( - sql, - /* sql */ ` - SELECT be.agent_id, COUNT(*) AS fail_count - FROM bead_events AS be - WHERE be.event_type = 'status_changed' - AND be.new_value = 'failed' - AND be.agent_id IS NOT NULL - AND be.created_at > ? - AND NOT EXISTS ( - SELECT 1 FROM ${beads} AS failed_bead - WHERE failed_bead.${beads.columns.bead_id} = be.bead_id - AND failed_bead.${beads.columns.labels} LIKE ? - ) - AND NOT EXISTS ( - SELECT 1 FROM ${agent_metadata} - INNER JOIN ${beads} AS hooked - ON ${agent_metadata.current_hook_bead_id} = hooked.${beads.columns.bead_id} - WHERE ${agent_metadata.bead_id} = be.agent_id - AND hooked.${beads.columns.labels} LIKE ? - ) - GROUP BY be.agent_id - HAVING fail_count >= ? - `, - [windowCutoff, TRIAGE_LABEL_ANY, TRIAGE_LABEL_ANY, CRASH_LOOP_THRESHOLD] - ), - ]); - - for (const row of rows) { - createTriageRequest(sql, { - triageType: 'crash_loop', - agentBeadId: row.agent_id, - title: `Crash loop detected: ${row.fail_count} failures in ${CRASH_LOOP_WINDOW_MS / 60_000}min`, - context: { - agent_id: row.agent_id, - fail_count: row.fail_count, - window_minutes: CRASH_LOOP_WINDOW_MS / 60_000, - }, - options: ['RESTART_WITH_BACKOFF', 'REASSIGN_BEAD', 'ESCALATE_TO_MAYOR'], - }); - - console.log( - `${LOG} crash loop: agent=${row.agent_id} failures=${row.fail_count} in ${CRASH_LOOP_WINDOW_MS / 60_000}min` - ); - } -} - // ── Pending triage requests ───────────────────────────────────────── /** Count open triage request beads (issue beads with gt:triage-request label). */ diff --git a/cloudflare-gastown/src/dos/town/reconciler.ts b/cloudflare-gastown/src/dos/town/reconciler.ts new file mode 100644 index 0000000000..c70e33d51a --- /dev/null +++ b/cloudflare-gastown/src/dos/town/reconciler.ts @@ -0,0 +1,1582 @@ +/** + * Town Reconciler — read-only state reconciliation engine. + * + * Each reconcile function examines current state and returns Action[] + * describing what mutations are needed to bring the system toward its + * desired state. Rules are checked on every alarm tick. + * + * In Phase 2 (shadow mode), actions are logged but not applied. + * In Phase 3+, actions are applied via applyAction(). + * + * See reconciliation-spec.md §5.3. + */ + +import { z } from 'zod'; +import { beads, BeadRecord } from '../../db/tables/beads.table'; +import { agent_metadata, AgentMetadataRecord } from '../../db/tables/agent-metadata.table'; +import { review_metadata, ReviewMetadataRecord } from '../../db/tables/review-metadata.table'; +import { convoy_metadata, ConvoyMetadataRecord } from '../../db/tables/convoy-metadata.table'; +import { bead_dependencies } from '../../db/tables/bead-dependencies.table'; +import { agent_nudges } from '../../db/tables/agent-nudges.table'; +import { query } from '../../util/query.util'; +import { + GUPP_WARN_MS, + GUPP_ESCALATE_MS, + GUPP_FORCE_STOP_MS, + AGENT_GC_RETENTION_MS, + TRIAGE_LABEL_LIKE, +} from './patrol'; +import { DISPATCH_COOLDOWN_MS, MAX_DISPATCH_ATTEMPTS } from './scheduling'; +import * as reviewQueue from './review-queue'; +import * as agents from './agents'; +import * as beadOps from './beads'; +import { getRig } from './rigs'; +import type { Action } from './actions'; +import type { TownEventRecord } from '../../db/tables/town-events.table'; + +const LOG = '[reconciler]'; + +// ── Timeouts (from spec §7) ───────────────────────────────────────── + +/** Reset non-PR MR beads stuck in_progress with no working agent */ +const STUCK_REVIEW_TIMEOUT_MS = 30 * 60_000; // 30 min + +/** Reset unhooked MR beads to open */ +const ABANDONED_MR_TIMEOUT_MS = 2 * 60_000; // 2 min + +/** Reset in_review beads with all-terminal MRs */ +const ORPHANED_SOURCE_TIMEOUT_MS = 5 * 60_000; // 5 min + +/** Fail PR-strategy beads with dead agents */ +const ORPHANED_PR_REVIEW_TIMEOUT_MS = 30 * 60_000; // 30 min + +/** In-progress issue bead with no working agent considered stale. + * Must be longer than AGENT_IDLE_TIMEOUT_MS (2 min) + one alarm tick (5s) + * to avoid racing with the idle-timer → agentCompleted → reconciler flow. */ +const STALE_IN_PROGRESS_TIMEOUT_MS = 5 * 60_000; // 5 min + +// ── Helper: staleness check ───────────────────────────────────────── + +function staleMs(timestamp: string | null, thresholdMs: number): boolean { + if (!timestamp) return true; + return Date.now() - new Date(timestamp).getTime() > thresholdMs; +} + +// ── Row schemas for queries ───────────────────────────────────────── +// Derived from table record schemas for traceability back to table defs. + +const AgentRow = AgentMetadataRecord.pick({ + bead_id: true, + role: true, + status: true, + current_hook_bead_id: true, + dispatch_attempts: true, + last_activity_at: true, + last_event_type: true, + last_event_at: true, + active_tools: true, +}).extend({ + // Joined from beads table + rig_id: BeadRecord.shape.rig_id, +}); +type AgentRow = z.infer; + +const BeadRow = BeadRecord.pick({ + bead_id: true, + type: true, + status: true, + rig_id: true, + assignee_agent_bead_id: true, + updated_at: true, + labels: true, + created_by: true, +}); +type BeadRow = z.infer; + +const MrBeadRow = BeadRecord.pick({ + bead_id: true, + status: true, + rig_id: true, + updated_at: true, + assignee_agent_bead_id: true, +}).extend({ + // Joined from review_metadata + pr_url: ReviewMetadataRecord.shape.pr_url, +}); +type MrBeadRow = z.infer; + +const ConvoyRow = BeadRecord.pick({ + bead_id: true, + status: true, +}).extend({ + // Joined from convoy_metadata + total_beads: ConvoyMetadataRecord.shape.total_beads, + closed_beads: ConvoyMetadataRecord.shape.closed_beads, + feature_branch: ConvoyMetadataRecord.shape.feature_branch, + merge_mode: ConvoyMetadataRecord.shape.merge_mode, + staged: ConvoyMetadataRecord.shape.staged, + // Raw JSON string from beads.metadata + metadata: z.string(), +}); +type ConvoyRow = z.infer; + +// ════════════════════════════════════════════════════════════════════ +// Event application — translates facts into state transitions +// ════════════════════════════════════════════════════════════════════ + +/** + * Apply a single event to the database. Events represent facts that + * have occurred; applying them updates state to reflect those facts. + * + * Delegates to existing module functions to ensure identical behavior + * to the pre-reconciler system. + * + * See reconciliation-spec.md §5.2. + */ +export function applyEvent(sql: SqlStorage, event: TownEventRecord): void { + const payload = event.payload; + + switch (event.event_type) { + case 'agent_done': { + if (!event.agent_id) { + console.warn(`${LOG} applyEvent: agent_done missing agent_id`); + return; + } + const branch = typeof payload.branch === 'string' ? payload.branch : ''; + const pr_url = typeof payload.pr_url === 'string' ? payload.pr_url : undefined; + const summary = typeof payload.summary === 'string' ? payload.summary : undefined; + + reviewQueue.agentDone(sql, event.agent_id, { branch, pr_url, summary }); + return; + } + + case 'agent_completed': { + if (!event.agent_id) { + console.warn(`${LOG} applyEvent: agent_completed missing agent_id`); + return; + } + const status = + payload.status === 'completed' || payload.status === 'failed' ? payload.status : 'failed'; + const reason = typeof payload.reason === 'string' ? payload.reason : undefined; + + reviewQueue.agentCompleted(sql, event.agent_id, { status, reason }); + return; + } + + case 'pr_status_changed': { + if (!event.bead_id) { + console.warn(`${LOG} applyEvent: pr_status_changed missing bead_id`); + return; + } + const pr_state = payload.pr_state; + if (pr_state === 'merged') { + reviewQueue.completeReviewWithResult(sql, { + entry_id: event.bead_id, + status: 'merged', + message: 'PR merged (detected by polling)', + }); + } else if (pr_state === 'closed') { + reviewQueue.completeReviewWithResult(sql, { + entry_id: event.bead_id, + status: 'failed', + message: 'PR closed without merge', + }); + } + return; + } + + case 'bead_created': { + // No state change needed — bead already exists in DB. + // Reconciler will pick it up as unassigned on next pass. + return; + } + + case 'bead_cancelled': { + if (!event.bead_id) { + console.warn(`${LOG} applyEvent: bead_cancelled missing bead_id`); + return; + } + const cancelStatus = + payload.cancel_status === 'closed' || payload.cancel_status === 'failed' + ? payload.cancel_status + : 'failed'; + + beadOps.updateBeadStatus(sql, event.bead_id, cancelStatus, 'system'); + + // Unhook any agent hooked to this bead + const hookedAgentRows = z + .object({ bead_id: z.string() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT ${agent_metadata.bead_id} + FROM ${agent_metadata} + WHERE ${agent_metadata.current_hook_bead_id} = ? + `, + [event.bead_id] + ), + ]); + for (const row of hookedAgentRows) { + agents.unhookBead(sql, row.bead_id); + } + return; + } + + case 'convoy_started': { + const convoyId = typeof payload.convoy_id === 'string' ? payload.convoy_id : null; + if (!convoyId) { + console.warn(`${LOG} applyEvent: convoy_started missing convoy_id`); + return; + } + query( + sql, + /* sql */ ` + UPDATE ${convoy_metadata} + SET ${convoy_metadata.columns.staged} = 0 + WHERE ${convoy_metadata.columns.bead_id} = ? + `, + [convoyId] + ); + return; + } + + case 'container_status': { + if (!event.agent_id) return; + + const containerStatus = payload.status as string; + const agent = agents.getAgent(sql, event.agent_id); + if (!agent) return; + + // Only act on working/stalled agents whose container has stopped + if ( + (agent.status === 'working' || agent.status === 'stalled') && + (containerStatus === 'exited' || containerStatus === 'not_found') + ) { + if (agent.role === 'refinery') { + // Check if gt_done already completed the MR + if (agent.current_hook_bead_id) { + const mr = beadOps.getBead(sql, agent.current_hook_bead_id); + if (mr && (mr.status === 'closed' || mr.status === 'failed')) { + // MR already terminal — clean up the refinery + agents.unhookBead(sql, event.agent_id); + agents.updateAgentStatus(sql, event.agent_id, 'idle'); + agents.writeCheckpoint(sql, event.agent_id, null); + } else { + // Refinery died without completing — set idle, keep hook. + // reconcileReviewQueue Rule 6 will retry dispatch. + agents.updateAgentStatus(sql, event.agent_id, 'idle'); + } + } else { + agents.updateAgentStatus(sql, event.agent_id, 'idle'); + } + } else { + // Non-refinery died — set idle. Bead stays in_progress. + // reconcileBeads Rule 3 will reset it to open after 5 min. + agents.updateAgentStatus(sql, event.agent_id, 'idle'); + } + } + return; + } + + case 'nudge_timeout': { + // GUPP violations are handled by reconcileGUPP on the next pass. + // The event just records the fact for audit trail. + return; + } + + default: { + console.warn(`${LOG} applyEvent: unknown event type: ${event.event_type}`); + } + } +} + +// ════════════════════════════════════════════════════════════════════ +// Top-level reconcile +// ════════════════════════════════════════════════════════════════════ + +export function reconcile(sql: SqlStorage): Action[] { + const actions: Action[] = []; + actions.push(...reconcileAgents(sql)); + actions.push(...reconcileBeads(sql)); + actions.push(...reconcileReviewQueue(sql)); + actions.push(...reconcileConvoys(sql)); + actions.push(...reconcileGUPP(sql)); + actions.push(...reconcileGC(sql)); + return actions; +} + +// ════════════════════════════════════════════════════════════════════ +// reconcileAgents — detect working agents with dead containers, +// idle agents with stale hooks to terminal beads +// ════════════════════════════════════════════════════════════════════ + +export function reconcileAgents(sql: SqlStorage): Action[] { + const actions: Action[] = []; + + // Working agents with stale or missing heartbeat — container probably dead. + // This is a safety net: the container status observation pre-phase + // emits container_status events which are applied in Phase 0, but + // if that fails (e.g. container DO unreachable), this catches agents + // whose heartbeat stopped. 3 missed heartbeats (90s) = container dead. + // Agents with NULL last_activity_at never received a heartbeat at all + // (container may have failed to start). + const workingAgents = AgentRow.array().parse([ + ...query( + sql, + /* sql */ ` + SELECT ${agent_metadata.bead_id}, ${agent_metadata.role}, + ${agent_metadata.status}, ${agent_metadata.current_hook_bead_id}, + ${agent_metadata.dispatch_attempts}, + ${agent_metadata.last_activity_at}, + b.${beads.columns.rig_id} + FROM ${agent_metadata} + LEFT JOIN ${beads} b ON b.${beads.columns.bead_id} = ${agent_metadata.bead_id} + WHERE ${agent_metadata.status} = 'working' + `, + [] + ), + ]); + + for (const agent of workingAgents) { + if (!agent.last_activity_at) { + // No heartbeat ever received — container may have failed to start + actions.push({ + type: 'transition_agent', + agent_id: agent.bead_id, + from: 'working', + to: 'idle', + reason: 'no heartbeat received since dispatch', + }); + } else if (staleMs(agent.last_activity_at, 90_000)) { + actions.push({ + type: 'transition_agent', + agent_id: agent.bead_id, + from: 'working', + to: 'idle', + reason: 'heartbeat lost (3 missed cycles)', + }); + } + } + + // Idle agents hooked to terminal beads — clean up stale hooks + const idleHooked = AgentRow.array().parse([ + ...query( + sql, + /* sql */ ` + SELECT ${agent_metadata.bead_id}, ${agent_metadata.role}, + ${agent_metadata.status}, ${agent_metadata.current_hook_bead_id}, + ${agent_metadata.dispatch_attempts}, + ${agent_metadata.last_activity_at}, + b.${beads.columns.rig_id} + FROM ${agent_metadata} + LEFT JOIN ${beads} b ON b.${beads.columns.bead_id} = ${agent_metadata.bead_id} + WHERE ${agent_metadata.status} = 'idle' + AND ${agent_metadata.current_hook_bead_id} IS NOT NULL + `, + [] + ), + ]); + + for (const agent of idleHooked) { + if (!agent.current_hook_bead_id) continue; + + const hookedRows = z + .object({ status: z.string() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT ${beads.status} + FROM ${beads} + WHERE ${beads.bead_id} = ? + `, + [agent.current_hook_bead_id] + ), + ]); + + if (hookedRows.length === 0) { + // Hooked bead doesn't exist — stale reference + actions.push({ + type: 'unhook_agent', + agent_id: agent.bead_id, + reason: 'hooked bead does not exist', + }); + actions.push({ + type: 'clear_agent_checkpoint', + agent_id: agent.bead_id, + }); + continue; + } + + const hookedStatus = hookedRows[0].status; + if (hookedStatus === 'closed' || hookedStatus === 'failed') { + actions.push({ + type: 'unhook_agent', + agent_id: agent.bead_id, + reason: 'hooked bead is terminal', + }); + actions.push({ + type: 'clear_agent_checkpoint', + agent_id: agent.bead_id, + }); + } + } + + return actions; +} + +// ════════════════════════════════════════════════════════════════════ +// reconcileBeads — handle unassigned beads, lost agents, stale reviews +// ════════════════════════════════════════════════════════════════════ + +export function reconcileBeads(sql: SqlStorage): Action[] { + const actions: Action[] = []; + + // Rule 1: Open issue beads with no assignee, no blockers, not staged, not triage + const unassigned = BeadRow.array().parse([ + ...query( + sql, + /* sql */ ` + SELECT b.${beads.columns.bead_id}, b.${beads.columns.type}, + b.${beads.columns.status}, b.${beads.columns.rig_id}, + b.${beads.columns.assignee_agent_bead_id}, + b.${beads.columns.updated_at}, + b.${beads.columns.labels}, + b.${beads.columns.created_by} + FROM ${beads} b + WHERE b.${beads.columns.type} = 'issue' + AND b.${beads.columns.status} = 'open' + AND b.${beads.columns.assignee_agent_bead_id} IS NULL + AND b.${beads.columns.rig_id} IS NOT NULL + AND b.${beads.columns.labels} NOT LIKE ? + AND NOT EXISTS ( + SELECT 1 FROM ${bead_dependencies} bd + INNER JOIN ${beads} blocker ON blocker.${beads.columns.bead_id} = bd.${bead_dependencies.columns.depends_on_bead_id} + WHERE bd.${bead_dependencies.columns.bead_id} = b.${beads.columns.bead_id} + AND bd.${bead_dependencies.columns.dependency_type} = 'blocks' + AND blocker.${beads.columns.status} NOT IN ('closed', 'failed') + ) + AND NOT EXISTS ( + SELECT 1 FROM ${bead_dependencies} bd2 + INNER JOIN ${convoy_metadata} cm ON cm.${convoy_metadata.columns.bead_id} = bd2.${bead_dependencies.columns.depends_on_bead_id} + WHERE bd2.${bead_dependencies.columns.bead_id} = b.${beads.columns.bead_id} + AND bd2.${bead_dependencies.columns.dependency_type} = 'tracks' + AND cm.${convoy_metadata.columns.staged} = 1 + ) + `, + [TRIAGE_LABEL_LIKE] + ), + ]); + + for (const bead of unassigned) { + if (!bead.rig_id) continue; + // In shadow mode we can't call getOrCreateAgent, so we just note + // that a hook_agent + dispatch_agent is needed. + // The action includes rig_id so Phase 3's applyAction can resolve the agent. + actions.push({ + type: 'dispatch_agent', + agent_id: '', // resolved at apply time + bead_id: bead.bead_id, + rig_id: bead.rig_id, + }); + } + + // Rule 2: Idle agents with hooks need dispatch (schedulePendingWork equivalent) + const idleHooked = AgentRow.array().parse([ + ...query( + sql, + /* sql */ ` + SELECT ${agent_metadata.bead_id}, ${agent_metadata.role}, + ${agent_metadata.status}, ${agent_metadata.current_hook_bead_id}, + ${agent_metadata.dispatch_attempts}, + ${agent_metadata.last_activity_at}, + b.${beads.columns.rig_id} + FROM ${agent_metadata} + LEFT JOIN ${beads} b ON b.${beads.columns.bead_id} = ${agent_metadata.bead_id} + WHERE ${agent_metadata.status} = 'idle' + AND ${agent_metadata.current_hook_bead_id} IS NOT NULL + AND ${agent_metadata.columns.role} != 'refinery' + `, + [] + ), + ]); + + for (const agent of idleHooked) { + if (!agent.current_hook_bead_id) continue; + + // Check dispatch cooldown + if (!staleMs(agent.last_activity_at, DISPATCH_COOLDOWN_MS)) continue; + + // Check max dispatch attempts + if (agent.dispatch_attempts >= MAX_DISPATCH_ATTEMPTS) { + actions.push({ + type: 'transition_bead', + bead_id: agent.current_hook_bead_id, + from: null, + to: 'failed', + reason: 'max dispatch attempts exceeded', + actor: 'system', + }); + actions.push({ + type: 'unhook_agent', + agent_id: agent.bead_id, + reason: 'max dispatch attempts', + }); + continue; + } + + // Check if the hooked bead is open and unblocked + const hookedRows = z + .object({ status: z.string(), rig_id: z.string().nullable() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT ${beads.status}, ${beads.rig_id} + FROM ${beads} + WHERE ${beads.bead_id} = ? + `, + [agent.current_hook_bead_id] + ), + ]); + + if (hookedRows.length === 0) continue; + const hooked = hookedRows[0]; + if (hooked.status !== 'open') continue; + + // Check blockers + const blockerCount = z + .object({ cnt: z.number() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT count(*) as cnt + FROM ${bead_dependencies} bd + INNER JOIN ${beads} blocker ON blocker.${beads.columns.bead_id} = bd.${bead_dependencies.columns.depends_on_bead_id} + WHERE bd.${bead_dependencies.columns.bead_id} = ? + AND bd.${bead_dependencies.columns.dependency_type} = 'blocks' + AND blocker.${beads.columns.status} NOT IN ('closed', 'failed') + `, + [agent.current_hook_bead_id] + ), + ]); + + if (blockerCount[0]?.cnt > 0) continue; + + actions.push({ + type: 'dispatch_agent', + agent_id: agent.bead_id, + bead_id: agent.current_hook_bead_id, + rig_id: hooked.rig_id ?? agent.rig_id ?? '', + }); + } + + // Rule 3: In-progress issue beads with no working/stalled agent + const staleInProgress = BeadRow.array().parse([ + ...query( + sql, + /* sql */ ` + SELECT b.${beads.columns.bead_id}, b.${beads.columns.type}, + b.${beads.columns.status}, b.${beads.columns.rig_id}, + b.${beads.columns.assignee_agent_bead_id}, + b.${beads.columns.updated_at}, + b.${beads.columns.labels}, + b.${beads.columns.created_by} + FROM ${beads} b + WHERE b.${beads.columns.type} = 'issue' + AND b.${beads.columns.status} = 'in_progress' + `, + [] + ), + ]); + + for (const bead of staleInProgress) { + if (!staleMs(bead.updated_at, STALE_IN_PROGRESS_TIMEOUT_MS)) continue; + + // Check if any agent is hooked AND working/stalled + const hookedAgent = z + .object({ status: z.string() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT ${agent_metadata.status} + FROM ${agent_metadata} + WHERE ${agent_metadata.current_hook_bead_id} = ? + AND ${agent_metadata.status} IN ('working', 'stalled') + `, + [bead.bead_id] + ), + ]); + + if (hookedAgent.length > 0) continue; + + actions.push({ + type: 'transition_bead', + bead_id: bead.bead_id, + from: 'in_progress', + to: 'open', + reason: 'agent lost', + actor: 'system', + }); + actions.push({ + type: 'clear_bead_assignee', + bead_id: bead.bead_id, + }); + } + + // Rule 4: In-review issue beads where all MR beads are terminal + const inReview = BeadRow.array().parse([ + ...query( + sql, + /* sql */ ` + SELECT b.${beads.columns.bead_id}, b.${beads.columns.type}, + b.${beads.columns.status}, b.${beads.columns.rig_id}, + b.${beads.columns.assignee_agent_bead_id}, + b.${beads.columns.updated_at}, + b.${beads.columns.labels}, + b.${beads.columns.created_by} + FROM ${beads} b + WHERE b.${beads.columns.type} = 'issue' + AND b.${beads.columns.status} = 'in_review' + `, + [] + ), + ]); + + for (const bead of inReview) { + if (!staleMs(bead.updated_at, ORPHANED_SOURCE_TIMEOUT_MS)) continue; + + // Get all MR beads tracking this source + const mrBeads = z + .object({ status: z.string() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT mr.${beads.columns.status} + FROM ${bead_dependencies} bd + INNER JOIN ${beads} mr ON mr.${beads.columns.bead_id} = bd.${bead_dependencies.columns.bead_id} + WHERE bd.${bead_dependencies.columns.depends_on_bead_id} = ? + AND bd.${bead_dependencies.columns.dependency_type} = 'tracks' + AND mr.${beads.columns.type} = 'merge_request' + `, + [bead.bead_id] + ), + ]); + + if (mrBeads.length === 0) continue; + const allTerminal = mrBeads.every(mr => mr.status === 'closed' || mr.status === 'failed'); + if (!allTerminal) continue; + + const anyMerged = mrBeads.some(mr => mr.status === 'closed'); + + if (anyMerged) { + actions.push({ + type: 'transition_bead', + bead_id: bead.bead_id, + from: 'in_review', + to: 'closed', + reason: 'MR merged (reconciler safety net)', + actor: 'system', + }); + } else { + actions.push({ + type: 'transition_bead', + bead_id: bead.bead_id, + from: 'in_review', + to: 'open', + reason: 'all reviews failed', + actor: 'system', + }); + actions.push({ + type: 'clear_bead_assignee', + bead_id: bead.bead_id, + }); + } + } + + return actions; +} + +// ════════════════════════════════════════════════════════════════════ +// reconcileReviewQueue — PR polling, stuck/abandoned MR recovery, +// refinery dispatch +// ════════════════════════════════════════════════════════════════════ + +export function reconcileReviewQueue(sql: SqlStorage): Action[] { + const actions: Action[] = []; + + // Get all MR beads that need attention + const mrBeads = MrBeadRow.array().parse([ + ...query( + sql, + /* sql */ ` + SELECT b.${beads.columns.bead_id}, b.${beads.columns.status}, + b.${beads.columns.rig_id}, b.${beads.columns.updated_at}, + rm.${review_metadata.columns.pr_url}, + b.${beads.columns.assignee_agent_bead_id} + FROM ${beads} b + INNER JOIN ${review_metadata} rm ON rm.${review_metadata.columns.bead_id} = b.${beads.columns.bead_id} + WHERE b.${beads.columns.type} = 'merge_request' + AND b.${beads.columns.status} IN ('open', 'in_progress') + `, + [] + ), + ]); + + for (const mr of mrBeads) { + // Rule 1: PR-strategy MR beads in_progress need polling + if (mr.status === 'in_progress' && mr.pr_url) { + actions.push({ + type: 'poll_pr', + bead_id: mr.bead_id, + pr_url: mr.pr_url, + }); + } + + // Rule 2: Stuck MR beads in_progress with no PR, no working agent, stale >30min + // Skip MR beads with unresolved rework blockers — they're waiting for + // a polecat to finish rework, which is a normal in-flight state. + if ( + mr.status === 'in_progress' && + !mr.pr_url && + staleMs(mr.updated_at, STUCK_REVIEW_TIMEOUT_MS) + ) { + if (hasUnresolvedReworkBlockers(sql, mr.bead_id)) continue; + const workingAgent = hasWorkingAgentHooked(sql, mr.bead_id); + if (!workingAgent) { + actions.push({ + type: 'transition_bead', + bead_id: mr.bead_id, + from: 'in_progress', + to: 'open', + reason: 'stuck review, no working agent', + actor: 'system', + }); + // Unhook any idle agent still pointing at this MR + const idleAgent = getIdleAgentHookedTo(sql, mr.bead_id); + if (idleAgent) { + actions.push({ + type: 'unhook_agent', + agent_id: idleAgent, + reason: 'stuck review cleanup', + }); + } + } + } + + // Rule 3: Abandoned MR beads in_progress, no PR, no agent hooked, stale >2min + // Skip MR beads with rework blockers (same reasoning as Rule 2). + if ( + mr.status === 'in_progress' && + !mr.pr_url && + staleMs(mr.updated_at, ABANDONED_MR_TIMEOUT_MS) + ) { + if (hasUnresolvedReworkBlockers(sql, mr.bead_id)) continue; + const anyAgent = hasAnyAgentHooked(sql, mr.bead_id); + if (!anyAgent) { + actions.push({ + type: 'transition_bead', + bead_id: mr.bead_id, + from: 'in_progress', + to: 'open', + reason: 'abandoned, no agent hooked', + actor: 'system', + }); + } + } + + // Rule 4: PR-strategy MR beads orphaned (refinery dispatched then died, stale >30min) + // Only in_progress — open beads are just waiting for the refinery to pop them. + if ( + mr.status === 'in_progress' && + mr.pr_url && + staleMs(mr.updated_at, ORPHANED_PR_REVIEW_TIMEOUT_MS) + ) { + const workingAgent = hasWorkingAgentHooked(sql, mr.bead_id); + if (!workingAgent) { + actions.push({ + type: 'transition_bead', + bead_id: mr.bead_id, + from: mr.status, + to: 'failed', + reason: 'PR review orphaned', + actor: 'system', + }); + } + } + } + + // Rule 5: Pop open MR bead for idle refinery + // Get all rigs that have open MR beads + const rigsWithOpenMrs = z + .object({ rig_id: z.string() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT DISTINCT b.${beads.columns.rig_id} + FROM ${beads} b + WHERE b.${beads.columns.type} = 'merge_request' + AND b.${beads.columns.status} = 'open' + AND b.${beads.columns.rig_id} IS NOT NULL + `, + [] + ), + ]); + + for (const { rig_id } of rigsWithOpenMrs) { + // Check if rig already has an in_progress MR that needs the refinery. + // PR-strategy MR beads (pr_url IS NOT NULL) don't need the refinery — + // the merge is handled by the user/CI via the PR. Only direct-strategy + // MRs (no pr_url, refinery merges to main itself) block the queue. + const inProgressCount = z + .object({ cnt: z.number() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT count(*) as cnt FROM ${beads} b + INNER JOIN ${review_metadata} rm + ON rm.${review_metadata.columns.bead_id} = b.${beads.columns.bead_id} + WHERE b.${beads.columns.type} = 'merge_request' + AND b.${beads.columns.status} = 'in_progress' + AND b.${beads.columns.rig_id} = ? + AND rm.${review_metadata.columns.pr_url} IS NULL + `, + [rig_id] + ), + ]); + if ((inProgressCount[0]?.cnt ?? 0) > 0) continue; + + // Check if the refinery for this rig is idle and unhooked + const refinery = AgentRow.array().parse([ + ...query( + sql, + /* sql */ ` + SELECT ${agent_metadata.bead_id}, ${agent_metadata.role}, + ${agent_metadata.status}, ${agent_metadata.current_hook_bead_id}, + ${agent_metadata.dispatch_attempts}, + ${agent_metadata.last_activity_at}, + b.${beads.columns.rig_id} + FROM ${agent_metadata} + LEFT JOIN ${beads} b ON b.${beads.columns.bead_id} = ${agent_metadata.bead_id} + WHERE ${agent_metadata.columns.role} = 'refinery' + AND b.${beads.columns.rig_id} = ? + LIMIT 1 + `, + [rig_id] + ), + ]); + + // Get oldest open MR for this rig + const oldestMr = z + .object({ bead_id: z.string() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT ${beads.bead_id} + FROM ${beads} + WHERE ${beads.type} = 'merge_request' + AND ${beads.status} = 'open' + AND ${beads.rig_id} = ? + ORDER BY ${beads.columns.created_at} ASC + LIMIT 1 + `, + [rig_id] + ), + ]); + + if (oldestMr.length === 0) continue; + + // If no refinery exists or it's busy, emit a dispatch_agent with empty + // agent_id — applyAction will create the refinery via getOrCreateAgent. + if (refinery.length === 0) { + actions.push({ + type: 'transition_bead', + bead_id: oldestMr[0].bead_id, + from: 'open', + to: 'in_progress', + reason: 'popped for review (creating refinery)', + actor: 'system', + }); + actions.push({ + type: 'dispatch_agent', + agent_id: '', + bead_id: oldestMr[0].bead_id, + rig_id, + }); + continue; + } + + const ref = refinery[0]; + if (ref.status !== 'idle' || ref.current_hook_bead_id) continue; + + actions.push({ + type: 'transition_bead', + bead_id: oldestMr[0].bead_id, + from: 'open', + to: 'in_progress', + reason: 'popped for review', + actor: 'system', + }); + actions.push({ + type: 'hook_agent', + agent_id: ref.bead_id, + bead_id: oldestMr[0].bead_id, + }); + actions.push({ + type: 'dispatch_agent', + agent_id: ref.bead_id, + bead_id: oldestMr[0].bead_id, + rig_id, + }); + } + + // Rule 6: Idle refinery hooked to in_progress MR — needs re-dispatch + const idleRefineries = AgentRow.array().parse([ + ...query( + sql, + /* sql */ ` + SELECT ${agent_metadata.bead_id}, ${agent_metadata.role}, + ${agent_metadata.status}, ${agent_metadata.current_hook_bead_id}, + ${agent_metadata.dispatch_attempts}, + ${agent_metadata.last_activity_at}, + b.${beads.columns.rig_id} + FROM ${agent_metadata} + LEFT JOIN ${beads} b ON b.${beads.columns.bead_id} = ${agent_metadata.bead_id} + WHERE ${agent_metadata.columns.role} = 'refinery' + AND ${agent_metadata.status} = 'idle' + AND ${agent_metadata.current_hook_bead_id} IS NOT NULL + `, + [] + ), + ]); + + for (const ref of idleRefineries) { + if (!ref.current_hook_bead_id) continue; + + const mrRows = z + .object({ status: z.string(), type: z.string(), rig_id: z.string().nullable() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT ${beads.status}, ${beads.type}, ${beads.rig_id} + FROM ${beads} + WHERE ${beads.bead_id} = ? + `, + [ref.current_hook_bead_id] + ), + ]); + + if (mrRows.length === 0) continue; + const mr = mrRows[0]; + if (mr.type !== 'merge_request' || mr.status !== 'in_progress') continue; + + // Container status is checked at apply time (async). In shadow mode, + // we just note that a dispatch is needed. + actions.push({ + type: 'dispatch_agent', + agent_id: ref.bead_id, + bead_id: ref.current_hook_bead_id, + rig_id: mr.rig_id ?? ref.rig_id ?? '', + }); + } + + return actions; +} + +// ════════════════════════════════════════════════════════════════════ +// reconcileConvoys — track convoy progress, trigger landing +// ════════════════════════════════════════════════════════════════════ + +export function reconcileConvoys(sql: SqlStorage): Action[] { + const actions: Action[] = []; + + const convoys = ConvoyRow.array().parse([ + ...query( + sql, + /* sql */ ` + SELECT b.${beads.columns.bead_id}, b.${beads.columns.status}, + cm.${convoy_metadata.columns.total_beads} as total_beads, + cm.${convoy_metadata.columns.closed_beads} as closed_beads, + cm.${convoy_metadata.columns.feature_branch} as feature_branch, + cm.${convoy_metadata.columns.merge_mode} as merge_mode, + cm.${convoy_metadata.columns.staged} as staged, + b.${beads.columns.metadata} as metadata + FROM ${beads} b + INNER JOIN ${convoy_metadata} cm ON cm.${convoy_metadata.columns.bead_id} = b.${beads.columns.bead_id} + WHERE b.${beads.columns.type} = 'convoy' + AND b.${beads.columns.status} = 'open' + `, + [] + ), + ]); + + for (const convoy of convoys) { + // Count actually closed tracked beads + const progressRows = z + .object({ closed_count: z.number(), total_count: z.number() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT + count(CASE WHEN tracked.${beads.columns.status} IN ('closed', 'failed') THEN 1 END) as closed_count, + count(*) as total_count + FROM ${bead_dependencies} bd + INNER JOIN ${beads} tracked ON tracked.${beads.columns.bead_id} = bd.${bead_dependencies.columns.bead_id} + WHERE bd.${bead_dependencies.columns.depends_on_bead_id} = ? + AND bd.${bead_dependencies.columns.dependency_type} = 'tracks' + AND tracked.${beads.columns.type} = 'issue' + `, + [convoy.bead_id] + ), + ]); + + if (progressRows.length === 0) continue; + const { closed_count, total_count } = progressRows[0]; + + // Update progress if stale + if (closed_count !== convoy.closed_beads) { + actions.push({ + type: 'update_convoy_progress', + convoy_id: convoy.bead_id, + closed_beads: closed_count, + }); + } + + // Check for in-flight MR beads (open or in_progress) for tracked issue beads + const inFlightMrCount = z + .object({ cnt: z.number() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT count(*) as cnt + FROM ${bead_dependencies} track_dep + INNER JOIN ${bead_dependencies} mr_dep + ON mr_dep.${bead_dependencies.columns.depends_on_bead_id} = track_dep.${bead_dependencies.columns.bead_id} + INNER JOIN ${beads} mr + ON mr.${beads.columns.bead_id} = mr_dep.${bead_dependencies.columns.bead_id} + WHERE track_dep.${bead_dependencies.columns.depends_on_bead_id} = ? + AND track_dep.${bead_dependencies.columns.dependency_type} = 'tracks' + AND mr_dep.${bead_dependencies.columns.dependency_type} = 'tracks' + AND mr.${beads.columns.type} = 'merge_request' + AND mr.${beads.columns.status} IN ('open', 'in_progress') + `, + [convoy.bead_id] + ), + ]); + + const hasInFlightReviews = (inFlightMrCount[0]?.cnt ?? 0) > 0; + + // Check if all beads done + if (closed_count >= total_count && total_count > 0 && !hasInFlightReviews) { + let parsedMeta: Record = {}; + try { + parsedMeta = JSON.parse(convoy.metadata) as Record; + } catch { + /* ignore */ + } + + if (convoy.merge_mode === 'review-then-land' && convoy.feature_branch) { + if (!parsedMeta.ready_to_land) { + actions.push({ + type: 'set_convoy_ready_to_land', + convoy_id: convoy.bead_id, + }); + } + + if (parsedMeta.ready_to_land) { + // Check if a landing MR already exists (any status) + const landingMrs = z + .object({ status: z.string() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT mr.${beads.columns.status} + FROM ${bead_dependencies} bd + INNER JOIN ${beads} mr ON mr.${beads.columns.bead_id} = bd.${bead_dependencies.columns.bead_id} + WHERE bd.${bead_dependencies.columns.depends_on_bead_id} = ? + AND bd.${bead_dependencies.columns.dependency_type} = 'tracks' + AND mr.${beads.columns.type} = 'merge_request' + `, + [convoy.bead_id] + ), + ]); + + // If a landing MR was already merged (closed), close the convoy + const hasMergedLanding = landingMrs.some(mr => mr.status === 'closed'); + if (hasMergedLanding) { + actions.push({ + type: 'close_convoy', + convoy_id: convoy.bead_id, + }); + continue; + } + + // If a landing MR is active (open or in_progress), wait for it + const hasActiveLanding = landingMrs.some( + mr => mr.status === 'open' || mr.status === 'in_progress' + ); + if (hasActiveLanding) continue; + + // No landing MR exists yet — create one + { + // Need rig_id from one of the tracked beads + const rigRows = z + .object({ rig_id: z.string() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT DISTINCT tracked.${beads.columns.rig_id} as rig_id + FROM ${bead_dependencies} bd + INNER JOIN ${beads} tracked ON tracked.${beads.columns.bead_id} = bd.${bead_dependencies.columns.bead_id} + WHERE bd.${bead_dependencies.columns.depends_on_bead_id} = ? + AND bd.${bead_dependencies.columns.dependency_type} = 'tracks' + AND tracked.${beads.columns.rig_id} IS NOT NULL + LIMIT 1 + `, + [convoy.bead_id] + ), + ]); + + if (rigRows.length > 0) { + const rig = getRig(sql, rigRows[0].rig_id); + actions.push({ + type: 'create_landing_mr', + convoy_id: convoy.bead_id, + rig_id: rigRows[0].rig_id, + feature_branch: convoy.feature_branch, + target_branch: rig?.default_branch ?? 'main', + }); + } + } + } + } else { + // review-and-merge or no feature branch — auto-close + actions.push({ + type: 'close_convoy', + convoy_id: convoy.bead_id, + }); + } + } + } + + return actions; +} + +// ════════════════════════════════════════════════════════════════════ +// reconcileGUPP — detect agents exceeding activity thresholds +// ════════════════════════════════════════════════════════════════════ + +export function reconcileGUPP(sql: SqlStorage): Action[] { + const actions: Action[] = []; + + const workingAgents = AgentRow.array().parse([ + ...query( + sql, + /* sql */ ` + SELECT ${agent_metadata.bead_id}, ${agent_metadata.role}, + ${agent_metadata.status}, ${agent_metadata.current_hook_bead_id}, + ${agent_metadata.dispatch_attempts}, + ${agent_metadata.last_activity_at}, + ${agent_metadata.last_event_type}, + ${agent_metadata.last_event_at}, + ${agent_metadata.active_tools}, + b.${beads.columns.rig_id} + FROM ${agent_metadata} + LEFT JOIN ${beads} b ON b.${beads.columns.bead_id} = ${agent_metadata.bead_id} + WHERE ${agent_metadata.status} IN ('working', 'stalled') + `, + [] + ), + ]); + + for (const agent of workingAgents) { + // Use last_event_at (SDK activity) as primary signal, fall back to + // last_activity_at (heartbeat). Agents with no heartbeat at all are + // handled by reconcileAgents (NULL-heartbeat check), so skip them here. + const activityTimestamp = agent.last_event_at ?? agent.last_activity_at; + if (!activityTimestamp) continue; + + const elapsed = Date.now() - new Date(activityTimestamp).getTime(); + if (Number.isNaN(elapsed) || elapsed < 0) continue; + + if (elapsed > GUPP_FORCE_STOP_MS) { + actions.push({ + type: 'transition_agent', + agent_id: agent.bead_id, + from: agent.status, + to: 'stalled', + reason: 'GUPP force stop — no SDK activity for 2h', + }); + actions.push({ + type: 'stop_agent', + agent_id: agent.bead_id, + reason: 'exceeded 2h GUPP limit', + }); + actions.push({ + type: 'create_triage_request', + agent_id: agent.bead_id, + triage_type: 'stuck_agent', + reason: 'GUPP force stop', + }); + } else if (elapsed > GUPP_ESCALATE_MS) { + if (!hasRecentNudge(sql, agent.bead_id, 'escalate')) { + actions.push({ + type: 'send_nudge', + agent_id: agent.bead_id, + message: + 'You have been working for over 1 hour without completing your task. Please wrap up or report if you are stuck.', + tier: 'escalate', + }); + actions.push({ + type: 'create_triage_request', + agent_id: agent.bead_id, + triage_type: 'stuck_agent', + reason: 'GUPP escalation', + }); + } + } else if (elapsed > 15 * 60_000) { + // Tighter warn threshold (15min vs old 30min) using SDK activity. + // Skip if agent is mid-tool-call — long-running tools like git clone are normal. + let tools: string[] = []; + try { + tools = JSON.parse(agent.active_tools ?? '[]') as string[]; + } catch { + /* ignore */ + } + + if (tools.length === 0 && !hasRecentNudge(sql, agent.bead_id, 'warn')) { + actions.push({ + type: 'send_nudge', + agent_id: agent.bead_id, + message: + 'You have been idle for 15 minutes with no tool activity. Please check your progress.', + tier: 'warn', + }); + } + } + } + + return actions; +} + +// ════════════════════════════════════════════════════════════════════ +// reconcileGC — garbage-collect idle agents with no hook +// ════════════════════════════════════════════════════════════════════ + +export function reconcileGC(sql: SqlStorage): Action[] { + const actions: Action[] = []; + + const gcCandidates = AgentRow.array().parse([ + ...query( + sql, + /* sql */ ` + SELECT ${agent_metadata.bead_id}, ${agent_metadata.role}, + ${agent_metadata.status}, ${agent_metadata.current_hook_bead_id}, + ${agent_metadata.dispatch_attempts}, + ${agent_metadata.last_activity_at}, + b.${beads.columns.rig_id} + FROM ${agent_metadata} + LEFT JOIN ${beads} b ON b.${beads.columns.bead_id} = ${agent_metadata.bead_id} + WHERE ${agent_metadata.status} IN ('idle', 'dead') + AND ${agent_metadata.columns.role} IN ('polecat', 'refinery') + AND ${agent_metadata.current_hook_bead_id} IS NULL + `, + [] + ), + ]); + + for (const agent of gcCandidates) { + if (staleMs(agent.last_activity_at, AGENT_GC_RETENTION_MS)) { + actions.push({ + type: 'delete_agent', + agent_id: agent.bead_id, + reason: 'GC: idle > 24h', + }); + } + } + + return actions; +} + +// ── Helpers ───────────────────────────────────────────────────────── + +/** Check if an MR bead has open rework beads blocking it. */ +function hasUnresolvedReworkBlockers(sql: SqlStorage, mrBeadId: string): boolean { + const rows = [ + ...query( + sql, + /* sql */ ` + SELECT 1 FROM ${bead_dependencies} bd + INNER JOIN ${beads} rework ON rework.${beads.columns.bead_id} = bd.${bead_dependencies.columns.depends_on_bead_id} + WHERE bd.${bead_dependencies.columns.bead_id} = ? + AND bd.${bead_dependencies.columns.dependency_type} = 'blocks' + AND rework.${beads.columns.status} NOT IN ('closed', 'failed') + LIMIT 1 + `, + [mrBeadId] + ), + ]; + return rows.length > 0; +} + +function hasWorkingAgentHooked(sql: SqlStorage, beadId: string): boolean { + const rows = [ + ...query( + sql, + /* sql */ ` + SELECT 1 FROM ${agent_metadata} + WHERE ${agent_metadata.current_hook_bead_id} = ? + AND ${agent_metadata.status} IN ('working', 'stalled') + LIMIT 1 + `, + [beadId] + ), + ]; + return rows.length > 0; +} + +function hasAnyAgentHooked(sql: SqlStorage, beadId: string): boolean { + const rows = [ + ...query( + sql, + /* sql */ ` + SELECT 1 FROM ${agent_metadata} + WHERE ${agent_metadata.current_hook_bead_id} = ? + LIMIT 1 + `, + [beadId] + ), + ]; + return rows.length > 0; +} + +function getIdleAgentHookedTo(sql: SqlStorage, beadId: string): string | null { + const rows = z + .object({ bead_id: z.string() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT ${agent_metadata.bead_id} + FROM ${agent_metadata} + WHERE ${agent_metadata.current_hook_bead_id} = ? + AND ${agent_metadata.status} = 'idle' + LIMIT 1 + `, + [beadId] + ), + ]); + return rows.length > 0 ? rows[0].bead_id : null; +} + +function hasRecentNudge(sql: SqlStorage, agentId: string, tier: string): boolean { + // Check if a nudge with this exact tier source was created in the last 60 min. + // The source is set to `reconciler:${tier}` by applyAction('send_nudge'). + const cutoff = new Date(Date.now() - 60 * 60_000).toISOString(); + const rows = [ + ...query( + sql, + /* sql */ ` + SELECT 1 FROM ${agent_nudges} + WHERE ${agent_nudges.agent_bead_id} = ? + AND ${agent_nudges.source} = ? + AND ${agent_nudges.created_at} > ? + LIMIT 1 + `, + [agentId, `reconciler:${tier}`, cutoff] + ), + ]; + return rows.length > 0; +} + +// ════════════════════════════════════════════════════════════════════ +// Invariant checker — runs after action application to detect +// violations of the system invariants from spec §6. +// ════════════════════════════════════════════════════════════════════ + +export type Violation = { + invariant: number; + message: string; +}; + +/** + * Check all system invariants. Returns violations found. + * Should run at the end of each alarm tick after actions are applied. + * See reconciliation-spec.md §6. + */ +export function checkInvariants(sql: SqlStorage): Violation[] { + const violations: Violation[] = []; + + // Invariant 7: Working agents must have hooks + const unhookedWorkers = z + .object({ bead_id: z.string() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT ${agent_metadata.bead_id} + FROM ${agent_metadata} + WHERE ${agent_metadata.status} = 'working' + AND ${agent_metadata.current_hook_bead_id} IS NULL + `, + [] + ), + ]); + for (const a of unhookedWorkers) { + violations.push({ + invariant: 7, + message: `Working agent ${a.bead_id} has no hook`, + }); + } + + // Invariant 5: Convoy beads should not be in_progress + const inProgressConvoys = z + .object({ bead_id: z.string() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT ${beads.bead_id} + FROM ${beads} + WHERE ${beads.type} = 'convoy' + AND ${beads.status} = 'in_progress' + `, + [] + ), + ]); + for (const c of inProgressConvoys) { + violations.push({ + invariant: 5, + message: `Convoy bead ${c.bead_id} is in_progress (should only be open or closed)`, + }); + } + + // Invariant 3: Only one MR bead in_progress per rig (refinery is serial) + const duplicateMrPerRig = z + .object({ rig_id: z.string(), cnt: z.number() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT ${beads.rig_id} as rig_id, count(*) as cnt + FROM ${beads} + WHERE ${beads.type} = 'merge_request' + AND ${beads.status} = 'in_progress' + AND ${beads.rig_id} IS NOT NULL + GROUP BY ${beads.rig_id} + HAVING count(*) > 1 + `, + [] + ), + ]); + for (const r of duplicateMrPerRig) { + violations.push({ + invariant: 3, + message: `Rig ${r.rig_id} has ${r.cnt} in_progress MR beads (should be at most 1)`, + }); + } + + // Invariant 6: At most one agent hooked per bead + const multiHooked = z + .object({ hook: z.string(), cnt: z.number() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT ${agent_metadata.current_hook_bead_id} as hook, count(*) as cnt + FROM ${agent_metadata} + WHERE ${agent_metadata.current_hook_bead_id} IS NOT NULL + GROUP BY ${agent_metadata.current_hook_bead_id} + HAVING count(*) > 1 + `, + [] + ), + ]); + for (const m of multiHooked) { + violations.push({ + invariant: 6, + message: `Bead ${m.hook} has ${m.cnt} agents hooked (should be at most 1)`, + }); + } + + // Invariant 4: in_review beads must have at least one open/in_progress MR + const orphanedInReview = z + .object({ bead_id: z.string() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT b.${beads.columns.bead_id} + FROM ${beads} b + WHERE b.${beads.columns.type} = 'issue' + AND b.${beads.columns.status} = 'in_review' + AND NOT EXISTS ( + SELECT 1 + FROM ${bead_dependencies} bd + INNER JOIN ${beads} mr ON mr.${beads.columns.bead_id} = bd.${bead_dependencies.columns.bead_id} + WHERE bd.${bead_dependencies.columns.depends_on_bead_id} = b.${beads.columns.bead_id} + AND bd.${bead_dependencies.columns.dependency_type} = 'tracks' + AND mr.${beads.columns.type} = 'merge_request' + AND mr.${beads.columns.status} IN ('open', 'in_progress') + ) + `, + [] + ), + ]); + for (const b of orphanedInReview) { + violations.push({ + invariant: 4, + message: `Issue bead ${b.bead_id} is in_review but has no open/in_progress MR bead`, + }); + } + + return violations; +} + +// ════════════════════════════════════════════════════════════════════ +// Reconciler metrics — collected per alarm tick +// ════════════════════════════════════════════════════════════════════ + +export type ReconcilerMetrics = { + eventsDrained: number; + actionsEmitted: number; + actionsByType: Record; + sideEffectsAttempted: number; + sideEffectsSucceeded: number; + sideEffectsFailed: number; + invariantViolations: number; + wallClockMs: number; + pendingEventCount: number; +}; diff --git a/cloudflare-gastown/src/dos/town/review-queue.ts b/cloudflare-gastown/src/dos/town/review-queue.ts index 26df5a6ddf..07db76062b 100644 --- a/cloudflare-gastown/src/dos/town/review-queue.ts +++ b/cloudflare-gastown/src/dos/town/review-queue.ts @@ -28,8 +28,11 @@ import { getAgent, unhookBead } from './agents'; import { getRig } from './rigs'; import type { ReviewQueueInput, ReviewQueueEntry, AgentDoneInput, Molecule } from '../../types'; -// Review entries stuck in 'running' past this timeout are reset to 'pending' -const REVIEW_RUNNING_TIMEOUT_MS = 5 * 60 * 1000; +// Review entries stuck in 'running' past this timeout are reset to 'pending'. +// Only applies when no agent (working or idle) is hooked to the MR bead. +// Set to 30 min — reviews can legitimately take 10-15 min for clone + build +// + test + merge, and the refinery hook guard is the primary protection. +const REVIEW_RUNNING_TIMEOUT_MS = 30 * 60 * 1000; function generateId(): string { return crypto.randomUUID(); @@ -186,12 +189,26 @@ export function submitToReviewQueue(sql: SqlStorage, input: ReviewQueueInput): v } export function popReviewQueue(sql: SqlStorage): ReviewQueueEntry | null { + // Pop the oldest open MR bead, but skip any whose source bead already + // has another MR in_progress (i.e. a refinery is already reviewing it). + // This prevents popping stale MR beads and triggering failReviewWithRework + // while an active review is in flight for the same source. + // + // The source bead is linked via bead_dependencies (dependency_type='tracks'): + // bead_dependencies.bead_id = MR bead + // bead_dependencies.depends_on_bead_id = source bead const rows = [ ...query( sql, /* sql */ ` ${REVIEW_JOIN} WHERE ${beads.status} = 'open' + AND NOT EXISTS ( + SELECT 1 FROM ${beads} AS active_mr + WHERE active_mr.${beads.columns.type} = 'merge_request' + AND active_mr.${beads.columns.status} = 'in_progress' + AND active_mr.${beads.columns.rig_id} = ${beads.rig_id} + ) ORDER BY ${beads.created_at} ASC LIMIT 1 `, @@ -223,6 +240,16 @@ export function completeReview( entryId: string, status: 'merged' | 'failed' ): void { + // Guard: don't overwrite terminal states (closed MR bead that was + // already merged should never be set to 'failed' by a stale call) + const current = getBead(sql, entryId); + if (current && (current.status === 'closed' || current.status === 'failed')) { + console.warn( + `[review-queue] completeReview: bead ${entryId} already ${current.status}, skipping` + ); + return; + } + const beadStatus = status === 'merged' ? 'closed' : 'failed'; const timestamp = now(); query( @@ -275,8 +302,38 @@ export function completeReviewWithResult( if (input.status === 'merged') { const mergeTimestamp = now(); + console.log( + `[review-queue] completeReviewWithResult MERGED: entry_id=${input.entry_id} ` + + `entry.bead_id (source)=${entry.bead_id} entry.id (MR)=${entry.id} — ` + + `calling closeBead on source` + ); closeBead(sql, entry.bead_id, entry.agent_id); + // Close ALL other open/in_progress/failed MR beads for the same + // source bead. During rework cycles, multiple MR beads accumulate. + // Without this cleanup, stale MR beads trigger failReviewWithRework + // on the next alarm tick, reopening the source bead that was just + // closed by this merge. + query( + sql, + /* sql */ ` + UPDATE ${beads} + SET ${beads.columns.status} = 'closed', + ${beads.columns.updated_at} = ?, + ${beads.columns.closed_at} = ? + WHERE ${beads.type} = 'merge_request' + AND ${beads.bead_id} != ? + AND ${beads.status} NOT IN ('closed') + AND ${beads.bead_id} IN ( + SELECT dep.${bead_dependencies.columns.bead_id} + FROM ${bead_dependencies} AS dep + WHERE dep.${bead_dependencies.columns.depends_on_bead_id} = ? + AND dep.${bead_dependencies.columns.dependency_type} = 'tracks' + ) + `, + [mergeTimestamp, mergeTimestamp, input.entry_id, entry.bead_id] + ); + // closeBead → updateBeadStatus short-circuits when completeReview already // set the status to 'closed' via direct SQL, so updateConvoyProgress is // never reached transitively. Call it explicitly to ensure the convoy @@ -310,13 +367,45 @@ export function completeReviewWithResult( conflict: true, }, }); - // Return source bead to in_progress so the polecat can be re-dispatched - // to resolve the conflict (in_review → in_progress rework flow). - updateBeadStatus(sql, entry.bead_id, 'in_progress', entry.agent_id); + // Return source bead to open so the normal scheduling path handles + // rework. Clear assignee so feedStrandedConvoys can match. + const conflictSourceBead = getBead(sql, entry.bead_id); + if ( + conflictSourceBead && + conflictSourceBead.status !== 'closed' && + conflictSourceBead.status !== 'failed' + ) { + updateBeadStatus(sql, entry.bead_id, 'open', entry.agent_id); + query( + sql, + /* sql */ ` + UPDATE ${beads} + SET ${beads.columns.assignee_agent_bead_id} = NULL + WHERE ${beads.bead_id} = ? + `, + [entry.bead_id] + ); + } } else if (input.status === 'failed') { - // Review failed (rework requested): return source bead to in_progress - // so it can be re-dispatched (in_review → in_progress rework flow). - updateBeadStatus(sql, entry.bead_id, 'in_progress', entry.agent_id); + // Review failed (rework requested): return source bead to open so + // the normal scheduling path (feedStrandedConvoys → hookBead → + // schedulePendingWork → dispatch) handles rework. Clear the stale + // assignee so feedStrandedConvoys can match (requires assignee IS NULL). + // This avoids the fire-and-forget rework dispatch race in TownDO + // where the dispatch fails and rehookOrphanedBeads churn. + const sourceBead = getBead(sql, entry.bead_id); + if (sourceBead && sourceBead.status !== 'closed' && sourceBead.status !== 'failed') { + updateBeadStatus(sql, entry.bead_id, 'open', entry.agent_id); + query( + sql, + /* sql */ ` + UPDATE ${beads} + SET ${beads.columns.assignee_agent_bead_id} = NULL + WHERE ${beads.bead_id} = ? + `, + [entry.bead_id] + ); + } } } @@ -326,6 +415,34 @@ export function completeReviewWithResult( * Writes to both review_metadata.pr_url (for query) and beads.metadata.pr_url * (so the URL is available via the standard bead list endpoint). */ +/** Get review_metadata for an MR bead. */ +export function getReviewMetadata( + sql: SqlStorage, + mrBeadId: string +): { branch: string; target_branch: string; pr_url: string | null } | null { + const rows = z + .object({ + branch: z.string(), + target_branch: z.string(), + pr_url: z.string().nullable(), + }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT ${review_metadata.columns.branch} as branch, + ${review_metadata.columns.target_branch} as target_branch, + ${review_metadata.columns.pr_url} as pr_url + FROM ${review_metadata} + WHERE ${review_metadata.bead_id} = ? + `, + [mrBeadId] + ), + ]); + return rows[0] ?? null; +} + export function setReviewPrUrl(sql: SqlStorage, entryId: string, prUrl: string): boolean { // Reject non-HTTPS URLs to prevent storing garbage from LLM output. // Invalid URLs would cause pollPendingPRs to poll indefinitely. @@ -373,111 +490,68 @@ export function markReviewInReview(sql: SqlStorage, entryId: string): void { ); } -/** - * List MR beads that are in_progress and have a pr_url (PR-strategy merges - * waiting for external review). Used by the alarm to poll PR status. - */ -export function listPendingPRReviews(sql: SqlStorage): MergeRequestBeadRecord[] { - const rows = [ - ...query( - sql, - /* sql */ ` - ${REVIEW_JOIN} - WHERE ${beads.status} = 'in_progress' - AND ${review_metadata.pr_url} IS NOT NULL - `, - [] - ), - ]; - return MergeRequestBeadRecord.array().parse(rows); -} - -/** - * Reset MR beads stuck in 'in_progress' back to 'open' so they can be - * re-processed. Excludes beads that have a pr_url set — those are - * legitimately waiting for external human review (PR strategy) and may - * take hours or days. - */ -export function recoverStuckReviews(sql: SqlStorage): void { - const timeout = new Date(Date.now() - REVIEW_RUNNING_TIMEOUT_MS).toISOString(); - query( - sql, - /* sql */ ` - UPDATE ${beads} - SET ${beads.columns.status} = 'open', - ${beads.columns.updated_at} = ? - WHERE ${beads.type} = 'merge_request' - AND ${beads.status} = 'in_progress' - AND ${beads.updated_at} < ? - AND ${beads.bead_id} NOT IN ( - SELECT ${review_metadata.bead_id} - FROM ${review_metadata} - WHERE ${review_metadata.pr_url} IS NOT NULL - ) - `, - [now(), timeout] - ); -} - -/** - * Close MR beads that are stuck waiting for a PR review but whose assigned - * agent is no longer active. After a container restart, agents lose their - * in-memory state — the PR review will never complete. Close these beads - * so they don't block convoy progress indefinitely. - * - * Only affects beads with a pr_url (excluded by recoverStuckReviews) that - * are stale (>30 min) and whose agent is idle/dead/missing. - */ -const ORPHAN_REVIEW_TIMEOUT_MS = 30 * 60 * 1000; - -export function closeOrphanedReviewBeads(sql: SqlStorage): void { - const cutoff = new Date(Date.now() - ORPHAN_REVIEW_TIMEOUT_MS).toISOString(); - - const orphanRows = [ - ...query( - sql, - /* sql */ ` - SELECT ${beads.bead_id}, ${beads.assignee_agent_bead_id} - FROM ${beads} - INNER JOIN ${review_metadata} ON ${beads.bead_id} = ${review_metadata.bead_id} - LEFT JOIN ${agent_metadata} ON ${beads.assignee_agent_bead_id} = ${agent_metadata.bead_id} - WHERE ${beads.type} = 'merge_request' - AND ${beads.status} = 'open' - AND ${review_metadata.pr_url} IS NOT NULL - AND ${beads.updated_at} < ? - AND ( - ${agent_metadata.bead_id} IS NULL - OR ${agent_metadata.status} IN ('idle', 'dead') - ) - `, - [cutoff] - ), - ]; - - for (const row of orphanRows) { - const parsed = z - .object({ bead_id: z.string(), assignee_agent_bead_id: z.string().nullable() }) - .parse(row); - try { - closeBead(sql, parsed.bead_id, parsed.assignee_agent_bead_id ?? 'system'); - console.log( - `[review-queue] closeOrphanedReviewBeads: closed orphaned MR bead=${parsed.bead_id}` - ); - } catch (err) { - console.warn( - `[review-queue] closeOrphanedReviewBeads: failed to close bead=${parsed.bead_id}`, - err - ); - } - } -} - // ── Agent Done ────────────────────────────────────────────────────── export function agentDone(sql: SqlStorage, agentId: string, input: AgentDoneInput): void { const agent = getAgent(sql, agentId); if (!agent) throw new Error(`Agent ${agentId} not found`); - if (!agent.current_hook_bead_id) throw new Error(`Agent ${agentId} has no hooked bead`); + if (!agent.current_hook_bead_id) { + // The agent was unhooked by a recovery path (witnessPatrol, + // rehookOrphanedBeads) between when the agent finished work and + // when it called gt_done. + // + // For refineries, this is critical: the refinery successfully merged + // but the hook was cleared by zombie detection. We MUST still complete + // the review — otherwise the source bead stays open forever. Find the + // most recent non-closed MR bead assigned to this agent and complete it. + if (agent.role === 'refinery') { + const recentMrRows = [ + ...query( + sql, + /* sql */ ` + SELECT ${beads.bead_id} + FROM ${beads} + WHERE ${beads.type} = 'merge_request' + AND ${beads.assignee_agent_bead_id} = ? + AND ${beads.status} NOT IN ('closed', 'failed') + ORDER BY ${beads.updated_at} DESC + LIMIT 1 + `, + [agentId] + ), + ]; + if (recentMrRows.length > 0) { + const mrBeadId = z.object({ bead_id: z.string() }).parse(recentMrRows[0]).bead_id; + console.log( + `[review-queue] agentDone: unhooked refinery ${agentId} — recovering MR bead ${mrBeadId}` + ); + if (input.pr_url) { + const stored = setReviewPrUrl(sql, mrBeadId, input.pr_url); + if (stored) { + markReviewInReview(sql, mrBeadId); + } else { + completeReviewWithResult(sql, { + entry_id: mrBeadId, + status: 'failed', + message: `Refinery provided invalid pr_url: ${input.pr_url}`, + }); + } + } else { + completeReviewWithResult(sql, { + entry_id: mrBeadId, + status: 'merged', + message: input.summary ?? 'Merged by refinery agent (recovered from unhook)', + }); + } + return; + } + } + + console.warn( + `[review-queue] agentDone: agent ${agentId} (role=${agent.role}) has no hooked bead — ignoring` + ); + return; + } // Triage batch beads don't produce code — close and unhook without // submitting to the review queue. Only applies to system-created triage @@ -490,6 +564,19 @@ export function agentDone(sql: SqlStorage, agentId: string, input: AgentDoneInpu return; } + // Rework beads skip the review queue entirely. The polecat pushed commits + // to an existing branch (the one the refinery already reviewed). Closing + // the rework bead unblocks the MR bead, and the reconciler re-dispatches + // the refinery to re-review. + if (hookedBead?.labels.includes('gt:rework')) { + console.log( + `[review-queue] agentDone: rework bead ${agent.current_hook_bead_id} — closing directly (skip review)` + ); + closeBead(sql, agent.current_hook_bead_id, agentId); + unhookBead(sql, agentId); + return; + } + if (agent.role === 'refinery') { // The refinery handles merging (direct strategy) or PR creation (pr strategy) // itself. When it calls gt_done: @@ -596,32 +683,41 @@ export function agentCompleted( if (!agent) return result; if (agent.current_hook_bead_id) { - // When a refinery exits with 'completed' but the MR bead is still - // in_progress (not closed/merged), it means the refinery requested - // rework. Route through completeReviewWithResult so the source bead - // is returned to in_progress for re-dispatch. - if (agent.role === 'refinery' && input.status === 'completed') { - const mrBead = getBead(sql, agent.current_hook_bead_id); - if (mrBead && mrBead.status !== 'closed') { - const sourceBeadId = - typeof mrBead.metadata?.source_bead_id === 'string' - ? mrBead.metadata.source_bead_id - : null; - completeReviewWithResult(sql, { - entry_id: agent.current_hook_bead_id, - status: 'failed', - message: input.reason ?? 'Refinery exited without merge — rework needed', - }); - result.reworkSourceBeadId = sourceBeadId; - unhookBead(sql, agentId); - // Mark agent idle (below) + if (agent.role === 'refinery') { + // NEVER fail or unhook a refinery from agentCompleted. + // agentCompleted races with gt_done: the process exits, the + // container sends /completed, but gt_done's HTTP request may + // still be in flight. If we unhook here, recoverStuckReviews + // can fire between agentCompleted and gt_done, resetting the + // MR bead that's about to be closed by gt_done. + // + // Leave the hook intact. gt_done will close + unhook if the + // merge succeeded. recoverStuckReviews (which checks for + // status='working') handles the case where gt_done never arrives. + // + // No-op for the bead — just fall through to mark agent idle. + } else { + // For non-refineries: if the agent exited with 'failed', fail the bead. + // If it exited with 'completed', check whether gt_done already ran: + // - If the bead is in_review/closed/failed → gt_done already handled it, no-op on bead + // - If the bead is still in_progress → agent was killed (idle timer, OOM, etc.) + // before calling gt_done. Don't close the bead — just unhook. The reconciler's + // Rule 3 will reset it to open after the staleness timeout. + const hookedBead = getBead(sql, agent.current_hook_bead_id); + if (input.status === 'failed') { + updateBeadStatus(sql, agent.current_hook_bead_id, 'failed', agentId); + } else if (hookedBead && hookedBead.status === 'in_progress') { + // Agent exited 'completed' but bead is still in_progress — gt_done was never called. + // Don't close the bead. Rule 3 will handle rework. + console.log( + `[review-queue] agentCompleted: polecat ${agentId} exited without gt_done — ` + + `bead ${agent.current_hook_bead_id} stays in_progress (Rule 3 will recover)` + ); + } else if (hookedBead && hookedBead.status === 'open') { + // Bead is open (wasn't dispatched yet or was already reset). No-op. } else { - // MR was already closed (merged) — normal completion - unhookBead(sql, agentId); + // Bead is in_review, closed, or failed — gt_done already ran. No-op on bead. } - } else { - const beadStatus = input.status === 'completed' ? 'closed' : 'failed'; - updateBeadStatus(sql, agent.current_hook_bead_id, beadStatus, agentId); unhookBead(sql, agentId); } } diff --git a/cloudflare-gastown/src/dos/town/scheduling.ts b/cloudflare-gastown/src/dos/town/scheduling.ts new file mode 100644 index 0000000000..477aa9fb17 --- /dev/null +++ b/cloudflare-gastown/src/dos/town/scheduling.ts @@ -0,0 +1,282 @@ +/** + * Agent scheduling and dispatch for the Town DO alarm loop. + * + * Owns the core dispatch/retry logic that was previously inline in + * Town.do.ts. The Town DO delegates to these pure(ish) functions, + * passing its SQL handle and env bindings. + */ + +import * as Sentry from '@sentry/cloudflare'; +import { z } from 'zod'; +import { beads, AgentBeadRecord } from '../../db/tables/beads.table'; +import { agent_metadata } from '../../db/tables/agent-metadata.table'; +import { query } from '../../util/query.util'; +import * as beadOps from './beads'; +import * as agents from './agents'; +import * as rigs from './rigs'; +import * as dispatch from './container-dispatch'; +import * as patrol from './patrol'; +import type { Agent, Bead, TownConfig } from '../../types'; +import type { GastownEventData } from '../../util/analytics.util'; + +const LOG = '[scheduling]'; + +// ── Constants ────────────────────────────────────────────────────────── + +export const DISPATCH_COOLDOWN_MS = 2 * 60_000; // 2 min +export const MAX_DISPATCH_ATTEMPTS = 20; + +// ── Context passed by the Town DO ────────────────────────────────────── + +type SchedulingContext = { + sql: SqlStorage; + env: Env; + storage: DurableObjectStorage; + townId: string; + getTownConfig: () => Promise; + getRigConfig: (rigId: string) => Promise; + resolveKilocodeToken: () => Promise; + emitEvent: (data: Omit) => void; +}; + +type RigConfig = { + townId: string; + rigId: string; + gitUrl: string; + defaultBranch: string; + userId: string; + kilocodeToken?: string; + platformIntegrationId?: string; + merge_strategy?: string; +}; + +function now(): string { + return new Date().toISOString(); +} + +// ── dispatchAgent ────────────────────────────────────────────────────── + +/** + * Dispatch a single agent to the container. Transitions the bead to + * in_progress and the agent to working BEFORE the async network call + * (I/O gate safety for fire-and-forget callers). Returns true if the + * container accepted the agent. + */ +export async function dispatchAgent( + ctx: SchedulingContext, + agent: Agent, + bead: Bead, + options?: { systemPromptOverride?: string } +): Promise { + try { + const rigId = agent.rig_id ?? rigs.listRigs(ctx.sql)[0]?.id ?? ''; + const rigConfig = rigId ? await ctx.getRigConfig(rigId) : null; + if (!rigConfig) { + console.warn(`${LOG} dispatchAgent: no rig config for agent=${agent.id} rig=${rigId}`); + return false; + } + + const townConfig = await ctx.getTownConfig(); + const kilocodeToken = await ctx.resolveKilocodeToken(); + + const convoyId = beadOps.getConvoyForBead(ctx.sql, bead.bead_id); + const convoyFeatureBranch = convoyId ? beadOps.getConvoyFeatureBranch(ctx.sql, convoyId) : null; + + // Transition bead to in_progress BEFORE the async container start. + // Must happen synchronously within the I/O gate — fire-and-forget + // callers (slingBead, slingConvoy) close the gate before the + // network call completes. + const currentBead = beadOps.getBead(ctx.sql, bead.bead_id); + if ( + currentBead && + currentBead.status !== 'in_progress' && + currentBead.status !== 'closed' && + currentBead.status !== 'failed' + ) { + beadOps.updateBeadStatus(ctx.sql, bead.bead_id, 'in_progress', agent.id); + } + + // Set agent to 'working' BEFORE the async container start (same + // I/O gate rationale). + const timestamp = now(); + query( + ctx.sql, + /* sql */ ` + UPDATE ${agent_metadata} + SET ${agent_metadata.columns.status} = 'working', + ${agent_metadata.columns.dispatch_attempts} = ${agent_metadata.columns.dispatch_attempts} + 1, + ${agent_metadata.columns.last_activity_at} = ? + WHERE ${agent_metadata.bead_id} = ? + `, + [timestamp, agent.id] + ); + + const started = await dispatch.startAgentInContainer(ctx.env, ctx.storage, { + townId: ctx.townId, + rigId, + userId: rigConfig.userId, + agentId: agent.id, + agentName: agent.name, + role: agent.role, + identity: agent.identity, + beadId: bead.bead_id, + beadTitle: bead.title, + beadBody: bead.body ?? '', + checkpoint: agent.checkpoint, + gitUrl: rigConfig.gitUrl, + defaultBranch: rigConfig.defaultBranch, + kilocodeToken, + townConfig, + platformIntegrationId: rigConfig.platformIntegrationId, + convoyFeatureBranch: convoyFeatureBranch ?? undefined, + systemPromptOverride: options?.systemPromptOverride, + }); + + if (started) { + // Best-effort: may be dropped if I/O gate is closed + query( + ctx.sql, + /* sql */ ` + UPDATE ${agent_metadata} + SET ${agent_metadata.columns.dispatch_attempts} = 0 + WHERE ${agent_metadata.bead_id} = ? + `, + [agent.id] + ); + console.log(`${LOG} dispatchAgent: started agent=${agent.name}(${agent.id})`); + ctx.emitEvent({ + event: 'agent.spawned', + townId: ctx.townId, + rigId, + agentId: agent.id, + beadId: bead.bead_id, + role: agent.role, + }); + } else { + // Container start returned false — but the container may have + // actually started the agent (timeout race). DON'T roll back + // the bead to open. Leave it in_progress with the agent idle+hooked. + // If the agent truly failed: rehookOrphanedBeads recovers after 2 min. + // If the agent actually started: it works and calls gt_done normally. + query( + ctx.sql, + /* sql */ ` + UPDATE ${agent_metadata} + SET ${agent_metadata.columns.status} = 'idle', + ${agent_metadata.columns.last_activity_at} = ? + WHERE ${agent_metadata.bead_id} = ? + `, + [now(), agent.id] + ); + ctx.emitEvent({ + event: 'agent.dispatch_failed', + townId: ctx.townId, + rigId, + agentId: agent.id, + beadId: bead.bead_id, + role: agent.role, + }); + } + return started; + } catch (err) { + console.error(`${LOG} dispatchAgent: failed for agent=${agent.id}:`, err); + Sentry.captureException(err, { extra: { agentId: agent.id, beadId: bead.bead_id } }); + try { + query( + ctx.sql, + /* sql */ ` + UPDATE ${agent_metadata} + SET ${agent_metadata.columns.status} = 'idle', + ${agent_metadata.columns.last_activity_at} = ? + WHERE ${agent_metadata.bead_id} = ? + `, + [now(), agent.id] + ); + // Don't roll back bead to open — same timeout race rationale + } catch (rollbackErr) { + console.error(`${LOG} dispatchAgent: rollback also failed:`, rollbackErr); + } + ctx.emitEvent({ + event: 'agent.dispatch_failed', + townId: ctx.townId, + agentId: agent.id, + beadId: bead.bead_id, + role: agent.role, + }); + return false; + } +} + +// ── dispatchUnblockedBeads ───────────────────────────────────────────── + +/** + * When a bead closes, find beads that were blocked by it and are now + * fully unblocked. Dispatch their assigned agents (fire-and-forget). + */ +export function dispatchUnblockedBeads(ctx: SchedulingContext, closedBeadId: string): void { + const unblockedIds = beadOps.getNewlyUnblockedBeads(ctx.sql, closedBeadId); + if (unblockedIds.length === 0) return; + + console.log( + `${LOG} dispatchUnblockedBeads: ${unblockedIds.length} beads unblocked by ${closedBeadId}` + ); + + for (const beadId of unblockedIds) { + const bead = beadOps.getBead(ctx.sql, beadId); + if (!bead || bead.status === 'closed' || bead.status === 'failed') continue; + + if (!bead.assignee_agent_bead_id) continue; + const agent = agents.getAgent(ctx.sql, bead.assignee_agent_bead_id); + if (!agent || agent.status !== 'idle') continue; + + dispatchAgent(ctx, agent, bead).catch(err => + console.error( + `${LOG} dispatchUnblockedBeads: fire-and-forget dispatch failed for bead=${beadId}`, + err + ) + ); + } +} + +// ── hasActiveWork ────────────────────────────────────────────────────── + +/** + * Returns true if the town has work that requires the fast (5s) alarm + * interval. Used to decide between active and idle alarm cadence. + */ +export function hasActiveWork(sql: SqlStorage): boolean { + const activeAgentRows = [ + ...query( + sql, + /* sql */ `SELECT COUNT(*) as cnt FROM ${agent_metadata} WHERE ${agent_metadata.status} IN ('working', 'stalled')`, + [] + ), + ]; + const pendingBeadRows = [ + ...query( + sql, + /* sql */ `SELECT COUNT(*) as cnt FROM ${agent_metadata} WHERE ${agent_metadata.status} = 'idle' AND ${agent_metadata.current_hook_bead_id} IS NOT NULL`, + [] + ), + ]; + const pendingReviewRows = [ + ...query( + sql, + /* sql */ `SELECT COUNT(*) as cnt FROM ${beads} WHERE ${beads.type} = 'merge_request' AND ${beads.status} IN ('open', 'in_progress')`, + [] + ), + ]; + const pendingTriageRows = [ + ...query( + sql, + /* sql */ `SELECT COUNT(*) as cnt FROM ${beads} WHERE ${beads.type} = 'issue' AND ${beads.labels} LIKE ? AND ${beads.status} = 'open'`, + [patrol.TRIAGE_LABEL_LIKE] + ), + ]; + return ( + Number(activeAgentRows[0]?.cnt ?? 0) > 0 || + Number(pendingBeadRows[0]?.cnt ?? 0) > 0 || + Number(pendingReviewRows[0]?.cnt ?? 0) > 0 || + Number(pendingTriageRows[0]?.cnt ?? 0) > 0 + ); +} diff --git a/cloudflare-gastown/src/gastown.worker.ts b/cloudflare-gastown/src/gastown.worker.ts index 4e36f52fcf..6c8efab713 100644 --- a/cloudflare-gastown/src/gastown.worker.ts +++ b/cloudflare-gastown/src/gastown.worker.ts @@ -35,6 +35,7 @@ import { handleUnhookBead, handlePrime, handleAgentDone, + handleRequestChanges, handleAgentCompleted, handleWriteCheckpoint, handleCheckMail, @@ -54,10 +55,6 @@ import { } from './handlers/rig-review-queue.handler'; import { handleCreateEscalation } from './handlers/rig-escalations.handler'; import { handleResolveTriage } from './handlers/rig-triage.handler'; -import { - handleAddBeadDependency, - handleRemoveBeadDependency, -} from './handlers/bead-dependencies.handler'; import { handleListBeadEvents } from './handlers/rig-bead-events.handler'; import { handleListTownEvents } from './handlers/town-events.handler'; import { @@ -197,6 +194,18 @@ app.get('/', c => c.html(dashboardHtml())); app.get('/health', c => c.json({ status: 'ok' })); +// ── DEBUG: unauthenticated town introspection — REMOVE after debugging ── +app.get('/debug/towns/:townId/status', async c => { + const townId = c.req.param('townId'); + const town = getTownDOStub(c.env, townId); + const alarmStatus = await town.getAlarmStatus(); + // eslint-disable-next-line @typescript-eslint/await-thenable -- DO RPC returns promise at runtime + const agentMeta = await town.debugAgentMetadata(); + // eslint-disable-next-line @typescript-eslint/await-thenable + const beadSummary = await town.debugBeadSummary(); + return c.json({ alarmStatus, agentMeta, beadSummary }); +}); + // ── Town ID + Auth ────────────────────────────────────────────────────── // All rig routes live under /api/towns/:townId/rigs/:rigId so the townId // is always available from the URL path. @@ -246,21 +255,6 @@ app.delete('/api/towns/:townId/rigs/:rigId/beads/:beadId', c => ) ); -// ── Bead Dependencies ────────────────────────────────────────────────── - -app.post('/api/towns/:townId/rigs/:rigId/beads/:beadId/dependencies', c => - instrumented(c, 'POST /api/towns/:townId/rigs/:rigId/beads/:beadId/dependencies', () => - handleAddBeadDependency(c, c.req.param()) - ) -); -app.delete('/api/towns/:townId/rigs/:rigId/beads/:beadId/dependencies/:dependsOnBeadId', c => - instrumented( - c, - 'DELETE /api/towns/:townId/rigs/:rigId/beads/:beadId/dependencies/:dependsOnBeadId', - () => handleRemoveBeadDependency(c, c.req.param()) - ) -); - // ── Agents ────────────────────────────────────────────────────────────── app.post('/api/towns/:townId/rigs/:rigId/agents', c => @@ -323,6 +317,11 @@ app.post('/api/towns/:townId/rigs/:rigId/agents/:agentId/done', c => handleAgentDone(c, c.req.param()) ) ); +app.post('/api/towns/:townId/rigs/:rigId/agents/:agentId/request-changes', c => + instrumented(c, 'POST /api/towns/:townId/rigs/:rigId/agents/:agentId/request-changes', () => + handleRequestChanges(c, c.req.param()) + ) +); app.post('/api/towns/:townId/rigs/:rigId/agents/:agentId/completed', c => instrumented(c, 'POST /api/towns/:townId/rigs/:rigId/agents/:agentId/completed', () => handleAgentCompleted(c, c.req.param()) diff --git a/cloudflare-gastown/src/handlers/bead-dependencies.handler.ts b/cloudflare-gastown/src/handlers/bead-dependencies.handler.ts deleted file mode 100644 index f53d0bb9f2..0000000000 --- a/cloudflare-gastown/src/handlers/bead-dependencies.handler.ts +++ /dev/null @@ -1,76 +0,0 @@ -import type { Context } from 'hono'; -import { z } from 'zod'; -import { getTownDOStub } from '../dos/Town.do'; -import { resSuccess, resError } from '../util/res.util'; -import { parseJsonBody } from '../util/parse-json-body.util'; -import type { GastownEnv } from '../gastown.worker'; - -// Only allow user-editable dependency types. 'tracks' is system-managed -// (created by slingConvoy) and must not be creatable via the public API. -const EditableDependencyType = z.enum(['blocks', 'parent-child']); - -const AddDependencyBody = z.object({ - depends_on_bead_id: z.string().min(1), - dependency_type: EditableDependencyType.optional().default('blocks'), -}); - -/** - * POST /api/towns/:townId/rigs/:rigId/beads/:beadId/dependencies - * Add a dependency edge between two beads. - */ -export async function handleAddBeadDependency( - c: Context, - params: { townId: string; rigId: string; beadId: string } -) { - const parsed = AddDependencyBody.safeParse(await parseJsonBody(c)); - if (!parsed.success) { - return c.json( - { success: false, error: 'Invalid request body', issues: parsed.error.issues }, - 400 - ); - } - - const town = getTownDOStub(c.env, params.townId); - const bead = await town.getBeadAsync(params.beadId); - if (!bead || bead.rig_id !== params.rigId) return c.json(resError('Bead not found'), 404); - - const depBead = await town.getBeadAsync(parsed.data.depends_on_bead_id); - if (!depBead || depBead.rig_id !== params.rigId) { - return c.json(resError('Dependency bead not found in this rig'), 404); - } - - try { - await town.addBeadDependency( - params.beadId, - parsed.data.depends_on_bead_id, - parsed.data.dependency_type - ); - } catch (err) { - const message = err instanceof Error ? err.message : String(err); - return c.json(resError(message), 400); - } - - return c.json(resSuccess({ ok: true })); -} - -/** - * DELETE /api/towns/:townId/rigs/:rigId/beads/:beadId/dependencies/:dependsOnBeadId - * Remove a dependency edge between two beads. - */ -export async function handleRemoveBeadDependency( - c: Context, - params: { townId: string; rigId: string; beadId: string; dependsOnBeadId: string } -) { - const town = getTownDOStub(c.env, params.townId); - const bead = await town.getBeadAsync(params.beadId); - if (!bead || bead.rig_id !== params.rigId) return c.json(resError('Bead not found'), 404); - - const depBead = await town.getBeadAsync(params.dependsOnBeadId); - if (!depBead || depBead.rig_id !== params.rigId) { - return c.json(resError('Dependency bead not found in this rig'), 404); - } - - const deleted = await town.removeBeadDependency(params.beadId, params.dependsOnBeadId); - - return c.json(resSuccess({ ok: true, deleted })); -} diff --git a/cloudflare-gastown/src/handlers/mayor-tools.handler.ts b/cloudflare-gastown/src/handlers/mayor-tools.handler.ts index 156dccaeb4..6f778d45d0 100644 --- a/cloudflare-gastown/src/handlers/mayor-tools.handler.ts +++ b/cloudflare-gastown/src/handlers/mayor-tools.handler.ts @@ -24,8 +24,6 @@ const MayorSlingBody = z.object({ title: z.string().min(1), body: z.string().optional(), metadata: z.record(z.string(), z.unknown()).optional(), - depends_on: z.array(z.string().min(1)).optional(), - convoy_id: z.string().min(1).optional(), }); const MayorSlingBatchBody = z @@ -154,11 +152,7 @@ export async function handleMayorSling(c: Context, params: { townId: const town = getTownDOStub(c.env, params.townId); const result = await town.slingBead({ rigId: parsed.data.rig_id, - title: parsed.data.title, - body: parsed.data.body, - metadata: parsed.data.metadata, - dependsOn: parsed.data.depends_on, - convoyId: parsed.data.convoy_id, + ...parsed.data, }); console.log( @@ -397,7 +391,6 @@ const BeadUpdateBody = z metadata: z.record(z.string(), z.unknown()).optional(), rig_id: z.string().min(1).nullable().optional(), parent_bead_id: z.string().min(1).nullable().optional(), - convoy_id: z.string().min(1).nullable().optional(), }) .refine( data => @@ -408,8 +401,7 @@ const BeadUpdateBody = z data.status !== undefined || data.metadata !== undefined || data.rig_id !== undefined || - data.parent_bead_id !== undefined || - data.convoy_id !== undefined, + data.parent_bead_id !== undefined, { message: 'At least one field must be provided' } ); @@ -464,24 +456,7 @@ export async function handleMayorBeadUpdate( return c.json(resError('Bead does not belong to this rig'), 403); } - // Handle convoy_id changes separately — convoy membership is managed - // via 'tracks' dependencies and counter updates, not plain field updates. - if (parsed.data.convoy_id !== undefined) { - // null → remove from current convoy; string → add to that convoy - if (parsed.data.convoy_id === null) { - await town.removeBeadFromConvoy(params.beadId); - } else { - await town.addBeadToConvoy(params.beadId, parsed.data.convoy_id); - } - } - - // Forward remaining fields (excluding convoy_id) to the normal update path - const { convoy_id: _convoyId, ...fieldUpdates } = parsed.data; - const hasFieldUpdates = Object.values(fieldUpdates).some(v => v !== undefined); - - const bead = hasFieldUpdates - ? await town.updateBead(params.beadId, fieldUpdates, 'mayor') - : await town.getBeadAsync(params.beadId); + const bead = await town.updateBead(params.beadId, parsed.data, 'mayor'); return c.json(resSuccess(bead)); } diff --git a/cloudflare-gastown/src/handlers/mayor.handler.ts b/cloudflare-gastown/src/handlers/mayor.handler.ts index 0d6740a0f7..cbe0470450 100644 --- a/cloudflare-gastown/src/handlers/mayor.handler.ts +++ b/cloudflare-gastown/src/handlers/mayor.handler.ts @@ -50,9 +50,6 @@ export async function handleSendMayorMessage(c: Context, params: { t ); const town = getTownDOStub(c.env, params.townId); - // Ensure the TownDO knows its real UUID (ctx.id.name is unreliable in local dev) - // TODO: This should only be done on town creation. Why are we doing it here? - await town.setTownId(params.townId); const result = await town.sendMayorMessage( parsed.data.message, parsed.data.model, @@ -67,7 +64,6 @@ export async function handleSendMayorMessage(c: Context, params: { t */ export async function handleGetMayorStatus(c: Context, params: { townId: string }) { const town = getTownDOStub(c.env, params.townId); - await town.setTownId(params.townId); const status = await town.getMayorStatus(); return c.json(resSuccess(status), 200); } @@ -80,7 +76,6 @@ export async function handleGetMayorStatus(c: Context, params: { tow export async function handleEnsureMayor(c: Context, params: { townId: string }) { console.log(`${MAYOR_HANDLER_LOG} handleEnsureMayor: townId=${params.townId}`); const town = getTownDOStub(c.env, params.townId); - await town.setTownId(params.townId); const result = await town.ensureMayor(); return c.json(resSuccess(result), 200); } @@ -156,7 +151,6 @@ export async function handleSetDashboardContext( } const town = getTownDOStub(c.env, params.townId); - await town.setTownId(params.townId); await town.setDashboardContext(parsed.data.context); return c.json(resSuccess({ stored: true }), 200); } @@ -184,7 +178,6 @@ export async function handleBroadcastUiAction(c: Context, params: { const action = normalizeUiAction(parsed.data.action, params.townId); const town = getTownDOStub(c.env, params.townId); - await town.setTownId(params.townId); // Validate that the referenced rig belongs to this town const rigId = uiActionRigId(action); diff --git a/cloudflare-gastown/src/handlers/org-towns.handler.ts b/cloudflare-gastown/src/handlers/org-towns.handler.ts index 478959671b..a875e8375d 100644 --- a/cloudflare-gastown/src/handlers/org-towns.handler.ts +++ b/cloudflare-gastown/src/handlers/org-towns.handler.ts @@ -100,7 +100,6 @@ export async function handleCreateOrgRig(c: Context, params: { orgId // If this fails, roll back the rig creation to avoid an orphaned record. try { const townDOStub = getTownDOStub(c.env, parsed.data.town_id); - await townDOStub.setTownId(parsed.data.town_id); await townDOStub.configureRig({ rigId: rig.id, townId: parsed.data.town_id, diff --git a/cloudflare-gastown/src/handlers/rig-agents.handler.ts b/cloudflare-gastown/src/handlers/rig-agents.handler.ts index d7f4408ad7..24c429a4c9 100644 --- a/cloudflare-gastown/src/handlers/rig-agents.handler.ts +++ b/cloudflare-gastown/src/handlers/rig-agents.handler.ts @@ -189,9 +189,18 @@ export async function handleCheckMail( return c.json(resSuccess(messages)); } +const HeartbeatWatermark = z + .object({ + lastEventType: z.string().nullable().optional(), + lastEventAt: z.string().nullable().optional(), + activeTools: z.array(z.string()).optional(), + }) + .passthrough(); + /** * Heartbeat endpoint called by the container's heartbeat reporter. - * Updates the agent's last_activity_at timestamp in the Rig DO. + * Updates the agent's last_activity_at timestamp and SDK activity + * watermark in the Town DO's agent_metadata. */ export async function handleHeartbeat( c: Context, @@ -199,7 +208,30 @@ export async function handleHeartbeat( ) { const townId = c.get('townId'); const town = getTownDOStub(c.env, townId); - await town.touchAgentHeartbeat(params.agentId); + + // Parse watermark from body (best-effort — old containers send no body) + let watermark: z.infer | undefined; + try { + const body: unknown = await c.req.json(); + const parsed = HeartbeatWatermark.safeParse(body); + if (parsed.success) { + watermark = parsed.data; + } + } catch { + // No body or invalid JSON — old container format, just touch + } + + await town.touchAgentHeartbeat( + params.agentId, + watermark + ? { + lastEventType: watermark.lastEventType ?? null, + lastEventAt: watermark.lastEventAt ?? null, + activeTools: watermark.activeTools, + } + : undefined + ); + return c.json(resSuccess({ heartbeat: true })); } @@ -327,3 +359,32 @@ export async function handleNudgeDelivered( await town.markNudgeDelivered(parsed.data.nudge_id); return c.json(resSuccess({ marked: true })); } + +// ── Request Changes ────────────────────────────────────────────────── + +const RequestChangesBody = z.object({ + feedback: z.string().min(1, 'Feedback is required'), + files: z.array(z.string()).optional(), +}); + +/** + * Refinery requests changes on an in-progress MR. Creates a rework bead + * that blocks the MR bead. The reconciler assigns a polecat to the rework + * bead; when it closes, the MR unblocks for re-review. + */ +export async function handleRequestChanges( + c: Context, + params: { rigId: string; agentId: string } +) { + const parsed = RequestChangesBody.safeParse(await parseJsonBody(c)); + if (!parsed.success) { + return c.json( + { success: false, error: 'Invalid request body', issues: parsed.error.issues }, + 400 + ); + } + const townId = c.get('townId'); + const town = getTownDOStub(c.env, townId); + const result = await town.requestChanges(params.agentId, parsed.data); + return c.json(resSuccess(result), 201); +} diff --git a/cloudflare-gastown/src/prompts/mayor-system.prompt.ts b/cloudflare-gastown/src/prompts/mayor-system.prompt.ts index 98e4de4b9c..cf98c598b6 100644 --- a/cloudflare-gastown/src/prompts/mayor-system.prompt.ts +++ b/cloudflare-gastown/src/prompts/mayor-system.prompt.ts @@ -25,7 +25,7 @@ Your #1 purpose is to turn user requests into actionable work items. Every time You have these tools for cross-rig coordination: -- **gt_sling** — Delegate a single task to a polecat in a specific rig. Use for one-off tasks. Accepts an optional \`depends_on\` array of bead IDs — the new bead will not be dispatched until all listed beads are closed. +- **gt_sling** — Delegate a single task to a polecat in a specific rig. Use for one-off tasks. - **gt_sling_batch** — YOUR MOST IMPORTANT TOOL. Sling multiple beads as a tracked convoy. Use this when breaking work into parallel sub-tasks. Creates all beads at once, groups them into a convoy for progress tracking, and dispatches polecats automatically. Accepts an optional \`merge_mode\`: - **"review-then-land"** (default): Each bead is reviewed by the refinery and merged into the convoy's feature branch. Only at the very end does a PR or merge to main occur. Best for tightly coupled tasks that build on each other. - **"review-and-merge"**: Each bead goes through the full review + merge/PR cycle independently. Best for loosely coupled tasks where each can land on its own. @@ -215,8 +215,6 @@ You can directly edit town state when things go wrong: - **gt_convoy_close** to force-close a stuck convoy - **gt_convoy_update** to edit convoy merge_mode or feature_branch - **gt_bead_delete** to remove beads that shouldn't exist -- **gt_bead_add_dependency** to add a dependency between beads (the bead at bead_id will be blocked by depends_on_bead_id) -- **gt_bead_remove_dependency** to remove a dependency between beads (if this unblocks the bead, it will be dispatched automatically) - **gt_escalation_acknowledge** to acknowledge escalations Use these tools when the user reports stuck state, when you detect problems during delegation, or when you need to clean up after failures. You are the town coordinator — you have full authority over the control plane. diff --git a/cloudflare-gastown/src/trpc/router.ts b/cloudflare-gastown/src/trpc/router.ts index 45ee0260d0..6d88f34422 100644 --- a/cloudflare-gastown/src/trpc/router.ts +++ b/cloudflare-gastown/src/trpc/router.ts @@ -341,7 +341,6 @@ export const gastownRouter = router({ const ownerStub = ownership.stub; const townStub = getTownDOStub(ctx.env, input.townId); - await townStub.setTownId(input.townId); // For org towns, use the town owner's identity for credentials; // for personal towns the caller is always the owner. @@ -583,7 +582,6 @@ export const gastownRouter = router({ } const townStub = getTownDOStub(ctx.env, rig.town_id); - await townStub.setTownId(rig.town_id); return townStub.slingBead({ rigId: rig.id, title: input.title, @@ -609,7 +607,6 @@ export const gastownRouter = router({ await verifyTownOwnership(ctx.env, ctx.userId, input.townId, ctx.orgMemberships); const townStub = getTownDOStub(ctx.env, input.townId); - await townStub.setTownId(input.townId); return townStub.sendMayorMessage(input.message, input.model, input.uiContext); }), @@ -619,7 +616,6 @@ export const gastownRouter = router({ .query(async ({ ctx, input }) => { await verifyTownOwnership(ctx.env, ctx.userId, input.townId, ctx.orgMemberships); const townStub = getTownDOStub(ctx.env, input.townId); - await townStub.setTownId(input.townId); return townStub.getMayorStatus(); }), @@ -629,7 +625,6 @@ export const gastownRouter = router({ .query(async ({ ctx, input }) => { await verifyTownOwnership(ctx.env, ctx.userId, input.townId, ctx.orgMemberships); const townStub = getTownDOStub(ctx.env, input.townId); - await townStub.setTownId(input.townId); return townStub.getAlarmStatus(); }), @@ -666,7 +661,6 @@ export const gastownRouter = router({ } const townStub = getTownDOStub(ctx.env, input.townId); - await townStub.setTownId(input.townId); return townStub.ensureMayor(); }), @@ -861,7 +855,6 @@ export const gastownRouter = router({ .mutation(async ({ ctx, input }) => { await verifyTownOwnership(ctx.env, ctx.userId, input.townId, ctx.orgMemberships); const townStub = getTownDOStub(ctx.env, input.townId); - await townStub.setTownId(input.townId); await townStub.forceRefreshContainerToken(); }), @@ -1068,8 +1061,6 @@ export const gastownRouter = router({ if (!town) throw new TRPCError({ code: 'NOT_FOUND', message: 'Town not found' }); const townStub = getTownDOStub(ctx.env, input.townId); - await townStub.setTownId(input.townId); - // Use the town owner's identity for credentials. Only re-mint the // kilocode token if the caller is the owner (they have their pepper // in ctx). For non-owner members, keep the existing town token. @@ -1201,7 +1192,6 @@ export const gastownRouter = router({ .output(RpcAlarmStatusOutput) .query(async ({ ctx, input }) => { const townStub = getTownDOStub(ctx.env, input.townId); - await townStub.setTownId(input.townId); return townStub.getAlarmStatus(); }), @@ -1231,6 +1221,14 @@ export const gastownRouter = router({ const townStub = getTownDOStub(ctx.env, input.townId); return townStub.getBeadAsync(input.beadId); }), + + // DEBUG: raw agent_metadata dump — remove after debugging + debugAgentMetadata: adminProcedure + .input(z.object({ townId: z.string().uuid() })) + .query(async ({ ctx, input }) => { + const townStub = getTownDOStub(ctx.env, input.townId); + return townStub.debugAgentMetadata(); + }), }); export type GastownRouter = typeof gastownRouter; diff --git a/cloudflare-gastown/src/types.ts b/cloudflare-gastown/src/types.ts index d24b683ba9..c9d0158490 100644 --- a/cloudflare-gastown/src/types.ts +++ b/cloudflare-gastown/src/types.ts @@ -162,6 +162,15 @@ export type PrimeContext = { hooked_bead: Bead | null; undelivered_mail: Mail[]; open_beads: Bead[]; + /** Present when the hooked bead is a rework request (gt:rework label). */ + rework_context: { + feedback: string; + branch: string | null; + target_branch: string | null; + files: string[]; + original_bead_title: string | null; + mr_bead_id: string | null; + } | null; }; // -- Agent done -- diff --git a/cloudflare-gastown/test/integration/convoy-dag.test.ts b/cloudflare-gastown/test/integration/convoy-dag.test.ts index 3586b0f295..e9d22cbc86 100644 --- a/cloudflare-gastown/test/integration/convoy-dag.test.ts +++ b/cloudflare-gastown/test/integration/convoy-dag.test.ts @@ -1,4 +1,4 @@ -import { env } from 'cloudflare:test'; +import { env, runDurableObjectAlarm } from 'cloudflare:test'; import { describe, it, expect, beforeEach } from 'vitest'; function getTownStub(name = 'test-town') { @@ -8,9 +8,13 @@ function getTownStub(name = 'test-town') { describe('Convoy DAG and Feature Branches', () => { let town: ReturnType; + let townName: string; - beforeEach(() => { - town = getTownStub(`convoy-dag-${crypto.randomUUID()}`); + beforeEach(async () => { + townName = `convoy-dag-${crypto.randomUUID()}`; + town = getTownStub(townName); + // Set town ID so the alarm loop doesn't bail out + await town.setTownId(townName); }); // ── Feature Branch ───────────────────────────────────────────────── @@ -190,19 +194,21 @@ describe('Convoy DAG and Feature Branches', () => { ], }); - // The first bead should be open (ready to dispatch) or in_progress - // The second and third should remain open (blocked) + // Run alarm to trigger reconciler assignment (lazy assignment). + // Only unblocked beads get agents assigned. + await runDurableObjectAlarm(town); + const bead0 = await town.getBeadAsync(result.beads[0].bead.bead_id); const bead1 = await town.getBeadAsync(result.beads[1].bead.bead_id); const bead2 = await town.getBeadAsync(result.beads[2].bead.bead_id); - // First bead should have an agent hooked and be ready for dispatch + // First bead is unblocked — reconciler assigned an agent expect(bead0?.assignee_agent_bead_id).toBeTruthy(); - // Second and third are blocked but still have agents hooked - // (they'll be dispatched when unblocked) - expect(bead1?.assignee_agent_bead_id).toBeTruthy(); - expect(bead2?.assignee_agent_bead_id).toBeTruthy(); + // Second and third are blocked — reconciler does NOT assign agents + // (lazy assignment only assigns unblocked beads) + expect(bead1?.assignee_agent_bead_id).toBeNull(); + expect(bead2?.assignee_agent_bead_id).toBeNull(); }); it('should unblock next bead when blocker closes', async () => { @@ -219,20 +225,27 @@ describe('Convoy DAG and Feature Branches', () => { tasks: [{ title: 'Step 1' }, { title: 'Step 2', depends_on: [0] }], }); + // Run alarm to trigger reconciler assignment of unblocked beads + await runDurableObjectAlarm(town); + const beadIds = result.beads.map(b => b.bead.bead_id); - const agentIds = result.beads.map(b => b.agent.id); + const bead0 = await town.getBeadAsync(beadIds[0]); + const agent0Id = bead0!.assignee_agent_bead_id!; + expect(agent0Id).toBeTruthy(); // Close the first bead — this should unblock the second - await town.updateBeadStatus(beadIds[0], 'closed', agentIds[0]); + await town.updateBeadStatus(beadIds[0], 'closed', agent0Id); + + // Run alarm again so reconciler assigns agent to the now-unblocked bead + await runDurableObjectAlarm(town); // After closing, check convoy progress const status = await town.getConvoyStatus(result.convoy.id); expect(status?.closed_beads).toBe(1); - // The second bead should still be open/in_progress (it was unblocked) + // The second bead should be assigned and in_progress (unblocked by bead 0) const bead1 = await town.getBeadAsync(beadIds[1]); expect(bead1?.status).not.toBe('closed'); - // Its agent should still be hooked expect(bead1?.assignee_agent_bead_id).toBeTruthy(); }); @@ -251,17 +264,21 @@ describe('Convoy DAG and Feature Branches', () => { tasks: [{ title: 'Task A' }, { title: 'Task B' }, { title: 'Task C', depends_on: [0, 1] }], }); + // Run alarm to trigger reconciler assignment of unblocked beads (A and B) + await runDurableObjectAlarm(town); + const beadIds = result.beads.map(b => b.bead.bead_id); - const agentIds = result.beads.map(b => b.agent.id); + const beadA = await town.getBeadAsync(beadIds[0]); + const beadB = await town.getBeadAsync(beadIds[1]); // Close task A — task C should still be blocked (B is open) - await town.updateBeadStatus(beadIds[0], 'closed', agentIds[0]); + await town.updateBeadStatus(beadIds[0], 'closed', beadA!.assignee_agent_bead_id!); const status1 = await town.getConvoyStatus(result.convoy.id); expect(status1?.closed_beads).toBe(1); // Close task B — task C should now be unblocked - await town.updateBeadStatus(beadIds[1], 'closed', agentIds[1]); + await town.updateBeadStatus(beadIds[1], 'closed', beadB!.assignee_agent_bead_id!); const status2 = await town.getConvoyStatus(result.convoy.id); expect(status2?.closed_beads).toBe(2); @@ -290,16 +307,18 @@ describe('Convoy DAG and Feature Branches', () => { expect(status?.closed_beads).toBe(0); expect(status?.total_beads).toBe(3); + // Run alarm to trigger reconciler assignment + await runDurableObjectAlarm(town); + // Close one bead const beadIds = result.beads.map(b => b.bead.bead_id); - const agentIds = result.beads.map(b => b.agent.id); - await town.updateBeadStatus(beadIds[0], 'closed', agentIds[0]); + await town.updateBeadStatus(beadIds[0], 'closed', 'system'); status = await town.getConvoyStatus(result.convoy.id); expect(status?.closed_beads).toBe(1); // Close second - await town.updateBeadStatus(beadIds[1], 'closed', agentIds[1]); + await town.updateBeadStatus(beadIds[1], 'closed', 'system'); status = await town.getConvoyStatus(result.convoy.id); expect(status?.closed_beads).toBe(2); @@ -323,10 +342,9 @@ describe('Convoy DAG and Feature Branches', () => { expect(result.convoy.feature_branch).toBeTruthy(); const beadId = result.beads[0].bead.bead_id; - const agentId = result.beads[0].agent.id; // Close the only bead - await town.updateBeadStatus(beadId, 'closed', agentId); + await town.updateBeadStatus(beadId, 'closed', 'system'); // Convoy should NOT auto-close (it has a feature branch that needs landing) const status = await town.getConvoyStatus(result.convoy.id); @@ -349,11 +367,10 @@ describe('Convoy DAG and Feature Branches', () => { }); const beadIds = result.beads.map(b => b.bead.bead_id); - const agentIds = result.beads.map(b => b.agent.id); // Fail one bead, close the other - await town.updateBeadStatus(beadIds[0], 'failed', agentIds[0]); - await town.updateBeadStatus(beadIds[1], 'closed', agentIds[1]); + await town.updateBeadStatus(beadIds[0], 'failed', 'system'); + await town.updateBeadStatus(beadIds[1], 'closed', 'system'); const status = await town.getConvoyStatus(result.convoy.id); // Both failed and closed count toward progress @@ -531,8 +548,13 @@ describe('Convoy DAG and Feature Branches', () => { merge_mode: 'review-then-land', }); + // Run alarm to trigger reconciler assignment + await runDurableObjectAlarm(town); + const beadId = result.beads[0].bead.bead_id; - const agentId = result.beads[0].agent.id; + const bead0 = await town.getBeadAsync(beadId); + const agentId = bead0!.assignee_agent_bead_id!; + expect(agentId).toBeTruthy(); // Simulate agent completing work await town.agentDone(agentId, { @@ -540,9 +562,12 @@ describe('Convoy DAG and Feature Branches', () => { summary: 'Done with task', }); - // Verify the source bead was closed and a review entry was created + // agentDone is event-only — run alarm to drain events and apply + await runDurableObjectAlarm(town); + + // Verify the source bead was transitioned to in_review const bead = await town.getBeadAsync(beadId); - expect(bead?.status).toBe('closed'); + expect(bead?.status).toBe('in_review'); // Check that the MR bead exists with convoy metadata const allBeads = await town.listBeads({ type: 'merge_request' }); diff --git a/cloudflare-gastown/test/integration/http-api.test.ts b/cloudflare-gastown/test/integration/http-api.test.ts index 8a6c731b5d..588ebdbc57 100644 --- a/cloudflare-gastown/test/integration/http-api.test.ts +++ b/cloudflare-gastown/test/integration/http-api.test.ts @@ -40,6 +40,7 @@ function api(path: string): string { } describe('HTTP API', () => { + const townId = 'test-town-http-api'; const rigId = () => `rig-${crypto.randomUUID()}`; // ── Dashboard ────────────────────────────────────────────────────────── @@ -84,7 +85,7 @@ describe('HTTP API', () => { describe('beads', () => { it('should create a bead', async () => { const id = rigId(); - const res = await SELF.fetch(api(`/api/rigs/${id}/beads`), { + const res = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/beads`), { method: 'POST', headers: headers(), body: JSON.stringify({ @@ -106,7 +107,7 @@ describe('HTTP API', () => { it('should validate required fields', async () => { const id = rigId(); - const res = await SELF.fetch(api(`/api/rigs/${id}/beads`), { + const res = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/beads`), { method: 'POST', headers: headers(), body: JSON.stringify({ type: 'issue' }), @@ -119,18 +120,18 @@ describe('HTTP API', () => { it('should list beads', async () => { const id = rigId(); // Create two beads - await SELF.fetch(api(`/api/rigs/${id}/beads`), { + await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/beads`), { method: 'POST', headers: headers(), body: JSON.stringify({ type: 'issue', title: 'Bead 1' }), }); - await SELF.fetch(api(`/api/rigs/${id}/beads`), { + await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/beads`), { method: 'POST', headers: headers(), body: JSON.stringify({ type: 'message', title: 'Bead 2' }), }); - const res = await SELF.fetch(api(`/api/rigs/${id}/beads`), { + const res = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/beads`), { headers: headers(), }); expect(res.status).toBe(200); @@ -141,18 +142,18 @@ describe('HTTP API', () => { it('should filter beads by type', async () => { const id = rigId(); - await SELF.fetch(api(`/api/rigs/${id}/beads`), { + await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/beads`), { method: 'POST', headers: headers(), body: JSON.stringify({ type: 'issue', title: 'Issue' }), }); - await SELF.fetch(api(`/api/rigs/${id}/beads`), { + await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/beads`), { method: 'POST', headers: headers(), body: JSON.stringify({ type: 'message', title: 'Message' }), }); - const res = await SELF.fetch(api(`/api/rigs/${id}/beads?type=issue`), { + const res = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/beads?type=issue`), { headers: headers(), }); const body = await res.json(); @@ -162,26 +163,26 @@ describe('HTTP API', () => { it('should get a single bead', async () => { const id = rigId(); - const createRes = await SELF.fetch(api(`/api/rigs/${id}/beads`), { + const createRes = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/beads`), { method: 'POST', headers: headers(), body: JSON.stringify({ type: 'issue', title: 'Get me' }), }); const created = await createRes.json(); - const beadId = created.data.id; + const beadId = created.data.bead_id; - const res = await SELF.fetch(api(`/api/rigs/${id}/beads/${beadId}`), { + const res = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/beads/${beadId}`), { headers: headers(), }); expect(res.status).toBe(200); const body = await res.json(); - expect(body.data.id).toBe(beadId); + expect(body.data.bead_id).toBe(beadId); expect(body.data.title).toBe('Get me'); }); it('should return 404 for non-existent bead', async () => { const id = rigId(); - const res = await SELF.fetch(api(`/api/rigs/${id}/beads/nonexistent`), { + const res = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/beads/nonexistent`), { headers: headers(), }); expect(res.status).toBe(404); @@ -190,25 +191,28 @@ describe('HTTP API', () => { it('should update bead status', async () => { const id = rigId(); // Create bead and agent - const beadRes = await SELF.fetch(api(`/api/rigs/${id}/beads`), { + const beadRes = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/beads`), { method: 'POST', headers: headers(), body: JSON.stringify({ type: 'issue', title: 'Status test' }), }); const bead = (await beadRes.json()).data; - const agentRes = await SELF.fetch(api(`/api/rigs/${id}/agents`), { + const agentRes = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/agents`), { method: 'POST', headers: headers(), body: JSON.stringify({ role: 'polecat', name: 'P1', identity: `p1-${id}` }), }); const agent = (await agentRes.json()).data; - const res = await SELF.fetch(api(`/api/rigs/${id}/beads/${bead.id}/status`), { - method: 'PATCH', - headers: headers(), - body: JSON.stringify({ status: 'in_progress', agent_id: agent.id }), - }); + const res = await SELF.fetch( + api(`/api/towns/${townId}/rigs/${id}/beads/${bead.bead_id}/status`), + { + method: 'PATCH', + headers: headers(), + body: JSON.stringify({ status: 'in_progress', agent_id: agent.id }), + } + ); expect(res.status).toBe(200); const body = await res.json(); expect(body.data.status).toBe('in_progress'); @@ -216,25 +220,28 @@ describe('HTTP API', () => { it('should close a bead', async () => { const id = rigId(); - const beadRes = await SELF.fetch(api(`/api/rigs/${id}/beads`), { + const beadRes = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/beads`), { method: 'POST', headers: headers(), body: JSON.stringify({ type: 'issue', title: 'Close me' }), }); const bead = (await beadRes.json()).data; - const agentRes = await SELF.fetch(api(`/api/rigs/${id}/agents`), { + const agentRes = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/agents`), { method: 'POST', headers: headers(), body: JSON.stringify({ role: 'polecat', name: 'P1', identity: `close-${id}` }), }); const agent = (await agentRes.json()).data; - const res = await SELF.fetch(api(`/api/rigs/${id}/beads/${bead.id}/close`), { - method: 'POST', - headers: headers(), - body: JSON.stringify({ agent_id: agent.id }), - }); + const res = await SELF.fetch( + api(`/api/towns/${townId}/rigs/${id}/beads/${bead.bead_id}/close`), + { + method: 'POST', + headers: headers(), + body: JSON.stringify({ agent_id: agent.id }), + } + ); expect(res.status).toBe(200); const body = await res.json(); expect(body.data.status).toBe('closed'); @@ -247,7 +254,7 @@ describe('HTTP API', () => { describe('agents', () => { it('should register an agent', async () => { const id = rigId(); - const res = await SELF.fetch(api(`/api/rigs/${id}/agents`), { + const res = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/agents`), { method: 'POST', headers: headers(), body: JSON.stringify({ role: 'polecat', name: 'Polecat-1', identity: `p-${id}` }), @@ -261,18 +268,18 @@ describe('HTTP API', () => { it('should list agents', async () => { const id = rigId(); - await SELF.fetch(api(`/api/rigs/${id}/agents`), { + await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/agents`), { method: 'POST', headers: headers(), body: JSON.stringify({ role: 'polecat', name: 'P1', identity: `p1-${id}` }), }); - await SELF.fetch(api(`/api/rigs/${id}/agents`), { + await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/agents`), { method: 'POST', headers: headers(), body: JSON.stringify({ role: 'refinery', name: 'R1', identity: `r1-${id}` }), }); - const res = await SELF.fetch(api(`/api/rigs/${id}/agents`), { + const res = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/agents`), { headers: headers(), }); const body = await res.json(); @@ -281,14 +288,14 @@ describe('HTTP API', () => { it('should get agent by id', async () => { const id = rigId(); - const createRes = await SELF.fetch(api(`/api/rigs/${id}/agents`), { + const createRes = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/agents`), { method: 'POST', headers: headers(), body: JSON.stringify({ role: 'polecat', name: 'P1', identity: `get-${id}` }), }); const agent = (await createRes.json()).data; - const res = await SELF.fetch(api(`/api/rigs/${id}/agents/${agent.id}`), { + const res = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/agents/${agent.id}`), { headers: headers(), }); expect(res.status).toBe(200); @@ -298,7 +305,7 @@ describe('HTTP API', () => { it('should return 404 for non-existent agent', async () => { const id = rigId(); - const res = await SELF.fetch(api(`/api/rigs/${id}/agents/nonexistent`), { + const res = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/agents/nonexistent`), { headers: headers(), }); expect(res.status).toBe(404); @@ -311,14 +318,14 @@ describe('HTTP API', () => { it('should hook and unhook a bead', async () => { const id = rigId(); // Create agent and bead - const agentRes = await SELF.fetch(api(`/api/rigs/${id}/agents`), { + const agentRes = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/agents`), { method: 'POST', headers: headers(), body: JSON.stringify({ role: 'polecat', name: 'P1', identity: `hook-${id}` }), }); const agent = (await agentRes.json()).data; - const beadRes = await SELF.fetch(api(`/api/rigs/${id}/beads`), { + const beadRes = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/beads`), { method: 'POST', headers: headers(), body: JSON.stringify({ type: 'issue', title: 'Hook target' }), @@ -326,42 +333,51 @@ describe('HTTP API', () => { const bead = (await beadRes.json()).data; // Hook - const hookRes = await SELF.fetch(api(`/api/rigs/${id}/agents/${agent.id}/hook`), { - method: 'POST', - headers: headers(), - body: JSON.stringify({ bead_id: bead.id }), - }); + const hookRes = await SELF.fetch( + api(`/api/towns/${townId}/rigs/${id}/agents/${agent.id}/hook`), + { + method: 'POST', + headers: headers(), + body: JSON.stringify({ bead_id: bead.bead_id }), + } + ); expect(hookRes.status).toBe(200); const hookBody = await hookRes.json(); expect(hookBody.data.hooked).toBe(true); // Verify agent has hooked bead (stays idle until alarm dispatches to container) - const agentCheck = await SELF.fetch(api(`/api/rigs/${id}/agents/${agent.id}`), { - headers: headers(), - }); + const agentCheck = await SELF.fetch( + api(`/api/towns/${townId}/rigs/${id}/agents/${agent.id}`), + { + headers: headers(), + } + ); const agentState = (await agentCheck.json()).data; expect(agentState.status).toBe('idle'); - expect(agentState.current_hook_bead_id).toBe(bead.id); + expect(agentState.current_hook_bead_id).toBe(bead.bead_id); // Unhook - const unhookRes = await SELF.fetch(api(`/api/rigs/${id}/agents/${agent.id}/hook`), { - method: 'DELETE', - headers: headers(), - }); + const unhookRes = await SELF.fetch( + api(`/api/towns/${townId}/rigs/${id}/agents/${agent.id}/hook`), + { + method: 'DELETE', + headers: headers(), + } + ); expect(unhookRes.status).toBe(200); }); it('should hook via agent JWT auth', async () => { const id = rigId(); // Create agent and bead - const agentRes = await SELF.fetch(api(`/api/rigs/${id}/agents`), { + const agentRes = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/agents`), { method: 'POST', headers: headers(), body: JSON.stringify({ role: 'polecat', name: 'P1', identity: `jwt-hook-${id}` }), }); const agent = (await agentRes.json()).data; - const beadRes = await SELF.fetch(api(`/api/rigs/${id}/beads`), { + const beadRes = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/beads`), { method: 'POST', headers: headers(), body: JSON.stringify({ type: 'issue', title: 'JWT hook target' }), @@ -370,11 +386,14 @@ describe('HTTP API', () => { // Hook via agent JWT const jwtHeaders = agentHeaders({ agentId: agent.id, rigId: id }); - const hookRes = await SELF.fetch(api(`/api/rigs/${id}/agents/${agent.id}/hook`), { - method: 'POST', - headers: jwtHeaders, - body: JSON.stringify({ bead_id: bead.id }), - }); + const hookRes = await SELF.fetch( + api(`/api/towns/${townId}/rigs/${id}/agents/${agent.id}/hook`), + { + method: 'POST', + headers: jwtHeaders, + body: JSON.stringify({ bead_id: bead.bead_id }), + } + ); expect(hookRes.status).toBe(200); }); }); @@ -384,16 +403,19 @@ describe('HTTP API', () => { describe('prime', () => { it('should return prime context', async () => { const id = rigId(); - const agentRes = await SELF.fetch(api(`/api/rigs/${id}/agents`), { + const agentRes = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/agents`), { method: 'POST', headers: headers(), body: JSON.stringify({ role: 'polecat', name: 'P1', identity: `prime-${id}` }), }); const agent = (await agentRes.json()).data; - const res = await SELF.fetch(api(`/api/rigs/${id}/agents/${agent.id}/prime`), { - headers: headers(), - }); + const res = await SELF.fetch( + api(`/api/towns/${townId}/rigs/${id}/agents/${agent.id}/prime`), + { + headers: headers(), + } + ); expect(res.status).toBe(200); const body = await res.json(); expect(body.data.agent.id).toBe(agent.id); @@ -408,14 +430,14 @@ describe('HTTP API', () => { describe('agent done', () => { it('should mark agent done and submit to review queue', async () => { const id = rigId(); - const agentRes = await SELF.fetch(api(`/api/rigs/${id}/agents`), { + const agentRes = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/agents`), { method: 'POST', headers: headers(), body: JSON.stringify({ role: 'polecat', name: 'P1', identity: `done-${id}` }), }); const agent = (await agentRes.json()).data; - const beadRes = await SELF.fetch(api(`/api/rigs/${id}/beads`), { + const beadRes = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/beads`), { method: 'POST', headers: headers(), body: JSON.stringify({ type: 'issue', title: 'Done test' }), @@ -423,14 +445,14 @@ describe('HTTP API', () => { const bead = (await beadRes.json()).data; // Hook the bead - await SELF.fetch(api(`/api/rigs/${id}/agents/${agent.id}/hook`), { + await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/agents/${agent.id}/hook`), { method: 'POST', headers: headers(), - body: JSON.stringify({ bead_id: bead.id }), + body: JSON.stringify({ bead_id: bead.bead_id }), }); // Mark done - const res = await SELF.fetch(api(`/api/rigs/${id}/agents/${agent.id}/done`), { + const res = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/agents/${agent.id}/done`), { method: 'POST', headers: headers(), body: JSON.stringify({ @@ -444,9 +466,12 @@ describe('HTTP API', () => { expect(body.data.done).toBe(true); // Verify agent is idle - const agentCheck = await SELF.fetch(api(`/api/rigs/${id}/agents/${agent.id}`), { - headers: headers(), - }); + const agentCheck = await SELF.fetch( + api(`/api/towns/${townId}/rigs/${id}/agents/${agent.id}`), + { + headers: headers(), + } + ); const agentState = (await agentCheck.json()).data; expect(agentState.status).toBe('idle'); expect(agentState.current_hook_bead_id).toBeNull(); @@ -458,24 +483,30 @@ describe('HTTP API', () => { describe('checkpoint', () => { it('should write and read checkpoint', async () => { const id = rigId(); - const agentRes = await SELF.fetch(api(`/api/rigs/${id}/agents`), { + const agentRes = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/agents`), { method: 'POST', headers: headers(), body: JSON.stringify({ role: 'polecat', name: 'P1', identity: `cp-${id}` }), }); const agent = (await agentRes.json()).data; - const writeRes = await SELF.fetch(api(`/api/rigs/${id}/agents/${agent.id}/checkpoint`), { - method: 'POST', - headers: headers(), - body: JSON.stringify({ data: { step: 5, notes: 'halfway' } }), - }); + const writeRes = await SELF.fetch( + api(`/api/towns/${townId}/rigs/${id}/agents/${agent.id}/checkpoint`), + { + method: 'POST', + headers: headers(), + body: JSON.stringify({ data: { step: 5, notes: 'halfway' } }), + } + ); expect(writeRes.status).toBe(200); // Read checkpoint via agent get (checkpoint is on the agent record) - const agentCheck = await SELF.fetch(api(`/api/rigs/${id}/agents/${agent.id}`), { - headers: headers(), - }); + const agentCheck = await SELF.fetch( + api(`/api/towns/${townId}/rigs/${id}/agents/${agent.id}`), + { + headers: headers(), + } + ); const agentState = (await agentCheck.json()).data; expect(agentState.checkpoint).toEqual({ step: 5, notes: 'halfway' }); }); @@ -487,14 +518,14 @@ describe('HTTP API', () => { it('should send and check mail', async () => { const id = rigId(); // Create sender and receiver - const senderRes = await SELF.fetch(api(`/api/rigs/${id}/agents`), { + const senderRes = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/agents`), { method: 'POST', headers: headers(), body: JSON.stringify({ role: 'polecat', name: 'Sender', identity: `sender-${id}` }), }); const sender = (await senderRes.json()).data; - const receiverRes = await SELF.fetch(api(`/api/rigs/${id}/agents`), { + const receiverRes = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/agents`), { method: 'POST', headers: headers(), body: JSON.stringify({ role: 'polecat', name: 'Receiver', identity: `receiver-${id}` }), @@ -502,7 +533,7 @@ describe('HTTP API', () => { const receiver = (await receiverRes.json()).data; // Send mail - const sendRes = await SELF.fetch(api(`/api/rigs/${id}/mail`), { + const sendRes = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/mail`), { method: 'POST', headers: headers(), body: JSON.stringify({ @@ -515,18 +546,24 @@ describe('HTTP API', () => { expect(sendRes.status).toBe(201); // Check mail - const mailRes = await SELF.fetch(api(`/api/rigs/${id}/agents/${receiver.id}/mail`), { - headers: headers(), - }); + const mailRes = await SELF.fetch( + api(`/api/towns/${townId}/rigs/${id}/agents/${receiver.id}/mail`), + { + headers: headers(), + } + ); expect(mailRes.status).toBe(200); const mailBody = await mailRes.json(); expect(mailBody.data).toHaveLength(1); expect(mailBody.data[0].subject).toBe('Hello'); // Check mail again — should be empty (delivered) - const mailRes2 = await SELF.fetch(api(`/api/rigs/${id}/agents/${receiver.id}/mail`), { - headers: headers(), - }); + const mailRes2 = await SELF.fetch( + api(`/api/towns/${townId}/rigs/${id}/agents/${receiver.id}/mail`), + { + headers: headers(), + } + ); const mailBody2 = await mailRes2.json(); expect(mailBody2.data).toHaveLength(0); }); @@ -537,26 +574,26 @@ describe('HTTP API', () => { describe('review queue', () => { it('should submit to review queue', async () => { const id = rigId(); - const agentRes = await SELF.fetch(api(`/api/rigs/${id}/agents`), { + const agentRes = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/agents`), { method: 'POST', headers: headers(), body: JSON.stringify({ role: 'polecat', name: 'P1', identity: `rq-${id}` }), }); const agent = (await agentRes.json()).data; - const beadRes = await SELF.fetch(api(`/api/rigs/${id}/beads`), { + const beadRes = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/beads`), { method: 'POST', headers: headers(), body: JSON.stringify({ type: 'issue', title: 'Review me' }), }); const bead = (await beadRes.json()).data; - const res = await SELF.fetch(api(`/api/rigs/${id}/review-queue`), { + const res = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/review-queue`), { method: 'POST', headers: headers(), body: JSON.stringify({ agent_id: agent.id, - bead_id: bead.id, + bead_id: bead.bead_id, branch: 'feature/review', pr_url: 'https://github.com/org/repo/pull/3', }), @@ -572,7 +609,7 @@ describe('HTTP API', () => { describe('escalations', () => { it('should create an escalation bead', async () => { const id = rigId(); - const res = await SELF.fetch(api(`/api/rigs/${id}/escalations`), { + const res = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/escalations`), { method: 'POST', headers: headers(), body: JSON.stringify({ @@ -599,7 +636,7 @@ describe('HTTP API', () => { describe('query param validation', () => { it('should reject non-numeric limit', async () => { const id = rigId(); - const res = await SELF.fetch(api(`/api/rigs/${id}/beads?limit=abc`), { + const res = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/beads?limit=abc`), { headers: headers(), }); expect(res.status).toBe(400); @@ -609,7 +646,7 @@ describe('HTTP API', () => { it('should reject negative offset', async () => { const id = rigId(); - const res = await SELF.fetch(api(`/api/rigs/${id}/beads?offset=-1`), { + const res = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/beads?offset=-1`), { headers: headers(), }); expect(res.status).toBe(400); @@ -617,7 +654,7 @@ describe('HTTP API', () => { it('should accept valid limit and offset', async () => { const id = rigId(); - const res = await SELF.fetch(api(`/api/rigs/${id}/beads?limit=10&offset=0`), { + const res = await SELF.fetch(api(`/api/towns/${townId}/rigs/${id}/beads?limit=10&offset=0`), { headers: headers(), }); expect(res.status).toBe(200); diff --git a/cloudflare-gastown/test/integration/reconciler.test.ts b/cloudflare-gastown/test/integration/reconciler.test.ts new file mode 100644 index 0000000000..cf08f620b9 --- /dev/null +++ b/cloudflare-gastown/test/integration/reconciler.test.ts @@ -0,0 +1,357 @@ +/** + * Integration tests for the reconciler. + * + * These tests verify the reconciler's behavior end-to-end by: + * 1. Setting up state via DO RPC methods + * 2. Running the alarm (which triggers the reconciler) + * 3. Asserting that the reconciler produced the correct state transitions + */ + +import { env, runDurableObjectAlarm } from 'cloudflare:test'; +import { describe, it, expect, beforeEach } from 'vitest'; + +function getTownStub(name = 'test-town') { + const id = env.TOWN.idFromName(name); + return env.TOWN.get(id); +} + +describe('Reconciler', () => { + let town: ReturnType; + let townName: string; + + beforeEach(async () => { + townName = `reconciler-${crypto.randomUUID()}`; + town = getTownStub(townName); + await town.setTownId(townName); + await town.addRig({ + rigId: 'rig-1', + name: 'main-rig', + gitUrl: 'https://github.com/test/repo.git', + defaultBranch: 'main', + }); + }); + + // ── reconcileBeads Rule 1: Unassigned beads get agents ────────────── + + describe('reconcileBeads Rule 1: lazy assignment', () => { + it('should assign an agent to an unassigned open issue bead', async () => { + // Use slingConvoy (single task, no deps) for a bead with rig_id set + const result = await town.slingConvoy({ + rigId: 'rig-1', + convoyTitle: 'Single bead', + tasks: [{ title: 'Unassigned bead' }], + }); + + const beadId = result.beads[0].bead.bead_id; + + // Before alarm: no agent assigned (lazy assignment) + const before = await town.getBeadAsync(beadId); + expect(before?.assignee_agent_bead_id).toBeNull(); + + // Run alarm — reconciler should assign an agent + await runDurableObjectAlarm(town); + + const after = await town.getBeadAsync(beadId); + expect(after?.assignee_agent_bead_id).toBeTruthy(); + }); + + it('should not assign agents to blocked beads', async () => { + const result = await town.slingConvoy({ + rigId: 'rig-1', + convoyTitle: 'Blocked test', + tasks: [{ title: 'First' }, { title: 'Second', depends_on: [0] }], + }); + + await runDurableObjectAlarm(town); + + // First bead (unblocked) should get an agent + const bead0 = await town.getBeadAsync(result.beads[0].bead.bead_id); + expect(bead0?.assignee_agent_bead_id).toBeTruthy(); + + // Second bead (blocked by first) should NOT get an agent + const bead1 = await town.getBeadAsync(result.beads[1].bead.bead_id); + expect(bead1?.assignee_agent_bead_id).toBeNull(); + }); + + it('should assign agents to newly unblocked beads after blocker closes', async () => { + const result = await town.slingConvoy({ + rigId: 'rig-1', + convoyTitle: 'Unblock test', + tasks: [{ title: 'First' }, { title: 'Second', depends_on: [0] }], + }); + + // First alarm: assign agent to first bead + await runDurableObjectAlarm(town); + + const bead0 = await town.getBeadAsync(result.beads[0].bead.bead_id); + expect(bead0?.assignee_agent_bead_id).toBeTruthy(); + + // Close the first bead (removes blocker) + await town.updateBeadStatus(result.beads[0].bead.bead_id, 'closed', 'system'); + + // Second alarm: reconciler assigns agent to now-unblocked second bead + await runDurableObjectAlarm(town); + + const bead1 = await town.getBeadAsync(result.beads[1].bead.bead_id); + expect(bead1?.assignee_agent_bead_id).toBeTruthy(); + }); + }); + + // ── reconcileBeads Rule 3: Stale in_progress beads ────────────────── + + describe('reconcileBeads Rule 3: stale in_progress recovery', () => { + it('should return in_progress bead to open when no agent is working', async () => { + const bead = await town.createBead({ + type: 'issue', + title: 'Orphaned bead', + rig_id: 'rig-1', + }); + + // Manually set bead to in_progress (simulating an agent that was dispatched) + await town.updateBeadStatus(bead.bead_id, 'in_progress', 'system'); + + // Wait for staleness threshold to pass (mock: set updated_at in the past) + // Since we can't easily manipulate time in integration tests, we verify + // the invariant: bead is in_progress with no working agent + + // The reconciler checks for staleness > 2 min. In tests, the bead was + // JUST set to in_progress, so it won't be stale yet. This test verifies + // the alarm runs without error — the actual staleness recovery is tested + // by the reconciler's behavior in production. + await runDurableObjectAlarm(town); + + // Bead should still be in_progress (not yet stale) + const after = await town.getBeadAsync(bead.bead_id); + expect(after?.status).toBe('in_progress'); + }); + }); + + // ── Event-driven agentDone ────────────────────────────────────────── + + describe('event-driven agentDone', () => { + it('should transition source bead to in_review after alarm drains agent_done event', async () => { + const result = await town.slingConvoy({ + rigId: 'rig-1', + convoyTitle: 'AgentDone test', + tasks: [{ title: 'Agent done test' }], + }); + + const beadId = result.beads[0].bead.bead_id; + + // Assign agent and set bead to in_progress + await runDurableObjectAlarm(town); + + const assigned = await town.getBeadAsync(beadId); + const agentId = assigned?.assignee_agent_bead_id; + expect(agentId).toBeTruthy(); + + // Agent calls gt_done (event-only) + await town.agentDone(agentId!, { + branch: 'gt/polecat/test-branch', + summary: 'Test done', + }); + + // Run alarm to drain agent_done event + await runDurableObjectAlarm(town); + + // After alarm: bead should be in_review + const after = await town.getBeadAsync(beadId); + expect(after?.status).toBe('in_review'); + + // An MR bead should have been created + const mrBeads = await town.listBeads({ type: 'merge_request' }); + expect(mrBeads.length).toBeGreaterThan(0); + const mrForSource = mrBeads.find(b => b.metadata?.source_bead_id === beadId); + expect(mrForSource).toBeTruthy(); + }); + }); + + // ── reconcileReviewQueue Rule 5: Refinery dispatch ────────────────── + + describe('reconcileReviewQueue Rule 5: refinery dispatch', () => { + it('should create a refinery and dispatch it for an open MR bead', async () => { + const result = await town.slingConvoy({ + rigId: 'rig-1', + convoyTitle: 'Refinery test', + tasks: [{ title: 'Review dispatch test' }], + }); + + const beadId = result.beads[0].bead.bead_id; + + // Assign agent, dispatch + await runDurableObjectAlarm(town); + + const assigned = await town.getBeadAsync(beadId); + const agentId = assigned?.assignee_agent_bead_id!; + expect(agentId).toBeTruthy(); + + // Agent finishes work (event-only) + await town.agentDone(agentId, { + branch: 'gt/polecat/test-review', + summary: 'Ready for review', + }); + + // Drain agent_done event → creates MR bead in 'open' status + await runDurableObjectAlarm(town); + + // Verify MR bead exists + const mrBeads = await town.listBeads({ type: 'merge_request' }); + const mrBead = mrBeads.find(b => b.metadata?.source_bead_id === beadId); + expect(mrBead).toBeTruthy(); + + // Run alarm again — reconciler should pop the MR bead and dispatch a refinery. + // (Container dispatch will fail in tests, but the MR should transition + // to in_progress and a refinery agent should be created.) + await runDurableObjectAlarm(town); + + const updatedMr = await town.getBeadAsync(mrBead!.bead_id); + // MR should be in_progress (popped by reconciler) + expect(updatedMr?.status).toBe('in_progress'); + + // A refinery agent should exist + const agentsList = await town.listAgents({ role: 'refinery' }); + expect(agentsList.length).toBeGreaterThan(0); + }); + }); + + // ── reconcileConvoys: Auto-close ──────────────────────────────────── + + describe('reconcileConvoys: progress and auto-close', () => { + it('should close a review-and-merge convoy when all beads are closed', async () => { + const result = await town.slingConvoy({ + rigId: 'rig-1', + convoyTitle: 'Auto-close test', + tasks: [{ title: 'Task 1' }, { title: 'Task 2' }], + merge_mode: 'review-and-merge', + }); + + // Close both beads + await town.updateBeadStatus(result.beads[0].bead.bead_id, 'closed', 'system'); + await town.updateBeadStatus(result.beads[1].bead.bead_id, 'closed', 'system'); + + // Run alarm — reconciler should auto-close the convoy + await runDurableObjectAlarm(town); + + const status = await town.getConvoyStatus(result.convoy.id); + // getConvoyStatus returns 'landed' when the convoy bead is closed + expect(status?.status).toBe('landed'); + expect(status?.closed_beads).toBe(2); + }); + + it('should NOT auto-close a review-then-land convoy (needs landing MR)', async () => { + const result = await town.slingConvoy({ + rigId: 'rig-1', + convoyTitle: 'Landing needed test', + tasks: [{ title: 'Task 1' }], + merge_mode: 'review-then-land', + }); + + expect(result.convoy.feature_branch).toBeTruthy(); + + // Close the bead + await town.updateBeadStatus(result.beads[0].bead.bead_id, 'closed', 'system'); + + // Run alarm — convoy should be ready_to_land but NOT closed + await runDurableObjectAlarm(town); + + const status = await town.getConvoyStatus(result.convoy.id); + expect(status?.status).toBe('active'); // Not closed — waiting for landing MR + expect(status?.closed_beads).toBe(1); + }); + }); + + // ── reconcileAgents: idle hooks ───────────────────────────────────── + + describe('reconcileAgents: stale hook cleanup', () => { + it('should unhook an idle agent from a terminal bead', async () => { + const agent = await town.registerAgent({ + role: 'polecat', + name: 'P1', + identity: `stale-hook-${townName}`, + rig_id: 'rig-1', + }); + const bead = await town.createBead({ + type: 'issue', + title: 'Terminal bead', + rig_id: 'rig-1', + }); + + // Hook agent, then close the bead (making it terminal) + await town.hookBead(agent.id, bead.bead_id); + await town.updateBeadStatus(bead.bead_id, 'closed', agent.id); + + // Agent is now idle + hooked to a closed bead + const agentBefore = await town.getAgentAsync(agent.id); + expect(agentBefore?.current_hook_bead_id).toBe(bead.bead_id); + + // Run alarm — reconciler should unhook the stale hook + await runDurableObjectAlarm(town); + + const agentAfter = await town.getAgentAsync(agent.id); + expect(agentAfter?.current_hook_bead_id).toBeNull(); + }); + }); + + // ── reconcileGC: agent garbage collection ─────────────────────────── + + describe('reconcileGC: agent garbage collection', () => { + it('should not GC agents with recent activity', async () => { + const agent = await town.registerAgent({ + role: 'polecat', + name: 'P1', + identity: `gc-recent-${townName}`, + rig_id: 'rig-1', + }); + + // Touch the agent (recent heartbeat) + await town.touchAgentHeartbeat(agent.id); + + // Run alarm — agent has recent activity, should NOT be GC'd + await runDurableObjectAlarm(town); + + const after = await town.getAgentAsync(agent.id); + expect(after).toBeTruthy(); + }); + }); + + // ── Event system: insert, drain, apply ────────────────────────────── + + describe('event system', () => { + it('should drain and apply bead_created events', async () => { + // slingBead inserts a bead_created event + const result = await town.slingBead({ + type: 'issue', + title: 'Event test', + rigId: 'rig-1', + }); + + // The bead should exist (created synchronously) + const bead = await town.getBeadAsync(result.bead.bead_id); + expect(bead).toBeTruthy(); + expect(bead?.status).toBe('open'); + + // Agent should be assigned (fast path in slingBead) + expect(result.agent).toBeTruthy(); + }); + + it('should drain and apply convoy_started events', async () => { + const result = await town.slingConvoy({ + rigId: 'rig-1', + convoyTitle: 'Staged convoy', + tasks: [{ title: 'Task 1' }], + staged: true, + }); + + // Convoy should be staged + const statusBefore = await town.getConvoyStatus(result.convoy.id); + expect(statusBefore?.staged).toBe(true); + + // Start the convoy + await town.startConvoy(result.convoy.id); + + // Convoy should no longer be staged + const statusAfter = await town.getConvoyStatus(result.convoy.id); + expect(statusAfter?.staged).toBe(false); + }); + }); +}); diff --git a/cloudflare-gastown/test/integration/review-failure.test.ts b/cloudflare-gastown/test/integration/review-failure.test.ts new file mode 100644 index 0000000000..d5b7773c00 --- /dev/null +++ b/cloudflare-gastown/test/integration/review-failure.test.ts @@ -0,0 +1,249 @@ +import { env, runDurableObjectAlarm } from 'cloudflare:test'; +import { describe, it, expect, beforeEach } from 'vitest'; + +function getTownStub(name = 'test-town') { + const id = env.TOWN.idFromName(name); + return env.TOWN.get(id); +} + +describe('Review failure paths — convoy progress and source bead recovery', () => { + let town: ReturnType; + let townName: string; + + beforeEach(async () => { + townName = `review-failure-${crypto.randomUUID()}`; + town = getTownStub(townName); + await town.setTownId(townName); + }); + + async function setupConvoyWithMR() { + await town.addRig({ + rigId: 'rig-1', + name: 'main-rig', + gitUrl: 'https://github.com/test/repo.git', + defaultBranch: 'main', + }); + + const result = await town.slingConvoy({ + rigId: 'rig-1', + convoyTitle: 'Review Failure Test', + tasks: [{ title: 'Task 1' }], + }); + + // Run alarm to trigger reconciler assignment (lazy assignment) + await runDurableObjectAlarm(town); + + const beadId = result.beads[0].bead.bead_id; + const bead = await town.getBeadAsync(beadId); + const agentId = bead!.assignee_agent_bead_id!; + expect(agentId).toBeTruthy(); + + // Simulate agent completing work — inserts agent_done event + await town.agentDone(agentId, { + branch: 'gt/polecat/test-branch', + summary: 'Completed task', + }); + + // agentDone is event-only — run alarm to drain events and create MR bead + await runDurableObjectAlarm(town); + + // Source bead should now be in_review (waiting for refinery) + const sourceBead = await town.getBeadAsync(beadId); + expect(sourceBead?.status).toBe('in_review'); + + // Find the MR bead + const allBeads = await town.listBeads({ type: 'merge_request' }); + const mrBead = allBeads.find(b => b.metadata?.source_bead_id === beadId); + expect(mrBead).toBeTruthy(); + + return { result, beadId, agentId, mrBeadId: mrBead!.bead_id, convoyId: result.convoy.id }; + } + + // ── completeReviewWithResult properly updates convoy progress ─────── + + describe('completeReviewWithResult on MR failure', () => { + it('should return source bead to in_progress when MR bead fails', async () => { + const { beadId, mrBeadId } = await setupConvoyWithMR(); + + // Fail the review via completeReviewWithResult (the fixed path) + await town.completeReviewWithResult({ + entry_id: mrBeadId, + status: 'failed', + message: 'Refinery container failed to start', + }); + + // MR bead should be failed + const mrBead = await town.getBeadAsync(mrBeadId); + expect(mrBead?.status).toBe('failed'); + + // Source bead should be returned to open for rework (not stuck in in_review). + // The reconciler will assign a new agent on the next alarm tick. + const sourceBead = await town.getBeadAsync(beadId); + expect(sourceBead?.status).toBe('open'); + }); + + it('should update convoy progress when MR bead is merged', async () => { + const { beadId, mrBeadId, convoyId } = await setupConvoyWithMR(); + + // Complete the review successfully + await town.completeReviewWithResult({ + entry_id: mrBeadId, + status: 'merged', + message: 'Merged by refinery', + }); + + // Source bead should be closed + const sourceBead = await town.getBeadAsync(beadId); + expect(sourceBead?.status).toBe('closed'); + + // MR bead should be closed + const mrBead = await town.getBeadAsync(mrBeadId); + expect(mrBead?.status).toBe('closed'); + + // Convoy progress should reflect the closed bead + const convoyStatus = await town.getConvoyStatus(convoyId); + expect(convoyStatus?.closed_beads).toBe(1); + }); + }); + + // ── Multi-bead convoy: failed MR doesn't stall the convoy ────────── + + describe('convoy progress with mixed outcomes', () => { + it('should not stall convoy when one MR fails and another merges', async () => { + await town.addRig({ + rigId: 'rig-1', + name: 'main-rig', + gitUrl: 'https://github.com/test/repo.git', + defaultBranch: 'main', + }); + + const result = await town.slingConvoy({ + rigId: 'rig-1', + convoyTitle: 'Two-Task Convoy', + tasks: [{ title: 'Task 1' }, { title: 'Task 2' }], + }); + + // Run alarm to trigger reconciler assignment (lazy assignment) + await runDurableObjectAlarm(town); + + const bead0Id = result.beads[0].bead.bead_id; + const bead0 = await town.getBeadAsync(bead0Id); + const agent0Id = bead0!.assignee_agent_bead_id!; + const bead1Id = result.beads[1].bead.bead_id; + const bead1 = await town.getBeadAsync(bead1Id); + const agent1Id = bead1!.assignee_agent_bead_id!; + + // Both agents complete work (event-only) + await town.agentDone(agent0Id, { + branch: 'gt/polecat/task-1', + summary: 'Task 1 done', + }); + await town.agentDone(agent1Id, { + branch: 'gt/polecat/task-2', + summary: 'Task 2 done', + }); + + // Drain events to create MR beads + await runDurableObjectAlarm(town); + + // Find MR beads + const mrBeads = await town.listBeads({ type: 'merge_request' }); + const mr0 = mrBeads.find(b => b.metadata?.source_bead_id === bead0Id); + const mr1 = mrBeads.find(b => b.metadata?.source_bead_id === bead1Id); + expect(mr0).toBeTruthy(); + expect(mr1).toBeTruthy(); + + // Fail MR for task 1 via completeReviewWithResult + await town.completeReviewWithResult({ + entry_id: mr0!.bead_id, + status: 'failed', + message: 'Review failed', + }); + + // Source bead 0 should be back to open (ready for rework by reconciler) + const source0 = await town.getBeadAsync(bead0Id); + expect(source0?.status).toBe('open'); + + // Merge MR for task 2 + await town.completeReviewWithResult({ + entry_id: mr1!.bead_id, + status: 'merged', + message: 'Merged', + }); + + // Source bead 1 should be closed + const source1 = await town.getBeadAsync(bead1Id); + expect(source1?.status).toBe('closed'); + + // Convoy should show 1 closed bead (task 2 merged; task 1 is in_progress + // awaiting rework, its MR is failed but the source isn't terminal yet) + const convoyStatus = await town.getConvoyStatus(result.convoy.id); + expect(convoyStatus?.closed_beads).toBe(1); + }); + }); + + // ── Direct completeReview leaves source bead orphaned (regression) ─ + + describe('completeReview bypass (regression guard)', () => { + it('should leave source bead stuck in in_review when completeReview is called directly', async () => { + const { beadId, mrBeadId } = await setupConvoyWithMR(); + + // Call completeReview directly (the OLD broken path) — + // this is the raw SQL update that bypasses lifecycle events. + // We use this to verify the regression scenario. + await town.completeReview(mrBeadId, 'failed'); + + // MR bead should be failed + const mrBead = await town.getBeadAsync(mrBeadId); + expect(mrBead?.status).toBe('failed'); + + // Source bead is STILL in_review — this is the bug this PR fixes + // in processReviewQueue. The direct completeReview call doesn't + // return the source bead to in_progress. + const sourceBead = await town.getBeadAsync(beadId); + expect(sourceBead?.status).toBe('in_review'); + }); + }); + + // ── Source bead in_review after agentDone ────────────────────────── + + describe('agentDone transitions source bead to in_review', () => { + it('should set source bead to in_review after polecat calls agentDone', async () => { + await town.addRig({ + rigId: 'rig-1', + name: 'main-rig', + gitUrl: 'https://github.com/test/repo.git', + defaultBranch: 'main', + }); + + const result = await town.slingConvoy({ + rigId: 'rig-1', + convoyTitle: 'Agent Done Test', + tasks: [{ title: 'Single Task' }], + }); + + // Run alarm to trigger reconciler assignment (lazy assignment) + await runDurableObjectAlarm(town); + + const beadId = result.beads[0].bead.bead_id; + const assignedBead = await town.getBeadAsync(beadId); + const agentId = assignedBead!.assignee_agent_bead_id!; + + await town.agentDone(agentId, { + branch: 'gt/polecat/test', + summary: 'Done', + }); + + // agentDone is event-only — run alarm to drain events + await runDurableObjectAlarm(town); + + const updatedBead = await town.getBeadAsync(beadId); + expect(updatedBead?.status).toBe('in_review'); + + // An MR bead should have been created + const mrBeads = await town.listBeads({ type: 'merge_request' }); + expect(mrBeads.length).toBeGreaterThan(0); + expect(mrBeads.some(b => b.metadata?.source_bead_id === beadId)).toBe(true); + }); + }); +}); diff --git a/cloudflare-gastown/test/integration/rig-do.test.ts b/cloudflare-gastown/test/integration/rig-do.test.ts index 38ba3642a2..eb22196fd0 100644 --- a/cloudflare-gastown/test/integration/rig-do.test.ts +++ b/cloudflare-gastown/test/integration/rig-do.test.ts @@ -1,4 +1,4 @@ -import { env } from 'cloudflare:test'; +import { env, runDurableObjectAlarm } from 'cloudflare:test'; import { describe, it, expect, beforeEach } from 'vitest'; function getTownStub(name = 'test-town') { @@ -29,7 +29,7 @@ describe('TownDO', () => { metadata: { source: 'test' }, }); - expect(bead.id).toBeDefined(); + expect(bead.bead_id).toBeDefined(); expect(bead.type).toBe('issue'); expect(bead.status).toBe('open'); expect(bead.title).toBe('Fix the widget'); @@ -37,11 +37,11 @@ describe('TownDO', () => { expect(bead.priority).toBe('high'); expect(bead.labels).toEqual(['bug']); expect(bead.metadata).toEqual({ source: 'test' }); - expect(bead.assignee_agent_id).toBeNull(); + expect(bead.assignee_agent_bead_id).toBeNull(); expect(bead.closed_at).toBeNull(); - const retrieved = await town.getBeadAsync(bead.id); - expect(retrieved).toMatchObject({ id: bead.id, title: 'Fix the widget' }); + const retrieved = await town.getBeadAsync(bead.bead_id); + expect(retrieved).toMatchObject({ bead_id: bead.bead_id, title: 'Fix the widget' }); }); it('should return null for non-existent bead', async () => { @@ -189,18 +189,18 @@ describe('TownDO', () => { }); const bead = await town.createBead({ type: 'issue', title: 'Hook target' }); - await town.hookBead(agent.id, bead.id); + await town.hookBead(agent.id, bead.bead_id); const hookedAgent = await town.getAgentAsync(agent.id); - expect(hookedAgent?.current_hook_bead_id).toBe(bead.id); + expect(hookedAgent?.current_hook_bead_id).toBe(bead.bead_id); expect(hookedAgent?.status).toBe('idle'); - const hookedBead = await town.getBeadAsync(bead.id); + const hookedBead = await town.getBeadAsync(bead.bead_id); expect(hookedBead?.status).toBe('in_progress'); - expect(hookedBead?.assignee_agent_id).toBe(agent.id); + expect(hookedBead?.assignee_agent_bead_id).toBe(agent.id); const retrieved = await town.getHookedBead(agent.id); - expect(retrieved?.id).toBe(bead.id); + expect(retrieved?.bead_id).toBe(bead.bead_id); await town.unhookBead(agent.id); @@ -217,12 +217,12 @@ describe('TownDO', () => { }); const bead = await town.createBead({ type: 'issue', title: 'Bead 1' }); - await town.hookBead(agent.id, bead.id); + await town.hookBead(agent.id, bead.bead_id); // Re-hooking the same bead should succeed (idempotent) - await town.hookBead(agent.id, bead.id); + await town.hookBead(agent.id, bead.bead_id); const hookedBead = await town.getHookedBead(agent.id); - expect(hookedBead?.id).toBe(bead.id); + expect(hookedBead?.bead_id).toBe(bead.bead_id); }); it('should return null for unhooked agent', async () => { @@ -248,7 +248,7 @@ describe('TownDO', () => { }); const bead = await town.createBead({ type: 'issue', title: 'Status test' }); - const updated = await town.updateBeadStatus(bead.id, 'in_progress', agent.id); + const updated = await town.updateBeadStatus(bead.bead_id, 'in_progress', agent.id); expect(updated.status).toBe('in_progress'); expect(updated.closed_at).toBeNull(); }); @@ -261,7 +261,7 @@ describe('TownDO', () => { }); const bead = await town.createBead({ type: 'issue', title: 'Close test' }); - const closed = await town.closeBead(bead.id, agent.id); + const closed = await town.closeBead(bead.bead_id, agent.id); expect(closed.status).toBe('closed'); expect(closed.closed_at).toBeDefined(); }); @@ -274,7 +274,7 @@ describe('TownDO', () => { }); await town.createBead({ type: 'issue', title: 'Open bead' }); const beadToClose = await town.createBead({ type: 'issue', title: 'Closed bead' }); - await town.closeBead(beadToClose.id, agent.id); + await town.closeBead(beadToClose.bead_id, agent.id); const openBeads = await town.listBeads({ status: 'open' }); expect(openBeads).toHaveLength(1); @@ -366,7 +366,7 @@ describe('TownDO', () => { await town.submitToReviewQueue({ agent_id: agent.id, - bead_id: bead.id, + bead_id: bead.bead_id, rig_id: 'test-rig', branch: 'feature/fix-widget', pr_url: 'https://github.com/org/repo/pull/1', @@ -394,7 +394,7 @@ describe('TownDO', () => { await town.submitToReviewQueue({ agent_id: agent.id, - bead_id: bead.id, + bead_id: bead.bead_id, rig_id: 'test-rig', branch: 'feature/fix', }); @@ -419,7 +419,7 @@ describe('TownDO', () => { await town.submitToReviewQueue({ agent_id: agent.id, - bead_id: bead.id, + bead_id: bead.bead_id, rig_id: 'test-rig', branch: 'feature/merge-test', }); @@ -435,7 +435,7 @@ describe('TownDO', () => { }); // Bead should be closed - const updatedBead = await town.getBeadAsync(bead.id); + const updatedBead = await town.getBeadAsync(bead.bead_id); expect(updatedBead?.status).toBe('closed'); expect(updatedBead?.closed_at).toBeDefined(); @@ -454,7 +454,7 @@ describe('TownDO', () => { await town.submitToReviewQueue({ agent_id: agent.id, - bead_id: bead.id, + bead_id: bead.bead_id, rig_id: 'test-rig', branch: 'feature/conflict-test', }); @@ -469,7 +469,7 @@ describe('TownDO', () => { }); // Original bead should NOT be closed (conflict means it stays as-is) - const updatedBead = await town.getBeadAsync(bead.id); + const updatedBead = await town.getBeadAsync(bead.bead_id); expect(updatedBead?.status).not.toBe('closed'); // An escalation bead should have been created @@ -479,7 +479,7 @@ describe('TownDO', () => { expect(escalations[0].priority).toBe('high'); expect(escalations[0].body).toContain('CONFLICT (content)'); expect(escalations[0].metadata).toMatchObject({ - source_bead_id: bead.id, + source_bead_id: bead.bead_id, source_branch: 'feature/conflict-test', agent_id: agent.id, }); @@ -510,7 +510,7 @@ describe('TownDO', () => { title: 'Work on this', assignee_agent_id: agent.id, }); - await town.hookBead(agent.id, bead.id); + await town.hookBead(agent.id, bead.bead_id); await town.sendMail({ from_agent_id: sender.id, @@ -522,7 +522,7 @@ describe('TownDO', () => { const context = await town.prime(agent.id); expect(context.agent.id).toBe(agent.id); - expect(context.hooked_bead?.id).toBe(bead.id); + expect(context.hooked_bead?.bead_id).toBe(bead.bead_id); expect(context.undelivered_mail).toHaveLength(1); expect(context.undelivered_mail[0].subject).toBe('Priority update'); expect(context.open_beads).toHaveLength(1); @@ -591,7 +591,10 @@ describe('TownDO', () => { identity: `done-${townName}`, }); const bead = await town.createBead({ type: 'issue', title: 'Done test' }); - await town.hookBead(agent.id, bead.id); + await town.hookBead(agent.id, bead.bead_id); + + // agentDone is event-only — need to set townId and run alarm to drain + await town.setTownId(townName); await town.agentDone(agent.id, { branch: 'feature/done', @@ -599,16 +602,18 @@ describe('TownDO', () => { summary: 'Completed the work', }); + // Drain the agent_done event + await runDurableObjectAlarm(town); + // Agent should be unhooked const updatedAgent = await town.getAgentAsync(agent.id); expect(updatedAgent?.current_hook_bead_id).toBeNull(); expect(updatedAgent?.status).toBe('idle'); - // Review queue should have an entry - const entry = await town.popReviewQueue(); - expect(entry).toBeDefined(); - expect(entry?.branch).toBe('feature/done'); - expect(entry?.bead_id).toBe(bead.id); + // Review queue should have an entry (MR bead created by applyEvent) + const mrBeads = await town.listBeads({ type: 'merge_request' }); + expect(mrBeads.length).toBeGreaterThan(0); + expect(mrBeads[0].metadata?.source_bead_id).toBe(bead.bead_id); }); }); @@ -651,10 +656,10 @@ describe('TownDO', () => { describe('bead events', () => { it('should write events on createBead', async () => { const bead = await town.createBead({ type: 'issue', title: 'Event test' }); - const events = await town.listBeadEvents({ beadId: bead.id }); + const events = await town.listBeadEvents({ beadId: bead.bead_id }); expect(events).toHaveLength(1); expect(events[0].event_type).toBe('created'); - expect(events[0].bead_id).toBe(bead.id); + expect(events[0].bead_id).toBe(bead.bead_id); expect(events[0].metadata).toMatchObject({ title: 'Event test' }); }); @@ -665,15 +670,16 @@ describe('TownDO', () => { identity: `evt-hook-${townName}`, }); const bead = await town.createBead({ type: 'issue', title: 'Hook event test' }); - await town.hookBead(agent.id, bead.id); + await town.hookBead(agent.id, bead.bead_id); - const events = await town.listBeadEvents({ beadId: bead.id }); - // created + hooked - expect(events).toHaveLength(2); + const events = await town.listBeadEvents({ beadId: bead.bead_id }); + // created + status_changed(open→in_progress) + hooked + expect(events).toHaveLength(3); expect(events[0].event_type).toBe('created'); - expect(events[1].event_type).toBe('hooked'); - expect(events[1].agent_id).toBe(agent.id); - expect(events[1].new_value).toBe(agent.id); + expect(events[1].event_type).toBe('status_changed'); + expect(events[2].event_type).toBe('hooked'); + expect(events[2].agent_id).toBe(agent.id); + expect(events[2].new_value).toBe(agent.id); }); it('should write events on unhookBead', async () => { @@ -683,13 +689,13 @@ describe('TownDO', () => { identity: `evt-unhook-${townName}`, }); const bead = await town.createBead({ type: 'issue', title: 'Unhook event test' }); - await town.hookBead(agent.id, bead.id); + await town.hookBead(agent.id, bead.bead_id); await town.unhookBead(agent.id); - const events = await town.listBeadEvents({ beadId: bead.id }); - // created + hooked + unhooked - expect(events).toHaveLength(3); - expect(events[2].event_type).toBe('unhooked'); + const events = await town.listBeadEvents({ beadId: bead.bead_id }); + // created + status_changed + hooked + unhooked + expect(events).toHaveLength(4); + expect(events[3].event_type).toBe('unhooked'); }); it('should write events on updateBeadStatus', async () => { @@ -699,9 +705,9 @@ describe('TownDO', () => { identity: `evt-status-${townName}`, }); const bead = await town.createBead({ type: 'issue', title: 'Status event test' }); - await town.updateBeadStatus(bead.id, 'in_progress', agent.id); + await town.updateBeadStatus(bead.bead_id, 'in_progress', agent.id); - const events = await town.listBeadEvents({ beadId: bead.id }); + const events = await town.listBeadEvents({ beadId: bead.bead_id }); // created + status_changed expect(events).toHaveLength(2); expect(events[1].event_type).toBe('status_changed'); @@ -716,9 +722,9 @@ describe('TownDO', () => { identity: `evt-close-${townName}`, }); const bead = await town.createBead({ type: 'issue', title: 'Close event test' }); - await town.closeBead(bead.id, agent.id); + await town.closeBead(bead.bead_id, agent.id); - const events = await town.listBeadEvents({ beadId: bead.id }); + const events = await town.listBeadEvents({ beadId: bead.bead_id }); // created + closed expect(events).toHaveLength(2); expect(events[1].event_type).toBe('closed'); @@ -726,12 +732,12 @@ describe('TownDO', () => { it('should filter events by since timestamp', async () => { const bead = await town.createBead({ type: 'issue', title: 'Since filter test' }); - const events = await town.listBeadEvents({ beadId: bead.id }); + const events = await town.listBeadEvents({ beadId: bead.bead_id }); expect(events).toHaveLength(1); // Query with a future timestamp should return nothing const futureEvents = await town.listBeadEvents({ - beadId: bead.id, + beadId: bead.bead_id, since: '2099-01-01T00:00:00.000Z', }); expect(futureEvents).toHaveLength(0); @@ -754,12 +760,12 @@ describe('TownDO', () => { const bead = await town.createBead({ type: 'issue', title: 'Review event test' }); await town.submitToReviewQueue({ agent_id: agent.id, - bead_id: bead.id, + bead_id: bead.bead_id, rig_id: 'test-rig', branch: 'feature/test', }); - const events = await town.listBeadEvents({ beadId: bead.id }); + const events = await town.listBeadEvents({ beadId: bead.bead_id }); const reviewEvents = events.filter(e => e.event_type === 'review_submitted'); expect(reviewEvents).toHaveLength(1); expect(reviewEvents[0].new_value).toBe('feature/test'); diff --git a/cloudflare-gastown/wrangler.jsonc b/cloudflare-gastown/wrangler.jsonc index 5644cd45cd..683e4e58d6 100644 --- a/cloudflare-gastown/wrangler.jsonc +++ b/cloudflare-gastown/wrangler.jsonc @@ -5,7 +5,21 @@ "compatibility_date": "2026-02-24", "compatibility_flags": ["nodejs_compat"], "placement": { "mode": "smart" }, - "observability": { "enabled": true }, + "observability": { + "enabled": true, + "head_sampling_rate": 1, + "logs": { + "enabled": true, + "head_sampling_rate": 1, + "persist": true, + "invocation_logs": true, + }, + "traces": { + "enabled": true, + "persist": true, + "head_sampling_rate": 1, + }, + }, "upload_source_maps": true, "version_metadata": { "binding": "CF_VERSION_METADATA" }, "routes": [ diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 6177d3e5d3..c933f87330 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1068,7 +1068,7 @@ importers: cloudflare-gastown: dependencies: '@cloudflare/containers': - specifier: ^0.1.1 + specifier: ^0.1.0 version: 0.1.1 '@hono/trpc-server': specifier: ^0.4.2 @@ -1080,10 +1080,10 @@ importers: specifier: workspace:* version: link:../packages/worker-utils '@sentry/cloudflare': - specifier: ^9.47.1 + specifier: ^9 version: 9.47.1(@cloudflare/workers-types@4.20260313.1) '@trpc/server': - specifier: ^11.13.0 + specifier: ^11.0.0 version: 11.13.0(typescript@5.9.3) drizzle-orm: specifier: 'catalog:' @@ -1101,14 +1101,14 @@ importers: specifier: 'catalog:' version: 9.0.3 pg: - specifier: ^8.20.0 + specifier: ^8.16.3 version: 8.20.0 zod: specifier: 'catalog:' version: 4.3.6 devDependencies: '@cloudflare/vitest-pool-workers': - specifier: ^0.12.21 + specifier: ^0.12.8 version: 0.12.21(@cloudflare/workers-types@4.20260313.1)(@vitest/runner@4.1.0)(@vitest/snapshot@4.1.0)(vitest@3.2.4) '@cloudflare/workers-types': specifier: 'catalog:' @@ -1117,17 +1117,17 @@ importers: specifier: workspace:* version: link:../packages/eslint-config '@sentry/cli': - specifier: ^2.58.5 + specifier: ^2.58.2 version: 2.58.5 '@types/jsonwebtoken': specifier: 'catalog:' version: 9.0.10 '@types/node': - specifier: ^22.19.15 + specifier: ^22 version: 22.19.15 '@typescript/native-preview': - specifier: 'catalog:' - version: 7.0.0-dev.20260319.1 + specifier: 7.0.0-dev.20251019.1 + version: 7.0.0-dev.20251019.1 eslint: specifier: 'catalog:' version: 9.39.4(jiti@2.6.1) @@ -1160,11 +1160,8 @@ importers: specifier: workspace:* version: link:../../packages/eslint-config '@types/bun': - specifier: ^1.3.10 + specifier: ^1.2.17 version: 1.3.10 - '@typescript/native-preview': - specifier: 'catalog:' - version: 7.0.0-dev.20260319.1 eslint: specifier: 'catalog:' version: 9.39.4(jiti@2.6.1) @@ -6599,41 +6596,80 @@ packages: resolution: {integrity: sha512-zm6xx8UT/Xy2oSr2ZXD0pZo7Jx2XsCoID2IUh9YSTFRu7z+WdwYTRk6LhUftm1crwqbuoF6I8zAFeCMw0YjwDg==} engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} + '@typescript/native-preview-darwin-arm64@7.0.0-dev.20251019.1': + resolution: {integrity: sha512-GecLPUXgaptUiBrpuLhKwsxsckJ/rBA1e9pY2HdFx+mIWze1FTUiXu0It6EcFbQ2IZCMke1WuZZz18Bo4lftwA==} + cpu: [arm64] + os: [darwin] + '@typescript/native-preview-darwin-arm64@7.0.0-dev.20260319.1': resolution: {integrity: sha512-CMpCsOVUJ/BLC9x9VaT/DQviTmfHqw/NVwnyAR6ZHVVR1YJynJ6EJBPPHI4oStgTwVSdhna5uTPRiKhUpzvgCA==} cpu: [arm64] os: [darwin] + '@typescript/native-preview-darwin-x64@7.0.0-dev.20251019.1': + resolution: {integrity: sha512-/XTRfbZW+BKvxC0XwoRp21UXdQOAEUwTf/T1OMs797HLfl1EbBiCp2UK+boYFwDw/5WP18i5bYEHkzx34wUTaA==} + cpu: [x64] + os: [darwin] + '@typescript/native-preview-darwin-x64@7.0.0-dev.20260319.1': resolution: {integrity: sha512-gUcJwLYodGDC7Xf8aRDSMyL1impp4I+sE4ySTnnibTCzI5Mi2g8Sckkr81TEgMx5L4B10gkMmREuqr//1E+Y3A==} cpu: [x64] os: [darwin] + '@typescript/native-preview-linux-arm64@7.0.0-dev.20251019.1': + resolution: {integrity: sha512-wJR4FDSvOBqtNIZ3SxXk72LfMMPdx69VXpavSgoyZY9Xkf7Wr6uNpKTmwzX/fOjQBhpHo/6ctiSjB/t3uVKKSQ==} + cpu: [arm64] + os: [linux] + '@typescript/native-preview-linux-arm64@7.0.0-dev.20260319.1': resolution: {integrity: sha512-7JmTBE0URYCmLiHXWZ/wlFoNWMu6YJkHGXXqkR3EGTck8+qi0X+dutRsasKYwGHS4I8Ubn/yWTcJHX5qHf68sQ==} cpu: [arm64] os: [linux] + '@typescript/native-preview-linux-arm@7.0.0-dev.20251019.1': + resolution: {integrity: sha512-m0dBydey0T9ToLVbB1e4keK2hSLqJPOT5RMQH9plxibU89Ry4ueON6yGvJgO4La0LqVyk5RqLSkuHyLGFHmevA==} + cpu: [arm] + os: [linux] + '@typescript/native-preview-linux-arm@7.0.0-dev.20260319.1': resolution: {integrity: sha512-boLhrOyT6nSWaVARBD7AF99rLPMy7jjQB50KywZMFzJ9Gg9N+MBBHJCzgwqlDK2sfahfPWBSV5JjYfET8AOZMg==} cpu: [arm] os: [linux] + '@typescript/native-preview-linux-x64@7.0.0-dev.20251019.1': + resolution: {integrity: sha512-xS6qSkZEKw/kw95+K/1xe/3ivx+M8bu5rrySSi+lZmvk2Og19pTmvyW4ec9ojleoYGrU3E6oAcCI+mbcO+KVKg==} + cpu: [x64] + os: [linux] + '@typescript/native-preview-linux-x64@7.0.0-dev.20260319.1': resolution: {integrity: sha512-9LdXkBqR3r8MHmHHwoA7mwv/fkATe2TO+6fLuan/sfmGjob/9y7swyYVvauYzX+aiTbzXjjCkOtEw0klQLxT/A==} cpu: [x64] os: [linux] + '@typescript/native-preview-win32-arm64@7.0.0-dev.20251019.1': + resolution: {integrity: sha512-vJvkjZEN6GRH+Y3atQO5t8WGGjqgnHzwU0ZP+4oqYJl7G6sNiqRw23lHedZxZF6Iav+lE6SPMAhVbz97LlvkbQ==} + cpu: [arm64] + os: [win32] + '@typescript/native-preview-win32-arm64@7.0.0-dev.20260319.1': resolution: {integrity: sha512-jKJ+lha8EZtxP5chtsuSNAZOvPInJbFExu0ctoiGVgw01sdMUZeyFYz9TSLoa5oNUs0dboydtiHTK/uyrKThNw==} cpu: [arm64] os: [win32] + '@typescript/native-preview-win32-x64@7.0.0-dev.20251019.1': + resolution: {integrity: sha512-GMYGYxRHIX/+hFn7SGj9LMh4CLm90ZByGH4BvgKvwJGEctkYtOB6wXJUvQMo5koel7pDY1Yy3uChiexuG11biQ==} + cpu: [x64] + os: [win32] + '@typescript/native-preview-win32-x64@7.0.0-dev.20260319.1': resolution: {integrity: sha512-GonXb3TBp23brWlPMA+uL42qUvBTNkCHFoE653tQK5SQkCvCSO3GYyY9K5GhEdG6kV2vDpG+Y98p1BYTM5z0Qg==} cpu: [x64] os: [win32] + '@typescript/native-preview@7.0.0-dev.20251019.1': + resolution: {integrity: sha512-ytCPJouuNmJyGjZwSFg/v0Ugkn/52drU5HymW1p0l6dU+iHuTIaZSKfHFWETJxQVwyyYqNxxvC0QMxTDfwPlGQ==} + hasBin: true + '@typescript/native-preview@7.0.0-dev.20260319.1': resolution: {integrity: sha512-K9evb5u4QmH3Xv2XUg9OWUETYMrIX1C7Hls1ce8DW+Nlbb26NnQ5SPQCt8fGq4FGqZ9BodMwfane1pTd+BWYwQ==} hasBin: true @@ -14594,7 +14630,7 @@ snapshots: '@jest/console@29.7.0': dependencies: '@jest/types': 29.6.3 - '@types/node': 25.5.0 + '@types/node': 22.19.15 chalk: 4.1.2 jest-message-util: 29.7.0 jest-util: 29.7.0 @@ -14603,7 +14639,7 @@ snapshots: '@jest/console@30.3.0': dependencies: '@jest/types': 30.3.0 - '@types/node': 25.5.0 + '@types/node': 22.19.15 chalk: 4.1.2 jest-message-util: 30.3.0 jest-util: 30.3.0 @@ -14616,14 +14652,14 @@ snapshots: '@jest/test-result': 29.7.0 '@jest/transform': 29.7.0 '@jest/types': 29.6.3 - '@types/node': 25.5.0 + '@types/node': 22.19.15 ansi-escapes: 4.3.2 chalk: 4.1.2 ci-info: 3.9.0 exit: 0.1.2 graceful-fs: 4.2.11 jest-changed-files: 29.7.0 - jest-config: 29.7.0(@types/node@25.5.0) + jest-config: 29.7.0(@types/node@22.19.15) jest-haste-map: 29.7.0 jest-message-util: 29.7.0 jest-regex-util: 29.6.3 @@ -14652,14 +14688,14 @@ snapshots: '@jest/test-result': 30.3.0 '@jest/transform': 30.3.0 '@jest/types': 30.3.0 - '@types/node': 25.5.0 + '@types/node': 22.19.15 ansi-escapes: 4.3.2 chalk: 4.1.2 ci-info: 4.4.0 exit-x: 0.2.2 graceful-fs: 4.2.11 jest-changed-files: 30.3.0 - jest-config: 30.3.0(@types/node@25.5.0)(esbuild-register@3.6.0(esbuild@0.27.4)) + jest-config: 30.3.0(@types/node@22.19.15)(esbuild-register@3.6.0(esbuild@0.27.4)) jest-haste-map: 30.3.0 jest-message-util: 30.3.0 jest-regex-util: 30.0.1 @@ -14689,14 +14725,14 @@ snapshots: dependencies: '@jest/fake-timers': 29.7.0 '@jest/types': 29.6.3 - '@types/node': 25.5.0 + '@types/node': 22.19.15 jest-mock: 29.7.0 '@jest/environment@30.3.0': dependencies: '@jest/fake-timers': 30.3.0 '@jest/types': 30.3.0 - '@types/node': 25.5.0 + '@types/node': 22.19.15 jest-mock: 30.3.0 '@jest/expect-utils@29.7.0': @@ -14725,7 +14761,7 @@ snapshots: dependencies: '@jest/types': 29.6.3 '@sinonjs/fake-timers': 10.3.0 - '@types/node': 25.5.0 + '@types/node': 22.19.15 jest-message-util: 29.7.0 jest-mock: 29.7.0 jest-util: 29.7.0 @@ -14734,7 +14770,7 @@ snapshots: dependencies: '@jest/types': 30.3.0 '@sinonjs/fake-timers': 15.1.1 - '@types/node': 25.5.0 + '@types/node': 22.19.15 jest-message-util: 30.3.0 jest-mock: 30.3.0 jest-util: 30.3.0 @@ -14761,7 +14797,7 @@ snapshots: '@jest/pattern@30.0.1': dependencies: - '@types/node': 25.5.0 + '@types/node': 22.19.15 jest-regex-util: 30.0.1 '@jest/reporters@29.7.0': @@ -14772,7 +14808,7 @@ snapshots: '@jest/transform': 29.7.0 '@jest/types': 29.6.3 '@jridgewell/trace-mapping': 0.3.31 - '@types/node': 25.5.0 + '@types/node': 22.19.15 chalk: 4.1.2 collect-v8-coverage: 1.0.3 exit: 0.1.2 @@ -14801,7 +14837,7 @@ snapshots: '@jest/transform': 30.3.0 '@jest/types': 30.3.0 '@jridgewell/trace-mapping': 0.3.31 - '@types/node': 25.5.0 + '@types/node': 22.19.15 chalk: 4.1.2 collect-v8-coverage: 1.0.3 exit-x: 0.2.2 @@ -14920,7 +14956,7 @@ snapshots: '@jest/schemas': 29.6.3 '@types/istanbul-lib-coverage': 2.0.6 '@types/istanbul-reports': 3.0.4 - '@types/node': 25.5.0 + '@types/node': 22.19.15 '@types/yargs': 17.0.35 chalk: 4.1.2 @@ -14930,7 +14966,7 @@ snapshots: '@jest/schemas': 30.0.5 '@types/istanbul-lib-coverage': 2.0.6 '@types/istanbul-reports': 3.0.4 - '@types/node': 25.5.0 + '@types/node': 22.19.15 '@types/yargs': 17.0.35 chalk: 4.1.2 @@ -16807,7 +16843,7 @@ snapshots: '@slack/logger@4.0.1': dependencies: - '@types/node': 25.5.0 + '@types/node': 22.19.15 '@slack/oauth@3.0.5': dependencies: @@ -18047,7 +18083,7 @@ snapshots: '@types/connect@3.4.38': dependencies: - '@types/node': 25.5.0 + '@types/node': 22.19.15 '@types/d3-array@3.2.2': {} @@ -18099,7 +18135,7 @@ snapshots: '@types/graceful-fs@4.1.9': dependencies: - '@types/node': 25.5.0 + '@types/node': 22.19.15 '@types/hast@3.0.4': dependencies: @@ -18148,7 +18184,7 @@ snapshots: '@types/mysql@2.15.27': dependencies: - '@types/node': 25.5.0 + '@types/node': 22.19.15 '@types/node@20.19.37': dependencies: @@ -18170,13 +18206,13 @@ snapshots: '@types/pg@8.15.6': dependencies: - '@types/node': 25.5.0 + '@types/node': 22.19.15 pg-protocol: 1.13.0 pg-types: 2.2.0 '@types/pg@8.18.0': dependencies: - '@types/node': 25.5.0 + '@types/node': 22.19.15 pg-protocol: 1.13.0 pg-types: 2.2.0 @@ -18202,7 +18238,7 @@ snapshots: '@types/tedious@4.0.14': dependencies: - '@types/node': 25.5.0 + '@types/node': 22.19.15 '@types/trusted-types@2.0.7': optional: true @@ -18217,11 +18253,11 @@ snapshots: '@types/wait-on@5.3.4': dependencies: - '@types/node': 25.5.0 + '@types/node': 22.19.15 '@types/ws@8.18.1': dependencies: - '@types/node': 25.5.0 + '@types/node': 22.19.15 '@types/yargs-parser@21.0.3': {} @@ -18320,27 +18356,58 @@ snapshots: '@typescript-eslint/types': 8.57.0 eslint-visitor-keys: 5.0.1 + '@typescript/native-preview-darwin-arm64@7.0.0-dev.20251019.1': + optional: true + '@typescript/native-preview-darwin-arm64@7.0.0-dev.20260319.1': optional: true + '@typescript/native-preview-darwin-x64@7.0.0-dev.20251019.1': + optional: true + '@typescript/native-preview-darwin-x64@7.0.0-dev.20260319.1': optional: true + '@typescript/native-preview-linux-arm64@7.0.0-dev.20251019.1': + optional: true + '@typescript/native-preview-linux-arm64@7.0.0-dev.20260319.1': optional: true + '@typescript/native-preview-linux-arm@7.0.0-dev.20251019.1': + optional: true + '@typescript/native-preview-linux-arm@7.0.0-dev.20260319.1': optional: true + '@typescript/native-preview-linux-x64@7.0.0-dev.20251019.1': + optional: true + '@typescript/native-preview-linux-x64@7.0.0-dev.20260319.1': optional: true + '@typescript/native-preview-win32-arm64@7.0.0-dev.20251019.1': + optional: true + '@typescript/native-preview-win32-arm64@7.0.0-dev.20260319.1': optional: true + '@typescript/native-preview-win32-x64@7.0.0-dev.20251019.1': + optional: true + '@typescript/native-preview-win32-x64@7.0.0-dev.20260319.1': optional: true + '@typescript/native-preview@7.0.0-dev.20251019.1': + optionalDependencies: + '@typescript/native-preview-darwin-arm64': 7.0.0-dev.20251019.1 + '@typescript/native-preview-darwin-x64': 7.0.0-dev.20251019.1 + '@typescript/native-preview-linux-arm': 7.0.0-dev.20251019.1 + '@typescript/native-preview-linux-arm64': 7.0.0-dev.20251019.1 + '@typescript/native-preview-linux-x64': 7.0.0-dev.20251019.1 + '@typescript/native-preview-win32-arm64': 7.0.0-dev.20251019.1 + '@typescript/native-preview-win32-x64': 7.0.0-dev.20251019.1 + '@typescript/native-preview@7.0.0-dev.20260319.1': optionalDependencies: '@typescript/native-preview-darwin-arm64': 7.0.0-dev.20260319.1 @@ -19186,7 +19253,7 @@ snapshots: bun-types@1.3.10: dependencies: - '@types/node': 25.5.0 + '@types/node': 22.19.15 bytes@3.1.2: {} @@ -21137,7 +21204,7 @@ snapshots: '@jest/expect': 29.7.0 '@jest/test-result': 29.7.0 '@jest/types': 29.6.3 - '@types/node': 25.5.0 + '@types/node': 22.19.15 chalk: 4.1.2 co: 4.6.0 dedent: 1.7.2 @@ -21163,7 +21230,7 @@ snapshots: '@jest/expect': 30.3.0 '@jest/test-result': 30.3.0 '@jest/types': 30.3.0 - '@types/node': 25.5.0 + '@types/node': 22.19.15 chalk: 4.1.2 co: 4.6.0 dedent: 1.7.2 @@ -21332,38 +21399,6 @@ snapshots: - babel-plugin-macros - supports-color - jest-config@30.3.0(@types/node@25.5.0)(esbuild-register@3.6.0(esbuild@0.27.4)): - dependencies: - '@babel/core': 7.29.0 - '@jest/get-type': 30.1.0 - '@jest/pattern': 30.0.1 - '@jest/test-sequencer': 30.3.0 - '@jest/types': 30.3.0 - babel-jest: 30.3.0(@babel/core@7.29.0) - chalk: 4.1.2 - ci-info: 4.4.0 - deepmerge: 4.3.1 - glob: 13.0.6 - graceful-fs: 4.2.11 - jest-circus: 30.3.0 - jest-docblock: 30.2.0 - jest-environment-node: 30.3.0 - jest-regex-util: 30.0.1 - jest-resolve: 30.3.0 - jest-runner: 30.3.0 - jest-util: 30.3.0 - jest-validate: 30.3.0 - parse-json: 5.2.0 - pretty-format: 30.3.0 - slash: 3.0.0 - strip-json-comments: 3.1.1 - optionalDependencies: - '@types/node': 25.5.0 - esbuild-register: 3.6.0(esbuild@0.27.4) - transitivePeerDependencies: - - babel-plugin-macros - - supports-color - jest-diff@29.7.0: dependencies: chalk: 4.1.2 @@ -21407,7 +21442,7 @@ snapshots: '@jest/environment': 29.7.0 '@jest/fake-timers': 29.7.0 '@jest/types': 29.6.3 - '@types/node': 25.5.0 + '@types/node': 22.19.15 jest-mock: 29.7.0 jest-util: 29.7.0 @@ -21416,7 +21451,7 @@ snapshots: '@jest/environment': 30.3.0 '@jest/fake-timers': 30.3.0 '@jest/types': 30.3.0 - '@types/node': 25.5.0 + '@types/node': 22.19.15 jest-mock: 30.3.0 jest-util: 30.3.0 jest-validate: 30.3.0 @@ -21427,7 +21462,7 @@ snapshots: dependencies: '@jest/types': 29.6.3 '@types/graceful-fs': 4.1.9 - '@types/node': 25.5.0 + '@types/node': 22.19.15 anymatch: 3.1.3 fb-watchman: 2.0.2 graceful-fs: 4.2.11 @@ -21442,7 +21477,7 @@ snapshots: jest-haste-map@30.3.0: dependencies: '@jest/types': 30.3.0 - '@types/node': 25.5.0 + '@types/node': 22.19.15 anymatch: 3.1.3 fb-watchman: 2.0.2 graceful-fs: 4.2.11 @@ -21512,13 +21547,13 @@ snapshots: jest-mock@29.7.0: dependencies: '@jest/types': 29.6.3 - '@types/node': 25.5.0 + '@types/node': 22.19.15 jest-util: 29.7.0 jest-mock@30.3.0: dependencies: '@jest/types': 30.3.0 - '@types/node': 25.5.0 + '@types/node': 22.19.15 jest-util: 30.3.0 jest-playwright-preset@4.0.0(jest-circus@29.7.0)(jest-environment-node@29.7.0)(jest-runner@29.7.0)(jest@29.7.0(@types/node@25.5.0)): @@ -21609,7 +21644,7 @@ snapshots: '@jest/test-result': 29.7.0 '@jest/transform': 29.7.0 '@jest/types': 29.6.3 - '@types/node': 25.5.0 + '@types/node': 22.19.15 chalk: 4.1.2 emittery: 0.13.1 graceful-fs: 4.2.11 @@ -21635,7 +21670,7 @@ snapshots: '@jest/test-result': 30.3.0 '@jest/transform': 30.3.0 '@jest/types': 30.3.0 - '@types/node': 25.5.0 + '@types/node': 22.19.15 chalk: 4.1.2 emittery: 0.13.1 exit-x: 0.2.2 @@ -21664,7 +21699,7 @@ snapshots: '@jest/test-result': 29.7.0 '@jest/transform': 29.7.0 '@jest/types': 29.6.3 - '@types/node': 25.5.0 + '@types/node': 22.19.15 chalk: 4.1.2 cjs-module-lexer: 1.4.3 collect-v8-coverage: 1.0.3 @@ -21691,7 +21726,7 @@ snapshots: '@jest/test-result': 30.3.0 '@jest/transform': 30.3.0 '@jest/types': 30.3.0 - '@types/node': 25.5.0 + '@types/node': 22.19.15 chalk: 4.1.2 cjs-module-lexer: 2.2.0 collect-v8-coverage: 1.0.3 @@ -21767,7 +21802,7 @@ snapshots: jest-util@29.7.0: dependencies: '@jest/types': 29.6.3 - '@types/node': 25.5.0 + '@types/node': 22.19.15 chalk: 4.1.2 ci-info: 3.9.0 graceful-fs: 4.2.11 @@ -21776,7 +21811,7 @@ snapshots: jest-util@30.3.0: dependencies: '@jest/types': 30.3.0 - '@types/node': 25.5.0 + '@types/node': 22.19.15 chalk: 4.1.2 ci-info: 4.4.0 graceful-fs: 4.2.11 @@ -21815,7 +21850,7 @@ snapshots: dependencies: '@jest/test-result': 29.7.0 '@jest/types': 29.6.3 - '@types/node': 25.5.0 + '@types/node': 22.19.15 ansi-escapes: 4.3.2 chalk: 4.1.2 emittery: 0.13.1 @@ -21826,7 +21861,7 @@ snapshots: dependencies: '@jest/test-result': 30.3.0 '@jest/types': 30.3.0 - '@types/node': 25.5.0 + '@types/node': 22.19.15 ansi-escapes: 4.3.2 chalk: 4.1.2 emittery: 0.13.1 @@ -21835,20 +21870,20 @@ snapshots: jest-worker@27.5.1: dependencies: - '@types/node': 25.5.0 + '@types/node': 22.19.15 merge-stream: 2.0.0 supports-color: 8.1.1 jest-worker@29.7.0: dependencies: - '@types/node': 25.5.0 + '@types/node': 22.19.15 jest-util: 29.7.0 merge-stream: 2.0.0 supports-color: 8.1.1 jest-worker@30.3.0: dependencies: - '@types/node': 25.5.0 + '@types/node': 22.19.15 '@ungap/structured-clone': 1.3.0 jest-util: 30.3.0 merge-stream: 2.0.0 @@ -23565,7 +23600,7 @@ snapshots: '@protobufjs/path': 1.1.2 '@protobufjs/pool': 1.1.0 '@protobufjs/utf8': 1.1.0 - '@types/node': 25.5.0 + '@types/node': 22.19.15 long: 5.3.2 proxy-addr@2.0.7: