From f8df4044b9595847338b2ba7a27b565b5f1cf591 Mon Sep 17 00:00:00 2001 From: "kilo-code-bot[bot]" <240665456+kilo-code-bot[bot]@users.noreply.github.com> Date: Tue, 10 Mar 2026 20:06:41 +0000 Subject: [PATCH 01/13] fix(convoy): call updateConvoyProgress after MR bead closes to trigger landing (#962) (#13) Primary fix: In completeReviewWithResult(), after closeBead() on the source bead, explicitly call updateConvoyProgress() with the source bead ID. The polecat already closed the source bead before gt_done, so closeBead is a no-op (guard in updateBeadStatus short-circuits on same status). Calling updateConvoyProgress directly ensures the convoy recounts after the MR bead transitions to 'closed', allowing the source bead to pass the NOT EXISTS guard and count toward closedCount. Secondary fix: Fix getConvoyForBead() to handle the case where the bead IS the convoy itself. When processConvoyLandings() creates the final landing MR, it passes convoyId as the source bead. The old lookup (find 'tracks' edge from bead) returns null for convoy beads. Now also checks for convoy_metadata presence so the landing MR receives the correct convoy context (merge mode, isIntermediateStep=false) and the refinery sees the 'Final Landing' section in its system prompt. Co-authored-by: Maple (gastown) --- cloudflare-gastown/src/dos/town/beads.ts | 35 +++++++++++++++---- .../src/dos/town/review-queue.ts | 11 ++++++ 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/cloudflare-gastown/src/dos/town/beads.ts b/cloudflare-gastown/src/dos/town/beads.ts index 846ff632f7..9e6581b451 100644 --- a/cloudflare-gastown/src/dos/town/beads.ts +++ b/cloudflare-gastown/src/dos/town/beads.ts @@ -226,7 +226,7 @@ export function updateBeadStatus( * recount closed beads and update convoy_metadata. Auto-lands the * convoy when all tracked beads are closed. */ -function updateConvoyProgress(sql: SqlStorage, beadId: string, timestamp: string): void { +export function updateConvoyProgress(sql: SqlStorage, beadId: string, timestamp: string): void { const convoyRows = [ ...query( sql, @@ -667,11 +667,18 @@ export function getConvoyDependencyEdges( } /** - * Find the convoy a bead belongs to (if any) via 'tracks' dependencies. - * Returns the convoy bead_id or null. + * Find the convoy a bead belongs to (if any). + * + * Two cases: + * 1. Normal source bead: tracked by a convoy via bead_dependencies + * (bead_id = sourceBeadId, depends_on_bead_id = convoyId, type = 'tracks'). + * Returns the convoy bead_id. + * 2. The bead IS the convoy (e.g. for the final landing MR where processConvoyLandings + * passes the convoy bead_id as the source). Returns beadId itself. */ export function getConvoyForBead(sql: SqlStorage, beadId: string): string | null { - const rows = [ + // Case 1: bead is tracked by a convoy + const trackRows = [ ...query( sql, /* sql */ ` @@ -683,8 +690,24 @@ export function getConvoyForBead(sql: SqlStorage, beadId: string): string | null [beadId] ), ]; - if (rows.length === 0) return null; - return z.object({ depends_on_bead_id: z.string() }).parse(rows[0]).depends_on_bead_id; + if (trackRows.length > 0) { + return z.object({ depends_on_bead_id: z.string() }).parse(trackRows[0]).depends_on_bead_id; + } + + // Case 2: bead is itself a convoy (has convoy_metadata) + const metaRows = [ + ...query( + sql, + /* sql */ ` + SELECT 1 FROM ${convoy_metadata} + WHERE ${convoy_metadata.bead_id} = ? + `, + [beadId] + ), + ]; + if (metaRows.length > 0) return beadId; + + return null; } /** diff --git a/cloudflare-gastown/src/dos/town/review-queue.ts b/cloudflare-gastown/src/dos/town/review-queue.ts index f24c72a693..aca88240c2 100644 --- a/cloudflare-gastown/src/dos/town/review-queue.ts +++ b/cloudflare-gastown/src/dos/town/review-queue.ts @@ -18,6 +18,7 @@ import { getBead, closeBead, updateBeadStatus, + updateConvoyProgress, createBead, getConvoyForBead, getConvoyFeatureBranch, @@ -273,8 +274,18 @@ export function completeReviewWithResult( }); if (input.status === 'merged') { + const mergeTimestamp = now(); closeBead(sql, entry.bead_id, entry.agent_id); + // Explicitly trigger convoy progress for the source bead after the MR closes. + // closeBead → updateBeadStatus → updateConvoyProgress, but only if the source + // bead's status actually changes. If the polecat already closed the source bead + // before submitting to the review queue, the guard in updateBeadStatus short- + // circuits and updateConvoyProgress is never called. Calling it here directly + // ensures the convoy recounts after the MR bead is now closed (not in-flight), + // so the source bead passes the NOT EXISTS guard and counts toward closedCount. + updateConvoyProgress(sql, entry.bead_id, mergeTimestamp); + // If this was a convoy landing MR, also set landed_at on the convoy metadata const sourceBead = getBead(sql, entry.bead_id); if (sourceBead?.type === 'convoy') { From 8ab489992800968829cecddce309e3f16cd20e86 Mon Sep 17 00:00:00 2001 From: "kilo-code-bot[bot]" <240665456+kilo-code-bot[bot]@users.noreply.github.com> Date: Tue, 10 Mar 2026 20:06:45 +0000 Subject: [PATCH 02/13] fix(patrol): break triage agent feedback loop (#965) (#12) - detectCrashLoops: exclude agents hooked to triage beads via NOT EXISTS subquery so triage failures don't create new crash-loop triage requests - createTriageRequest: add global cap (MAX_OPEN_TRIAGE_REQUESTS=5) to prevent unbounded accumulation during feedback loops - maybeDispatchTriageAgent: pass role='triage' to skip git clone in container; apply DISPATCH_COOLDOWN_MS on failure via last_activity_at - agent-runner: handle role='triage' with createLightweightWorkspace (no git clone); refactor createMayorWorkspace to share the same helper - Add 'triage' to AgentRole enum in both worker and container type files Co-authored-by: Birch (gastown) --- .../container/src/agent-runner.ts | 37 ++++++++++++------ cloudflare-gastown/container/src/types.ts | 2 +- cloudflare-gastown/src/dos/Town.do.ts | 16 +++++++- cloudflare-gastown/src/dos/town/patrol.ts | 39 ++++++++++++++++++- cloudflare-gastown/src/types.ts | 2 +- 5 files changed, 80 insertions(+), 16 deletions(-) diff --git a/cloudflare-gastown/container/src/agent-runner.ts b/cloudflare-gastown/container/src/agent-runner.ts index 803b46ba0a..acb9f3ad09 100644 --- a/cloudflare-gastown/container/src/agent-runner.ts +++ b/cloudflare-gastown/container/src/agent-runner.ts @@ -328,24 +328,23 @@ async function verifyGitCredentials( } /** - * Create a minimal git-initialized workspace for the mayor agent. - * The mayor doesn't need a real repo clone — it's a conversational - * orchestrator that delegates work via tools. But kilo serve requires - * a git repo in the working directory. + * Create a minimal git-initialized workspace for a reasoning-only agent + * (e.g. triage) that doesn't need a real repo clone. + * kilo serve requires a git repo in the working directory, so we init + * a bare local repo with an empty initial commit. */ -async function createMayorWorkspace(rigId: string): Promise { +async function createLightweightWorkspace(label: string, rigId: string): Promise { const { mkdir: mkdirAsync } = await import('node:fs/promises'); const { existsSync } = await import('node:fs'); const path = await import('node:path'); - // Validate rigId to prevent path traversal (rigId is synthetic: "mayor-") + // Validate to prevent path traversal // eslint-disable-next-line no-control-regex if (!rigId || /\.\.[/\\]|[/\\]\.\.|^\.\.$/.test(rigId) || /[\x00-\x1f]/.test(rigId)) { - throw new Error(`Invalid rigId for mayor workspace: ${rigId}`); + throw new Error(`Invalid rigId for lightweight workspace: ${rigId}`); } - const dir = path.resolve('/workspace/rigs', rigId, 'mayor-workspace'); + const dir = path.resolve('/workspace/rigs', rigId, `${label}-workspace`); await mkdirAsync(dir, { recursive: true }); - // Initialize a bare git repo if not already present if (!existsSync(`${dir}/.git`)) { const init = Bun.spawn(['git', 'init'], { cwd: dir, stdout: 'pipe', stderr: 'pipe' }); await init.exited; @@ -355,12 +354,22 @@ async function createMayorWorkspace(rigId: string): Promise { stderr: 'pipe', }); await commit.exited; - console.log(`Created mayor workspace at ${dir}`); + console.log(`Created ${label} workspace at ${dir}`); } return dir; } +/** + * Create a minimal git-initialized workspace for the mayor agent. + * The mayor doesn't need a real repo clone — it's a conversational + * orchestrator that delegates work via tools. But kilo serve requires + * a git repo in the working directory. + */ +async function createMayorWorkspace(rigId: string): Promise { + return createLightweightWorkspace('mayor', rigId); +} + /** * Write the mayor's system prompt to AGENTS.md in the workspace. * @@ -415,7 +424,7 @@ async function writeMayorSystemPromptToAgentsMd( /** * Run the full agent startup sequence: - * 1. Clone/fetch the rig's git repo (or create minimal workspace for mayor) + * 1. Clone/fetch the rig's git repo (or create minimal workspace for mayor/triage) * 2. Create an isolated worktree for the agent's branch * 3. Configure git credentials for push/fetch * 4. Start a kilo serve instance for the worktree (or reuse existing) @@ -425,7 +434,11 @@ export async function runAgent(originalRequest: StartAgentRequest): Promise; // ── Control server request/response schemas ───────────────────────────── diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index 6a7e719cd3..6cd1ffba30 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -2500,7 +2500,9 @@ export class TownDO extends DurableObject { userId: rigConfig.userId, agentId: triageAgent.id, agentName: triageAgent.name, - role: 'polecat', + // Use 'triage' role so the container skips the git clone entirely. + // Triage work is purely reasoning — no code changes needed. + role: 'triage', identity: triageAgent.identity, beadId: triageBead.bead_id, beadTitle: triageBead.title, @@ -2521,6 +2523,18 @@ export class TownDO extends DurableObject { } else { agents.unhookBead(this.sql, triageAgent.id); beadOps.updateBeadStatus(this.sql, triageBead.bead_id, 'failed', triageAgent.id); + // Apply dispatch cooldown so the next alarm tick doesn't immediately + // retry. Setting last_activity_at = now() makes the agent invisible + // to schedulePendingWork for DISPATCH_COOLDOWN_MS (2 min). + query( + this.sql, + /* sql */ ` + UPDATE ${agent_metadata} + SET ${agent_metadata.columns.last_activity_at} = ? + WHERE ${agent_metadata.bead_id} = ? + `, + [now(), triageAgent.id] + ); console.error(`${TOWN_LOG} maybeDispatchTriageAgent: triage agent failed to start`); } } diff --git a/cloudflare-gastown/src/dos/town/patrol.ts b/cloudflare-gastown/src/dos/town/patrol.ts index 9be22cb8a6..e8cc2de374 100644 --- a/cloudflare-gastown/src/dos/town/patrol.ts +++ b/cloudflare-gastown/src/dos/town/patrol.ts @@ -37,6 +37,8 @@ export const STALE_HOOK_MS = 30 * 60_000; // 30 min export const CRASH_LOOP_WINDOW_MS = 30 * 60_000; // 30 min /** Minimum failures within the window to flag a crash loop */ export const CRASH_LOOP_THRESHOLD = 3; +/** Maximum number of open triage request beads allowed at once */ +export const MAX_OPEN_TRIAGE_REQUESTS = 5; // ── Triage request types ──────────────────────────────────────────── @@ -105,6 +107,28 @@ export function createTriageRequest( if (existing.length > 0) return; } + // Global cap: skip if there are already too many open triage requests. + // Prevents unbounded accumulation during feedback loops. + const openCountRows = [ + ...query( + sql, + /* sql */ ` + SELECT COUNT(*) AS cnt FROM ${beads} + WHERE ${beads.type} = 'issue' + AND ${beads.labels} LIKE ? + AND ${beads.status} = 'open' + `, + [TRIAGE_LABEL_LIKE] + ), + ]; + const openCount = Number(z.object({ cnt: z.number() }).parse(openCountRows[0]).cnt); + if (openCount >= MAX_OPEN_TRIAGE_REQUESTS) { + console.warn( + `${LOG} createTriageRequest: global cap reached (${openCount} open), skipping type=${params.triageType}` + ); + return; + } + const metadata: TriageRequestMetadata = { triage_type: params.triageType, agent_bead_id: params.agentBeadId, @@ -559,6 +583,12 @@ export function detectCrashLoops(sql: SqlStorage): void { fail_count: z.number(), }); + // Exclude triage agents from crash loop detection — their failures must + // not create new triage requests, which would feed the feedback loop. + // An agent is considered a triage agent if its current hooked bead has + // the gt:triage or gt:triage-request label (both start with "gt:triage"). + const TRIAGE_LABEL_ANY = `%"gt:triage%`; + const rows = CrashRow.array().parse([ ...query( sql, @@ -569,10 +599,17 @@ export function detectCrashLoops(sql: SqlStorage): void { AND be.new_value = 'failed' AND be.agent_id IS NOT NULL AND be.created_at > ? + AND NOT EXISTS ( + SELECT 1 FROM ${agent_metadata} + INNER JOIN ${beads} AS hooked + ON ${agent_metadata.current_hook_bead_id} = hooked.${beads.columns.bead_id} + WHERE ${agent_metadata.bead_id} = be.agent_id + AND hooked.${beads.columns.labels} LIKE ? + ) GROUP BY be.agent_id HAVING fail_count >= ? `, - [windowCutoff, CRASH_LOOP_THRESHOLD] + [windowCutoff, TRIAGE_LABEL_ANY, CRASH_LOOP_THRESHOLD] ), ]); diff --git a/cloudflare-gastown/src/types.ts b/cloudflare-gastown/src/types.ts index a775283462..5cd6c6d6fb 100644 --- a/cloudflare-gastown/src/types.ts +++ b/cloudflare-gastown/src/types.ts @@ -48,7 +48,7 @@ export type BeadFilter = { // -- Agents (now beads + agent_metadata) -- -export const AgentRole = z.enum(['polecat', 'refinery', 'mayor']); +export const AgentRole = z.enum(['polecat', 'refinery', 'mayor', 'triage']); export type AgentRole = z.infer; export const AgentStatus = z.enum(['idle', 'working', 'stalled', 'dead']); From 8f196a7829d2b0c2dcefdba4725ee3e18b1e3fc1 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Tue, 10 Mar 2026 10:56:56 -0500 Subject: [PATCH 03/13] fix(gastown): replace per-agent JWTs with per-container HMAC secrets (#923) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agent JWTs (8h expiry) caused 401s for persistent agents (Mayor) running longer than 8 hours. Instead of adding token refresh complexity, replace the auth model with HMAC-based container secrets that never expire — they live as long as the container does. When the container sleeps and wakes, a new secret is minted automatically. Container secret design: - Format: townId:nonce:hmac (HMAC-SHA256 signed with GASTOWN_JWT_SECRET) - No expiry — lives as long as the container process - Stateless verification — no DO lookup needed, just HMAC check - Town-scoped — cross-town access prevented by HMAC input binding - Agent identity via X-Gastown-Agent-Id/Rig-Id headers, trusted because the container secret proves the request origin Backwards compatible: - Auth middleware accepts both container secrets AND legacy JWTs - Container code prefers GASTOWN_CONTAINER_SECRET, falls back to GASTOWN_SESSION_TOKEN - Legacy JWT minting retained (marked deprecated) for rollout safety Closes #923 --- .../container/plugin/client.test.ts | 30 +++++-- cloudflare-gastown/container/plugin/client.ts | 50 +++++++++-- cloudflare-gastown/container/plugin/types.ts | 6 ++ .../container/src/completion-reporter.ts | 17 ++-- .../container/src/control-server.ts | 16 ++-- cloudflare-gastown/container/src/heartbeat.ts | 14 ++- .../container/src/process-manager.ts | 19 ++-- cloudflare-gastown/container/src/types.ts | 4 +- .../src/dos/town/container-dispatch.ts | 54 +++++++++++- .../src/middleware/auth.middleware.ts | 75 +++++++++++++--- .../src/middleware/mayor-auth.middleware.ts | 30 ++++++- .../src/util/container-secret.util.ts | 86 +++++++++++++++++++ 12 files changed, 341 insertions(+), 60 deletions(-) create mode 100644 cloudflare-gastown/src/util/container-secret.util.ts diff --git a/cloudflare-gastown/container/plugin/client.test.ts b/cloudflare-gastown/container/plugin/client.test.ts index 35f05bb6b5..3c3c86ceb1 100644 --- a/cloudflare-gastown/container/plugin/client.test.ts +++ b/cloudflare-gastown/container/plugin/client.test.ts @@ -7,6 +7,7 @@ const TEST_ENV: GastownEnv = { sessionToken: 'test-jwt-token', agentId: 'agent-111', rigId: 'rig-222', + townId: 'town-333', }; function mockFetch(data: unknown, status = 200) { @@ -48,7 +49,9 @@ describe('GastownClient', () => { expect(fetchMock).toHaveBeenCalledTimes(1); const [url, init] = fetchMock.mock.calls[0] as [string, RequestInit]; - expect(url).toBe('https://gastown.example.com/api/rigs/rig-222/agents/agent-111/prime'); + expect(url).toBe( + 'https://gastown.example.com/api/towns/town-333/rigs/rig-222/agents/agent-111/prime' + ); const headers = new Headers(init.headers); expect(headers.get('Authorization')).toBe('Bearer test-jwt-token'); expect(headers.get('Content-Type')).toBe('application/json'); @@ -81,7 +84,7 @@ describe('GastownClient', () => { expect(result).toEqual(bead); const [url] = (globalThis.fetch as ReturnType).mock.calls[0] as [string]; - expect(url).toBe('https://gastown.example.com/api/rigs/rig-222/beads/bead-1'); + expect(url).toBe('https://gastown.example.com/api/towns/town-333/rigs/rig-222/beads/bead-1'); }); it('closeBead() sends agent_id in body', async () => { @@ -94,7 +97,9 @@ describe('GastownClient', () => { string, RequestInit, ]; - expect(url).toBe('https://gastown.example.com/api/rigs/rig-222/beads/bead-1/close'); + expect(url).toBe( + 'https://gastown.example.com/api/towns/town-333/rigs/rig-222/beads/bead-1/close' + ); expect(init.method).toBe('POST'); expect(JSON.parse(init.body as string)).toEqual({ agent_id: 'agent-111' }); }); @@ -112,7 +117,9 @@ describe('GastownClient', () => { string, RequestInit, ]; - expect(url).toBe('https://gastown.example.com/api/rigs/rig-222/agents/agent-111/done'); + expect(url).toBe( + 'https://gastown.example.com/api/towns/town-333/rigs/rig-222/agents/agent-111/done' + ); expect(JSON.parse(init.body as string)).toEqual({ branch: 'feat/test', pr_url: 'https://github.com/pr/1', @@ -145,7 +152,9 @@ describe('GastownClient', () => { expect(result).toEqual(mail); const [url] = (globalThis.fetch as ReturnType).mock.calls[0] as [string]; - expect(url).toBe('https://gastown.example.com/api/rigs/rig-222/agents/agent-111/mail'); + expect(url).toBe( + 'https://gastown.example.com/api/towns/town-333/rigs/rig-222/agents/agent-111/mail' + ); }); it('writeCheckpoint() posts data to checkpoint endpoint', async () => { @@ -157,7 +166,9 @@ describe('GastownClient', () => { string, RequestInit, ]; - expect(url).toBe('https://gastown.example.com/api/rigs/rig-222/agents/agent-111/checkpoint'); + expect(url).toBe( + 'https://gastown.example.com/api/towns/town-333/rigs/rig-222/agents/agent-111/checkpoint' + ); expect(JSON.parse(init.body as string)).toEqual({ data: { step: 3, files: ['a.ts'] } }); }); @@ -172,7 +183,7 @@ describe('GastownClient', () => { string, RequestInit, ]; - expect(url).toBe('https://gastown.example.com/api/rigs/rig-222/escalations'); + expect(url).toBe('https://gastown.example.com/api/towns/town-333/rigs/rig-222/escalations'); expect(JSON.parse(init.body as string)).toEqual({ title: 'blocked', priority: 'high' }); }); @@ -246,7 +257,9 @@ describe('GastownClient', () => { // Verify no double slashes in the URL by calling prime void c.prime(); const [url] = (globalThis.fetch as ReturnType).mock.calls[0] as [string]; - expect(url).toBe('https://gastown.example.com/api/rigs/rig-222/agents/agent-111/prime'); + expect(url).toBe( + 'https://gastown.example.com/api/towns/town-333/rigs/rig-222/agents/agent-111/prime' + ); }); }); @@ -262,6 +275,7 @@ describe('createClientFromEnv', () => { process.env.GASTOWN_SESSION_TOKEN = 'tok'; process.env.GASTOWN_AGENT_ID = 'agent-1'; process.env.GASTOWN_RIG_ID = 'rig-1'; + process.env.GASTOWN_TOWN_ID = 'town-1'; const client = createClientFromEnv(); expect(client).toBeInstanceOf(GastownClient); diff --git a/cloudflare-gastown/container/plugin/client.ts b/cloudflare-gastown/container/plugin/client.ts index 1627c2a16f..06efedbf69 100644 --- a/cloudflare-gastown/container/plugin/client.ts +++ b/cloudflare-gastown/container/plugin/client.ts @@ -26,12 +26,14 @@ function isApiResponse( export class GastownClient { private baseUrl: string; + private containerSecret: string | undefined; private token: string; private agentId: string; private rigId: string; private townId: string; constructor(env: GastownEnv) { this.baseUrl = env.apiUrl.replace(/\/+$/, ''); + this.containerSecret = env.containerSecret; this.token = env.sessionToken; this.agentId = env.agentId; this.rigId = env.rigId; @@ -50,7 +52,13 @@ export class GastownClient { // Normalize headers so callers can pass plain objects, Headers instances, or tuples const headers = new Headers(init?.headers); headers.set('Content-Type', 'application/json'); - headers.set('Authorization', `Bearer ${this.token}`); + // Prefer container secret (no expiry) over legacy JWT (8h expiry) + headers.set('Authorization', `Bearer ${this.containerSecret ?? this.token}`); + // When using container secret, agent identity must be sent via headers + if (this.containerSecret) { + headers.set('X-Gastown-Agent-Id', this.agentId); + headers.set('X-Gastown-Rig-Id', this.rigId); + } let response: Response; try { @@ -193,16 +201,18 @@ export class GastownClient { /** * Mayor-scoped client for town-level cross-rig operations. - * Uses `/api/mayor/:townId/tools/*` routes authenticated via townId-scoped JWT. + * Uses `/api/mayor/:townId/tools/*` routes authenticated via container secret or JWT. */ export class MayorGastownClient { private baseUrl: string; + private containerSecret: string | undefined; private token: string; private agentId: string; private townId: string; constructor(env: MayorGastownEnv) { this.baseUrl = env.apiUrl.replace(/\/+$/, ''); + this.containerSecret = env.containerSecret; this.token = env.sessionToken; this.agentId = env.agentId; this.townId = env.townId; @@ -215,7 +225,11 @@ export class MayorGastownClient { private async request(url: string, init?: RequestInit): Promise { const headers = new Headers(init?.headers); headers.set('Content-Type', 'application/json'); - headers.set('Authorization', `Bearer ${this.token}`); + // Prefer container secret (no expiry) over legacy JWT (8h expiry) + headers.set('Authorization', `Bearer ${this.containerSecret ?? this.token}`); + if (this.containerSecret) { + headers.set('X-Gastown-Agent-Id', this.agentId); + } let response: Response; try { @@ -334,15 +348,18 @@ export class GastownApiError extends Error { export function createClientFromEnv(): GastownClient { const apiUrl = process.env.GASTOWN_API_URL; + const containerSecret = process.env.GASTOWN_CONTAINER_SECRET; const sessionToken = process.env.GASTOWN_SESSION_TOKEN; const agentId = process.env.GASTOWN_AGENT_ID; const rigId = process.env.GASTOWN_RIG_ID; const townId = process.env.GASTOWN_TOWN_ID; - if (!apiUrl || !sessionToken || !agentId || !rigId || !townId) { + // Require either containerSecret or sessionToken (prefer containerSecret) + const hasAuth = containerSecret || sessionToken; + if (!apiUrl || !hasAuth || !agentId || !rigId || !townId) { const missing = [ !apiUrl && 'GASTOWN_API_URL', - !sessionToken && 'GASTOWN_SESSION_TOKEN', + !hasAuth && 'GASTOWN_CONTAINER_SECRET or GASTOWN_SESSION_TOKEN', !agentId && 'GASTOWN_AGENT_ID', !rigId && 'GASTOWN_RIG_ID', !townId && 'GASTOWN_TOWN_ID', @@ -350,24 +367,39 @@ export function createClientFromEnv(): GastownClient { throw new Error(`Missing required Gastown environment variables: ${missing.join(', ')}`); } - return new GastownClient({ apiUrl, sessionToken, agentId, rigId, townId }); + return new GastownClient({ + apiUrl, + containerSecret: containerSecret ?? undefined, + sessionToken: sessionToken ?? '', + agentId, + rigId, + townId, + }); } export function createMayorClientFromEnv(): MayorGastownClient { const apiUrl = process.env.GASTOWN_API_URL; + const containerSecret = process.env.GASTOWN_CONTAINER_SECRET; const sessionToken = process.env.GASTOWN_SESSION_TOKEN; const agentId = process.env.GASTOWN_AGENT_ID; const townId = process.env.GASTOWN_TOWN_ID; - if (!apiUrl || !sessionToken || !agentId || !townId) { + const hasAuth = containerSecret || sessionToken; + if (!apiUrl || !hasAuth || !agentId || !townId) { const missing = [ !apiUrl && 'GASTOWN_API_URL', - !sessionToken && 'GASTOWN_SESSION_TOKEN', + !hasAuth && 'GASTOWN_CONTAINER_SECRET or GASTOWN_SESSION_TOKEN', !agentId && 'GASTOWN_AGENT_ID', !townId && 'GASTOWN_TOWN_ID', ].filter(Boolean); throw new Error(`Missing required mayor environment variables: ${missing.join(', ')}`); } - return new MayorGastownClient({ apiUrl, sessionToken, agentId, townId }); + return new MayorGastownClient({ + apiUrl, + containerSecret: containerSecret ?? undefined, + sessionToken: sessionToken ?? '', + agentId, + townId, + }); } diff --git a/cloudflare-gastown/container/plugin/types.ts b/cloudflare-gastown/container/plugin/types.ts index 2a2ad85a93..d0990b17a5 100644 --- a/cloudflare-gastown/container/plugin/types.ts +++ b/cloudflare-gastown/container/plugin/types.ts @@ -119,6 +119,9 @@ export type ConvoyDetail = Convoy & { // Environment variable config for the plugin (rig-scoped agents) export type GastownEnv = { apiUrl: string; + /** Container secret (HMAC-based, no expiry) — preferred auth mechanism. */ + containerSecret?: string; + /** Legacy per-agent JWT (8h expiry) — fallback during rollout. */ sessionToken: string; agentId: string; rigId: string; @@ -128,6 +131,9 @@ export type GastownEnv = { // Environment variable config for the mayor (town-scoped) export type MayorGastownEnv = { apiUrl: string; + /** Container secret (HMAC-based, no expiry) — preferred auth mechanism. */ + containerSecret?: string; + /** Legacy per-agent JWT (8h expiry) — fallback during rollout. */ sessionToken: string; agentId: string; townId: string; diff --git a/cloudflare-gastown/container/src/completion-reporter.ts b/cloudflare-gastown/container/src/completion-reporter.ts index a12eb88d2b..840c0081cb 100644 --- a/cloudflare-gastown/container/src/completion-reporter.ts +++ b/cloudflare-gastown/container/src/completion-reporter.ts @@ -17,8 +17,8 @@ export async function reportAgentCompleted( reason?: string ): Promise { const apiUrl = agent.gastownApiUrl; - const token = agent.gastownSessionToken; - if (!apiUrl || !token) { + const authToken = agent.gastownContainerSecret ?? agent.gastownSessionToken; + if (!apiUrl || !authToken) { console.warn( `Cannot report agent ${agent.agentId} completion: no API credentials on agent record` ); @@ -29,12 +29,17 @@ export async function reportAgentCompleted( agent.completionCallbackUrl ?? `${apiUrl}/api/towns/${agent.townId}/rigs/${agent.rigId}/agents/${agent.agentId}/completed`; try { + const headers: Record = { + 'Content-Type': 'application/json', + Authorization: `Bearer ${authToken}`, + }; + if (agent.gastownContainerSecret) { + headers['X-Gastown-Agent-Id'] = agent.agentId; + headers['X-Gastown-Rig-Id'] = agent.rigId; + } const response = await fetch(url, { method: 'POST', - headers: { - 'Content-Type': 'application/json', - Authorization: `Bearer ${token}`, - }, + headers, body: JSON.stringify({ status, reason, agentId: agent.agentId }), }); diff --git a/cloudflare-gastown/container/src/control-server.ts b/cloudflare-gastown/container/src/control-server.ts index d86461156b..3fa3d8f464 100644 --- a/cloudflare-gastown/container/src/control-server.ts +++ b/cloudflare-gastown/container/src/control-server.ts @@ -292,7 +292,12 @@ app.post('/git/merge', async c => { // Run the merge in the background so we can return 202 immediately. // The Rig DO will be notified via callback when the merge completes. const apiUrl = req.envVars?.GASTOWN_API_URL ?? process.env.GASTOWN_API_URL; - const token = req.envVars?.GASTOWN_SESSION_TOKEN ?? process.env.GASTOWN_SESSION_TOKEN; + // Prefer container secret (no expiry) over session token (8h JWT) + const token = + req.envVars?.GASTOWN_CONTAINER_SECRET ?? + process.env.GASTOWN_CONTAINER_SECRET ?? + req.envVars?.GASTOWN_SESSION_TOKEN ?? + process.env.GASTOWN_SESSION_TOKEN; const doMerge = async () => { const outcome = await mergeBranch({ @@ -515,11 +520,12 @@ app.onError((err, c) => { export function startControlServer(): void { const PORT = 8080; - // Start heartbeat if env vars are configured + // Start heartbeat if env vars are configured. + // Prefer container secret (no expiry) over session token (8h JWT). const apiUrl = process.env.GASTOWN_API_URL; - const sessionToken = process.env.GASTOWN_SESSION_TOKEN; - if (apiUrl && sessionToken) { - startHeartbeat(apiUrl, sessionToken); + const authToken = process.env.GASTOWN_CONTAINER_SECRET ?? process.env.GASTOWN_SESSION_TOKEN; + if (apiUrl && authToken) { + startHeartbeat(apiUrl, authToken); } // Handle graceful shutdown diff --git a/cloudflare-gastown/container/src/heartbeat.ts b/cloudflare-gastown/container/src/heartbeat.ts index b09531207b..b1b5d00cf5 100644 --- a/cloudflare-gastown/container/src/heartbeat.ts +++ b/cloudflare-gastown/container/src/heartbeat.ts @@ -53,14 +53,20 @@ async function sendHeartbeats(): Promise { }; try { + const headers: Record = { + 'Content-Type': 'application/json', + Authorization: `Bearer ${sessionToken}`, + }; + // When using a container secret (contains ':'), send agent identity headers + if (sessionToken.includes(':')) { + headers['X-Gastown-Agent-Id'] = agent.agentId; + headers['X-Gastown-Rig-Id'] = agent.rigId; + } const response = await fetch( `${gastownApiUrl}/api/towns/${agent.townId}/rigs/${agent.rigId}/agents/${agent.agentId}/heartbeat`, { method: 'POST', - headers: { - 'Content-Type': 'application/json', - Authorization: `Bearer ${sessionToken}`, - }, + headers, body: JSON.stringify(payload), } ); diff --git a/cloudflare-gastown/container/src/process-manager.ts b/cloudflare-gastown/container/src/process-manager.ts index ea4afcfda9..b750056542 100644 --- a/cloudflare-gastown/container/src/process-manager.ts +++ b/cloudflare-gastown/container/src/process-manager.ts @@ -97,16 +97,23 @@ function broadcastEvent(agentId: string, event: string, data: unknown): void { // Persist to AgentDO via the worker (fire-and-forget) const agent = agents.get(agentId); - if (agent?.gastownApiUrl && agent.gastownSessionToken) { + const authToken = agent?.gastownContainerSecret ?? agent?.gastownSessionToken; + if (agent?.gastownApiUrl && authToken) { + const headers: Record = { + 'Content-Type': 'application/json', + Authorization: `Bearer ${authToken}`, + }; + // When using container secret, send agent identity headers + if (agent.gastownContainerSecret) { + headers['X-Gastown-Agent-Id'] = agentId; + headers['X-Gastown-Rig-Id'] = agent.rigId ?? '_'; + } // POST to the worker's agent-events endpoint for persistent storage fetch( `${agent.gastownApiUrl}/api/towns/${agent.townId ?? '_'}/rigs/${agent.rigId ?? '_'}/agent-events`, { method: 'POST', - headers: { - 'Content-Type': 'application/json', - Authorization: `Bearer ${agent.gastownSessionToken}`, - }, + headers, body: JSON.stringify({ agent_id: agentId, event_type: event, @@ -315,6 +322,8 @@ export async function startAgent( messageCount: 0, exitReason: null, gastownApiUrl: request.envVars?.GASTOWN_API_URL ?? process.env.GASTOWN_API_URL ?? null, + gastownContainerSecret: + request.envVars?.GASTOWN_CONTAINER_SECRET ?? process.env.GASTOWN_CONTAINER_SECRET ?? null, gastownSessionToken: request.envVars?.GASTOWN_SESSION_TOKEN ?? null, completionCallbackUrl: request.envVars?.GASTOWN_COMPLETION_CALLBACK_URL ?? null, model: request.model ?? null, diff --git a/cloudflare-gastown/container/src/types.ts b/cloudflare-gastown/container/src/types.ts index 4544c94dc9..b8d988cb63 100644 --- a/cloudflare-gastown/container/src/types.ts +++ b/cloudflare-gastown/container/src/types.ts @@ -107,7 +107,9 @@ export type ManagedAgent = { exitReason: string | null; /** Gastown worker API URL for completion callbacks */ gastownApiUrl: string | null; - /** Agent-scoped JWT for authenticating callbacks to the Gastown worker */ + /** Container secret (HMAC-based, no expiry) — preferred auth for worker callbacks. */ + gastownContainerSecret: string | null; + /** Agent-scoped JWT for authenticating callbacks to the Gastown worker (legacy, 8h expiry). */ gastownSessionToken: string | null; /** Override the default completion callback URL (for agents not backed by a Rig DO) */ completionCallbackUrl: string | null; diff --git a/cloudflare-gastown/src/dos/town/container-dispatch.ts b/cloudflare-gastown/src/dos/town/container-dispatch.ts index 841ae4e713..6260aa04b1 100644 --- a/cloudflare-gastown/src/dos/town/container-dispatch.ts +++ b/cloudflare-gastown/src/dos/town/container-dispatch.ts @@ -5,6 +5,7 @@ import { getTownContainerStub } from '../TownContainer.do'; import { signAgentJWT } from '../../util/jwt.util'; +import { mintContainerSecret } from '../../util/container-secret.util'; import { buildPolecatSystemPrompt } from '../../prompts/polecat-system.prompt'; import { buildMayorSystemPrompt } from '../../prompts/mayor-system.prompt'; import type { TownConfig } from '../../types'; @@ -41,6 +42,9 @@ export async function resolveJWTSecret(env: Env): Promise { /** * Mint a short-lived agent JWT for the given agent to authenticate * API calls back to the gastown worker. + * + * @deprecated Prefer container secrets (ensureContainerSecret) for new code. + * Agent JWTs are retained for backwards compatibility during rollout. */ export async function mintAgentToken( env: Env, @@ -57,6 +61,36 @@ export async function mintAgentToken( ); } +/** + * Ensure the container has a valid GASTOWN_CONTAINER_SECRET env var. + * Mints an HMAC-based token scoped to the townId and stores it on + * the TownContainerDO via setEnvVar(). The token has no expiry — it + * lives as long as the container process does. On container sleep/wake, + * a new token is minted automatically because setEnvVar re-runs. + * + * Returns the container secret so callers can also pass it as a + * per-request env var (for agents started before the env var was set). + */ +export async function ensureContainerSecret(env: Env, townId: string): Promise { + const jwtSecret = await resolveJWTSecret(env); + if (!jwtSecret) { + console.error(`${TOWN_LOG} ensureContainerSecret: no JWT secret available`); + return null; + } + + const secret = await mintContainerSecret(jwtSecret, townId); + try { + const container = getTownContainerStub(env, townId); + await container.setEnvVar('GASTOWN_CONTAINER_SECRET', secret); + } catch (err) { + console.warn( + `${TOWN_LOG} ensureContainerSecret: setEnvVar failed (container may not be running):`, + err instanceof Error ? err.message : err + ); + } + return secret; +} + /** Build the initial prompt for an agent from its bead. */ export function buildPrompt(params: { beadTitle: string; @@ -183,6 +217,13 @@ export async function startAgentInContainer( `${TOWN_LOG} startAgentInContainer: agentId=${params.agentId} role=${params.role} name=${params.agentName}` ); try { + // Ensure the container has a valid GASTOWN_CONTAINER_SECRET. + // This is the primary auth mechanism — an HMAC token that never expires, + // scoped to this town, and lives as long as the container process. + const containerSecret = await ensureContainerSecret(env, params.townId); + + // Also mint a per-agent JWT as fallback during rollout. Once all + // container code reads GASTOWN_CONTAINER_SECRET, this can be removed. const token = await mintAgentToken(env, { agentId: params.agentId, rigId: params.rigId, @@ -190,10 +231,10 @@ export async function startAgentInContainer( userId: params.userId, }); - if (!token) { + if (!containerSecret && !token) { console.error( - `${TOWN_LOG} startAgentInContainer: ABORTING — failed to mint JWT for agent ${params.agentId}. ` + - 'The agent would start without GASTOWN_SESSION_TOKEN and be unable to call back to the worker.' + `${TOWN_LOG} startAgentInContainer: ABORTING — failed to mint any auth token for agent ${params.agentId}. ` + + 'The agent would start without credentials and be unable to call back to the worker.' ); return false; } @@ -212,13 +253,16 @@ export async function startAgentInContainer( envVars.GITLAB_INSTANCE_URL = params.townConfig.git_auth.gitlab_instance_url; } + // Container secret is the primary auth mechanism (no expiry). + // The JWT is kept as a fallback during rollout. + if (containerSecret) envVars.GASTOWN_CONTAINER_SECRET = containerSecret; if (token) envVars.GASTOWN_SESSION_TOKEN = token; // kilocodeToken: prefer rig-level, fall back to town config const kilocodeToken = params.kilocodeToken ?? params.townConfig.kilocode_token; if (kilocodeToken) envVars.KILOCODE_TOKEN = kilocodeToken; console.log( - `${TOWN_LOG} startAgentInContainer: envVars built: keys=[${Object.keys(envVars).join(',')}] hasGitToken=${!!envVars.GIT_TOKEN} hasGitlabToken=${!!envVars.GITLAB_TOKEN} hasJwt=${!!token} hasKilocodeToken=${!!kilocodeToken} git_auth_keys=[${Object.keys(params.townConfig.git_auth ?? {}).join(',')}]` + `${TOWN_LOG} startAgentInContainer: envVars built: keys=[${Object.keys(envVars).join(',')}] hasGitToken=${!!envVars.GIT_TOKEN} hasGitlabToken=${!!envVars.GITLAB_TOKEN} hasContainerSecret=${!!containerSecret} hasJwt=${!!token} hasKilocodeToken=${!!kilocodeToken} git_auth_keys=[${Object.keys(params.townConfig.git_auth ?? {}).join(',')}]` ); const containerConfig = await buildContainerConfig(storage, env); @@ -302,6 +346,7 @@ export async function startMergeInContainer( } ): Promise { try { + const containerSecret = await ensureContainerSecret(env, params.townId); const token = await mintAgentToken(env, { agentId: params.agentId, rigId: params.rigId, @@ -319,6 +364,7 @@ export async function startMergeInContainer( if (params.townConfig.git_auth?.gitlab_instance_url) { envVars.GITLAB_INSTANCE_URL = params.townConfig.git_auth.gitlab_instance_url; } + if (containerSecret) envVars.GASTOWN_CONTAINER_SECRET = containerSecret; if (token) envVars.GASTOWN_SESSION_TOKEN = token; if (env.GASTOWN_API_URL) envVars.GASTOWN_API_URL = env.GASTOWN_API_URL; const mergeKilocodeToken = params.kilocodeToken ?? params.townConfig.kilocode_token; diff --git a/cloudflare-gastown/src/middleware/auth.middleware.ts b/cloudflare-gastown/src/middleware/auth.middleware.ts index d328d26812..cfed805cf5 100644 --- a/cloudflare-gastown/src/middleware/auth.middleware.ts +++ b/cloudflare-gastown/src/middleware/auth.middleware.ts @@ -2,6 +2,7 @@ import type { Context } from 'hono'; import { createMiddleware } from 'hono/factory'; import { extractBearerToken } from '@kilocode/worker-utils'; import { verifyAgentJWT, type AgentJWTPayload } from '../util/jwt.util'; +import { verifyContainerSecret } from '../util/container-secret.util'; import { resError } from '../util/res.util'; import type { GastownEnv } from '../gastown.worker'; @@ -34,10 +35,44 @@ export const townIdMiddleware = createMiddleware(async (c, next) => }); /** - * Auth middleware that requires a valid Gastown agent JWT via - * `Authorization: Bearer `. + * Try to authenticate with a container secret (HMAC-based, no expiry). + * Returns the AgentJWTPayload-shaped object if successful, null otherwise. + * Agent identity comes from X-Gastown-* headers which are trusted because + * the container secret proves the request came from the right town's container. + */ +async function tryContainerSecretAuth( + c: Context, + token: string, + jwtSecret: string +): Promise { + // Container secrets contain colons (format: townId:nonce:hmac). + // JWTs contain dots (format: header.payload.signature). + // Quick format check to avoid unnecessary HMAC computation on JWTs. + if (!token.includes(':') || token.includes('.')) return null; + + const result = await verifyContainerSecret(token, jwtSecret); + if (!result.success) return null; + + // Build an AgentJWTPayload from the container secret + headers. + // The container secret proves town membership; headers provide agent identity. + const agentId = c.req.header('X-Gastown-Agent-Id') ?? ''; + const rigId = c.req.header('X-Gastown-Rig-Id') ?? ''; + const userId = c.req.header('X-Gastown-User-Id') ?? ''; + + return { + agentId, + rigId, + townId: result.payload.townId, + userId, + }; +} + +/** + * Auth middleware that accepts either: + * 1. A container secret (HMAC-based, no expiry) — preferred for container→worker calls + * 2. A legacy agent JWT (HS256, 8h expiry) — retained for backwards compatibility * - * Sets `agentJWT` on the Hono context. Also validates the JWT's townId + * Sets `agentJWT` on the Hono context. Also validates the token's townId * and rigId match the route params to prevent cross-town/cross-rig access. */ export const authMiddleware = createMiddleware(async (c, next) => { @@ -52,31 +87,43 @@ export const authMiddleware = createMiddleware(async (c, next) => { return c.json(resError('Internal server error'), 500); } - const result = verifyAgentJWT(token, secret); - if (!result.success) { - return c.json(resError(result.error), 401); + // Try container secret first (fast HMAC check, no expiry) + let payload = await tryContainerSecretAuth(c, token, secret); + + // Fall back to legacy JWT verification + if (!payload) { + const result = verifyAgentJWT(token, secret); + if (!result.success) { + return c.json(resError(result.error), 401); + } + payload = result.payload; } - // Verify the rigId in the JWT matches the route param + // Verify the rigId matches the route param const rigId = c.req.param('rigId'); - if (rigId && result.payload.rigId !== rigId) { + if (rigId && payload.rigId && payload.rigId !== rigId) { return c.json(resError('Token rigId does not match route'), 403); } - // Verify the townId in the JWT matches the route param (cross-town guard) + // Verify the townId matches the route param (cross-town guard) const townId = c.req.param('townId'); - if (townId && townId !== result.payload.townId) { + if (townId && townId !== payload.townId) { return c.json(resError('Cross-town access denied'), 403); } - c.set('agentJWT', result.payload); + c.set('agentJWT', payload); return next(); }); /** - * Restricts a route to the specific agent identified by the JWT. - * Validates the agentId route param matches the JWT agentId. + * Restricts a route to the specific agent identified by the auth token. + * Validates the agentId route param matches the token's agentId. * Must be applied after `authMiddleware`. + * + * When using container secrets, agent identity is provided via headers + * and is not cryptographically bound to the token. The container secret + * proves the request came from the right town's container, and the + * container itself is trusted to correctly identify its agents. */ export const agentOnlyMiddleware = createMiddleware(async (c, next) => { const jwt = c.get('agentJWT'); @@ -85,7 +132,7 @@ export const agentOnlyMiddleware = createMiddleware(async (c, next) } const agentId = c.req.param('agentId'); - if (agentId && jwt.agentId !== agentId) { + if (agentId && jwt.agentId && jwt.agentId !== agentId) { return c.json(resError('Token agentId does not match route'), 403); } diff --git a/cloudflare-gastown/src/middleware/mayor-auth.middleware.ts b/cloudflare-gastown/src/middleware/mayor-auth.middleware.ts index ed127c09b2..3c8f32f096 100644 --- a/cloudflare-gastown/src/middleware/mayor-auth.middleware.ts +++ b/cloudflare-gastown/src/middleware/mayor-auth.middleware.ts @@ -1,16 +1,19 @@ import { createMiddleware } from 'hono/factory'; import { verifyAgentJWT } from '../util/jwt.util'; +import { verifyContainerSecret } from '../util/container-secret.util'; import { resError } from '../util/res.util'; import type { GastownEnv } from '../gastown.worker'; import { extractBearerToken } from '@kilocode/worker-utils'; import { resolveSecret } from '../util/secret.util'; /** - * Auth middleware for mayor tool routes. Validates a Gastown agent JWT - * and checks that the JWT's `townId` matches the `:townId` route param. + * Auth middleware for mayor tool routes. Accepts either: + * 1. A container secret (HMAC-based, no expiry) — preferred + * 2. A legacy agent JWT (HS256, 8h expiry) — backwards compatibility * - * Unlike the rig-scoped `authMiddleware` (which checks `rigId` match), - * this validates `townId` — the mayor operates cross-rig. + * Validates the token's `townId` matches the `:townId` route param. + * Unlike the rig-scoped `authMiddleware`, this does NOT check `rigId` + * because the mayor operates cross-rig. * * Sets `agentJWT` on the Hono context. */ @@ -26,6 +29,25 @@ export const mayorAuthMiddleware = createMiddleware(async (c, next) return c.json(resError('Internal server error'), 500); } + // Try container secret first (HMAC-based, no expiry) + if (token.includes(':') && !token.includes('.')) { + const csResult = await verifyContainerSecret(token, secret); + if (csResult.success) { + const townId = c.req.param('townId'); + if (townId && csResult.payload.townId !== townId) { + return c.json(resError('Token townId does not match route'), 403); + } + c.set('agentJWT', { + agentId: c.req.header('X-Gastown-Agent-Id') ?? '', + rigId: c.req.header('X-Gastown-Rig-Id') ?? '', + townId: csResult.payload.townId, + userId: c.req.header('X-Gastown-User-Id') ?? '', + }); + return next(); + } + } + + // Fall back to legacy JWT verification const result = verifyAgentJWT(token, secret); if (!result.success) { return c.json(resError(result.error), 401); diff --git a/cloudflare-gastown/src/util/container-secret.util.ts b/cloudflare-gastown/src/util/container-secret.util.ts new file mode 100644 index 0000000000..b5e75754a2 --- /dev/null +++ b/cloudflare-gastown/src/util/container-secret.util.ts @@ -0,0 +1,86 @@ +/** + * Container secret — HMAC-based authentication for container→worker API calls. + * + * Replaces the per-agent JWT system with a per-container shared secret that + * never expires (lives as long as the container). When the container sleeps + * and restarts, a new secret is minted automatically. + * + * Token format: `::` + * - townId: scopes the token to a specific town + * - nonce: random UUID, unique per container boot + * - hmac: HMAC-SHA256(secret, townId + ":" + nonce) — proves the worker minted it + * + * Verification is stateless — no DO lookup needed. The worker checks the HMAC + * using the shared GASTOWN_JWT_SECRET (same key used for agent JWTs). + */ + +import { z } from 'zod'; + +const encoder = new TextEncoder(); + +async function hmacSign(secret: string, data: string): Promise { + const key = await crypto.subtle.importKey( + 'raw', + encoder.encode(secret), + { name: 'HMAC', hash: 'SHA-256' }, + false, + ['sign'] + ); + const sig = await crypto.subtle.sign('HMAC', key, encoder.encode(data)); + return [...new Uint8Array(sig)].map(b => b.toString(16).padStart(2, '0')).join(''); +} + +async function hmacVerify(secret: string, data: string, signature: string): Promise { + const expected = await hmacSign(secret, data); + // Constant-time comparison to prevent timing attacks + if (expected.length !== signature.length) return false; + let mismatch = 0; + for (let i = 0; i < expected.length; i++) { + mismatch |= expected.charCodeAt(i) ^ signature.charCodeAt(i); + } + return mismatch === 0; +} + +/** + * Mint a container secret token. Called once per container boot. + * The token has no expiry — it lives as long as the container does. + */ +export async function mintContainerSecret(jwtSecret: string, townId: string): Promise { + const nonce = crypto.randomUUID(); + const data = `${townId}:${nonce}`; + const hmac = await hmacSign(jwtSecret, data); + return `${townId}:${nonce}:${hmac}`; +} + +/** Parsed and verified container secret payload. */ +export const ContainerSecretPayload = z.object({ + townId: z.string(), + nonce: z.string(), +}); +export type ContainerSecretPayload = z.infer; + +/** + * Verify a container secret token. Stateless — only needs the shared secret. + * Returns the parsed payload on success, or an error string on failure. + */ +export async function verifyContainerSecret( + token: string, + jwtSecret: string +): Promise<{ success: true; payload: ContainerSecretPayload } | { success: false; error: string }> { + const parts = token.split(':'); + if (parts.length !== 3) { + return { success: false, error: 'Invalid token format' }; + } + const [townId, nonce, hmac] = parts; + if (!townId || !nonce || !hmac) { + return { success: false, error: 'Invalid token format' }; + } + + const data = `${townId}:${nonce}`; + const valid = await hmacVerify(jwtSecret, data, hmac); + if (!valid) { + return { success: false, error: 'Invalid token signature' }; + } + + return { success: true, payload: { townId, nonce } }; +} From 18bc0417412c5f8b4e82b7dab4096a88d0e9be03 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Tue, 10 Mar 2026 12:16:58 -0500 Subject: [PATCH 04/13] fix(gastown): send userId header with container secret auth MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mayor's gt_list_rigs tool requires a userId (to look up rigs via GastownUserDO). With JWT auth, userId was embedded in the token payload. With container secrets, it was missing — causing 401 'Missing userId in token'. Fix: inject GASTOWN_USER_ID as an env var in startAgentInContainer, propagate it through buildAgentEnv, and send it as X-Gastown-User-Id header alongside the container secret. --- cloudflare-gastown/container/plugin/client.ts | 10 +++++++++- cloudflare-gastown/container/plugin/types.ts | 4 ++++ cloudflare-gastown/container/src/agent-runner.ts | 8 +++++++- cloudflare-gastown/src/dos/town/container-dispatch.ts | 3 +++ 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/cloudflare-gastown/container/plugin/client.ts b/cloudflare-gastown/container/plugin/client.ts index 06efedbf69..4be1ec0899 100644 --- a/cloudflare-gastown/container/plugin/client.ts +++ b/cloudflare-gastown/container/plugin/client.ts @@ -31,6 +31,7 @@ export class GastownClient { private agentId: string; private rigId: string; private townId: string; + private userId: string | undefined; constructor(env: GastownEnv) { this.baseUrl = env.apiUrl.replace(/\/+$/, ''); this.containerSecret = env.containerSecret; @@ -38,6 +39,7 @@ export class GastownClient { this.agentId = env.agentId; this.rigId = env.rigId; this.townId = env.townId; + this.userId = env.userId; } private rigPath(path: string): string { @@ -54,10 +56,11 @@ export class GastownClient { headers.set('Content-Type', 'application/json'); // Prefer container secret (no expiry) over legacy JWT (8h expiry) headers.set('Authorization', `Bearer ${this.containerSecret ?? this.token}`); - // When using container secret, agent identity must be sent via headers + // When using container secret, identity must be sent via headers if (this.containerSecret) { headers.set('X-Gastown-Agent-Id', this.agentId); headers.set('X-Gastown-Rig-Id', this.rigId); + if (this.userId) headers.set('X-Gastown-User-Id', this.userId); } let response: Response; @@ -209,6 +212,7 @@ export class MayorGastownClient { private token: string; private agentId: string; private townId: string; + private userId: string | undefined; constructor(env: MayorGastownEnv) { this.baseUrl = env.apiUrl.replace(/\/+$/, ''); @@ -216,6 +220,7 @@ export class MayorGastownClient { this.token = env.sessionToken; this.agentId = env.agentId; this.townId = env.townId; + this.userId = env.userId; } private mayorPath(path: string): string { @@ -229,6 +234,7 @@ export class MayorGastownClient { headers.set('Authorization', `Bearer ${this.containerSecret ?? this.token}`); if (this.containerSecret) { headers.set('X-Gastown-Agent-Id', this.agentId); + if (this.userId) headers.set('X-Gastown-User-Id', this.userId); } let response: Response; @@ -374,6 +380,7 @@ export function createClientFromEnv(): GastownClient { agentId, rigId, townId, + userId: process.env.GASTOWN_USER_ID ?? undefined, }); } @@ -401,5 +408,6 @@ export function createMayorClientFromEnv(): MayorGastownClient { sessionToken: sessionToken ?? '', agentId, townId, + userId: process.env.GASTOWN_USER_ID ?? undefined, }); } diff --git a/cloudflare-gastown/container/plugin/types.ts b/cloudflare-gastown/container/plugin/types.ts index d0990b17a5..f8f0d1fe0e 100644 --- a/cloudflare-gastown/container/plugin/types.ts +++ b/cloudflare-gastown/container/plugin/types.ts @@ -126,6 +126,8 @@ export type GastownEnv = { agentId: string; rigId: string; townId: string; + /** Owner userId — needed by mayor tool routes (e.g. listRigs). */ + userId?: string; }; // Environment variable config for the mayor (town-scoped) @@ -137,4 +139,6 @@ export type MayorGastownEnv = { sessionToken: string; agentId: string; townId: string; + /** Owner userId — needed by mayor tool routes (e.g. listRigs). */ + userId?: string; }; diff --git a/cloudflare-gastown/container/src/agent-runner.ts b/cloudflare-gastown/container/src/agent-runner.ts index acb9f3ad09..d5dc25f160 100644 --- a/cloudflare-gastown/container/src/agent-runner.ts +++ b/cloudflare-gastown/container/src/agent-runner.ts @@ -91,7 +91,13 @@ function buildAgentEnv(request: StartAgentRequest): Record { // the request or the container's own environment. // (KILO_API_URL and KILO_OPENROUTER_BASE are set at the container level // via TownContainerDO.envVars and inherited through process.env.) - const conditionalKeys = ['GASTOWN_API_URL', 'GASTOWN_SESSION_TOKEN', 'KILOCODE_TOKEN']; + const conditionalKeys = [ + 'GASTOWN_API_URL', + 'GASTOWN_CONTAINER_SECRET', + 'GASTOWN_SESSION_TOKEN', + 'GASTOWN_USER_ID', + 'KILOCODE_TOKEN', + ]; for (const key of conditionalKeys) { const value = resolveEnv(request, key); if (value) { diff --git a/cloudflare-gastown/src/dos/town/container-dispatch.ts b/cloudflare-gastown/src/dos/town/container-dispatch.ts index 6260aa04b1..23e2c80e70 100644 --- a/cloudflare-gastown/src/dos/town/container-dispatch.ts +++ b/cloudflare-gastown/src/dos/town/container-dispatch.ts @@ -257,6 +257,9 @@ export async function startAgentInContainer( // The JWT is kept as a fallback during rollout. if (containerSecret) envVars.GASTOWN_CONTAINER_SECRET = containerSecret; if (token) envVars.GASTOWN_SESSION_TOKEN = token; + // userId is needed by mayor tool routes (e.g. listRigs) and was + // previously carried only inside the JWT payload. + envVars.GASTOWN_USER_ID = params.userId; // kilocodeToken: prefer rig-level, fall back to town config const kilocodeToken = params.kilocodeToken ?? params.townConfig.kilocode_token; if (kilocodeToken) envVars.KILOCODE_TOKEN = kilocodeToken; From 655554dee05949cb238f31c5b49a2ee06d1cb4ea Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Tue, 10 Mar 2026 12:28:45 -0500 Subject: [PATCH 05/13] refactor(gastown): replace HMAC secrets with per-container JWT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the HMAC-based container secret approach with a simpler per-container JWT. The container JWT: - Carries { townId, userId, scope: 'container' } — same JWT format the auth middleware already understands - Has 8h expiry (same as legacy agent JWTs) but is proactively refreshed hourly by the TownDO alarm - Is shared by all agents in the container (one token per town) - Eliminates the X-Gastown-* identity headers — userId lives in the JWT, agentId/rigId come from route params Removes container-secret.util.ts entirely. The auth middleware tries container JWT verification first (scope: 'container'), falls back to legacy agent JWT verification. The TownDO alarm calls refreshContainerToken() once per hour, which mints a fresh JWT and pushes it to the TownContainerDO via setEnvVar('GASTOWN_CONTAINER_TOKEN', ...). --- cloudflare-gastown/container/plugin/client.ts | 51 ++++------- cloudflare-gastown/container/plugin/types.ts | 12 +-- .../container/src/agent-runner.ts | 3 +- .../container/src/completion-reporter.ts | 7 +- .../container/src/control-server.ts | 6 +- cloudflare-gastown/container/src/heartbeat.ts | 5 -- .../container/src/process-manager.ts | 11 +-- cloudflare-gastown/container/src/types.ts | 6 +- cloudflare-gastown/src/dos/Town.do.ts | 31 +++++++ .../src/dos/town/container-dispatch.ts | 70 ++++++++------- .../src/middleware/auth.middleware.ts | 41 ++++----- .../src/middleware/mayor-auth.middleware.ts | 31 +++---- .../src/util/container-secret.util.ts | 86 ------------------- cloudflare-gastown/src/util/jwt.util.ts | 57 ++++++++++++ 14 files changed, 186 insertions(+), 231 deletions(-) delete mode 100644 cloudflare-gastown/src/util/container-secret.util.ts diff --git a/cloudflare-gastown/container/plugin/client.ts b/cloudflare-gastown/container/plugin/client.ts index 4be1ec0899..6d9d5f5723 100644 --- a/cloudflare-gastown/container/plugin/client.ts +++ b/cloudflare-gastown/container/plugin/client.ts @@ -26,20 +26,18 @@ function isApiResponse( export class GastownClient { private baseUrl: string; - private containerSecret: string | undefined; + private containerToken: string | undefined; private token: string; private agentId: string; private rigId: string; private townId: string; - private userId: string | undefined; constructor(env: GastownEnv) { this.baseUrl = env.apiUrl.replace(/\/+$/, ''); - this.containerSecret = env.containerSecret; + this.containerToken = env.containerToken; this.token = env.sessionToken; this.agentId = env.agentId; this.rigId = env.rigId; this.townId = env.townId; - this.userId = env.userId; } private rigPath(path: string): string { @@ -54,14 +52,9 @@ export class GastownClient { // Normalize headers so callers can pass plain objects, Headers instances, or tuples const headers = new Headers(init?.headers); headers.set('Content-Type', 'application/json'); - // Prefer container secret (no expiry) over legacy JWT (8h expiry) - headers.set('Authorization', `Bearer ${this.containerSecret ?? this.token}`); - // When using container secret, identity must be sent via headers - if (this.containerSecret) { - headers.set('X-Gastown-Agent-Id', this.agentId); - headers.set('X-Gastown-Rig-Id', this.rigId); - if (this.userId) headers.set('X-Gastown-User-Id', this.userId); - } + // Prefer container-scoped JWT (shared, refreshed by alarm) over + // legacy per-agent JWT (8h expiry, no refresh) + headers.set('Authorization', `Bearer ${this.containerToken ?? this.token}`); let response: Response; try { @@ -208,19 +201,17 @@ export class GastownClient { */ export class MayorGastownClient { private baseUrl: string; - private containerSecret: string | undefined; + private containerToken: string | undefined; private token: string; private agentId: string; private townId: string; - private userId: string | undefined; constructor(env: MayorGastownEnv) { this.baseUrl = env.apiUrl.replace(/\/+$/, ''); - this.containerSecret = env.containerSecret; + this.containerToken = env.containerToken; this.token = env.sessionToken; this.agentId = env.agentId; this.townId = env.townId; - this.userId = env.userId; } private mayorPath(path: string): string { @@ -230,12 +221,8 @@ export class MayorGastownClient { private async request(url: string, init?: RequestInit): Promise { const headers = new Headers(init?.headers); headers.set('Content-Type', 'application/json'); - // Prefer container secret (no expiry) over legacy JWT (8h expiry) - headers.set('Authorization', `Bearer ${this.containerSecret ?? this.token}`); - if (this.containerSecret) { - headers.set('X-Gastown-Agent-Id', this.agentId); - if (this.userId) headers.set('X-Gastown-User-Id', this.userId); - } + // Prefer container-scoped JWT over legacy per-agent JWT + headers.set('Authorization', `Bearer ${this.containerToken ?? this.token}`); let response: Response; try { @@ -354,18 +341,18 @@ export class GastownApiError extends Error { export function createClientFromEnv(): GastownClient { const apiUrl = process.env.GASTOWN_API_URL; - const containerSecret = process.env.GASTOWN_CONTAINER_SECRET; + const containerToken = process.env.GASTOWN_CONTAINER_TOKEN; const sessionToken = process.env.GASTOWN_SESSION_TOKEN; const agentId = process.env.GASTOWN_AGENT_ID; const rigId = process.env.GASTOWN_RIG_ID; const townId = process.env.GASTOWN_TOWN_ID; - // Require either containerSecret or sessionToken (prefer containerSecret) - const hasAuth = containerSecret || sessionToken; + // Require either containerToken or sessionToken (prefer containerToken) + const hasAuth = containerToken || sessionToken; if (!apiUrl || !hasAuth || !agentId || !rigId || !townId) { const missing = [ !apiUrl && 'GASTOWN_API_URL', - !hasAuth && 'GASTOWN_CONTAINER_SECRET or GASTOWN_SESSION_TOKEN', + !hasAuth && 'GASTOWN_CONTAINER_TOKEN or GASTOWN_SESSION_TOKEN', !agentId && 'GASTOWN_AGENT_ID', !rigId && 'GASTOWN_RIG_ID', !townId && 'GASTOWN_TOWN_ID', @@ -375,27 +362,26 @@ export function createClientFromEnv(): GastownClient { return new GastownClient({ apiUrl, - containerSecret: containerSecret ?? undefined, + containerToken: containerToken ?? undefined, sessionToken: sessionToken ?? '', agentId, rigId, townId, - userId: process.env.GASTOWN_USER_ID ?? undefined, }); } export function createMayorClientFromEnv(): MayorGastownClient { const apiUrl = process.env.GASTOWN_API_URL; - const containerSecret = process.env.GASTOWN_CONTAINER_SECRET; + const containerToken = process.env.GASTOWN_CONTAINER_TOKEN; const sessionToken = process.env.GASTOWN_SESSION_TOKEN; const agentId = process.env.GASTOWN_AGENT_ID; const townId = process.env.GASTOWN_TOWN_ID; - const hasAuth = containerSecret || sessionToken; + const hasAuth = containerToken || sessionToken; if (!apiUrl || !hasAuth || !agentId || !townId) { const missing = [ !apiUrl && 'GASTOWN_API_URL', - !hasAuth && 'GASTOWN_CONTAINER_SECRET or GASTOWN_SESSION_TOKEN', + !hasAuth && 'GASTOWN_CONTAINER_TOKEN or GASTOWN_SESSION_TOKEN', !agentId && 'GASTOWN_AGENT_ID', !townId && 'GASTOWN_TOWN_ID', ].filter(Boolean); @@ -404,10 +390,9 @@ export function createMayorClientFromEnv(): MayorGastownClient { return new MayorGastownClient({ apiUrl, - containerSecret: containerSecret ?? undefined, + containerToken: containerToken ?? undefined, sessionToken: sessionToken ?? '', agentId, townId, - userId: process.env.GASTOWN_USER_ID ?? undefined, }); } diff --git a/cloudflare-gastown/container/plugin/types.ts b/cloudflare-gastown/container/plugin/types.ts index f8f0d1fe0e..7f83b5e132 100644 --- a/cloudflare-gastown/container/plugin/types.ts +++ b/cloudflare-gastown/container/plugin/types.ts @@ -119,26 +119,22 @@ export type ConvoyDetail = Convoy & { // Environment variable config for the plugin (rig-scoped agents) export type GastownEnv = { apiUrl: string; - /** Container secret (HMAC-based, no expiry) — preferred auth mechanism. */ - containerSecret?: string; + /** Container-scoped JWT (shared by all agents, refreshed by alarm). */ + containerToken?: string; /** Legacy per-agent JWT (8h expiry) — fallback during rollout. */ sessionToken: string; agentId: string; rigId: string; townId: string; - /** Owner userId — needed by mayor tool routes (e.g. listRigs). */ - userId?: string; }; // Environment variable config for the mayor (town-scoped) export type MayorGastownEnv = { apiUrl: string; - /** Container secret (HMAC-based, no expiry) — preferred auth mechanism. */ - containerSecret?: string; + /** Container-scoped JWT (shared by all agents, refreshed by alarm). */ + containerToken?: string; /** Legacy per-agent JWT (8h expiry) — fallback during rollout. */ sessionToken: string; agentId: string; townId: string; - /** Owner userId — needed by mayor tool routes (e.g. listRigs). */ - userId?: string; }; diff --git a/cloudflare-gastown/container/src/agent-runner.ts b/cloudflare-gastown/container/src/agent-runner.ts index d5dc25f160..ec9c0fa8a5 100644 --- a/cloudflare-gastown/container/src/agent-runner.ts +++ b/cloudflare-gastown/container/src/agent-runner.ts @@ -93,9 +93,8 @@ function buildAgentEnv(request: StartAgentRequest): Record { // via TownContainerDO.envVars and inherited through process.env.) const conditionalKeys = [ 'GASTOWN_API_URL', - 'GASTOWN_CONTAINER_SECRET', + 'GASTOWN_CONTAINER_TOKEN', 'GASTOWN_SESSION_TOKEN', - 'GASTOWN_USER_ID', 'KILOCODE_TOKEN', ]; for (const key of conditionalKeys) { diff --git a/cloudflare-gastown/container/src/completion-reporter.ts b/cloudflare-gastown/container/src/completion-reporter.ts index 840c0081cb..a927862f24 100644 --- a/cloudflare-gastown/container/src/completion-reporter.ts +++ b/cloudflare-gastown/container/src/completion-reporter.ts @@ -17,7 +17,7 @@ export async function reportAgentCompleted( reason?: string ): Promise { const apiUrl = agent.gastownApiUrl; - const authToken = agent.gastownContainerSecret ?? agent.gastownSessionToken; + const authToken = agent.gastownContainerToken ?? agent.gastownSessionToken; if (!apiUrl || !authToken) { console.warn( `Cannot report agent ${agent.agentId} completion: no API credentials on agent record` @@ -33,10 +33,7 @@ export async function reportAgentCompleted( 'Content-Type': 'application/json', Authorization: `Bearer ${authToken}`, }; - if (agent.gastownContainerSecret) { - headers['X-Gastown-Agent-Id'] = agent.agentId; - headers['X-Gastown-Rig-Id'] = agent.rigId; - } + const response = await fetch(url, { method: 'POST', headers, diff --git a/cloudflare-gastown/container/src/control-server.ts b/cloudflare-gastown/container/src/control-server.ts index 3fa3d8f464..1df93e8660 100644 --- a/cloudflare-gastown/container/src/control-server.ts +++ b/cloudflare-gastown/container/src/control-server.ts @@ -294,8 +294,8 @@ app.post('/git/merge', async c => { const apiUrl = req.envVars?.GASTOWN_API_URL ?? process.env.GASTOWN_API_URL; // Prefer container secret (no expiry) over session token (8h JWT) const token = - req.envVars?.GASTOWN_CONTAINER_SECRET ?? - process.env.GASTOWN_CONTAINER_SECRET ?? + req.envVars?.GASTOWN_CONTAINER_TOKEN ?? + process.env.GASTOWN_CONTAINER_TOKEN ?? req.envVars?.GASTOWN_SESSION_TOKEN ?? process.env.GASTOWN_SESSION_TOKEN; @@ -523,7 +523,7 @@ export function startControlServer(): void { // Start heartbeat if env vars are configured. // Prefer container secret (no expiry) over session token (8h JWT). const apiUrl = process.env.GASTOWN_API_URL; - const authToken = process.env.GASTOWN_CONTAINER_SECRET ?? process.env.GASTOWN_SESSION_TOKEN; + const authToken = process.env.GASTOWN_CONTAINER_TOKEN ?? process.env.GASTOWN_SESSION_TOKEN; if (apiUrl && authToken) { startHeartbeat(apiUrl, authToken); } diff --git a/cloudflare-gastown/container/src/heartbeat.ts b/cloudflare-gastown/container/src/heartbeat.ts index b1b5d00cf5..efd4f39522 100644 --- a/cloudflare-gastown/container/src/heartbeat.ts +++ b/cloudflare-gastown/container/src/heartbeat.ts @@ -57,11 +57,6 @@ async function sendHeartbeats(): Promise { 'Content-Type': 'application/json', Authorization: `Bearer ${sessionToken}`, }; - // When using a container secret (contains ':'), send agent identity headers - if (sessionToken.includes(':')) { - headers['X-Gastown-Agent-Id'] = agent.agentId; - headers['X-Gastown-Rig-Id'] = agent.rigId; - } const response = await fetch( `${gastownApiUrl}/api/towns/${agent.townId}/rigs/${agent.rigId}/agents/${agent.agentId}/heartbeat`, { diff --git a/cloudflare-gastown/container/src/process-manager.ts b/cloudflare-gastown/container/src/process-manager.ts index b750056542..5efeb7d83e 100644 --- a/cloudflare-gastown/container/src/process-manager.ts +++ b/cloudflare-gastown/container/src/process-manager.ts @@ -97,17 +97,12 @@ function broadcastEvent(agentId: string, event: string, data: unknown): void { // Persist to AgentDO via the worker (fire-and-forget) const agent = agents.get(agentId); - const authToken = agent?.gastownContainerSecret ?? agent?.gastownSessionToken; + const authToken = agent?.gastownContainerToken ?? agent?.gastownSessionToken; if (agent?.gastownApiUrl && authToken) { const headers: Record = { 'Content-Type': 'application/json', Authorization: `Bearer ${authToken}`, }; - // When using container secret, send agent identity headers - if (agent.gastownContainerSecret) { - headers['X-Gastown-Agent-Id'] = agentId; - headers['X-Gastown-Rig-Id'] = agent.rigId ?? '_'; - } // POST to the worker's agent-events endpoint for persistent storage fetch( `${agent.gastownApiUrl}/api/towns/${agent.townId ?? '_'}/rigs/${agent.rigId ?? '_'}/agent-events`, @@ -322,8 +317,8 @@ export async function startAgent( messageCount: 0, exitReason: null, gastownApiUrl: request.envVars?.GASTOWN_API_URL ?? process.env.GASTOWN_API_URL ?? null, - gastownContainerSecret: - request.envVars?.GASTOWN_CONTAINER_SECRET ?? process.env.GASTOWN_CONTAINER_SECRET ?? null, + gastownContainerToken: + request.envVars?.GASTOWN_CONTAINER_TOKEN ?? process.env.GASTOWN_CONTAINER_TOKEN ?? null, gastownSessionToken: request.envVars?.GASTOWN_SESSION_TOKEN ?? null, completionCallbackUrl: request.envVars?.GASTOWN_COMPLETION_CALLBACK_URL ?? null, model: request.model ?? null, diff --git a/cloudflare-gastown/container/src/types.ts b/cloudflare-gastown/container/src/types.ts index b8d988cb63..b47b485e72 100644 --- a/cloudflare-gastown/container/src/types.ts +++ b/cloudflare-gastown/container/src/types.ts @@ -107,9 +107,9 @@ export type ManagedAgent = { exitReason: string | null; /** Gastown worker API URL for completion callbacks */ gastownApiUrl: string | null; - /** Container secret (HMAC-based, no expiry) — preferred auth for worker callbacks. */ - gastownContainerSecret: string | null; - /** Agent-scoped JWT for authenticating callbacks to the Gastown worker (legacy, 8h expiry). */ + /** Container-scoped JWT (shared by all agents, refreshed by alarm). */ + gastownContainerToken: string | null; + /** Legacy per-agent JWT for authenticating callbacks to the Gastown worker. */ gastownSessionToken: string | null; /** Override the default completion callback URL (for agents not backed by a Rig DO) */ completionCallbackUrl: string | null; diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index 6cd1ffba30..ec565ee536 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -1964,6 +1964,17 @@ export class TownDO extends DurableObject { } } + // Refresh the container-scoped JWT before any work that might + // trigger API calls. Throttled to once per hour (tokens have 8h + // expiry, so hourly refresh provides ample safety margin). + if (this.hasActiveWork()) { + try { + await this.refreshContainerToken(); + } catch (err) { + console.warn(`${TOWN_LOG} alarm: refreshContainerToken failed`, err); + } + } + // Process reviews FIRST so the refinery gets assigned before the // scheduler dispatches new polecats. This prevents downstream beads // from starting before upstream reviews are merged. @@ -2022,6 +2033,26 @@ export class TownDO extends DurableObject { } } + /** + * Push a fresh container-scoped JWT to the TownContainerDO. Called + * from the alarm handler, throttled to once per hour (tokens have + * 8h expiry). The TownContainerDO stores it as an env var so it's + * available to all agents in the container. + */ + private lastContainerTokenRefreshAt = 0; + private async refreshContainerToken(): Promise { + const TOKEN_REFRESH_INTERVAL_MS = 60 * 60_000; // 1 hour + const now = Date.now(); + if (now - this.lastContainerTokenRefreshAt < TOKEN_REFRESH_INTERVAL_MS) return; + this.lastContainerTokenRefreshAt = now; + + const townId = this.townId; + if (!townId) return; + const townConfig = await this.getTownConfig(); + const userId = townConfig.owner_user_id ?? townId; + await dispatch.ensureContainerToken(this.env, townId, userId); + } + private hasActiveWork(): boolean { const activeAgentRows = [ ...query( diff --git a/cloudflare-gastown/src/dos/town/container-dispatch.ts b/cloudflare-gastown/src/dos/town/container-dispatch.ts index 23e2c80e70..618d331eed 100644 --- a/cloudflare-gastown/src/dos/town/container-dispatch.ts +++ b/cloudflare-gastown/src/dos/town/container-dispatch.ts @@ -4,8 +4,7 @@ */ import { getTownContainerStub } from '../TownContainer.do'; -import { signAgentJWT } from '../../util/jwt.util'; -import { mintContainerSecret } from '../../util/container-secret.util'; +import { signAgentJWT, signContainerJWT } from '../../util/jwt.util'; import { buildPolecatSystemPrompt } from '../../prompts/polecat-system.prompt'; import { buildMayorSystemPrompt } from '../../prompts/mayor-system.prompt'; import type { TownConfig } from '../../types'; @@ -62,33 +61,35 @@ export async function mintAgentToken( } /** - * Ensure the container has a valid GASTOWN_CONTAINER_SECRET env var. - * Mints an HMAC-based token scoped to the townId and stores it on - * the TownContainerDO via setEnvVar(). The token has no expiry — it - * lives as long as the container process does. On container sleep/wake, - * a new token is minted automatically because setEnvVar re-runs. + * Mint a container-scoped JWT and store it on the TownContainerDO. + * One JWT per container — shared by all agents in the town. Carries + * { townId, userId, scope: 'container' } with a 30-day expiry (far + * longer than any container will run, but bounded for safety). * - * Returns the container secret so callers can also pass it as a - * per-request env var (for agents started before the env var was set). + * Returns the token so callers can also pass it as a per-agent env var. */ -export async function ensureContainerSecret(env: Env, townId: string): Promise { +export async function ensureContainerToken( + env: Env, + townId: string, + userId: string +): Promise { const jwtSecret = await resolveJWTSecret(env); if (!jwtSecret) { - console.error(`${TOWN_LOG} ensureContainerSecret: no JWT secret available`); + console.error(`${TOWN_LOG} ensureContainerToken: no JWT secret available`); return null; } - const secret = await mintContainerSecret(jwtSecret, townId); + const token = signContainerJWT({ townId, userId }, jwtSecret); try { const container = getTownContainerStub(env, townId); - await container.setEnvVar('GASTOWN_CONTAINER_SECRET', secret); + await container.setEnvVar('GASTOWN_CONTAINER_TOKEN', token); } catch (err) { console.warn( - `${TOWN_LOG} ensureContainerSecret: setEnvVar failed (container may not be running):`, + `${TOWN_LOG} ensureContainerToken: setEnvVar failed (container may not be running):`, err instanceof Error ? err.message : err ); } - return secret; + return token; } /** Build the initial prompt for an agent from its bead. */ @@ -217,21 +218,20 @@ export async function startAgentInContainer( `${TOWN_LOG} startAgentInContainer: agentId=${params.agentId} role=${params.role} name=${params.agentName}` ); try { - // Ensure the container has a valid GASTOWN_CONTAINER_SECRET. - // This is the primary auth mechanism — an HMAC token that never expires, - // scoped to this town, and lives as long as the container process. - const containerSecret = await ensureContainerSecret(env, params.townId); + // Mint a container-scoped JWT (8h expiry, refreshed by TownDO alarm). + // One token per container — shared by all agents in the town. + // Carries { townId, userId, scope: 'container' }. + const containerToken = await ensureContainerToken(env, params.townId, params.userId); - // Also mint a per-agent JWT as fallback during rollout. Once all - // container code reads GASTOWN_CONTAINER_SECRET, this can be removed. - const token = await mintAgentToken(env, { + // Also mint a per-agent JWT as fallback during rollout. + const agentToken = await mintAgentToken(env, { agentId: params.agentId, rigId: params.rigId, townId: params.townId, userId: params.userId, }); - if (!containerSecret && !token) { + if (!containerToken && !agentToken) { console.error( `${TOWN_LOG} startAgentInContainer: ABORTING — failed to mint any auth token for agent ${params.agentId}. ` + 'The agent would start without credentials and be unable to call back to the worker.' @@ -253,19 +253,16 @@ export async function startAgentInContainer( envVars.GITLAB_INSTANCE_URL = params.townConfig.git_auth.gitlab_instance_url; } - // Container secret is the primary auth mechanism (no expiry). - // The JWT is kept as a fallback during rollout. - if (containerSecret) envVars.GASTOWN_CONTAINER_SECRET = containerSecret; - if (token) envVars.GASTOWN_SESSION_TOKEN = token; - // userId is needed by mayor tool routes (e.g. listRigs) and was - // previously carried only inside the JWT payload. - envVars.GASTOWN_USER_ID = params.userId; + // Container token is preferred (shared by all agents, refreshed by alarm). + // Legacy per-agent JWT kept as fallback during rollout. + if (containerToken) envVars.GASTOWN_CONTAINER_TOKEN = containerToken; + if (agentToken) envVars.GASTOWN_SESSION_TOKEN = agentToken; // kilocodeToken: prefer rig-level, fall back to town config const kilocodeToken = params.kilocodeToken ?? params.townConfig.kilocode_token; if (kilocodeToken) envVars.KILOCODE_TOKEN = kilocodeToken; console.log( - `${TOWN_LOG} startAgentInContainer: envVars built: keys=[${Object.keys(envVars).join(',')}] hasGitToken=${!!envVars.GIT_TOKEN} hasGitlabToken=${!!envVars.GITLAB_TOKEN} hasContainerSecret=${!!containerSecret} hasJwt=${!!token} hasKilocodeToken=${!!kilocodeToken} git_auth_keys=[${Object.keys(params.townConfig.git_auth ?? {}).join(',')}]` + `${TOWN_LOG} startAgentInContainer: envVars built: keys=[${Object.keys(envVars).join(',')}] hasGitToken=${!!envVars.GIT_TOKEN} hasGitlabToken=${!!envVars.GITLAB_TOKEN} hasContainerToken=${!!containerToken} hasAgentJwt=${!!agentToken} hasKilocodeToken=${!!kilocodeToken} git_auth_keys=[${Object.keys(params.townConfig.git_auth ?? {}).join(',')}]` ); const containerConfig = await buildContainerConfig(storage, env); @@ -349,12 +346,13 @@ export async function startMergeInContainer( } ): Promise { try { - const containerSecret = await ensureContainerSecret(env, params.townId); - const token = await mintAgentToken(env, { + const userId = params.townConfig.owner_user_id ?? ''; + const containerToken = await ensureContainerToken(env, params.townId, userId); + const agentToken = await mintAgentToken(env, { agentId: params.agentId, rigId: params.rigId, townId: params.townId, - userId: params.townConfig.owner_user_id ?? '', + userId, }); const envVars: Record = { ...(params.townConfig.env_vars ?? {}) }; @@ -367,8 +365,8 @@ export async function startMergeInContainer( if (params.townConfig.git_auth?.gitlab_instance_url) { envVars.GITLAB_INSTANCE_URL = params.townConfig.git_auth.gitlab_instance_url; } - if (containerSecret) envVars.GASTOWN_CONTAINER_SECRET = containerSecret; - if (token) envVars.GASTOWN_SESSION_TOKEN = token; + if (containerToken) envVars.GASTOWN_CONTAINER_TOKEN = containerToken; + if (agentToken) envVars.GASTOWN_SESSION_TOKEN = agentToken; if (env.GASTOWN_API_URL) envVars.GASTOWN_API_URL = env.GASTOWN_API_URL; const mergeKilocodeToken = params.kilocodeToken ?? params.townConfig.kilocode_token; if (mergeKilocodeToken) envVars.KILOCODE_TOKEN = mergeKilocodeToken; diff --git a/cloudflare-gastown/src/middleware/auth.middleware.ts b/cloudflare-gastown/src/middleware/auth.middleware.ts index cfed805cf5..3cac0c3f3f 100644 --- a/cloudflare-gastown/src/middleware/auth.middleware.ts +++ b/cloudflare-gastown/src/middleware/auth.middleware.ts @@ -1,8 +1,7 @@ import type { Context } from 'hono'; import { createMiddleware } from 'hono/factory'; import { extractBearerToken } from '@kilocode/worker-utils'; -import { verifyAgentJWT, type AgentJWTPayload } from '../util/jwt.util'; -import { verifyContainerSecret } from '../util/container-secret.util'; +import { verifyAgentJWT, verifyContainerJWT, type AgentJWTPayload } from '../util/jwt.util'; import { resError } from '../util/res.util'; import type { GastownEnv } from '../gastown.worker'; @@ -35,35 +34,27 @@ export const townIdMiddleware = createMiddleware(async (c, next) => }); /** - * Try to authenticate with a container secret (HMAC-based, no expiry). - * Returns the AgentJWTPayload-shaped object if successful, null otherwise. - * Agent identity comes from X-Gastown-* headers which are trusted because - * the container secret proves the request came from the right town's container. + * Try to authenticate with a container-scoped JWT (scope: 'container'). + * Returns an AgentJWTPayload-shaped object if successful, null otherwise. + * Container JWTs carry { townId, userId } but not agentId/rigId — those + * come from the route params and are trusted because the JWT proves the + * request came from the right town's container. */ -async function tryContainerSecretAuth( +function tryContainerJWTAuth( c: Context, token: string, jwtSecret: string -): Promise { - // Container secrets contain colons (format: townId:nonce:hmac). - // JWTs contain dots (format: header.payload.signature). - // Quick format check to avoid unnecessary HMAC computation on JWTs. - if (!token.includes(':') || token.includes('.')) return null; - - const result = await verifyContainerSecret(token, jwtSecret); +): AgentJWTPayload | null { + const result = verifyContainerJWT(token, jwtSecret); if (!result.success) return null; - // Build an AgentJWTPayload from the container secret + headers. - // The container secret proves town membership; headers provide agent identity. - const agentId = c.req.header('X-Gastown-Agent-Id') ?? ''; - const rigId = c.req.header('X-Gastown-Rig-Id') ?? ''; - const userId = c.req.header('X-Gastown-User-Id') ?? ''; - + // Populate agentId/rigId from route params — the container JWT proves + // the request came from this town's container, so we trust the URL. return { - agentId, - rigId, + agentId: c.req.param('agentId') ?? '', + rigId: c.req.param('rigId') ?? '', townId: result.payload.townId, - userId, + userId: result.payload.userId, }; } @@ -87,8 +78,8 @@ export const authMiddleware = createMiddleware(async (c, next) => { return c.json(resError('Internal server error'), 500); } - // Try container secret first (fast HMAC check, no expiry) - let payload = await tryContainerSecretAuth(c, token, secret); + // Try container-scoped JWT first (scope: 'container', 8h expiry + alarm refresh) + let payload = tryContainerJWTAuth(c, token, secret); // Fall back to legacy JWT verification if (!payload) { diff --git a/cloudflare-gastown/src/middleware/mayor-auth.middleware.ts b/cloudflare-gastown/src/middleware/mayor-auth.middleware.ts index 3c8f32f096..d104e3050d 100644 --- a/cloudflare-gastown/src/middleware/mayor-auth.middleware.ts +++ b/cloudflare-gastown/src/middleware/mayor-auth.middleware.ts @@ -1,6 +1,5 @@ import { createMiddleware } from 'hono/factory'; -import { verifyAgentJWT } from '../util/jwt.util'; -import { verifyContainerSecret } from '../util/container-secret.util'; +import { verifyAgentJWT, verifyContainerJWT } from '../util/jwt.util'; import { resError } from '../util/res.util'; import type { GastownEnv } from '../gastown.worker'; import { extractBearerToken } from '@kilocode/worker-utils'; @@ -29,22 +28,20 @@ export const mayorAuthMiddleware = createMiddleware(async (c, next) return c.json(resError('Internal server error'), 500); } - // Try container secret first (HMAC-based, no expiry) - if (token.includes(':') && !token.includes('.')) { - const csResult = await verifyContainerSecret(token, secret); - if (csResult.success) { - const townId = c.req.param('townId'); - if (townId && csResult.payload.townId !== townId) { - return c.json(resError('Token townId does not match route'), 403); - } - c.set('agentJWT', { - agentId: c.req.header('X-Gastown-Agent-Id') ?? '', - rigId: c.req.header('X-Gastown-Rig-Id') ?? '', - townId: csResult.payload.townId, - userId: c.req.header('X-Gastown-User-Id') ?? '', - }); - return next(); + // Try container-scoped JWT first (scope: 'container', carries townId + userId) + const containerResult = verifyContainerJWT(token, secret); + if (containerResult.success) { + const townId = c.req.param('townId'); + if (townId && containerResult.payload.townId !== townId) { + return c.json(resError('Token townId does not match route'), 403); } + c.set('agentJWT', { + agentId: '', + rigId: '', + townId: containerResult.payload.townId, + userId: containerResult.payload.userId, + }); + return next(); } // Fall back to legacy JWT verification diff --git a/cloudflare-gastown/src/util/container-secret.util.ts b/cloudflare-gastown/src/util/container-secret.util.ts deleted file mode 100644 index b5e75754a2..0000000000 --- a/cloudflare-gastown/src/util/container-secret.util.ts +++ /dev/null @@ -1,86 +0,0 @@ -/** - * Container secret — HMAC-based authentication for container→worker API calls. - * - * Replaces the per-agent JWT system with a per-container shared secret that - * never expires (lives as long as the container). When the container sleeps - * and restarts, a new secret is minted automatically. - * - * Token format: `::` - * - townId: scopes the token to a specific town - * - nonce: random UUID, unique per container boot - * - hmac: HMAC-SHA256(secret, townId + ":" + nonce) — proves the worker minted it - * - * Verification is stateless — no DO lookup needed. The worker checks the HMAC - * using the shared GASTOWN_JWT_SECRET (same key used for agent JWTs). - */ - -import { z } from 'zod'; - -const encoder = new TextEncoder(); - -async function hmacSign(secret: string, data: string): Promise { - const key = await crypto.subtle.importKey( - 'raw', - encoder.encode(secret), - { name: 'HMAC', hash: 'SHA-256' }, - false, - ['sign'] - ); - const sig = await crypto.subtle.sign('HMAC', key, encoder.encode(data)); - return [...new Uint8Array(sig)].map(b => b.toString(16).padStart(2, '0')).join(''); -} - -async function hmacVerify(secret: string, data: string, signature: string): Promise { - const expected = await hmacSign(secret, data); - // Constant-time comparison to prevent timing attacks - if (expected.length !== signature.length) return false; - let mismatch = 0; - for (let i = 0; i < expected.length; i++) { - mismatch |= expected.charCodeAt(i) ^ signature.charCodeAt(i); - } - return mismatch === 0; -} - -/** - * Mint a container secret token. Called once per container boot. - * The token has no expiry — it lives as long as the container does. - */ -export async function mintContainerSecret(jwtSecret: string, townId: string): Promise { - const nonce = crypto.randomUUID(); - const data = `${townId}:${nonce}`; - const hmac = await hmacSign(jwtSecret, data); - return `${townId}:${nonce}:${hmac}`; -} - -/** Parsed and verified container secret payload. */ -export const ContainerSecretPayload = z.object({ - townId: z.string(), - nonce: z.string(), -}); -export type ContainerSecretPayload = z.infer; - -/** - * Verify a container secret token. Stateless — only needs the shared secret. - * Returns the parsed payload on success, or an error string on failure. - */ -export async function verifyContainerSecret( - token: string, - jwtSecret: string -): Promise<{ success: true; payload: ContainerSecretPayload } | { success: false; error: string }> { - const parts = token.split(':'); - if (parts.length !== 3) { - return { success: false, error: 'Invalid token format' }; - } - const [townId, nonce, hmac] = parts; - if (!townId || !nonce || !hmac) { - return { success: false, error: 'Invalid token format' }; - } - - const data = `${townId}:${nonce}`; - const valid = await hmacVerify(jwtSecret, data, hmac); - if (!valid) { - return { success: false, error: 'Invalid token signature' }; - } - - return { success: true, payload: { townId, nonce } }; -} diff --git a/cloudflare-gastown/src/util/jwt.util.ts b/cloudflare-gastown/src/util/jwt.util.ts index b7f5970a5e..2a9110786e 100644 --- a/cloudflare-gastown/src/util/jwt.util.ts +++ b/cloudflare-gastown/src/util/jwt.util.ts @@ -1,6 +1,8 @@ import jwt from 'jsonwebtoken'; import { z } from 'zod'; +// ── Legacy per-agent JWT (deprecated — retained for rollout compat) ───── + export const AgentJWTPayload = z.object({ agentId: z.string(), rigId: z.string(), @@ -42,3 +44,58 @@ export function signAgentJWT( expiresIn: expiresInSeconds, }); } + +// ── Per-container JWT (preferred — no expiry, one per container) ───────── + +export const ContainerJWTPayload = z.object({ + townId: z.string(), + userId: z.string(), + scope: z.literal('container'), +}); + +export type ContainerJWTPayload = z.infer; + +const CONTAINER_JWT_EXPIRY_SECONDS = 8 * 3600; // 8h — same as legacy agent JWTs + +/** + * Sign a container-scoped JWT. 8h expiry, periodically refreshed by + * the TownDO alarm. Short-lived to limit damage from exfiltration, + * but refreshed proactively so running containers never hit expiry. + */ +export function signContainerJWT( + payload: { townId: string; userId: string }, + secret: string +): string { + return jwt.sign({ ...payload, scope: 'container' }, secret, { + algorithm: 'HS256', + expiresIn: CONTAINER_JWT_EXPIRY_SECONDS, + }); +} + +/** + * Verify a container-scoped JWT. Uses the standard 8h maxAge. + */ +export function verifyContainerJWT( + token: string, + secret: string +): { success: true; payload: ContainerJWTPayload } | { success: false; error: string } { + try { + const raw = jwt.verify(token, secret, { + algorithms: ['HS256'], + maxAge: '8h', + }); + const parsed = ContainerJWTPayload.safeParse(raw); + if (!parsed.success) { + return { success: false, error: 'Invalid container token payload' }; + } + return { success: true, payload: parsed.data }; + } catch (error) { + if (error instanceof jwt.TokenExpiredError) { + return { success: false, error: 'Token expired' }; + } + if (error instanceof jwt.JsonWebTokenError) { + return { success: false, error: 'Invalid token signature' }; + } + return { success: false, error: 'Token validation failed' }; + } +} From 1d22c5b65fabc1d9c67ddcccc2c583c3f3474b9b Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Tue, 10 Mar 2026 14:04:32 -0500 Subject: [PATCH 06/13] fix(gastown): address PR review comments - Add auth guard to startMergeInContainer (missing null check for tokens) - Add POST /refresh-token endpoint to container control server so the alarm-based refresh actually updates process.env on the running Bun process (setEnvVar only takes effect on next boot) - Plugin clients read process.env.GASTOWN_CONTAINER_TOKEN on each request to pick up refreshed tokens without needing to restart - dispatch.refreshContainerToken() pushes fresh JWT to both the ContainerDO (setEnvVar for next boot) and the running container (POST /refresh-token for current process) - Clarify auth middleware comments re: intentional no-op checks for container JWTs (town-scoped, rig/agent identity from route params) --- cloudflare-gastown/container/plugin/client.ts | 14 ++++-- .../container/src/control-server.ts | 14 ++++++ cloudflare-gastown/src/dos/Town.do.ts | 2 +- .../src/dos/town/container-dispatch.ts | 47 ++++++++++++++++++- .../src/middleware/auth.middleware.ts | 25 ++++++---- 5 files changed, 85 insertions(+), 17 deletions(-) diff --git a/cloudflare-gastown/container/plugin/client.ts b/cloudflare-gastown/container/plugin/client.ts index 6d9d5f5723..cc9c57b421 100644 --- a/cloudflare-gastown/container/plugin/client.ts +++ b/cloudflare-gastown/container/plugin/client.ts @@ -52,9 +52,11 @@ export class GastownClient { // Normalize headers so callers can pass plain objects, Headers instances, or tuples const headers = new Headers(init?.headers); headers.set('Content-Type', 'application/json'); - // Prefer container-scoped JWT (shared, refreshed by alarm) over - // legacy per-agent JWT (8h expiry, no refresh) - headers.set('Authorization', `Bearer ${this.containerToken ?? this.token}`); + // Prefer the live container token from process.env (refreshed by the + // TownDO alarm via POST /refresh-token), then the token captured at + // init, then the legacy per-agent JWT. + const authToken = process.env.GASTOWN_CONTAINER_TOKEN ?? this.containerToken ?? this.token; + headers.set('Authorization', `Bearer ${authToken}`); let response: Response; try { @@ -221,8 +223,10 @@ export class MayorGastownClient { private async request(url: string, init?: RequestInit): Promise { const headers = new Headers(init?.headers); headers.set('Content-Type', 'application/json'); - // Prefer container-scoped JWT over legacy per-agent JWT - headers.set('Authorization', `Bearer ${this.containerToken ?? this.token}`); + // Prefer live container token (refreshed via POST /refresh-token), + // then init-time token, then legacy per-agent JWT. + const authToken = process.env.GASTOWN_CONTAINER_TOKEN ?? this.containerToken ?? this.token; + headers.set('Authorization', `Bearer ${authToken}`); let response: Response; try { diff --git a/cloudflare-gastown/container/src/control-server.ts b/cloudflare-gastown/container/src/control-server.ts index 1df93e8660..2abd100e74 100644 --- a/cloudflare-gastown/container/src/control-server.ts +++ b/cloudflare-gastown/container/src/control-server.ts @@ -93,6 +93,20 @@ app.get('/health', c => { return c.json(response); }); +// POST /refresh-token +// Hot-swap the container-scoped JWT on the running process. Called by +// the TownDO alarm to push a fresh token before the current one expires. +// Updates process.env so all subsequent API calls use the new token. +app.post('/refresh-token', async c => { + const body: unknown = await c.req.json().catch(() => null); + if (!body || typeof body !== 'object' || !('token' in body) || typeof body.token !== 'string') { + return c.json({ error: 'Missing or invalid token field' }, 400); + } + process.env.GASTOWN_CONTAINER_TOKEN = body.token; + console.log('[control-server] Container token refreshed'); + return c.json({ refreshed: true }); +}); + // POST /agents/start app.post('/agents/start', async c => { const body: unknown = await c.req.json().catch(() => null); diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index ec565ee536..88b0f63ca5 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -2050,7 +2050,7 @@ export class TownDO extends DurableObject { if (!townId) return; const townConfig = await this.getTownConfig(); const userId = townConfig.owner_user_id ?? townId; - await dispatch.ensureContainerToken(this.env, townId, userId); + await dispatch.refreshContainerToken(this.env, townId, userId); } private hasActiveWork(): boolean { diff --git a/cloudflare-gastown/src/dos/town/container-dispatch.ts b/cloudflare-gastown/src/dos/town/container-dispatch.ts index 618d331eed..45bfa4856e 100644 --- a/cloudflare-gastown/src/dos/town/container-dispatch.ts +++ b/cloudflare-gastown/src/dos/town/container-dispatch.ts @@ -63,8 +63,11 @@ export async function mintAgentToken( /** * Mint a container-scoped JWT and store it on the TownContainerDO. * One JWT per container — shared by all agents in the town. Carries - * { townId, userId, scope: 'container' } with a 30-day expiry (far - * longer than any container will run, but bounded for safety). + * { townId, userId, scope: 'container' } with 8h expiry. + * + * Stores via setEnvVar() so the token is available on the next container + * boot. For the running process, call refreshContainerToken() which also + * pushes to the container's /refresh-token endpoint. * * Returns the token so callers can also pass it as a per-agent env var. */ @@ -92,6 +95,38 @@ export async function ensureContainerToken( return token; } +/** + * Refresh the container token on both the TownContainerDO (for next boot) + * and the running container process (via POST /refresh-token). This ensures + * already-running agents pick up the fresh token for subsequent API calls. + */ +export async function refreshContainerToken( + env: Env, + townId: string, + userId: string +): Promise { + const token = await ensureContainerToken(env, townId, userId); + if (!token) return; + + // Push to the running container process so process.env is updated + try { + const container = getTownContainerStub(env, townId); + const resp = await container.fetch('http://container/refresh-token', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ token }), + }); + if (!resp.ok) { + console.warn(`${TOWN_LOG} refreshContainerToken: container returned ${resp.status}`); + } + } catch (err) { + console.warn( + `${TOWN_LOG} refreshContainerToken: failed to push to container:`, + err instanceof Error ? err.message : err + ); + } +} + /** Build the initial prompt for an agent from its bead. */ export function buildPrompt(params: { beadTitle: string; @@ -355,6 +390,14 @@ export async function startMergeInContainer( userId, }); + if (!containerToken && !agentToken) { + console.error( + `${TOWN_LOG} startMergeInContainer: ABORTING — failed to mint any auth token for merge entry ${params.entryId}. ` + + 'The merge process would start without credentials and be unable to report results.' + ); + return false; + } + const envVars: Record = { ...(params.townConfig.env_vars ?? {}) }; if (params.townConfig.git_auth?.github_token) { envVars.GIT_TOKEN = params.townConfig.git_auth.github_token; diff --git a/cloudflare-gastown/src/middleware/auth.middleware.ts b/cloudflare-gastown/src/middleware/auth.middleware.ts index 3cac0c3f3f..b1f9aeb2d1 100644 --- a/cloudflare-gastown/src/middleware/auth.middleware.ts +++ b/cloudflare-gastown/src/middleware/auth.middleware.ts @@ -60,11 +60,13 @@ function tryContainerJWTAuth( /** * Auth middleware that accepts either: - * 1. A container secret (HMAC-based, no expiry) — preferred for container→worker calls - * 2. A legacy agent JWT (HS256, 8h expiry) — retained for backwards compatibility + * 1. A container-scoped JWT (scope: 'container') — preferred for container→worker calls + * 2. A legacy per-agent JWT (HS256, 8h expiry) — retained for backwards compatibility * - * Sets `agentJWT` on the Hono context. Also validates the token's townId - * and rigId match the route params to prevent cross-town/cross-rig access. + * Sets `agentJWT` on the Hono context. Validates: + * - townId always (cross-town guard) + * - rigId only for legacy agent JWTs (container JWTs are town-scoped; + * the container is trusted to call correct rig endpoints) */ export const authMiddleware = createMiddleware(async (c, next) => { const token = extractBearerToken(c.req.header('Authorization')); @@ -90,7 +92,9 @@ export const authMiddleware = createMiddleware(async (c, next) => { payload = result.payload; } - // Verify the rigId matches the route param + // Cross-rig guard: only enforced for legacy agent JWTs where the rigId + // is cryptographically bound to the token. Container JWTs are town-scoped + // and don't carry a rigId — the container is trusted within its town. const rigId = c.req.param('rigId'); if (rigId && payload.rigId && payload.rigId !== rigId) { return c.json(resError('Token rigId does not match route'), 403); @@ -111,10 +115,13 @@ export const authMiddleware = createMiddleware(async (c, next) => { * Validates the agentId route param matches the token's agentId. * Must be applied after `authMiddleware`. * - * When using container secrets, agent identity is provided via headers - * and is not cryptographically bound to the token. The container secret - * proves the request came from the right town's container, and the - * container itself is trusted to correctly identify its agents. + * For container JWTs: agentId is populated from the route param by + * tryContainerJWTAuth, so this check is a no-op (route param == route + * param). This is intentional — the container JWT is town-scoped, and + * the container is trusted to call the correct agent endpoints. + * Cross-agent attacks require compromising the container itself, which + * is the same trust boundary the container already has (it runs all + * agents in the town). */ export const agentOnlyMiddleware = createMiddleware(async (c, next) => { const jwt = c.get('agentJWT'); From 24cdc9bd223ac099e968138477ace59031ac3783 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Tue, 10 Mar 2026 14:11:19 -0500 Subject: [PATCH 07/13] fix(gastown): address second round of PR review comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - broadcastEvent: read live token from process.env instead of cached ManagedAgent field, so event persistence uses refreshed tokens - heartbeat: read process.env.GASTOWN_CONTAINER_TOKEN on each tick instead of the module-level cached token from startHeartbeat() - completion-reporter: same pattern — prefer live process.env token - Strip gastownContainerToken from /agents/start response to prevent leaking the town-wide bearer token to dashboard callers --- cloudflare-gastown/container/src/completion-reporter.ts | 4 +++- cloudflare-gastown/container/src/control-server.ts | 9 +++++++-- cloudflare-gastown/container/src/heartbeat.ts | 7 +++++-- cloudflare-gastown/container/src/process-manager.ts | 7 ++++++- 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/cloudflare-gastown/container/src/completion-reporter.ts b/cloudflare-gastown/container/src/completion-reporter.ts index a927862f24..8ad4be23bd 100644 --- a/cloudflare-gastown/container/src/completion-reporter.ts +++ b/cloudflare-gastown/container/src/completion-reporter.ts @@ -17,7 +17,9 @@ export async function reportAgentCompleted( reason?: string ): Promise { const apiUrl = agent.gastownApiUrl; - const authToken = agent.gastownContainerToken ?? agent.gastownSessionToken; + // Prefer live container token (refreshed via POST /refresh-token) + const authToken = + process.env.GASTOWN_CONTAINER_TOKEN ?? agent.gastownContainerToken ?? agent.gastownSessionToken; if (!apiUrl || !authToken) { console.warn( `Cannot report agent ${agent.agentId} completion: no API credentials on agent record` diff --git a/cloudflare-gastown/container/src/control-server.ts b/cloudflare-gastown/container/src/control-server.ts index 2abd100e74..14c6b56fcd 100644 --- a/cloudflare-gastown/container/src/control-server.ts +++ b/cloudflare-gastown/container/src/control-server.ts @@ -127,8 +127,13 @@ app.post('/agents/start', async c => { `[control-server] /agents/start: success agentId=${agent.agentId} port=${agent.serverPort} session=${agent.sessionId}` ); // Strip sensitive fields before returning — the caller only needs - // agent metadata, not the internal session token or API URL. - const { gastownSessionToken: _, gastownApiUrl: _url, ...safeAgent } = agent; + // agent metadata, not the internal tokens or API URL. + const { + gastownSessionToken: _, + gastownContainerToken: _ct, + gastownApiUrl: _url, + ...safeAgent + } = agent; return c.json(safeAgent, 201); } catch (err) { const message = err instanceof Error ? err.message : String(err); diff --git a/cloudflare-gastown/container/src/heartbeat.ts b/cloudflare-gastown/container/src/heartbeat.ts index efd4f39522..8d78c31ff6 100644 --- a/cloudflare-gastown/container/src/heartbeat.ts +++ b/cloudflare-gastown/container/src/heartbeat.ts @@ -39,7 +39,10 @@ export function stopHeartbeat(): void { } async function sendHeartbeats(): Promise { - if (!gastownApiUrl || !sessionToken) return; + // Prefer the live container token (refreshed via POST /refresh-token) + // over the token captured at startHeartbeat() time. + const currentToken = process.env.GASTOWN_CONTAINER_TOKEN ?? sessionToken; + if (!gastownApiUrl || !currentToken) return; const active = listAgents().filter(a => a.status === 'running' || a.status === 'starting'); @@ -55,7 +58,7 @@ async function sendHeartbeats(): Promise { try { const headers: Record = { 'Content-Type': 'application/json', - Authorization: `Bearer ${sessionToken}`, + Authorization: `Bearer ${currentToken}`, }; const response = await fetch( `${gastownApiUrl}/api/towns/${agent.townId}/rigs/${agent.rigId}/agents/${agent.agentId}/heartbeat`, diff --git a/cloudflare-gastown/container/src/process-manager.ts b/cloudflare-gastown/container/src/process-manager.ts index 5efeb7d83e..33e30a663e 100644 --- a/cloudflare-gastown/container/src/process-manager.ts +++ b/cloudflare-gastown/container/src/process-manager.ts @@ -97,7 +97,12 @@ function broadcastEvent(agentId: string, event: string, data: unknown): void { // Persist to AgentDO via the worker (fire-and-forget) const agent = agents.get(agentId); - const authToken = agent?.gastownContainerToken ?? agent?.gastownSessionToken; + // Prefer live container token (refreshed via POST /refresh-token), + // then the per-agent cached token, then the legacy session token. + const authToken = + process.env.GASTOWN_CONTAINER_TOKEN ?? + agent?.gastownContainerToken ?? + agent?.gastownSessionToken; if (agent?.gastownApiUrl && authToken) { const headers: Record = { 'Content-Type': 'application/json', From 722bffbcde8e910399b7e0786be94749e65d64e4 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Tue, 10 Mar 2026 14:21:55 -0500 Subject: [PATCH 08/13] fix(gastown): address third round of PR review comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ensureContainerToken now pushes to running container via POST /refresh-token (not just setEnvVar), so existing agents pick up the fresh token immediately on every agent start — no gap until the next alarm-based refresh - refreshContainerToken is now an alias for ensureContainerToken since both do the same thing (setEnvVar + POST /refresh-token) - Move throttle timestamp update to after successful refresh so failed refreshes are retried on the next alarm tick instead of being throttled away for an hour --- cloudflare-gastown/src/dos/Town.do.ts | 4 +- .../src/dos/town/container-dispatch.ts | 54 ++++++++----------- 2 files changed, 26 insertions(+), 32 deletions(-) diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index 88b0f63ca5..4f34fde7c6 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -2044,13 +2044,15 @@ export class TownDO extends DurableObject { const TOKEN_REFRESH_INTERVAL_MS = 60 * 60_000; // 1 hour const now = Date.now(); if (now - this.lastContainerTokenRefreshAt < TOKEN_REFRESH_INTERVAL_MS) return; - this.lastContainerTokenRefreshAt = now; const townId = this.townId; if (!townId) return; const townConfig = await this.getTownConfig(); const userId = townConfig.owner_user_id ?? townId; await dispatch.refreshContainerToken(this.env, townId, userId); + // Only mark as refreshed after success — failed refreshes should + // be retried on the next alarm tick, not throttled for an hour. + this.lastContainerTokenRefreshAt = now; } private hasActiveWork(): boolean { diff --git a/cloudflare-gastown/src/dos/town/container-dispatch.ts b/cloudflare-gastown/src/dos/town/container-dispatch.ts index 45bfa4856e..6c579d0093 100644 --- a/cloudflare-gastown/src/dos/town/container-dispatch.ts +++ b/cloudflare-gastown/src/dos/town/container-dispatch.ts @@ -61,13 +61,14 @@ export async function mintAgentToken( } /** - * Mint a container-scoped JWT and store it on the TownContainerDO. + * Mint a container-scoped JWT and push it to the TownContainerDO. * One JWT per container — shared by all agents in the town. Carries * { townId, userId, scope: 'container' } with 8h expiry. * - * Stores via setEnvVar() so the token is available on the next container - * boot. For the running process, call refreshContainerToken() which also - * pushes to the container's /refresh-token endpoint. + * Pushes via both setEnvVar() (for next container boot) and + * POST /refresh-token (for the running process). This ensures that + * all code paths — existing agents, heartbeat, event persistence — + * pick up the fresh token immediately. * * Returns the token so callers can also pass it as a per-agent env var. */ @@ -83,8 +84,10 @@ export async function ensureContainerToken( } const token = signContainerJWT({ townId, userId }, jwtSecret); + const container = getTownContainerStub(env, townId); + + // Store for next boot try { - const container = getTownContainerStub(env, townId); await container.setEnvVar('GASTOWN_CONTAINER_TOKEN', token); } catch (err) { console.warn( @@ -92,41 +95,30 @@ export async function ensureContainerToken( err instanceof Error ? err.message : err ); } - return token; -} - -/** - * Refresh the container token on both the TownContainerDO (for next boot) - * and the running container process (via POST /refresh-token). This ensures - * already-running agents pick up the fresh token for subsequent API calls. - */ -export async function refreshContainerToken( - env: Env, - townId: string, - userId: string -): Promise { - const token = await ensureContainerToken(env, townId, userId); - if (!token) return; - // Push to the running container process so process.env is updated + // Push to running process so existing agents pick up the fresh token try { - const container = getTownContainerStub(env, townId); - const resp = await container.fetch('http://container/refresh-token', { + await container.fetch('http://container/refresh-token', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ token }), }); - if (!resp.ok) { - console.warn(`${TOWN_LOG} refreshContainerToken: container returned ${resp.status}`); - } - } catch (err) { - console.warn( - `${TOWN_LOG} refreshContainerToken: failed to push to container:`, - err instanceof Error ? err.message : err - ); + } catch { + // Container may not be running yet — that's fine, the token will + // be in envVars when it boots. } + + return token; } +/** + * Alias for ensureContainerToken — both functions now push to the + * running container process via POST /refresh-token. Kept as a + * separate export for call-site readability (alarm code calls + * "refresh", dispatch code calls "ensure"). + */ +export const refreshContainerToken = ensureContainerToken; + /** Build the initial prompt for an agent from its bead. */ export function buildPrompt(params: { beadTitle: string; From 2b6dbf8ba3a0192f717b39b1cdb4a9c4d8e4ac1b Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Tue, 10 Mar 2026 14:30:33 -0500 Subject: [PATCH 09/13] fix(gastown): populate agentId from header for routes without :agentId param Container JWTs don't carry agentId, and routes like /triage/resolve and /mail don't have :agentId in the URL. This left agentId as '' in the auth payload, breaking handleResolveTriage (which requires a non-empty agentId) and weakening ownership checks in other handlers. Fix: tryContainerJWTAuth falls back to X-Gastown-Agent-Id and X-Gastown-Rig-Id headers when route params are absent. Both plugin clients (GastownClient, MayorGastownClient) now send these headers when using a container-scoped JWT. --- cloudflare-gastown/container/plugin/client.ts | 10 ++++++++++ cloudflare-gastown/src/middleware/auth.middleware.ts | 10 ++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/cloudflare-gastown/container/plugin/client.ts b/cloudflare-gastown/container/plugin/client.ts index cc9c57b421..daf504f3e8 100644 --- a/cloudflare-gastown/container/plugin/client.ts +++ b/cloudflare-gastown/container/plugin/client.ts @@ -57,6 +57,13 @@ export class GastownClient { // init, then the legacy per-agent JWT. const authToken = process.env.GASTOWN_CONTAINER_TOKEN ?? this.containerToken ?? this.token; headers.set('Authorization', `Bearer ${authToken}`); + // When using a container-scoped JWT, send agent identity headers so + // the auth middleware can populate agentId/rigId on routes that don't + // have :agentId/:rigId params (e.g. /triage/resolve, /mail). + if (process.env.GASTOWN_CONTAINER_TOKEN || this.containerToken) { + headers.set('X-Gastown-Agent-Id', this.agentId); + headers.set('X-Gastown-Rig-Id', this.rigId); + } let response: Response; try { @@ -227,6 +234,9 @@ export class MayorGastownClient { // then init-time token, then legacy per-agent JWT. const authToken = process.env.GASTOWN_CONTAINER_TOKEN ?? this.containerToken ?? this.token; headers.set('Authorization', `Bearer ${authToken}`); + if (process.env.GASTOWN_CONTAINER_TOKEN || this.containerToken) { + headers.set('X-Gastown-Agent-Id', this.agentId); + } let response: Response; try { diff --git a/cloudflare-gastown/src/middleware/auth.middleware.ts b/cloudflare-gastown/src/middleware/auth.middleware.ts index b1f9aeb2d1..31559983ff 100644 --- a/cloudflare-gastown/src/middleware/auth.middleware.ts +++ b/cloudflare-gastown/src/middleware/auth.middleware.ts @@ -48,11 +48,13 @@ function tryContainerJWTAuth( const result = verifyContainerJWT(token, jwtSecret); if (!result.success) return null; - // Populate agentId/rigId from route params — the container JWT proves - // the request came from this town's container, so we trust the URL. + // Populate agentId/rigId from route params, falling back to headers + // for routes that don't have :agentId/:rigId params (e.g. /triage/resolve, + // /mail). The container JWT proves the request came from this town's + // container, so we trust both the URL and the identity headers. return { - agentId: c.req.param('agentId') ?? '', - rigId: c.req.param('rigId') ?? '', + agentId: c.req.param('agentId') ?? c.req.header('X-Gastown-Agent-Id') ?? '', + rigId: c.req.param('rigId') ?? c.req.header('X-Gastown-Rig-Id') ?? '', townId: result.payload.townId, userId: result.payload.userId, }; From b2789c4bf9cb5daa5aeb00c550d1c2ccd1d1f7d3 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Tue, 10 Mar 2026 15:40:42 -0500 Subject: [PATCH 10/13] fix(gastown): send agent identity header in broadcastEvent The /agent-events route doesn't have :agentId in the URL, so with a container JWT the getEnforcedAgentId() ownership check became a no-op. Add X-Gastown-Agent-Id/Rig-Id headers when using the container token so the handler can still verify agent_id ownership. --- cloudflare-gastown/container/src/process-manager.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cloudflare-gastown/container/src/process-manager.ts b/cloudflare-gastown/container/src/process-manager.ts index 33e30a663e..a0c70fce4e 100644 --- a/cloudflare-gastown/container/src/process-manager.ts +++ b/cloudflare-gastown/container/src/process-manager.ts @@ -108,6 +108,12 @@ function broadcastEvent(agentId: string, event: string, data: unknown): void { 'Content-Type': 'application/json', Authorization: `Bearer ${authToken}`, }; + // When using a container JWT, send agent identity so the handler's + // getEnforcedAgentId() ownership check still works. + if (process.env.GASTOWN_CONTAINER_TOKEN || agent.gastownContainerToken) { + headers['X-Gastown-Agent-Id'] = agentId; + if (agent.rigId) headers['X-Gastown-Rig-Id'] = agent.rigId; + } // POST to the worker's agent-events endpoint for persistent storage fetch( `${agent.gastownApiUrl}/api/towns/${agent.townId ?? '_'}/rigs/${agent.rigId ?? '_'}/agent-events`, From f3ce90faf5cb141ee6eed8447648b323ac84e0d5 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Tue, 10 Mar 2026 16:05:56 -0500 Subject: [PATCH 11/13] fix(gastown): propagate non-2xx refresh-token responses as failures container.fetch() only throws on transport errors. A 4xx/5xx from /refresh-token was silently swallowed, causing the alarm throttle to advance even though the container never accepted the new token. Now check resp.ok and throw on non-2xx so the error propagates to refreshContainerToken() in Town.do.ts, which only advances lastContainerTokenRefreshAt after success. --- .../src/dos/town/container-dispatch.ts | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/cloudflare-gastown/src/dos/town/container-dispatch.ts b/cloudflare-gastown/src/dos/town/container-dispatch.ts index 6c579d0093..e4c68b7232 100644 --- a/cloudflare-gastown/src/dos/town/container-dispatch.ts +++ b/cloudflare-gastown/src/dos/town/container-dispatch.ts @@ -96,16 +96,24 @@ export async function ensureContainerToken( ); } - // Push to running process so existing agents pick up the fresh token + // Push to running process so existing agents pick up the fresh token. + // Throw on non-2xx so the alarm's throttle doesn't advance on failure. try { - await container.fetch('http://container/refresh-token', { + const resp = await container.fetch('http://container/refresh-token', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ token }), }); - } catch { - // Container may not be running yet — that's fine, the token will - // be in envVars when it boots. + if (!resp.ok) { + throw new Error(`container returned ${resp.status}`); + } + } catch (err) { + // If the container isn't running yet, the token will be in envVars + // when it boots. But if it IS running and rejected the refresh, + // propagate the error so the alarm retries on the next tick. + const isContainerDown = + err instanceof TypeError || (err instanceof Error && err.message.includes('fetch')); + if (!isContainerDown) throw err; } return token; From 9c7c2b502cb57577590fe0c04038a391e8afe023 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Tue, 10 Mar 2026 16:17:36 -0500 Subject: [PATCH 12/13] chore: remove accidentally committed plans document --- plans/gastown-org-level-architecture.md | 413 ------------------------ 1 file changed, 413 deletions(-) delete mode 100644 plans/gastown-org-level-architecture.md diff --git a/plans/gastown-org-level-architecture.md b/plans/gastown-org-level-architecture.md deleted file mode 100644 index 2ed924d681..0000000000 --- a/plans/gastown-org-level-architecture.md +++ /dev/null @@ -1,413 +0,0 @@ -# Gastown at the Organization Level - -## Overview - -Gastown towns are currently user-scoped — one `GastownUserDO` per user, keyed by `userId`, storing that user's towns and rigs. There is no organization awareness anywhere in the gastown worker, DOs, container, or tool plugin. - -The Kilo platform already has a mature org model: org membership with roles (`owner`, `member`, `billing_manager`), shared GitHub/GitLab integrations, org-level billing with per-user daily limits, seat subscriptions, SSO, audit logs, and the mutually-exclusive ownership pattern (`owned_by_user_id` XOR `owned_by_organization_id`) used across every resource type. - -This spec defines how Gastown adopts the org model — enabling teams to share towns, pool agent resources, and coordinate work across members while leveraging the existing org infrastructure. - ---- - -## Design Principles - -1. **Org towns are the default for teams.** When a user belongs to an org, the primary workflow is creating and working in org-owned towns. Personal towns still exist for individual use. -2. **Existing org infrastructure, not new infrastructure.** Billing, integrations, roles, SSO, audit logs — all use the existing org systems. Gastown doesn't reinvent any of this. -3. **Org members share everything in a town.** All members can see all towns, all rigs, all beads, all agent conversations. Visibility is town-wide. Fine-grained per-rig permissions are a future concern. -4. **The Mayor serves the team, not one user.** An org town's Mayor is a shared resource. Any member can chat with it. The Mayor maintains context about all members' conversations. -5. **Billing is org-level.** All LLM and container costs for org towns charge against the org balance. - ---- - -## Ownership Model - -### Town ownership follows the platform pattern - -Towns adopt the same mutually-exclusive ownership used by every other Kilo resource: - -| Town type | Owner | Who can access | Billing | -| ------------ | -------------------------- | ------------------------------- | -------------- | -| Personal | `owned_by_user_id` | Only the user | User's balance | -| Organization | `owned_by_organization_id` | All org members (based on role) | Org balance | - -A town is either personal or org-owned, never both. - -### Org role → town permissions - -| Org role | Can view towns | Can create towns | Can manage towns (delete, config) | Can chat with Mayor | Can view agents/beads | -| ----------------- | ------------------------------- | ---------------- | --------------------------------- | ------------------- | --------------------- | -| `owner` | Yes | Yes | Yes | Yes | Yes | -| `member` | Yes | Yes | No | Yes | Yes | -| `billing_manager` | No (not a user of the platform) | No | No | No | No | - -This mirrors how org roles map to other resources in the platform — owners manage, members use, billing managers handle money. - -### Town creation flow - -When creating a town, the UI checks the user's context: - -- **User has no org:** Town is personal. Same as today. -- **User has one org:** Default to org-owned. Option to create a personal town instead. -- **User has multiple orgs:** Org picker before town creation. Option for personal. - -The create-town API accepts an optional `organizationId`. When present, the backend verifies org membership before creating the town. - ---- - -## Architecture Changes - -### Replace GastownUserDO with owner-keyed lookup - -The current `GastownUserDO` is keyed by `userId` and stores that user's towns. This doesn't work for org-owned towns — multiple users need access to the same set of towns. - -**New approach:** Replace the per-user DO with an **owner-keyed DO** that can be keyed by either `userId` or `orgId`: - -```typescript -function getGastownOwnerStub(env: Env, owner: { type: 'user' | 'org'; id: string }) { - const key = `${owner.type}:${owner.id}`; - return env.GASTOWN_OWNER.get(env.GASTOWN_OWNER.idFromName(key)); -} -``` - -- Personal towns: `getGastownOwnerStub(env, { type: 'user', id: userId })` -- Org towns: `getGastownOwnerStub(env, { type: 'org', id: orgId })` - -The `owner_towns` table adds an `owner_type` and `owner_id` column: - -```sql -CREATE TABLE owner_towns ( - town_id TEXT PRIMARY KEY, - name TEXT NOT NULL, - owner_type TEXT NOT NULL, -- 'user' or 'org' - owner_id TEXT NOT NULL, -- userId or orgId - created_by TEXT NOT NULL, -- userId of the creator (for audit) - created_at TEXT NOT NULL, - updated_at TEXT NOT NULL -); -``` - -### TownDO stores ownership context - -The TownDO config gains org awareness: - -```typescript -type TownConfig = { - owner_type: 'user' | 'org'; - owner_id: string; // userId or orgId - owner_user_id?: string; // set when owner_type = 'user' - organization_id?: string; // set when owner_type = 'org' - // ... existing config fields -}; -``` - -This propagates through: - -- **Container dispatch:** The container receives `organizationId` so it can resolve org-level integrations (GitHub tokens) and set appropriate env vars. -- **JWT minting:** The agent JWT payload gains `organizationId?: string` so rig-scoped tool calls carry org context. -- **Billing:** When the container makes LLM calls via the Kilo gateway, the `kilocodeToken` is minted with org context so costs charge against the org balance. - -### Route structure - -Add org-scoped routes alongside user-scoped routes: - -``` -# Personal towns (existing pattern, updated) -GET /api/users/:userId/towns -POST /api/users/:userId/towns - -# Org towns (new) -GET /api/orgs/:orgId/towns -POST /api/orgs/:orgId/towns - -# Town-level routes are the same regardless of ownership -# (townId is globally unique, no need for user/org prefix) -GET /api/towns/:townId/... -POST /api/towns/:townId/... -``` - -The town-level routes don't change — once you have a `townId`, the TownDO handles everything. The ownership context is already stored in the TownDO's config. - -### Auth middleware - -The gastown worker currently relies on CF Access as its only perimeter. For org support, add proper authorization: - -```typescript -// For /api/orgs/:orgId/towns/* routes -async function orgMiddleware(c: Context, next: Next) { - const orgId = c.req.param('orgId'); - const userId = getUserIdFromRequest(c); // from CF Access JWT or session - - // Verify org membership via the main Kilo API - const membership = await verifyOrgMembership(c.env, orgId, userId); - if (!membership) return c.json({ error: 'Not an org member' }, 403); - - c.set('orgId', orgId); - c.set('orgRole', membership.role); - c.set('userId', userId); - await next(); -} - -// For /api/towns/:townId/* routes -async function townAuthMiddleware(c: Context, next: Next) { - const townId = c.req.param('townId'); - const userId = getUserIdFromRequest(c); - - // Look up town ownership from TownDO config - const townDO = getTownDOStub(c.env, townId); - const config = await townDO.getConfig(); - - if (config.owner_type === 'user') { - if (config.owner_id !== userId) return c.json({ error: 'Forbidden' }, 403); - } else { - // Org-owned: verify caller is an org member - const membership = await verifyOrgMembership(c.env, config.organization_id!, userId); - if (!membership) return c.json({ error: 'Not an org member' }, 403); - } - - await next(); -} -``` - ---- - -## Shared Mayor - -In an org town, the Mayor is a shared resource. Multiple team members can chat with it concurrently or sequentially. - -### How it works - -The Mayor maintains a single persistent session per town (same as today). When any org member sends a message, it goes to the same Mayor session. The Mayor's conversation history includes messages from all members. - -Each message carries the sender's identity: - -```typescript -// When forwarding a user message to the Mayor's session -const systemContext = `[Message from ${userName} (${userRole})]`; -``` - -The Mayor can see who's talking to it and tailor responses accordingly. "Sarah asked me to refactor the auth module yesterday. You're asking about the auth module too — are you coordinating with her, or is this separate work?" - -### Mayor chat in the dashboard - -The town dashboard's Mayor chat panel shows the conversation to all connected members. Messages are attributed to their senders. This is a shared chat room where the Mayor is the AI participant and team members are the human participants. - -Implementation: The existing Mayor WebSocket stream (town-wide, multiplexed) already supports multiple connected clients. Each client sends messages with the user's identity. The Mayor's responses are broadcast to all connected clients. - -### Concurrency - -When two members send messages simultaneously, they're queued by the TownDO (DO RPC serialization guarantees single-writer). The Mayor processes them sequentially. The second message includes context from the first — the Mayor sees the full conversation, not isolated threads. - -If the team wants isolated conversations with the Mayor (e.g., a private question about performance), that's a future feature (per-user Mayor threads within an org town). For now, all Mayor interaction is shared. - ---- - -## Integrations - -### Org GitHub/GitLab apps are used automatically - -When creating a rig in an org-owned town, the repo picker shows repositories from the **org's GitHub/GitLab installations** (not the user's personal installations). This uses the existing `getIntegrationForOwner({ type: 'org', id: orgId }, 'github')` infrastructure. - -The flow: - -1. User clicks "Add Rig" in an org town -2. Backend calls `getIntegrationForOwner({ type: 'org', id: orgId }, 'github')` -3. Repo picker shows org-accessible repos -4. On rig creation, `platform_integration_id` on the rig references the org's integration -5. When the container needs a git token, it's minted from the org's GitHub App installation - -If the org doesn't have a GitHub App installed, the "Add Rig" flow prompts the user to install it (requires org `owner` role). - ---- - -## Billing - -### Org towns charge the org - -All LLM costs for agents in org-owned towns charge against the org balance. This uses the existing `getBalanceForOrganizationUser(orgId, userId)` infrastructure: - -1. When the TownDO dispatches an agent, it mints a `kilocodeToken` scoped to the org -2. The container's kilo serve instances route LLM calls through the Kilo gateway with this token -3. The gateway charges usage to the org's `microdollars_used` - -### Container costs - -Cloudflare Container costs are per-town. For org towns, these costs are attributed to the org. Metering uses the existing `microdollar_usage` table with `organization_id` set. - ---- - -## Cross-Member Visibility - -### Dashboard shows everything - -When any org member opens an org town's dashboard, they see the complete picture: - -- All rigs, all beads, all agents, all convoys -- All members' Mayor chat history -- All agent conversation streams -- All merge queue entries and their outcomes -- Activity feed across all members' actions - -Attribution is clear — every bead shows who created it, every convoy shows who initiated it, every Mayor message shows who sent it. The dashboard answers "what is happening across the entire team's agent fleet?" - -### Notifications - -When an event occurs in an org town (convoy lands, escalation raised, merge failed), all connected dashboard clients receive the event via the existing WebSocket stream. Targeted notifications (e.g., "your convoy landed") use the `created_by` field on beads to identify the relevant member. - -Future: Slack integration for org towns. Gastown events post to an org's Slack channel via the existing `organization-slack-router` infrastructure. "Convoy cv-abc landed: 5/5 beads merged across 2 rigs. Total cost: $23.40." - ---- - -## Audit Trail - -### Org audit logs include Gastown events - -The existing `organization_audit_logs` table gains new action types for Gastown events: - -| Action | Details | -| ----------------------------- | -------------------------------- | -| `gastown.town.create` | Member created a town | -| `gastown.town.delete` | Owner deleted a town | -| `gastown.town.config_change` | Owner changed town config | -| `gastown.rig.create` | Member added a rig | -| `gastown.rig.delete` | Owner removed a rig | -| `gastown.convoy.create` | Member/Mayor initiated a convoy | -| `gastown.convoy.landed` | Convoy completed | -| `gastown.escalation.critical` | Critical escalation raised | -| `gastown.escalation.resolved` | Escalation acknowledged/resolved | - -These are written by the gastown worker when handling org-town events, via a service binding to the main Kilo API (or direct Postgres write if the gastown worker has DB access). - ---- - -## Org-Level Fleet View - -### The "all towns" dashboard - -Beyond individual town dashboards, org owners get an aggregate view across all their org's towns: - -**`/gastown/org/[orgId]`** shows: - -- **Town cards** — one per town, showing: name, active agent count, open bead count, today's spend, latest activity -- **Aggregate metrics** — total spend (today/this week/this month), total beads closed, total convoys landed, active agent count across all towns -- **Cost breakdown** — per-town, per-rig, per-model cost attribution -- **Performance comparison** — which towns/rigs have high first-pass merge rates, which have high rework rates -- **Active escalations** — all unacknowledged escalations across all towns, surfaced at the top - -This view is read-only for members and actionable for owners (click into any town, adjust config, kill runaway agents). - -### Cross-town convoys - -A convoy can track beads across multiple towns. This is natural because convoys are beads in the TownDO — but cross-town convoys require a coordination layer: - -1. The initiating town creates a convoy bead -2. For beads in other towns, the convoy uses `bead_dependencies` with HOP-style references: `{ depends_on: "town:other-town-id:bead-id", type: "tracks" }` -3. When a tracked bead in another town closes, that town's alarm notifies the initiating town (via a cross-town webhook or direct DO RPC if both towns are in the same org's gastown worker) -4. The initiating town updates convoy progress - -This extends the local Gastown convoy model to multi-town scope, which local Gastown doesn't support (convoys are per-town, tracking beads across rigs within one town). - ---- - -## Agent Identity at the Org Level - -### Agents are town-scoped, but CVs aggregate at the org level - -Within a town, agent identities are town-scoped (per #441). But across towns in the same org, agent performance data can be aggregated: - -- "Polecats using Claude Opus across all our towns have a 91% first-pass merge rate" -- "The payments-town has 3x the rework rate of the platform-town — something is wrong with the repo or the prompts" -- "Agent Toast in frontend-town has completed 47 beads with $0.83 average cost" - -This data lives in the TownDO (per-town agent beads and bead events). The org fleet view aggregates across TownDOs via the gastown worker. - -### Shared agent configurations - -Org owners can define agent configurations at the org level: - -```typescript -type OrgAgentConfig = { - default_model: string; - polecat_system_prompt_override?: string; - refinery_quality_gates?: string[]; - max_polecats_per_rig?: number; -}; -``` - -These serve as defaults for all towns in the org. Individual towns can override. This prevents the "every town is configured differently" problem and lets the org standardize on configurations that produce good results. - ---- - -## SSO and Auto-Provisioning - -When an org has SSO configured (via WorkOS), new team members who authenticate via SSO are auto-provisioned into the org. They immediately see all org-owned Gastown towns in their dashboard — no manual invitation or town sharing needed. - -The flow: - -1. New engineer joins company, authenticates via company SSO -2. WorkOS auto-provisions them into the Kilo org (existing behavior) -3. They navigate to Gastown, see all org towns -4. They open a town, chat with the Mayor, watch agents work - -Zero configuration for the new member. The org's Gastown infrastructure is immediately accessible. - ---- - -## Implementation Phases - -### Phase 1: Ownership and access control - -- Replace `GastownUserDO` with owner-keyed `GastownOwnerDO` -- Add `owner_type`/`owner_id` to town tables and TownDO config -- Add `organizationId` to agent JWT payload -- Add org auth middleware to gastown worker routes -- Add org-scoped routes (`/api/orgs/:orgId/towns`) -- Wire org membership verification - -### Phase 2: Billing integration - -- Mint org-scoped `kilocodeToken` for org town agents -- Route LLM costs to org balance via existing infrastructure -- Container cost attribution to org via `microdollar_usage` table - -### Phase 3: Shared Mayor and dashboard - -- Multi-user Mayor chat (message attribution, shared conversation) -- Dashboard access for all org members -- Activity feed shows member attribution - -### Phase 4: Org fleet view - -- Aggregate dashboard across all org towns -- Cost breakdown per town/rig/model -- Performance comparison metrics -- Cross-town escalation surfacing - -### Phase 5: Org-level configuration - -- Org-level agent config defaults (model, prompts, quality gates) -- Town-level overrides -- Shared formula library per org - -### Phase 6: Cross-town convoys - -- Cross-town bead references -- Cross-town convoy tracking and landing detection -- Cross-town notification routing - -### Phase 7: Audit and compliance - -- Gastown event types in org audit logs -- Org-level usage reporting -- Export capabilities for compliance - ---- - -## What This Enables (That Local Gastown Can't Do) - -1. **Team coordination** — Multiple engineers share a Mayor that knows what everyone is working on. "Don't touch the auth module, Sarah's convoy is refactoring it" happens naturally. -2. **Centralized cost visibility** — One dashboard showing total Gastown spend across all teams. -3. **Zero-config onboarding** — New engineer authenticates via SSO, immediately sees all org towns and can start using them. -4. **Org-wide performance data** — "Which model works best for our TypeScript repos?" answered from real production data across all teams. -5. **Cross-town project tracking** — A convoy that spans the frontend town, backend town, and infra town, with unified progress tracking and landing detection. -6. **Shared institutional knowledge** — Agent formulas, quality gate configs, and prompt tuning that work well for the org are shared across all towns, not siloed per developer. From 335df172d49d37310950e0327bf397c56b900262 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Tue, 10 Mar 2026 16:18:22 -0500 Subject: [PATCH 13/13] fix(gastown): use townId fallback for empty userId in startMergeInContainer owner_user_id is optional in TownConfigSchema, so the fallback was minting a container JWT with userId: '' which broke resolveUserId() in mayor tool handlers. Match the pattern used in refreshContainerToken by falling back to townId. --- cloudflare-gastown/src/dos/town/container-dispatch.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudflare-gastown/src/dos/town/container-dispatch.ts b/cloudflare-gastown/src/dos/town/container-dispatch.ts index e4c68b7232..51c6b67ed7 100644 --- a/cloudflare-gastown/src/dos/town/container-dispatch.ts +++ b/cloudflare-gastown/src/dos/town/container-dispatch.ts @@ -381,7 +381,7 @@ export async function startMergeInContainer( } ): Promise { try { - const userId = params.townConfig.owner_user_id ?? ''; + const userId = params.townConfig.owner_user_id ?? params.townId; const containerToken = await ensureContainerToken(env, params.townId, userId); const agentToken = await mintAgentToken(env, { agentId: params.agentId,