From 49754d69f0d0c5a33f4a7819f718f1e53eb04bc1 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Sat, 21 Mar 2026 11:15:52 -0500 Subject: [PATCH 1/7] fix: skip container_status events for running containers (#1368) Filter out 'running' status in the alarm pre-phase before calling upsertContainerStatus(). Running is the steady-state for healthy agents and a no-op in applyEvent(), so recording it just bloats the event table (~720 events/hour/agent). Non-running statuses (stopped, error, unknown) still get inserted for reconciler detection. --- cloudflare-gastown/src/dos/Town.do.ts | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index 0cf7c974a7..37c48c2645 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -2891,10 +2891,16 @@ export class TownDO extends DurableObject { townId, row.bead_id ); - events.upsertContainerStatus(this.sql, row.bead_id, { - status: containerInfo.status, - exit_reason: containerInfo.exitReason, - }); + // Skip inserting events for 'running' — it's the steady-state and + // a no-op in applyEvent, so recording it just bloats the event table + // (~720 events/hour/agent). Non-running statuses (stopped, error, + // unknown) still get inserted so the reconciler can detect and handle them. + if (containerInfo.status !== 'running') { + events.upsertContainerStatus(this.sql, row.bead_id, { + status: containerInfo.status, + exit_reason: containerInfo.exitReason, + }); + } } catch (err) { console.warn( `${TOWN_LOG} alarm: container status check failed for agent=${row.bead_id}`, From f63be07378a97e344f0a664a6108a4e5521a22cf Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Sat, 21 Mar 2026 12:52:05 -0500 Subject: [PATCH 2/7] feat(gastown): add POST /debug/reconcile-dry-run endpoint (#1367) Add a debug endpoint that runs the reconciler against current live state and returns the actions it would emit without applying them. This enables inspecting what the reconciler thinks should happen at any given moment. - Add debugDryRun() method to TownDO that calls reconciler.reconcile() and returns actions + metrics without calling applyAction() - Add POST /debug/towns/:townId/reconcile-dry-run route following the same unauthenticated debug pattern as GET /debug/towns/:townId/status - Response includes actions array, actionsEmitted count, actionsByType breakdown, and pendingEventCount --- cloudflare-gastown/src/dos/Town.do.ts | 27 +++++++++++++++++++++++- cloudflare-gastown/src/gastown.worker.ts | 8 +++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index 37c48c2645..b010ccdafb 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -30,7 +30,7 @@ import * as scheduling from './town/scheduling'; import * as events from './town/events'; import * as reconciler from './town/reconciler'; import { applyAction } from './town/actions'; -import type { ApplyActionContext } from './town/actions'; +import type { Action, ApplyActionContext } from './town/actions'; import { buildRefinerySystemPrompt } from '../prompts/refinery-system.prompt'; import { GitHubPRStatusSchema, GitLabMRStatusSchema } from '../util/platform-pr.util'; @@ -3690,6 +3690,31 @@ export class TownDO extends DurableObject { }; } + // DEBUG: dry-run the reconciler against current state, returning actions + // it would emit without applying them. Side-effect-free — reconcile() + // only reads SQLite state; applyAction() is never called. + async debugDryRun(): Promise<{ + actions: Action[]; + metrics: Pick< + reconciler.ReconcilerMetrics, + 'actionsEmitted' | 'actionsByType' | 'pendingEventCount' + >; + }> { + const actions = reconciler.reconcile(this.sql); + const actionsByType: Record = {}; + for (const a of actions) { + actionsByType[a.type] = (actionsByType[a.type] ?? 0) + 1; + } + return { + actions, + metrics: { + actionsEmitted: actions.length, + actionsByType, + pendingEventCount: events.pendingEventCount(this.sql), + }, + }; + } + // DEBUG: concise non-terminal bead summary — remove after debugging async debugBeadSummary(): Promise { return [ diff --git a/cloudflare-gastown/src/gastown.worker.ts b/cloudflare-gastown/src/gastown.worker.ts index 6c8efab713..1a1634f0d2 100644 --- a/cloudflare-gastown/src/gastown.worker.ts +++ b/cloudflare-gastown/src/gastown.worker.ts @@ -206,6 +206,14 @@ app.get('/debug/towns/:townId/status', async c => { return c.json({ alarmStatus, agentMeta, beadSummary }); }); +app.post('/debug/towns/:townId/reconcile-dry-run', async c => { + const townId = c.req.param('townId'); + const town = getTownDOStub(c.env, townId); + // eslint-disable-next-line @typescript-eslint/await-thenable -- DO RPC returns promise at runtime + const result = await town.debugDryRun(); + return c.json(result); +}); + // ── Town ID + Auth ────────────────────────────────────────────────────── // All rig routes live under /api/towns/:townId/rigs/:rigId so the townId // is always available from the URL path. From c8a756fc0b4e1d285738c523500d7819e8b627ee Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Sat, 21 Mar 2026 12:59:07 -0500 Subject: [PATCH 3/7] feat(gastown): add debug dry-run endpoint with event draining (#1370) * feat(claw): evaluate button-vs-card feature flag for PostHog experiment tracking * fix(claw): move button-vs-card flag eval to CreateInstanceCard Moves useFeatureFlagVariantKey('button-vs-card') from ClawDashboard (which renders for all users including those with existing instances) to CreateInstanceCard (which only renders for users who haven't provisioned yet). This scopes the experiment exposure to users who can actually see the create CTA, avoiding population dilution. * feat(gastown): add POST /debug/reconcile-dry-run endpoint Add a debug endpoint that runs the reconciler against current live state and returns the actions it would emit without applying them. This enables inspecting what the reconciler thinks should happen at any given moment. - Add debugDryRun() method to TownDO that calls reconciler.reconcile() and returns actions + metrics without calling applyAction() - Add POST /debug/towns/:townId/reconcile-dry-run route following the same unauthenticated debug pattern as GET /debug/towns/:townId/status - Response includes actions array, actionsEmitted count, actionsByType breakdown, and pendingEventCount * fix(gastown): drain pending events in debugDryRun() before reconciling Wrap debugDryRun() in a SQLite savepoint so it can drain and apply pending town_events (Phase 0) before running reconcile (Phase 1), matching the real alarm loop behavior. The savepoint is rolled back in a finally block so the endpoint remains fully side-effect-free. Adds eventsDrained to the returned metrics. --------- Co-authored-by: kiloconnect[bot] <240665456+kiloconnect[bot]@users.noreply.github.com> Co-authored-by: Pedro Heyerdahl Co-authored-by: Pedro Heyerdahl <61753986+pedroheyerdahl@users.noreply.github.com> --- cloudflare-gastown/src/dos/Town.do.ts | 51 +++++++++++++------ .../claw/components/CreateInstanceCard.tsx | 5 +- 2 files changed, 40 insertions(+), 16 deletions(-) diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index b010ccdafb..a634b6e387 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -3691,28 +3691,49 @@ export class TownDO extends DurableObject { } // DEBUG: dry-run the reconciler against current state, returning actions - // it would emit without applying them. Side-effect-free — reconcile() - // only reads SQLite state; applyAction() is never called. + // it would emit without applying them. Drains pending events first (same + // as the real alarm loop) inside a savepoint that is rolled back, so the + // endpoint remains fully side-effect-free. async debugDryRun(): Promise<{ actions: Action[]; metrics: Pick< reconciler.ReconcilerMetrics, - 'actionsEmitted' | 'actionsByType' | 'pendingEventCount' + 'actionsEmitted' | 'actionsByType' | 'pendingEventCount' | 'eventsDrained' >; }> { - const actions = reconciler.reconcile(this.sql); - const actionsByType: Record = {}; - for (const a of actions) { - actionsByType[a.type] = (actionsByType[a.type] ?? 0) + 1; + // Use a savepoint so we can drain events (which mutates state) + // then roll back without permanent side effects + this.sql.exec('SAVEPOINT debug_dry_run'); + try { + // Phase 0: Drain and apply pending events (same as real alarm loop) + const pending = events.drainEvents(this.sql); + for (const event of pending) { + reconciler.applyEvent(this.sql, event); + events.markProcessed(this.sql, event.event_id); + } + + // Phase 1: Reconcile against now-current state + const actions = reconciler.reconcile(this.sql); + const pendingEventCount = events.pendingEventCount(this.sql); + const actionsByType: Record = {}; + for (const a of actions) { + actionsByType[a.type] = (actionsByType[a.type] ?? 0) + 1; + } + + return { + actions, + metrics: { + actionsEmitted: actions.length, + actionsByType, + pendingEventCount, + eventsDrained: pending.length, + }, + }; + } finally { + // Roll back all state mutations — this is a dry run + this.sql.exec('ROLLBACK TO SAVEPOINT debug_dry_run'); + this.sql.exec('RELEASE SAVEPOINT debug_dry_run'); } - return { - actions, - metrics: { - actionsEmitted: actions.length, - actionsByType, - pendingEventCount: events.pendingEventCount(this.sql), - }, - }; } // DEBUG: concise non-terminal bead summary — remove after debugging diff --git a/src/app/(app)/claw/components/CreateInstanceCard.tsx b/src/app/(app)/claw/components/CreateInstanceCard.tsx index 29b330c9c0..a48c545845 100644 --- a/src/app/(app)/claw/components/CreateInstanceCard.tsx +++ b/src/app/(app)/claw/components/CreateInstanceCard.tsx @@ -1,7 +1,7 @@ 'use client'; import { useEffect, useMemo, useRef, useState } from 'react'; -import { usePostHog } from 'posthog-js/react'; +import { useFeatureFlagVariantKey, usePostHog } from 'posthog-js/react'; import { useQuery } from '@tanstack/react-query'; import { toast } from 'sonner'; import type { useKiloClawMutations } from '@/hooks/useKiloClaw'; @@ -25,6 +25,9 @@ export function CreateInstanceCard({ mutations: ClawMutations; onProvisionStart?: () => void; }) { + // Evaluate the landing-page experiment flag so PostHog attaches + // $feature/button-vs-card to events fired in this component. + useFeatureFlagVariantKey('button-vs-card'); const posthog = usePostHog(); const trpc = useTRPC(); const { data: billingStatus } = useQuery(trpc.kiloclaw.getBillingStatus.queryOptions()); From 9a634bf4e50a84a7d71912f33b429d2ee1cf8f43 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Sat, 21 Mar 2026 18:13:36 +0000 Subject: [PATCH 4/7] feat(gastown): add POST /debug/replay-events endpoint for event replay debugging Adds debugReplayEvents(from, to) method to Town.do.ts that queries all town_events in a time range (regardless of processed_at), applies them to reconstruct state transitions, runs the reconciler, and returns the computed actions and a state snapshot. Uses a SQLite SAVEPOINT that is rolled back so the endpoint remains fully side-effect-free. Route: POST /debug/towns/:townId/replay-events Body: { from: ISO, to: ISO } Response: { eventsReplayed, actions, stateSnapshot } --- cloudflare-gastown/src/dos/Town.do.ts | 93 ++++++++++++++++++++++++ cloudflare-gastown/src/gastown.worker.ts | 20 +++++ 2 files changed, 113 insertions(+) diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index a634b6e387..68f2f9c8fb 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -46,6 +46,7 @@ import { review_metadata } from '../db/tables/review-metadata.table'; import { escalation_metadata } from '../db/tables/escalation-metadata.table'; import { convoy_metadata } from '../db/tables/convoy-metadata.table'; import { bead_dependencies } from '../db/tables/bead-dependencies.table'; +import { town_events, TownEventRecord } from '../db/tables/town-events.table'; import { agent_nudges, AgentNudgeRecord, @@ -3690,6 +3691,98 @@ export class TownDO extends DurableObject { }; } + // DEBUG: replay events from a time range, apply them to state, run the + // reconciler, and return computed actions. Uses a savepoint + rollback so + // no state is permanently modified. + async debugReplayEvents( + from: string, + to: string + ): Promise<{ + eventsReplayed: number; + actions: Action[]; + stateSnapshot: { + agents: unknown[]; + nonTerminalBeads: unknown[]; + }; + }> { + this.sql.exec('SAVEPOINT debug_replay_events'); + try { + // Query ALL events in the time range regardless of processed_at + const rangeEvents = TownEventRecord.array().parse([ + ...query( + this.sql, + /* sql */ ` + SELECT ${town_events.event_id}, ${town_events.event_type}, + ${town_events.agent_id}, ${town_events.bead_id}, + ${town_events.payload}, ${town_events.created_at}, + ${town_events.processed_at} + FROM ${town_events} + WHERE ${town_events.created_at} >= ? + AND ${town_events.created_at} <= ? + ORDER BY ${town_events.created_at} ASC + `, + [from, to] + ), + ]); + + // Apply each event to reconstruct state transitions + for (const event of rangeEvents) { + reconciler.applyEvent(this.sql, event); + } + + // Run reconciler against the resulting state + const actions = reconciler.reconcile(this.sql); + + // Capture a state snapshot before rollback + const agentSnapshot = [ + ...query( + this.sql, + /* sql */ ` + SELECT ${agent_metadata.bead_id}, + ${agent_metadata.role}, + ${agent_metadata.status}, + ${agent_metadata.current_hook_bead_id}, + ${agent_metadata.dispatch_attempts}, + ${agent_metadata.last_activity_at} + FROM ${agent_metadata} + `, + [] + ), + ]; + + const beadSnapshot = [ + ...query( + this.sql, + /* sql */ ` + SELECT ${beads.bead_id}, + ${beads.type}, + ${beads.status}, + ${beads.title}, + ${beads.assignee_agent_bead_id}, + ${beads.updated_at} + FROM ${beads} + WHERE ${beads.status} NOT IN ('closed', 'failed') + AND ${beads.type} != 'agent' + ORDER BY ${beads.type}, ${beads.status} + `, + [] + ), + ]; + + return { + eventsReplayed: rangeEvents.length, + actions, + stateSnapshot: { + agents: agentSnapshot, + nonTerminalBeads: beadSnapshot, + }, + }; + } finally { + this.sql.exec('ROLLBACK TO SAVEPOINT debug_replay_events'); + this.sql.exec('RELEASE SAVEPOINT debug_replay_events'); + } + } + // DEBUG: dry-run the reconciler against current state, returning actions // it would emit without applying them. Drains pending events first (same // as the real alarm loop) inside a savepoint that is rolled back, so the diff --git a/cloudflare-gastown/src/gastown.worker.ts b/cloudflare-gastown/src/gastown.worker.ts index 1a1634f0d2..f5118d0d6f 100644 --- a/cloudflare-gastown/src/gastown.worker.ts +++ b/cloudflare-gastown/src/gastown.worker.ts @@ -214,6 +214,26 @@ app.post('/debug/towns/:townId/reconcile-dry-run', async c => { return c.json(result); }); +app.post('/debug/towns/:townId/replay-events', async c => { + const townId = c.req.param('townId'); + const body = (await c.req.json()) as { from?: string; to?: string }; + if (!body.from || !body.to) { + return c.json({ error: 'Missing required fields: from, to (ISO timestamps)' }, 400); + } + const fromDate = new Date(body.from); + const toDate = new Date(body.to); + if (Number.isNaN(fromDate.getTime()) || Number.isNaN(toDate.getTime())) { + return c.json({ error: 'Invalid date format. Use ISO 8601 timestamps.' }, 400); + } + if (fromDate > toDate) { + return c.json({ error: '"from" must be before or equal to "to"' }, 400); + } + const town = getTownDOStub(c.env, townId); + // eslint-disable-next-line @typescript-eslint/await-thenable -- DO RPC returns promise at runtime + const result = await town.debugReplayEvents(body.from, body.to); + return c.json(result); +}); + // ── Town ID + Auth ────────────────────────────────────────────────────── // All rig routes live under /api/towns/:townId/rigs/:rigId so the townId // is always available from the URL path. From c4c6ccb890e676e2c26f3a2f78770f262605aeae Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Mon, 23 Mar 2026 09:25:20 -0500 Subject: [PATCH 5/7] feat(gastown): emit reconciler metrics to Analytics Engine and add Grafana dashboard panels (#1372) - Extend writeEvent() to support double3-double10 fields for reconciler metrics - Emit reconciler_tick event after each alarm tick with all 9 metrics - Add Reconciler row to Grafana dashboard with 6 panels: 1. Events drained per tick (timeseries) 2. Actions emitted per tick by type (stacked bar) 3. Side effects attempted/succeeded/failed (timeseries) 4. Invariant violations (stat with >0 alert threshold) 5. Reconciler wall clock time (timeseries with >500ms threshold) 6. Pending event queue depth (gauge with >50 threshold) --- .../gastown-grafana-dash-1.json | 694 ++++++++++++++++++ cloudflare-gastown/src/dos/Town.do.ts | 25 + cloudflare-gastown/src/util/analytics.util.ts | 23 +- 3 files changed, 741 insertions(+), 1 deletion(-) diff --git a/cloudflare-gastown/gastown-grafana-dash-1.json b/cloudflare-gastown/gastown-grafana-dash-1.json index f09e9b3c56..1a3656509e 100644 --- a/cloudflare-gastown/gastown-grafana-dash-1.json +++ b/cloudflare-gastown/gastown-grafana-dash-1.json @@ -2479,6 +2479,700 @@ ], "title": "Agent & Review Events", "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 104 + }, + "id": 200, + "panels": [], + "title": "Reconciler", + "type": "row" + }, + { + "datasource": { + "type": "vertamedia-clickhouse-datasource", + "uid": "bffxugc31cnpcc" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 105 + }, + "id": 201, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.4.1", + "targets": [ + { + "adHocFilters": [], + "adHocValuesQuery": "", + "add_metadata": true, + "contextWindowSize": "10", + "dateTimeColDataType": "timestamp", + "dateTimeType": "DATETIME", + "editorMode": "sql", + "extrapolate": true, + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "nullifySparse": false, + "query": "SELECT $timeSeries AS t, 'events_drained' AS label, SUM(double2 * _sample_interval) / SUM(_sample_interval) AS events_drained FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' GROUP BY t ORDER BY t", + "rawSql": "SELECT $timeSeries AS t, 'events_drained' AS label, SUM(double2 * _sample_interval) / SUM(_sample_interval) AS events_drained FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' GROUP BY t ORDER BY t", + "refId": "A", + "round": "0s", + "showFormattedSQL": false, + "showHelp": false, + "skip_comments": true, + "table": "gastown_events", + "useWindowFuncForMacros": true + } + ], + "title": "Events Drained per Tick", + "type": "timeseries" + }, + { + "datasource": { + "type": "vertamedia-clickhouse-datasource", + "uid": "bffxugc31cnpcc" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "bars", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 0, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 105 + }, + "id": 202, + "options": { + "legend": { + "calcs": ["sum"], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.4.1", + "targets": [ + { + "adHocFilters": [], + "adHocValuesQuery": "", + "add_metadata": true, + "contextWindowSize": "10", + "dateTimeColDataType": "timestamp", + "dateTimeType": "DATETIME", + "editorMode": "sql", + "extrapolate": true, + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "nullifySparse": false, + "query": "SELECT $timeSeries AS t, arrayJoin(JSONExtractKeysAndValues(blob10, 'Float64')) AS kv, kv.1 AS label, SUM(kv.2 * _sample_interval) AS count FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' AND blob10 != '' AND blob10 != '{}' GROUP BY t, label ORDER BY t", + "rawSql": "SELECT $timeSeries AS t, arrayJoin(JSONExtractKeysAndValues(blob10, 'Float64')) AS kv, kv.1 AS label, SUM(kv.2 * _sample_interval) AS count FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' AND blob10 != '' AND blob10 != '{}' GROUP BY t, label ORDER BY t", + "refId": "A", + "round": "0s", + "showFormattedSQL": false, + "showHelp": false, + "skip_comments": true, + "table": "gastown_events", + "useWindowFuncForMacros": true + } + ], + "title": "Actions Emitted per Tick by Type", + "type": "timeseries" + }, + { + "datasource": { + "type": "vertamedia-clickhouse-datasource", + "uid": "bffxugc31cnpcc" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "attempted" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "succeeded" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 113 + }, + "id": 203, + "options": { + "legend": { + "calcs": ["sum"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.4.1", + "targets": [ + { + "adHocFilters": [], + "adHocValuesQuery": "", + "add_metadata": true, + "contextWindowSize": "10", + "dateTimeColDataType": "timestamp", + "dateTimeType": "DATETIME", + "editorMode": "sql", + "extrapolate": true, + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "nullifySparse": false, + "query": "SELECT $timeSeries AS t, 'attempted' AS label, SUM(double4 * _sample_interval) / SUM(_sample_interval) AS attempted FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' GROUP BY t ORDER BY t", + "rawSql": "SELECT $timeSeries AS t, 'attempted' AS label, SUM(double4 * _sample_interval) / SUM(_sample_interval) AS attempted FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' GROUP BY t ORDER BY t", + "refId": "A", + "round": "0s", + "showFormattedSQL": false, + "showHelp": false, + "skip_comments": true, + "table": "gastown_events", + "useWindowFuncForMacros": true + }, + { + "adHocFilters": [], + "adHocValuesQuery": "", + "add_metadata": true, + "contextWindowSize": "10", + "dateTimeColDataType": "timestamp", + "dateTimeType": "DATETIME", + "editorMode": "sql", + "extrapolate": true, + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "nullifySparse": false, + "query": "SELECT $timeSeries AS t, 'succeeded' AS label, SUM(double5 * _sample_interval) / SUM(_sample_interval) AS succeeded FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' GROUP BY t ORDER BY t", + "rawSql": "SELECT $timeSeries AS t, 'succeeded' AS label, SUM(double5 * _sample_interval) / SUM(_sample_interval) AS succeeded FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' GROUP BY t ORDER BY t", + "refId": "B", + "round": "0s", + "showFormattedSQL": false, + "showHelp": false, + "skip_comments": true, + "table": "gastown_events", + "useWindowFuncForMacros": true + }, + { + "adHocFilters": [], + "adHocValuesQuery": "", + "add_metadata": true, + "contextWindowSize": "10", + "dateTimeColDataType": "timestamp", + "dateTimeType": "DATETIME", + "editorMode": "sql", + "extrapolate": true, + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "nullifySparse": false, + "query": "SELECT $timeSeries AS t, 'failed' AS label, SUM(double6 * _sample_interval) / SUM(_sample_interval) AS failed FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' GROUP BY t ORDER BY t", + "rawSql": "SELECT $timeSeries AS t, 'failed' AS label, SUM(double6 * _sample_interval) / SUM(_sample_interval) AS failed FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' GROUP BY t ORDER BY t", + "refId": "C", + "round": "0s", + "showFormattedSQL": false, + "showHelp": false, + "skip_comments": true, + "table": "gastown_events", + "useWindowFuncForMacros": true + } + ], + "title": "Side Effects (attempted / succeeded / failed)", + "type": "timeseries" + }, + { + "datasource": { + "type": "vertamedia-clickhouse-datasource", + "uid": "bffxugc31cnpcc" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 4, + "x": 12, + "y": 113 + }, + "id": 204, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": ["sum"], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.4.1", + "targets": [ + { + "adHocFilters": [], + "adHocValuesQuery": "", + "add_metadata": true, + "contextWindowSize": "10", + "dateTimeColDataType": "timestamp", + "dateTimeType": "DATETIME", + "editorMode": "sql", + "extrapolate": true, + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "nullifySparse": false, + "query": "SELECT $timeSeries AS t, 'violations' AS label, SUM(double7 * _sample_interval) AS violations FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' GROUP BY t ORDER BY t", + "rawSql": "SELECT $timeSeries AS t, 'violations' AS label, SUM(double7 * _sample_interval) AS violations FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' GROUP BY t ORDER BY t", + "refId": "A", + "round": "0s", + "showFormattedSQL": false, + "showHelp": false, + "skip_comments": true, + "table": "gastown_events", + "useWindowFuncForMacros": true + } + ], + "title": "Invariant Violations", + "type": "stat" + }, + { + "datasource": { + "type": "vertamedia-clickhouse-datasource", + "uid": "bffxugc31cnpcc" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line+area" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent", + "value": 0 + }, + { + "color": "red", + "value": 500 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 113 + }, + "id": 205, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.4.1", + "targets": [ + { + "adHocFilters": [], + "adHocValuesQuery": "", + "add_metadata": true, + "contextWindowSize": "10", + "dateTimeColDataType": "timestamp", + "dateTimeType": "DATETIME", + "editorMode": "sql", + "extrapolate": true, + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "nullifySparse": false, + "query": "SELECT $timeSeries AS t, 'wall_clock_ms' AS label, SUM(double1 * _sample_interval) / SUM(_sample_interval) AS wall_clock_ms FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' GROUP BY t ORDER BY t", + "rawSql": "SELECT $timeSeries AS t, 'wall_clock_ms' AS label, SUM(double1 * _sample_interval) / SUM(_sample_interval) AS wall_clock_ms FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' GROUP BY t ORDER BY t", + "refId": "A", + "round": "0s", + "showFormattedSQL": false, + "showHelp": false, + "skip_comments": true, + "table": "gastown_events", + "useWindowFuncForMacros": true + } + ], + "title": "Reconciler Wall Clock Time", + "type": "timeseries" + }, + { + "datasource": { + "type": "vertamedia-clickhouse-datasource", + "uid": "bffxugc31cnpcc" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 200, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 25 + }, + { + "color": "red", + "value": 50 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 121 + }, + "id": 206, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showThresholdLabels": true, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "12.4.1", + "targets": [ + { + "adHocFilters": [], + "adHocValuesQuery": "", + "add_metadata": true, + "contextWindowSize": "10", + "dateTimeColDataType": "timestamp", + "dateTimeType": "DATETIME", + "editorMode": "sql", + "extrapolate": true, + "format": "table", + "interval": "", + "intervalFactor": 1, + "nullifySparse": false, + "query": "SELECT SUM(double8 * _sample_interval) / SUM(_sample_interval) AS pending_events FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' ORDER BY timestamp DESC LIMIT 1", + "rawSql": "SELECT SUM(double8 * _sample_interval) / SUM(_sample_interval) AS pending_events FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' ORDER BY timestamp DESC LIMIT 1", + "refId": "A", + "round": "0s", + "showFormattedSQL": false, + "showHelp": false, + "skip_comments": true, + "table": "gastown_events", + "useWindowFuncForMacros": true + } + ], + "title": "Pending Event Queue Depth", + "type": "gauge" } ], "preload": false, diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index 68f2f9c8fb..4bd859a994 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -3013,6 +3013,31 @@ export class TownDO extends DurableObject { metrics.pendingEventCount = events.pendingEventCount(this.sql); this._lastReconcilerMetrics = metrics; + // Emit reconciler metrics to Analytics Engine for Grafana dashboards. + // Field mapping: + // double1 = wallClockMs + // double2 = eventsDrained + // double3 = actionsEmitted + // double4 = sideEffectsAttempted + // double5 = sideEffectsSucceeded + // double6 = sideEffectsFailed + // double7 = invariantViolations + // double8 = pendingEventCount + // blob10 = JSON-encoded actionsByType breakdown + this.emitEvent({ + event: 'reconciler_tick', + townId, + durationMs: metrics.wallClockMs, + value: metrics.eventsDrained, + double3: metrics.actionsEmitted, + double4: metrics.sideEffectsAttempted, + double5: metrics.sideEffectsSucceeded, + double6: metrics.sideEffectsFailed, + double7: metrics.invariantViolations, + double8: metrics.pendingEventCount, + label: JSON.stringify(metrics.actionsByType), + }); + // ── Phase 3: Housekeeping (independent, all parallelizable) ──── await Promise.allSettled([ this.deliverPendingMail().catch(err => diff --git a/cloudflare-gastown/src/util/analytics.util.ts b/cloudflare-gastown/src/util/analytics.util.ts index 367960b108..99d1e54c0d 100644 --- a/cloudflare-gastown/src/util/analytics.util.ts +++ b/cloudflare-gastown/src/util/analytics.util.ts @@ -42,6 +42,16 @@ export type GastownEventData = { durationMs?: number; value?: number; label?: string; + // Additional doubles for reconciler_tick events (double3–double10). + // Analytics Engine supports up to 20 doubles per data point. + double3?: number; + double4?: number; + double5?: number; + double6?: number; + double7?: number; + double8?: number; + double9?: number; + double10?: number; }; /** @@ -70,7 +80,18 @@ export function writeEvent( data.role ?? '', // blob12 data.beadType ?? '', // blob13 ], - doubles: [data.durationMs ?? 0, data.value ?? 0], + doubles: [ + data.durationMs ?? 0, // double1 + data.value ?? 0, // double2 + data.double3 ?? 0, // double3 + data.double4 ?? 0, // double4 + data.double5 ?? 0, // double5 + data.double6 ?? 0, // double6 + data.double7 ?? 0, // double7 + data.double8 ?? 0, // double8 + data.double9 ?? 0, // double9 + data.double10 ?? 0, // double10 + ], indexes: [data.event], }); } catch { From d0672a3ac5ea19ab885efc1453e3a75219b0a1ee Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Mon, 23 Mar 2026 10:53:06 -0500 Subject: [PATCH 6/7] fix(gastown): add replay caveat and fix Grafana pending-events gauge query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a caveat comment and response field to debugReplayEvents explaining that events are re-applied on top of live state, not from a pre-window snapshot — results are approximate, useful for debugging event flow but not faithful historical reconstruction. Fix the Grafana 'Pending Event Queue Depth' gauge to show the latest row's double8 value instead of averaging across the time window. --- .../gastown-grafana-dash-1.json | 4 +- cloudflare-gastown/src/dos/Town.do.ts | 74 +++++++++++++++---- cloudflare-gastown/src/gastown.worker.ts | 2 +- 3 files changed, 62 insertions(+), 18 deletions(-) diff --git a/cloudflare-gastown/gastown-grafana-dash-1.json b/cloudflare-gastown/gastown-grafana-dash-1.json index 1a3656509e..e538890720 100644 --- a/cloudflare-gastown/gastown-grafana-dash-1.json +++ b/cloudflare-gastown/gastown-grafana-dash-1.json @@ -3160,8 +3160,8 @@ "interval": "", "intervalFactor": 1, "nullifySparse": false, - "query": "SELECT SUM(double8 * _sample_interval) / SUM(_sample_interval) AS pending_events FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' ORDER BY timestamp DESC LIMIT 1", - "rawSql": "SELECT SUM(double8 * _sample_interval) / SUM(_sample_interval) AS pending_events FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' ORDER BY timestamp DESC LIMIT 1", + "query": "SELECT double8 AS pending_events FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' ORDER BY timestamp DESC LIMIT 1", + "rawSql": "SELECT double8 AS pending_events FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' ORDER BY timestamp DESC LIMIT 1", "refId": "A", "round": "0s", "showFormattedSQL": false, diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index 4bd859a994..f526b8d7de 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -241,7 +241,11 @@ export class TownDO extends DurableObject { } private emitEvent(data: Omit): void { - writeEvent(this.env, { ...data, delivery: 'internal', userId: this._ownerUserId }); + writeEvent(this.env, { + ...data, + delivery: 'internal', + userId: this._ownerUserId, + }); } /** Build the context object used by the scheduling sub-module. */ @@ -291,7 +295,9 @@ export class TownDO extends DurableObject { }); } - return scheduling.dispatchAgent(schedulingCtx, agent, bead, { systemPromptOverride }); + return scheduling.dispatchAgent(schedulingCtx, agent, bead, { + systemPromptOverride, + }); }, stopAgent: async agentId => { await dispatch.stopAgentInContainer(this.env, this.townId, agentId); @@ -678,7 +684,9 @@ export class TownDO extends DurableObject { const townConfig = await this.getTownConfig(); if (!townConfig.kilocode_token || townConfig.kilocode_token !== rigConfig.kilocodeToken) { console.log(`${TOWN_LOG} configureRig: propagating kilocodeToken to town config`); - await this.updateTownConfig({ kilocode_token: rigConfig.kilocodeToken }); + await this.updateTownConfig({ + kilocode_token: rigConfig.kilocodeToken, + }); } } @@ -1215,10 +1223,14 @@ export class TownDO extends DurableObject { * Return undelivered, non-expired nudges for an agent. * Urgent nudges are returned first, then FIFO within same priority. */ - async getPendingNudges( - agentId: string - ): Promise< - { nudge_id: string; message: string; mode: string; priority: string; source: string }[] + async getPendingNudges(agentId: string): Promise< + { + nudge_id: string; + message: string; + mode: string; + priority: string; + source: string; + }[] > { const rows = [ ...query( @@ -1769,7 +1781,12 @@ export class TownDO extends DurableObject { /** Build the rig list for mayor agent startup (browse worktree setup on fresh containers). */ private async rigListForMayor(): Promise< - Array<{ rigId: string; gitUrl: string; defaultBranch: string; platformIntegrationId?: string }> + Array<{ + rigId: string; + gitUrl: string; + defaultBranch: string; + platformIntegrationId?: string; + }> > { const rigRecords = rigs.listRigs(this.sql); return Promise.all( @@ -1793,7 +1810,10 @@ export class TownDO extends DurableObject { message: string, _model?: string, uiContext?: string - ): Promise<{ agentId: string; sessionStatus: 'idle' | 'active' | 'starting' }> { + ): Promise<{ + agentId: string; + sessionStatus: 'idle' | 'active' | 'starting'; + }> { const townId = this.townId; let mayor = agents.listAgents(this.sql, { role: 'mayor' })[0] ?? null; @@ -1877,7 +1897,10 @@ export class TownDO extends DurableObject { * Called eagerly on page load so the terminal is available immediately * without requiring the user to send a message first. */ - async ensureMayor(): Promise<{ agentId: string; sessionStatus: 'idle' | 'active' | 'starting' }> { + async ensureMayor(): Promise<{ + agentId: string; + sessionStatus: 'idle' | 'active' | 'starting'; + }> { const townId = this.townId; let mayor = agents.listAgents(this.sql, { role: 'mayor' })[0] ?? null; @@ -2251,7 +2274,10 @@ export class TownDO extends DurableObject { tasks: Array<{ title: string; body?: string; depends_on?: number[] }>; merge_mode?: 'review-then-land' | 'review-and-merge'; staged?: boolean; - }): Promise<{ convoy: ConvoyEntry; beads: Array<{ bead: Bead; agent: Agent | null }> }> { + }): Promise<{ + convoy: ConvoyEntry; + beads: Array<{ bead: Bead; agent: Agent | null }>; + }> { // Resolve staged: explicit request wins, otherwise fall back to town config default. const townConfig = await this.getTownConfig(); const isStaged = input.staged ?? townConfig.staged_convoys_default; @@ -2450,9 +2476,10 @@ export class TownDO extends DurableObject { /** * Transition a staged convoy to active: hook agents and begin dispatch. */ - async startConvoy( - convoyId: string - ): Promise<{ convoy: ConvoyEntry; beads: Array<{ bead: Bead; agent: Agent | null }> }> { + async startConvoy(convoyId: string): Promise<{ + convoy: ConvoyEntry; + beads: Array<{ bead: Bead; agent: Agent | null }>; + }> { const convoy = this.getConvoy(convoyId); if (!convoy) throw new Error(`Convoy not found: ${convoyId}`); if (!convoy.staged) throw new Error(`Convoy is not staged: ${convoyId}`); @@ -3607,7 +3634,13 @@ export class TownDO extends DurableObject { [] ), ]; - const beadCounts = { open: 0, inProgress: 0, inReview: 0, failed: 0, triageRequests: 0 }; + const beadCounts = { + open: 0, + inProgress: 0, + inReview: 0, + failed: 0, + triageRequests: 0, + }; for (const row of beadRows) { const s = `${row.status as string}`; const c = Number(row.cnt); @@ -3719,10 +3752,17 @@ export class TownDO extends DurableObject { // DEBUG: replay events from a time range, apply them to state, run the // reconciler, and return computed actions. Uses a savepoint + rollback so // no state is permanently modified. + // + // CAVEAT: events are re-applied on top of current (live) state, not from a + // clean snapshot taken before the requested window. Non-idempotent handlers + // (e.g. agentDone, completeReviewWithResult) may target different beads than + // they originally did, so actions and snapshots are approximate — useful for + // debugging event flow, not for faithful historical reconstruction. async debugReplayEvents( from: string, to: string ): Promise<{ + caveat: string; eventsReplayed: number; actions: Action[]; stateSnapshot: { @@ -3795,6 +3835,10 @@ export class TownDO extends DurableObject { ]; return { + caveat: + 'Events are re-applied on top of current live state, not from a pre-window snapshot. ' + + 'Non-idempotent handlers may produce different results than the original processing. ' + + 'Use for debugging event flow, not faithful historical reconstruction.', eventsReplayed: rangeEvents.length, actions, stateSnapshot: { diff --git a/cloudflare-gastown/src/gastown.worker.ts b/cloudflare-gastown/src/gastown.worker.ts index f5118d0d6f..c9caa7c34a 100644 --- a/cloudflare-gastown/src/gastown.worker.ts +++ b/cloudflare-gastown/src/gastown.worker.ts @@ -216,7 +216,7 @@ app.post('/debug/towns/:townId/reconcile-dry-run', async c => { app.post('/debug/towns/:townId/replay-events', async c => { const townId = c.req.param('townId'); - const body = (await c.req.json()) as { from?: string; to?: string }; + const body: { from?: string; to?: string } = await c.req.json(); if (!body.from || !body.to) { return c.json({ error: 'Missing required fields: from, to (ISO timestamps)' }, 400); } From ff1fc03c8d07ac01865bd622128b294d83c14dc6 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Tue, 24 Mar 2026 16:28:37 +0000 Subject: [PATCH 7/7] feat(gastown): add Sentry capture for reconciler invariant violations Each invariant violation now triggers Sentry.captureMessage with structured context (invariant number, message, townId) as both extra data and tags. Existing analytics event emission is preserved. Added TODO for future auto-recovery of invariant #7 (working agent with no hook). --- cloudflare-gastown/src/dos/Town.do.ts | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index fc761c36bd..9239b1dafa 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -3044,6 +3044,29 @@ export class TownDO extends DurableObject { label: violations.map(v => `[${v.invariant}] ${v.message}`).join('; '), value: violations.length, }); + + for (const violation of violations) { + Sentry.captureMessage( + `Reconciler invariant #${violation.invariant} violated: ${violation.message}`, + { + level: 'error', + extra: { + invariant: violation.invariant, + message: violation.message, + townId, + }, + tags: { + invariant: String(violation.invariant), + townId, + }, + } + ); + + // TODO: auto-recovery for invariant #7 (working agent with no hook). + // Transitioning to idle requires unhooking side-effects (container stop, + // bead status rollback) that live in agents.ts — needs a dedicated + // recovery action in the reconciler rather than a raw SQL update here. + } } } catch (err) { console.warn(`${TOWN_LOG} [reconciler:invariants] town=${townId} check failed`, err);