From 57640e0dbd3ea482aa020cb0ec2e105e90e86456 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Sat, 21 Mar 2026 16:32:19 +0000 Subject: [PATCH] feat(gastown): emit reconciler metrics to Analytics Engine and add Grafana dashboard panels - Extend writeEvent() to support double3-double10 fields for reconciler metrics - Emit reconciler_tick event after each alarm tick with all 9 metrics - Add Reconciler row to Grafana dashboard with 6 panels: 1. Events drained per tick (timeseries) 2. Actions emitted per tick by type (stacked bar) 3. Side effects attempted/succeeded/failed (timeseries) 4. Invariant violations (stat with >0 alert threshold) 5. Reconciler wall clock time (timeseries with >500ms threshold) 6. Pending event queue depth (gauge with >50 threshold) --- .../gastown-grafana-dash-1.json | 694 ++++++++++++++++++ cloudflare-gastown/src/dos/Town.do.ts | 25 + cloudflare-gastown/src/util/analytics.util.ts | 23 +- 3 files changed, 741 insertions(+), 1 deletion(-) diff --git a/cloudflare-gastown/gastown-grafana-dash-1.json b/cloudflare-gastown/gastown-grafana-dash-1.json index f09e9b3c56..1a3656509e 100644 --- a/cloudflare-gastown/gastown-grafana-dash-1.json +++ b/cloudflare-gastown/gastown-grafana-dash-1.json @@ -2479,6 +2479,700 @@ ], "title": "Agent & Review Events", "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 104 + }, + "id": 200, + "panels": [], + "title": "Reconciler", + "type": "row" + }, + { + "datasource": { + "type": "vertamedia-clickhouse-datasource", + "uid": "bffxugc31cnpcc" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 105 + }, + "id": 201, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.4.1", + "targets": [ + { + "adHocFilters": [], + "adHocValuesQuery": "", + "add_metadata": true, + "contextWindowSize": "10", + "dateTimeColDataType": "timestamp", + "dateTimeType": "DATETIME", + "editorMode": "sql", + "extrapolate": true, + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "nullifySparse": false, + "query": "SELECT $timeSeries AS t, 'events_drained' AS label, SUM(double2 * _sample_interval) / SUM(_sample_interval) AS events_drained FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' GROUP BY t ORDER BY t", + "rawSql": "SELECT $timeSeries AS t, 'events_drained' AS label, SUM(double2 * _sample_interval) / SUM(_sample_interval) AS events_drained FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' GROUP BY t ORDER BY t", + "refId": "A", + "round": "0s", + "showFormattedSQL": false, + "showHelp": false, + "skip_comments": true, + "table": "gastown_events", + "useWindowFuncForMacros": true + } + ], + "title": "Events Drained per Tick", + "type": "timeseries" + }, + { + "datasource": { + "type": "vertamedia-clickhouse-datasource", + "uid": "bffxugc31cnpcc" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "bars", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 0, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 105 + }, + "id": 202, + "options": { + "legend": { + "calcs": ["sum"], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.4.1", + "targets": [ + { + "adHocFilters": [], + "adHocValuesQuery": "", + "add_metadata": true, + "contextWindowSize": "10", + "dateTimeColDataType": "timestamp", + "dateTimeType": "DATETIME", + "editorMode": "sql", + "extrapolate": true, + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "nullifySparse": false, + "query": "SELECT $timeSeries AS t, arrayJoin(JSONExtractKeysAndValues(blob10, 'Float64')) AS kv, kv.1 AS label, SUM(kv.2 * _sample_interval) AS count FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' AND blob10 != '' AND blob10 != '{}' GROUP BY t, label ORDER BY t", + "rawSql": "SELECT $timeSeries AS t, arrayJoin(JSONExtractKeysAndValues(blob10, 'Float64')) AS kv, kv.1 AS label, SUM(kv.2 * _sample_interval) AS count FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' AND blob10 != '' AND blob10 != '{}' GROUP BY t, label ORDER BY t", + "refId": "A", + "round": "0s", + "showFormattedSQL": false, + "showHelp": false, + "skip_comments": true, + "table": "gastown_events", + "useWindowFuncForMacros": true + } + ], + "title": "Actions Emitted per Tick by Type", + "type": "timeseries" + }, + { + "datasource": { + "type": "vertamedia-clickhouse-datasource", + "uid": "bffxugc31cnpcc" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "attempted" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "succeeded" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 113 + }, + "id": 203, + "options": { + "legend": { + "calcs": ["sum"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.4.1", + "targets": [ + { + "adHocFilters": [], + "adHocValuesQuery": "", + "add_metadata": true, + "contextWindowSize": "10", + "dateTimeColDataType": "timestamp", + "dateTimeType": "DATETIME", + "editorMode": "sql", + "extrapolate": true, + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "nullifySparse": false, + "query": "SELECT $timeSeries AS t, 'attempted' AS label, SUM(double4 * _sample_interval) / SUM(_sample_interval) AS attempted FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' GROUP BY t ORDER BY t", + "rawSql": "SELECT $timeSeries AS t, 'attempted' AS label, SUM(double4 * _sample_interval) / SUM(_sample_interval) AS attempted FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' GROUP BY t ORDER BY t", + "refId": "A", + "round": "0s", + "showFormattedSQL": false, + "showHelp": false, + "skip_comments": true, + "table": "gastown_events", + "useWindowFuncForMacros": true + }, + { + "adHocFilters": [], + "adHocValuesQuery": "", + "add_metadata": true, + "contextWindowSize": "10", + "dateTimeColDataType": "timestamp", + "dateTimeType": "DATETIME", + "editorMode": "sql", + "extrapolate": true, + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "nullifySparse": false, + "query": "SELECT $timeSeries AS t, 'succeeded' AS label, SUM(double5 * _sample_interval) / SUM(_sample_interval) AS succeeded FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' GROUP BY t ORDER BY t", + "rawSql": "SELECT $timeSeries AS t, 'succeeded' AS label, SUM(double5 * _sample_interval) / SUM(_sample_interval) AS succeeded FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' GROUP BY t ORDER BY t", + "refId": "B", + "round": "0s", + "showFormattedSQL": false, + "showHelp": false, + "skip_comments": true, + "table": "gastown_events", + "useWindowFuncForMacros": true + }, + { + "adHocFilters": [], + "adHocValuesQuery": "", + "add_metadata": true, + "contextWindowSize": "10", + "dateTimeColDataType": "timestamp", + "dateTimeType": "DATETIME", + "editorMode": "sql", + "extrapolate": true, + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "nullifySparse": false, + "query": "SELECT $timeSeries AS t, 'failed' AS label, SUM(double6 * _sample_interval) / SUM(_sample_interval) AS failed FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' GROUP BY t ORDER BY t", + "rawSql": "SELECT $timeSeries AS t, 'failed' AS label, SUM(double6 * _sample_interval) / SUM(_sample_interval) AS failed FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' GROUP BY t ORDER BY t", + "refId": "C", + "round": "0s", + "showFormattedSQL": false, + "showHelp": false, + "skip_comments": true, + "table": "gastown_events", + "useWindowFuncForMacros": true + } + ], + "title": "Side Effects (attempted / succeeded / failed)", + "type": "timeseries" + }, + { + "datasource": { + "type": "vertamedia-clickhouse-datasource", + "uid": "bffxugc31cnpcc" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 4, + "x": 12, + "y": 113 + }, + "id": 204, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": ["sum"], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.4.1", + "targets": [ + { + "adHocFilters": [], + "adHocValuesQuery": "", + "add_metadata": true, + "contextWindowSize": "10", + "dateTimeColDataType": "timestamp", + "dateTimeType": "DATETIME", + "editorMode": "sql", + "extrapolate": true, + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "nullifySparse": false, + "query": "SELECT $timeSeries AS t, 'violations' AS label, SUM(double7 * _sample_interval) AS violations FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' GROUP BY t ORDER BY t", + "rawSql": "SELECT $timeSeries AS t, 'violations' AS label, SUM(double7 * _sample_interval) AS violations FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' GROUP BY t ORDER BY t", + "refId": "A", + "round": "0s", + "showFormattedSQL": false, + "showHelp": false, + "skip_comments": true, + "table": "gastown_events", + "useWindowFuncForMacros": true + } + ], + "title": "Invariant Violations", + "type": "stat" + }, + { + "datasource": { + "type": "vertamedia-clickhouse-datasource", + "uid": "bffxugc31cnpcc" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line+area" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent", + "value": 0 + }, + { + "color": "red", + "value": 500 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 113 + }, + "id": 205, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.4.1", + "targets": [ + { + "adHocFilters": [], + "adHocValuesQuery": "", + "add_metadata": true, + "contextWindowSize": "10", + "dateTimeColDataType": "timestamp", + "dateTimeType": "DATETIME", + "editorMode": "sql", + "extrapolate": true, + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "nullifySparse": false, + "query": "SELECT $timeSeries AS t, 'wall_clock_ms' AS label, SUM(double1 * _sample_interval) / SUM(_sample_interval) AS wall_clock_ms FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' GROUP BY t ORDER BY t", + "rawSql": "SELECT $timeSeries AS t, 'wall_clock_ms' AS label, SUM(double1 * _sample_interval) / SUM(_sample_interval) AS wall_clock_ms FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' GROUP BY t ORDER BY t", + "refId": "A", + "round": "0s", + "showFormattedSQL": false, + "showHelp": false, + "skip_comments": true, + "table": "gastown_events", + "useWindowFuncForMacros": true + } + ], + "title": "Reconciler Wall Clock Time", + "type": "timeseries" + }, + { + "datasource": { + "type": "vertamedia-clickhouse-datasource", + "uid": "bffxugc31cnpcc" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 200, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 25 + }, + { + "color": "red", + "value": 50 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 121 + }, + "id": 206, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showThresholdLabels": true, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "12.4.1", + "targets": [ + { + "adHocFilters": [], + "adHocValuesQuery": "", + "add_metadata": true, + "contextWindowSize": "10", + "dateTimeColDataType": "timestamp", + "dateTimeType": "DATETIME", + "editorMode": "sql", + "extrapolate": true, + "format": "table", + "interval": "", + "intervalFactor": 1, + "nullifySparse": false, + "query": "SELECT SUM(double8 * _sample_interval) / SUM(_sample_interval) AS pending_events FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' ORDER BY timestamp DESC LIMIT 1", + "rawSql": "SELECT SUM(double8 * _sample_interval) / SUM(_sample_interval) AS pending_events FROM gastown_events WHERE $timeFilter AND blob1 = 'reconciler_tick' ORDER BY timestamp DESC LIMIT 1", + "refId": "A", + "round": "0s", + "showFormattedSQL": false, + "showHelp": false, + "skip_comments": true, + "table": "gastown_events", + "useWindowFuncForMacros": true + } + ], + "title": "Pending Event Queue Depth", + "type": "gauge" } ], "preload": false, diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index 37c48c2645..7f9ebf33eb 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -3012,6 +3012,31 @@ export class TownDO extends DurableObject { metrics.pendingEventCount = events.pendingEventCount(this.sql); this._lastReconcilerMetrics = metrics; + // Emit reconciler metrics to Analytics Engine for Grafana dashboards. + // Field mapping: + // double1 = wallClockMs + // double2 = eventsDrained + // double3 = actionsEmitted + // double4 = sideEffectsAttempted + // double5 = sideEffectsSucceeded + // double6 = sideEffectsFailed + // double7 = invariantViolations + // double8 = pendingEventCount + // blob10 = JSON-encoded actionsByType breakdown + this.emitEvent({ + event: 'reconciler_tick', + townId, + durationMs: metrics.wallClockMs, + value: metrics.eventsDrained, + double3: metrics.actionsEmitted, + double4: metrics.sideEffectsAttempted, + double5: metrics.sideEffectsSucceeded, + double6: metrics.sideEffectsFailed, + double7: metrics.invariantViolations, + double8: metrics.pendingEventCount, + label: JSON.stringify(metrics.actionsByType), + }); + // ── Phase 3: Housekeeping (independent, all parallelizable) ──── await Promise.allSettled([ this.deliverPendingMail().catch(err => diff --git a/cloudflare-gastown/src/util/analytics.util.ts b/cloudflare-gastown/src/util/analytics.util.ts index 367960b108..99d1e54c0d 100644 --- a/cloudflare-gastown/src/util/analytics.util.ts +++ b/cloudflare-gastown/src/util/analytics.util.ts @@ -42,6 +42,16 @@ export type GastownEventData = { durationMs?: number; value?: number; label?: string; + // Additional doubles for reconciler_tick events (double3–double10). + // Analytics Engine supports up to 20 doubles per data point. + double3?: number; + double4?: number; + double5?: number; + double6?: number; + double7?: number; + double8?: number; + double9?: number; + double10?: number; }; /** @@ -70,7 +80,18 @@ export function writeEvent( data.role ?? '', // blob12 data.beadType ?? '', // blob13 ], - doubles: [data.durationMs ?? 0, data.value ?? 0], + doubles: [ + data.durationMs ?? 0, // double1 + data.value ?? 0, // double2 + data.double3 ?? 0, // double3 + data.double4 ?? 0, // double4 + data.double5 ?? 0, // double5 + data.double6 ?? 0, // double6 + data.double7 ?? 0, // double7 + data.double8 ?? 0, // double8 + data.double9 ?? 0, // double9 + data.double10 ?? 0, // double10 + ], indexes: [data.event], }); } catch {