From 3517b2c1d4d8d40179f5553e4fcc325d0c3dfa41 Mon Sep 17 00:00:00 2001 From: obchain Date: Sat, 25 Apr 2026 23:29:28 +0530 Subject: [PATCH 1/3] fix(grafana): satisfy dashboard-linter --strict (closes #315) - Add top-level `job` template variable (label_values(charon_build_info, job)) - Append `,job=~"$job"` selector to every PromQL target across all panels - Replace hardcoded `[1m]`/`[5m]` rate ranges with `$__rate_interval` (rate/irate only; non-rate `[$__range]` and `histogram_quantile` unaffected) - Set `instance` `allValue` from `.*` to `.+` (linter prefers non-empty match) - Flip top-level `editable: true` -> `false` (lock dashboard via provisioning) - Add `unit: "none"` to Build info panel (value is always 1) --- deploy/grafana/charon.json | 51 ++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/deploy/grafana/charon.json b/deploy/grafana/charon.json index 0d0c05e..d3cf00f 100644 --- a/deploy/grafana/charon.json +++ b/deploy/grafana/charon.json @@ -13,7 +13,7 @@ ] }, "description": "Charon liquidation bot — scanner, executor, and profit telemetry. Scrapes the charon-metrics Prometheus exporter (default :9091). Chain/Instance variables default to All (.*) so panels render before the first scrape populates label_values; they auto-refine once metrics flow. Mempool / gas / RPC-latency panels deferred pending backing series: mempool #300, gas #301, rpc-latency #302. Alerting rules live in deploy/grafana/alerts.yaml.", - "editable": true, + "editable": false, "fiscalYearStartMonth": 0, "graphTooltip": 1, "id": null, @@ -53,7 +53,7 @@ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum by (chain) (rate(charon_scanner_blocks_total{instance=~\"$instance\",chain=~\"$chain\"}[1m]))", + "expr": "sum by (chain) (rate(charon_scanner_blocks_total{instance=~\"$instance\",chain=~\"$chain\",job=~\"$job\"}[$__rate_interval]))", "legendFormat": "{{chain}}", "range": true, "refId": "A" @@ -64,7 +64,7 @@ }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "description": "Per-block pipeline wall-clock latency. p50 and p95 from the histogram. BSC produces a block every ~3s; quantiles use a [5m] range (~100 observations) so they stay stable across scrapes.", + "description": "Per-block pipeline wall-clock latency. p50 and p95 from the histogram. BSC produces a block every ~3s; quantiles use $__rate_interval (typically ~4× scrape interval) so they stay stable across scrapes while respecting the resolution Grafana selects for the current panel.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, @@ -90,7 +90,7 @@ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le, chain) (rate(charon_pipeline_block_duration_seconds_bucket{instance=~\"$instance\",chain=~\"$chain\"}[5m])))", + "expr": "histogram_quantile(0.5, sum by (le, chain) (rate(charon_pipeline_block_duration_seconds_bucket{instance=~\"$instance\",chain=~\"$chain\",job=~\"$job\"}[$__rate_interval])))", "legendFormat": "p50 {{chain}}", "range": true, "refId": "A" @@ -98,7 +98,7 @@ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by (le, chain) (rate(charon_pipeline_block_duration_seconds_bucket{instance=~\"$instance\",chain=~\"$chain\"}[5m])))", + "expr": "histogram_quantile(0.95, sum by (le, chain) (rate(charon_pipeline_block_duration_seconds_bucket{instance=~\"$instance\",chain=~\"$chain\",job=~\"$job\"}[$__rate_interval])))", "legendFormat": "p95 {{chain}}", "range": true, "refId": "B" @@ -139,7 +139,7 @@ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum by (bucket) (charon_scanner_positions{instance=~\"$instance\",chain=~\"$chain\"})", + "expr": "sum by (bucket) (charon_scanner_positions{instance=~\"$instance\",chain=~\"$chain\",job=~\"$job\"})", "legendFormat": "{{bucket}}", "range": true, "refId": "A" @@ -182,7 +182,7 @@ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "editorMode": "code", - "expr": "charon_executor_queue_depth{instance=~\"$instance\"}", + "expr": "charon_executor_queue_depth{instance=~\"$instance\",job=~\"$job\"}", "legendFormat": "queue depth", "range": true, "refId": "A" @@ -223,7 +223,7 @@ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(increase(charon_executor_profit_usd_cents_sum{instance=~\"$instance\",chain=~\"$chain\"}[$__range])) / 100", + "expr": "sum(increase(charon_executor_profit_usd_cents_sum{instance=~\"$instance\",chain=~\"$chain\",job=~\"$job\"}[$__range])) / 100", "legendFormat": "profit (selected range)", "range": true, "refId": "A" @@ -264,7 +264,7 @@ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum by (result) (rate(charon_executor_simulations_total{instance=~\"$instance\",chain=~\"$chain\"}[1m])) * 60", + "expr": "sum by (result) (rate(charon_executor_simulations_total{instance=~\"$instance\",chain=~\"$chain\",job=~\"$job\"}[$__rate_interval])) * 60", "legendFormat": "{{result}}", "range": true, "refId": "A" @@ -301,7 +301,7 @@ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "editorMode": "code", - "expr": "label_replace(sum(rate(charon_executor_opportunities_queued_total{instance=~\"$instance\",chain=~\"$chain\"}[1m])) * 60, \"stage\", \"queued\", \"\", \"\")", + "expr": "label_replace(sum(rate(charon_executor_opportunities_queued_total{instance=~\"$instance\",chain=~\"$chain\",job=~\"$job\"}[$__rate_interval])) * 60, \"stage\", \"queued\", \"\", \"\")", "legendFormat": "{{stage}}", "range": true, "refId": "A" @@ -309,7 +309,7 @@ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum by (stage) (rate(charon_executor_opportunities_dropped_total{instance=~\"$instance\",chain=~\"$chain\"}[1m])) * 60", + "expr": "sum by (stage) (rate(charon_executor_opportunities_dropped_total{instance=~\"$instance\",chain=~\"$chain\",job=~\"$job\"}[$__rate_interval])) * 60", "legendFormat": "{{stage}}", "range": true, "refId": "B" @@ -353,7 +353,7 @@ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum by (le) (rate(charon_executor_profit_usd_cents_bucket{instance=~\"$instance\",chain=~\"$chain\"}[5m]))", + "expr": "sum by (le) (rate(charon_executor_profit_usd_cents_bucket{instance=~\"$instance\",chain=~\"$chain\",job=~\"$job\"}[$__rate_interval]))", "format": "heatmap", "legendFormat": "{{le}}", "range": true, @@ -367,7 +367,7 @@ "datasource": { "type": "prometheus", "uid": "${datasource}" }, "description": "Running build metadata. Only `version` is surfaced; `git_sha` is intentionally hidden via transform exclusion until /metrics has auth (open #214) and LAN exposure is addressed (#213). Surfacing the exact SHA of a running binary to anyone with Grafana read access is an intelligence leak while those are unresolved.", "fieldConfig": { - "defaults": { "custom": { "align": "auto" } }, + "defaults": { "custom": { "align": "auto" }, "unit": "none" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 24 }, @@ -382,7 +382,7 @@ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "editorMode": "code", - "expr": "charon_build_info{instance=~\"$instance\"}", + "expr": "charon_build_info{instance=~\"$instance\",job=~\"$job\"}", "format": "table", "instant": true, "legendFormat": "__auto", @@ -424,6 +424,25 @@ "skipUrlSync": false, "type": "datasource" }, + { + "allValue": ".+", + "current": { "selected": false, "text": "charon", "value": "charon" }, + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "definition": "label_values(charon_build_info, job)", + "description": "Prometheus scrape job. Defaults to `charon`.", + "hide": 0, + "includeAll": true, + "label": "Job", + "multi": false, + "name": "job", + "options": [], + "query": { "query": "label_values(charon_build_info, job)", "refId": "StandardVariableQuery" }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, { "allValue": ".*", "current": { "selected": true, "text": "All", "value": "$__all" }, @@ -444,11 +463,11 @@ "type": "query" }, { - "allValue": ".*", + "allValue": ".+", "current": { "selected": true, "text": "All", "value": "$__all" }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, "definition": "label_values(charon_build_info, instance)", - "description": "Instance label (Prometheus scrape target). Defaults to All (.*) so panels render before the first scrape populates the dropdown.", + "description": "Instance label (Prometheus scrape target). Defaults to All (.+) so panels render before the first scrape populates the dropdown.", "hide": 0, "includeAll": true, "label": "Instance", From f4772a50026d0e139276fe2f4289e3a2cd9e1e57 Mon Sep 17 00:00:00 2001 From: obchain Date: Sat, 25 Apr 2026 23:31:53 +0530 Subject: [PATCH 2/3] fix(grafana): make job template a multi-select dashboard-linter --strict rejects single-select job templates. Multi plus includeAll + allValue=".+" still resolves to a valid regex selector for `job=~"$job"`. --- deploy/grafana/charon.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/grafana/charon.json b/deploy/grafana/charon.json index d3cf00f..3afaa03 100644 --- a/deploy/grafana/charon.json +++ b/deploy/grafana/charon.json @@ -433,7 +433,7 @@ "hide": 0, "includeAll": true, "label": "Job", - "multi": false, + "multi": true, "name": "job", "options": [], "query": { "query": "label_values(charon_build_info, job)", "refId": "StandardVariableQuery" }, From 8a52f12444d0aa15c4bace723e576ebc66cc4a12 Mon Sep 17 00:00:00 2001 From: obchain Date: Sat, 25 Apr 2026 23:35:20 +0530 Subject: [PATCH 3/3] ci(grafana-lint): fix promtool docker invocation The prom/prometheus image entrypoint is `prometheus`, so passing `promtool` as the first arg makes prometheus parse it as an argument ("unexpected promtool"). Override the entrypoint so promtool runs directly. Pre-existing failure on main; surfaces here because the dashboard-linter step now passes and exposes the next broken step in the workflow. --- .github/workflows/grafana-lint.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/grafana-lint.yml b/.github/workflows/grafana-lint.yml index 53d5d4b..c25e03e 100644 --- a/.github/workflows/grafana-lint.yml +++ b/.github/workflows/grafana-lint.yml @@ -35,6 +35,7 @@ jobs: - name: Validate alert rules with promtool run: | - docker run --rm -v "$PWD/deploy/grafana:/rules:ro" \ + docker run --rm --entrypoint=promtool \ + -v "$PWD/deploy/grafana:/rules:ro" \ prom/prometheus:v2.55.1 \ - promtool check rules /rules/alerts.yaml + check rules /rules/alerts.yaml