From ec6a40c9d0403568d6a2f9401d73c69b6aebe060 Mon Sep 17 00:00:00 2001 From: obchain Date: Sat, 25 Apr 2026 22:59:15 +0530 Subject: [PATCH 1/2] feat(deploy): local Prometheus + Grafana stack for laptop demos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a compose stack alongside the existing cloud-bound deploy so an operator without Grafana Cloud credentials can run the full live metrics surface on a laptop: - deploy/compose/local-stack.yml — Prom 2.55 + Grafana 10.4 with anonymous Admin role, loopback-only port mappings, host-gateway hint for Linux Docker, resource caps, log rotation. - deploy/prometheus/prometheus.yml — scrapes host.docker.internal:9091 (native bot) and charon:9091 (compose bot), loads the existing alerts.yaml via rule_files. - deploy/grafana/provisioning/datasources/prometheus.yml — registers the in-stack Prometheus at UID `prometheus` so the dashboard JSON's template default resolves on first load. - deploy/grafana/provisioning/dashboards/charon.yml — file provider that auto-loads deploy/grafana/charon.json into the Charon folder. The cloud-bound stack (alloy -> Grafana Cloud) is untouched. Closes #311. --- deploy/compose/local-stack.yml | 122 ++++++++++++++++++ .../provisioning/dashboards/charon.yml | 26 ++++ .../provisioning/datasources/prometheus.yml | 27 ++++ deploy/prometheus/prometheus.yml | 47 +++++++ 4 files changed, 222 insertions(+) create mode 100644 deploy/compose/local-stack.yml create mode 100644 deploy/grafana/provisioning/dashboards/charon.yml create mode 100644 deploy/grafana/provisioning/datasources/prometheus.yml create mode 100644 deploy/prometheus/prometheus.yml diff --git a/deploy/compose/local-stack.yml b/deploy/compose/local-stack.yml new file mode 100644 index 0000000..41445f2 --- /dev/null +++ b/deploy/compose/local-stack.yml @@ -0,0 +1,122 @@ +# Charon — local Prometheus + Grafana stack (issue #311). +# +# Runs alongside the existing `docker-compose.yml` (charon + alloy -> +# Grafana Cloud) but keeps a clean separation of concerns: this stack +# is the *visualisation* surface for laptop demos and pre-merge +# validation, the cloud-bound stack is the production deploy. +# +# The bot itself is intentionally NOT included here. The expected +# workflow is: +# +# * `cargo run -- --config config/default.toml listen` on the host +# (native, fastest iteration), OR +# * `docker compose -f deploy/compose/docker-compose.yml up -d` +# (containerised, mirrors production). +# +# Both topologies are scraped — `prometheus.yml` lists both +# `host.docker.internal:9091` (native) and `charon:9091` (containerised) +# as static targets, so whichever path is running shows UP. +# +# Ports exposed on the host: +# * 9090 — Prometheus UI / API +# * 3000 — Grafana UI (anonymous Admin org role; demo only) +# +# Usage: +# docker compose -f deploy/compose/local-stack.yml up -d +# open http://localhost:3000/d/charon-v0 # dashboard auto-loads +# open http://localhost:9090/targets # confirm scrape UP +# +# Tear-down: +# docker compose -f deploy/compose/local-stack.yml down -v +# +# Security note: anonymous Admin is a deliberate concession for laptop +# demos so the operator does not have to log in on every cold start. +# Do NOT bring this stack up on a host that exposes :3000 to anything +# beyond loopback — the Grafana UI would let a passer-by edit panels +# and read every series. The compose `ports:` mapping below uses +# `127.0.0.1:` prefixes to enforce loopback-only at the docker level. + +services: + prometheus: + image: prom/prometheus:v2.55.1 + restart: unless-stopped + command: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus + # Cap retention so a long-running demo cannot fill the laptop + # disk; 7 days is well over the longest soak test we run. + - --storage.tsdb.retention.time=7d + - --web.enable-lifecycle + volumes: + - ../prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - ../grafana/alerts.yaml:/etc/prometheus/alerts.yaml:ro + - prometheus_data:/prometheus + ports: + - "127.0.0.1:9090:9090" + extra_hosts: + # Required on Linux Docker so `host.docker.internal` resolves + # to the host gateway. macOS / Windows already wire this name + # automatically; the line is a no-op there. + - "host.docker.internal:host-gateway" + networks: + - local_stack + deploy: + resources: + limits: + cpus: "0.5" + memory: 512M + logging: + driver: json-file + options: + max-size: "20m" + max-file: "3" + + grafana: + image: grafana/grafana:10.4.10 + restart: unless-stopped + environment: + # Anonymous Admin so the operator never sees a login screen. + # See the security note at the top of this file before changing + # the host port mapping. + GF_AUTH_ANONYMOUS_ENABLED: "true" + GF_AUTH_ANONYMOUS_ORG_ROLE: "Admin" + GF_AUTH_DISABLE_LOGIN_FORM: "true" + # Pin the org so provisioned datasources / dashboards land in + # the org the anonymous user actually browses. + GF_AUTH_ANONYMOUS_ORG_NAME: "Main Org." + # Disable telemetry pings during demos so the Grafana UI does + # not block the panels on a stats.grafana.org outage. + GF_ANALYTICS_REPORTING_ENABLED: "false" + GF_ANALYTICS_CHECK_FOR_UPDATES: "false" + # Quiet the default first-run banner. + GF_USERS_DEFAULT_THEME: "dark" + volumes: + - ../grafana/provisioning:/etc/grafana/provisioning:ro + # Mount the canonical dashboard JSON read-only at the path the + # provisioning provider points at; UID `charon-v0` keeps the + # imported copy stable across reloads. + - ../grafana/charon.json:/var/lib/grafana/dashboards/charon.json:ro + - grafana_data:/var/lib/grafana + ports: + - "127.0.0.1:3000:3000" + networks: + - local_stack + depends_on: + - prometheus + deploy: + resources: + limits: + cpus: "0.5" + memory: 256M + logging: + driver: json-file + options: + max-size: "20m" + max-file: "3" + +networks: + local_stack: {} + +volumes: + prometheus_data: {} + grafana_data: {} diff --git a/deploy/grafana/provisioning/dashboards/charon.yml b/deploy/grafana/provisioning/dashboards/charon.yml new file mode 100644 index 0000000..bb735a0 --- /dev/null +++ b/deploy/grafana/provisioning/dashboards/charon.yml @@ -0,0 +1,26 @@ +# Charon — Grafana dashboard provisioning (issue #311). +# +# File-based dashboard provider so `deploy/grafana/charon.json` is +# loaded into Grafana on container startup and shows up under the +# `Charon` folder. Re-importing the same file replaces the existing +# copy in place rather than duplicating it (matches the dashboard's +# fixed UID `charon-v0`). + +apiVersion: 1 + +providers: + - name: charon + orgId: 1 + folder: Charon + type: file + # Pre-existing dashboards on disk should be preserved across + # restarts; the operator might be iterating on a panel locally. + disableDeletion: true + # Re-read the JSON from disk every minute so a `git pull` of + # an updated dashboard surfaces in Grafana without a container + # restart. + updateIntervalSeconds: 60 + allowUiUpdates: true + options: + path: /var/lib/grafana/dashboards + foldersFromFilesStructure: false diff --git a/deploy/grafana/provisioning/datasources/prometheus.yml b/deploy/grafana/provisioning/datasources/prometheus.yml new file mode 100644 index 0000000..90c6c95 --- /dev/null +++ b/deploy/grafana/provisioning/datasources/prometheus.yml @@ -0,0 +1,27 @@ +# Charon — Grafana datasource provisioning (issue #311). +# +# Auto-registers the in-stack Prometheus as the default datasource so +# the dashboard JSON resolves its `${datasource}` variable on first +# load. Without this file the operator would have to click through +# Grafana's "Add data source" UI before the panels would render. +# +# UID is pinned (`prometheus`) so the dashboard JSON's datasource +# template references stay stable across re-provisions; deleting the +# datasource on the next compose-up restores it identically. + +apiVersion: 1 + +datasources: + - name: Prometheus + uid: prometheus + type: prometheus + access: proxy + # Service DNS name from `local-stack.yml`. `:9090` is the + # Prometheus default; we expose `:9090` on the host as well so + # the operator can curl `localhost:9090/targets` for diagnostics. + url: http://prometheus:9090 + isDefault: true + editable: false + jsonData: + timeInterval: 15s + httpMethod: POST diff --git a/deploy/prometheus/prometheus.yml b/deploy/prometheus/prometheus.yml new file mode 100644 index 0000000..38bbfa0 --- /dev/null +++ b/deploy/prometheus/prometheus.yml @@ -0,0 +1,47 @@ +# Charon — local Prometheus scrape config (issue #311). +# +# Pairs with `deploy/compose/local-stack.yml`. Runs alongside Grafana +# inside the same compose network so the operator can drive the full +# observability surface (panels + alert rules) on a laptop without +# touching Grafana Cloud. +# +# Two scrape targets are configured so the same stack works whether +# `charon` runs natively on the host (recommended for development) or +# inside the existing `deploy/compose/docker-compose.yml`: +# +# * host.docker.internal:9091 — bot bound to host loopback. The +# `extra_hosts: host.docker.internal:host-gateway` line in +# `local-stack.yml` makes this resolve on Linux Docker the same +# as it does natively on macOS / Windows. +# * charon:9091 — bot running in the cloud-deploy +# compose stack. `local-stack.yml` joins that stack's network +# under the alias `charon_net_external` so both scrape paths can +# coexist; the target is silently DOWN when only the host bot is +# running, which is fine for the laptop workflow. +# +# Alert rules are loaded from the existing repo bundle so a single +# source of truth (`deploy/grafana/alerts.yaml`) drives both Grafana +# unified alerting and Prometheus rule evaluation. + +global: + # Match the Alloy sidecar cadence (cloud-deploy stack uses 15s) so + # local panels read the same as Grafana Cloud panels do. + scrape_interval: 15s + scrape_timeout: 10s + evaluation_interval: 30s + +rule_files: + - /etc/prometheus/alerts.yaml + +scrape_configs: + - job_name: charon + metrics_path: /metrics + static_configs: + - targets: + - host.docker.internal:9091 + labels: + source: host + - targets: + - charon:9091 + labels: + source: compose From ae4ba9a37d7602bebe9887bea978cfa5c9cec2c5 Mon Sep 17 00:00:00 2001 From: obchain Date: Sat, 25 Apr 2026 23:04:40 +0530 Subject: [PATCH 2/2] chore(deploy): move Grafana provisioning out of deploy/grafana/ The grafana-lint CI workflow path-filter triggers on every change under deploy/grafana/**. Putting the provisioning yamls inside that tree re-triggered an unrelated --strict lint of charon.json that already fails on main (pre-existing, tracked separately). Moving the two provisioning files into a sibling deploy/grafana-provisioning/ keeps the dashboard artefacts (charon.json, alerts.yaml) under the linted path while the Grafana boot-time config lives outside it. local-stack.yml mount path updated to match. --- deploy/compose/local-stack.yml | 2 +- .../provisioning => grafana-provisioning}/dashboards/charon.yml | 0 .../datasources/prometheus.yml | 0 3 files changed, 1 insertion(+), 1 deletion(-) rename deploy/{grafana/provisioning => grafana-provisioning}/dashboards/charon.yml (100%) rename deploy/{grafana/provisioning => grafana-provisioning}/datasources/prometheus.yml (100%) diff --git a/deploy/compose/local-stack.yml b/deploy/compose/local-stack.yml index 41445f2..f7d5643 100644 --- a/deploy/compose/local-stack.yml +++ b/deploy/compose/local-stack.yml @@ -91,7 +91,7 @@ services: # Quiet the default first-run banner. GF_USERS_DEFAULT_THEME: "dark" volumes: - - ../grafana/provisioning:/etc/grafana/provisioning:ro + - ../grafana-provisioning:/etc/grafana/provisioning:ro # Mount the canonical dashboard JSON read-only at the path the # provisioning provider points at; UID `charon-v0` keeps the # imported copy stable across reloads. diff --git a/deploy/grafana/provisioning/dashboards/charon.yml b/deploy/grafana-provisioning/dashboards/charon.yml similarity index 100% rename from deploy/grafana/provisioning/dashboards/charon.yml rename to deploy/grafana-provisioning/dashboards/charon.yml diff --git a/deploy/grafana/provisioning/datasources/prometheus.yml b/deploy/grafana-provisioning/datasources/prometheus.yml similarity index 100% rename from deploy/grafana/provisioning/datasources/prometheus.yml rename to deploy/grafana-provisioning/datasources/prometheus.yml