diff --git a/deploy/compose/local-stack.yml b/deploy/compose/local-stack.yml new file mode 100644 index 0000000..f7d5643 --- /dev/null +++ b/deploy/compose/local-stack.yml @@ -0,0 +1,122 @@ +# Charon — local Prometheus + Grafana stack (issue #311). +# +# Runs alongside the existing `docker-compose.yml` (charon + alloy -> +# Grafana Cloud) but keeps a clean separation of concerns: this stack +# is the *visualisation* surface for laptop demos and pre-merge +# validation, the cloud-bound stack is the production deploy. +# +# The bot itself is intentionally NOT included here. The expected +# workflow is: +# +# * `cargo run -- --config config/default.toml listen` on the host +# (native, fastest iteration), OR +# * `docker compose -f deploy/compose/docker-compose.yml up -d` +# (containerised, mirrors production). +# +# Both topologies are scraped — `prometheus.yml` lists both +# `host.docker.internal:9091` (native) and `charon:9091` (containerised) +# as static targets, so whichever path is running shows UP. +# +# Ports exposed on the host: +# * 9090 — Prometheus UI / API +# * 3000 — Grafana UI (anonymous Admin org role; demo only) +# +# Usage: +# docker compose -f deploy/compose/local-stack.yml up -d +# open http://localhost:3000/d/charon-v0 # dashboard auto-loads +# open http://localhost:9090/targets # confirm scrape UP +# +# Tear-down: +# docker compose -f deploy/compose/local-stack.yml down -v +# +# Security note: anonymous Admin is a deliberate concession for laptop +# demos so the operator does not have to log in on every cold start. +# Do NOT bring this stack up on a host that exposes :3000 to anything +# beyond loopback — the Grafana UI would let a passer-by edit panels +# and read every series. The compose `ports:` mapping below uses +# `127.0.0.1:` prefixes to enforce loopback-only at the docker level. + +services: + prometheus: + image: prom/prometheus:v2.55.1 + restart: unless-stopped + command: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus + # Cap retention so a long-running demo cannot fill the laptop + # disk; 7 days is well over the longest soak test we run. + - --storage.tsdb.retention.time=7d + - --web.enable-lifecycle + volumes: + - ../prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - ../grafana/alerts.yaml:/etc/prometheus/alerts.yaml:ro + - prometheus_data:/prometheus + ports: + - "127.0.0.1:9090:9090" + extra_hosts: + # Required on Linux Docker so `host.docker.internal` resolves + # to the host gateway. macOS / Windows already wire this name + # automatically; the line is a no-op there. + - "host.docker.internal:host-gateway" + networks: + - local_stack + deploy: + resources: + limits: + cpus: "0.5" + memory: 512M + logging: + driver: json-file + options: + max-size: "20m" + max-file: "3" + + grafana: + image: grafana/grafana:10.4.10 + restart: unless-stopped + environment: + # Anonymous Admin so the operator never sees a login screen. + # See the security note at the top of this file before changing + # the host port mapping. + GF_AUTH_ANONYMOUS_ENABLED: "true" + GF_AUTH_ANONYMOUS_ORG_ROLE: "Admin" + GF_AUTH_DISABLE_LOGIN_FORM: "true" + # Pin the org so provisioned datasources / dashboards land in + # the org the anonymous user actually browses. + GF_AUTH_ANONYMOUS_ORG_NAME: "Main Org." + # Disable telemetry pings during demos so the Grafana UI does + # not block the panels on a stats.grafana.org outage. + GF_ANALYTICS_REPORTING_ENABLED: "false" + GF_ANALYTICS_CHECK_FOR_UPDATES: "false" + # Quiet the default first-run banner. + GF_USERS_DEFAULT_THEME: "dark" + volumes: + - ../grafana-provisioning:/etc/grafana/provisioning:ro + # Mount the canonical dashboard JSON read-only at the path the + # provisioning provider points at; UID `charon-v0` keeps the + # imported copy stable across reloads. + - ../grafana/charon.json:/var/lib/grafana/dashboards/charon.json:ro + - grafana_data:/var/lib/grafana + ports: + - "127.0.0.1:3000:3000" + networks: + - local_stack + depends_on: + - prometheus + deploy: + resources: + limits: + cpus: "0.5" + memory: 256M + logging: + driver: json-file + options: + max-size: "20m" + max-file: "3" + +networks: + local_stack: {} + +volumes: + prometheus_data: {} + grafana_data: {} diff --git a/deploy/grafana-provisioning/dashboards/charon.yml b/deploy/grafana-provisioning/dashboards/charon.yml new file mode 100644 index 0000000..bb735a0 --- /dev/null +++ b/deploy/grafana-provisioning/dashboards/charon.yml @@ -0,0 +1,26 @@ +# Charon — Grafana dashboard provisioning (issue #311). +# +# File-based dashboard provider so `deploy/grafana/charon.json` is +# loaded into Grafana on container startup and shows up under the +# `Charon` folder. Re-importing the same file replaces the existing +# copy in place rather than duplicating it (matches the dashboard's +# fixed UID `charon-v0`). + +apiVersion: 1 + +providers: + - name: charon + orgId: 1 + folder: Charon + type: file + # Pre-existing dashboards on disk should be preserved across + # restarts; the operator might be iterating on a panel locally. + disableDeletion: true + # Re-read the JSON from disk every minute so a `git pull` of + # an updated dashboard surfaces in Grafana without a container + # restart. + updateIntervalSeconds: 60 + allowUiUpdates: true + options: + path: /var/lib/grafana/dashboards + foldersFromFilesStructure: false diff --git a/deploy/grafana-provisioning/datasources/prometheus.yml b/deploy/grafana-provisioning/datasources/prometheus.yml new file mode 100644 index 0000000..90c6c95 --- /dev/null +++ b/deploy/grafana-provisioning/datasources/prometheus.yml @@ -0,0 +1,27 @@ +# Charon — Grafana datasource provisioning (issue #311). +# +# Auto-registers the in-stack Prometheus as the default datasource so +# the dashboard JSON resolves its `${datasource}` variable on first +# load. Without this file the operator would have to click through +# Grafana's "Add data source" UI before the panels would render. +# +# UID is pinned (`prometheus`) so the dashboard JSON's datasource +# template references stay stable across re-provisions; deleting the +# datasource on the next compose-up restores it identically. + +apiVersion: 1 + +datasources: + - name: Prometheus + uid: prometheus + type: prometheus + access: proxy + # Service DNS name from `local-stack.yml`. `:9090` is the + # Prometheus default; we expose `:9090` on the host as well so + # the operator can curl `localhost:9090/targets` for diagnostics. + url: http://prometheus:9090 + isDefault: true + editable: false + jsonData: + timeInterval: 15s + httpMethod: POST diff --git a/deploy/prometheus/prometheus.yml b/deploy/prometheus/prometheus.yml new file mode 100644 index 0000000..38bbfa0 --- /dev/null +++ b/deploy/prometheus/prometheus.yml @@ -0,0 +1,47 @@ +# Charon — local Prometheus scrape config (issue #311). +# +# Pairs with `deploy/compose/local-stack.yml`. Runs alongside Grafana +# inside the same compose network so the operator can drive the full +# observability surface (panels + alert rules) on a laptop without +# touching Grafana Cloud. +# +# Two scrape targets are configured so the same stack works whether +# `charon` runs natively on the host (recommended for development) or +# inside the existing `deploy/compose/docker-compose.yml`: +# +# * host.docker.internal:9091 — bot bound to host loopback. The +# `extra_hosts: host.docker.internal:host-gateway` line in +# `local-stack.yml` makes this resolve on Linux Docker the same +# as it does natively on macOS / Windows. +# * charon:9091 — bot running in the cloud-deploy +# compose stack. `local-stack.yml` joins that stack's network +# under the alias `charon_net_external` so both scrape paths can +# coexist; the target is silently DOWN when only the host bot is +# running, which is fine for the laptop workflow. +# +# Alert rules are loaded from the existing repo bundle so a single +# source of truth (`deploy/grafana/alerts.yaml`) drives both Grafana +# unified alerting and Prometheus rule evaluation. + +global: + # Match the Alloy sidecar cadence (cloud-deploy stack uses 15s) so + # local panels read the same as Grafana Cloud panels do. + scrape_interval: 15s + scrape_timeout: 10s + evaluation_interval: 30s + +rule_files: + - /etc/prometheus/alerts.yaml + +scrape_configs: + - job_name: charon + metrics_path: /metrics + static_configs: + - targets: + - host.docker.internal:9091 + labels: + source: host + - targets: + - charon:9091 + labels: + source: compose