Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 122 additions & 0 deletions deploy/compose/local-stack.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# Charon — local Prometheus + Grafana stack (issue #311).
#
# Runs alongside the existing `docker-compose.yml` (charon + alloy ->
# Grafana Cloud) but keeps a clean separation of concerns: this stack
# is the *visualisation* surface for laptop demos and pre-merge
# validation, the cloud-bound stack is the production deploy.
#
# The bot itself is intentionally NOT included here. The expected
# workflow is:
#
# * `cargo run -- --config config/default.toml listen` on the host
# (native, fastest iteration), OR
# * `docker compose -f deploy/compose/docker-compose.yml up -d`
# (containerised, mirrors production).
#
# Both topologies are scraped — `prometheus.yml` lists both
# `host.docker.internal:9091` (native) and `charon:9091` (containerised)
# as static targets, so whichever path is running shows UP.
#
# Ports exposed on the host:
# * 9090 — Prometheus UI / API
# * 3000 — Grafana UI (anonymous Admin org role; demo only)
#
# Usage:
# docker compose -f deploy/compose/local-stack.yml up -d
# open http://localhost:3000/d/charon-v0 # dashboard auto-loads
# open http://localhost:9090/targets # confirm scrape UP
#
# Tear-down:
# docker compose -f deploy/compose/local-stack.yml down -v
#
# Security note: anonymous Admin is a deliberate concession for laptop
# demos so the operator does not have to log in on every cold start.
# Do NOT bring this stack up on a host that exposes :3000 to anything
# beyond loopback — the Grafana UI would let a passer-by edit panels
# and read every series. The compose `ports:` mapping below uses
# `127.0.0.1:` prefixes to enforce loopback-only at the docker level.

services:
prometheus:
image: prom/prometheus:v2.55.1
restart: unless-stopped
command:
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.path=/prometheus
# Cap retention so a long-running demo cannot fill the laptop
# disk; 7 days is well over the longest soak test we run.
- --storage.tsdb.retention.time=7d
- --web.enable-lifecycle
volumes:
- ../prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ../grafana/alerts.yaml:/etc/prometheus/alerts.yaml:ro
- prometheus_data:/prometheus
ports:
- "127.0.0.1:9090:9090"
extra_hosts:
# Required on Linux Docker so `host.docker.internal` resolves
# to the host gateway. macOS / Windows already wire this name
# automatically; the line is a no-op there.
- "host.docker.internal:host-gateway"
networks:
- local_stack
deploy:
resources:
limits:
cpus: "0.5"
memory: 512M
logging:
driver: json-file
options:
max-size: "20m"
max-file: "3"

grafana:
image: grafana/grafana:10.4.10
restart: unless-stopped
environment:
# Anonymous Admin so the operator never sees a login screen.
# See the security note at the top of this file before changing
# the host port mapping.
GF_AUTH_ANONYMOUS_ENABLED: "true"
GF_AUTH_ANONYMOUS_ORG_ROLE: "Admin"
GF_AUTH_DISABLE_LOGIN_FORM: "true"
# Pin the org so provisioned datasources / dashboards land in
# the org the anonymous user actually browses.
GF_AUTH_ANONYMOUS_ORG_NAME: "Main Org."
# Disable telemetry pings during demos so the Grafana UI does
# not block the panels on a stats.grafana.org outage.
GF_ANALYTICS_REPORTING_ENABLED: "false"
GF_ANALYTICS_CHECK_FOR_UPDATES: "false"
# Quiet the default first-run banner.
GF_USERS_DEFAULT_THEME: "dark"
volumes:
- ../grafana-provisioning:/etc/grafana/provisioning:ro
# Mount the canonical dashboard JSON read-only at the path the
# provisioning provider points at; UID `charon-v0` keeps the
# imported copy stable across reloads.
- ../grafana/charon.json:/var/lib/grafana/dashboards/charon.json:ro
- grafana_data:/var/lib/grafana
ports:
- "127.0.0.1:3000:3000"
networks:
- local_stack
depends_on:
- prometheus
deploy:
resources:
limits:
cpus: "0.5"
memory: 256M
logging:
driver: json-file
options:
max-size: "20m"
max-file: "3"

networks:
local_stack: {}

volumes:
prometheus_data: {}
grafana_data: {}
26 changes: 26 additions & 0 deletions deploy/grafana-provisioning/dashboards/charon.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Charon — Grafana dashboard provisioning (issue #311).
#
# File-based dashboard provider so `deploy/grafana/charon.json` is
# loaded into Grafana on container startup and shows up under the
# `Charon` folder. Re-importing the same file replaces the existing
# copy in place rather than duplicating it (matches the dashboard's
# fixed UID `charon-v0`).

apiVersion: 1

providers:
- name: charon
orgId: 1
folder: Charon
type: file
# Pre-existing dashboards on disk should be preserved across
# restarts; the operator might be iterating on a panel locally.
disableDeletion: true
# Re-read the JSON from disk every minute so a `git pull` of
# an updated dashboard surfaces in Grafana without a container
# restart.
updateIntervalSeconds: 60
allowUiUpdates: true
options:
path: /var/lib/grafana/dashboards
foldersFromFilesStructure: false
27 changes: 27 additions & 0 deletions deploy/grafana-provisioning/datasources/prometheus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Charon — Grafana datasource provisioning (issue #311).
#
# Auto-registers the in-stack Prometheus as the default datasource so
# the dashboard JSON resolves its `${datasource}` variable on first
# load. Without this file the operator would have to click through
# Grafana's "Add data source" UI before the panels would render.
#
# UID is pinned (`prometheus`) so the dashboard JSON's datasource
# template references stay stable across re-provisions; deleting the
# datasource on the next compose-up restores it identically.

apiVersion: 1

datasources:
- name: Prometheus
uid: prometheus
type: prometheus
access: proxy
# Service DNS name from `local-stack.yml`. `:9090` is the
# Prometheus default; we expose `:9090` on the host as well so
# the operator can curl `localhost:9090/targets` for diagnostics.
url: http://prometheus:9090
isDefault: true
editable: false
jsonData:
timeInterval: 15s
httpMethod: POST
47 changes: 47 additions & 0 deletions deploy/prometheus/prometheus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Charon — local Prometheus scrape config (issue #311).
#
# Pairs with `deploy/compose/local-stack.yml`. Runs alongside Grafana
# inside the same compose network so the operator can drive the full
# observability surface (panels + alert rules) on a laptop without
# touching Grafana Cloud.
#
# Two scrape targets are configured so the same stack works whether
# `charon` runs natively on the host (recommended for development) or
# inside the existing `deploy/compose/docker-compose.yml`:
#
# * host.docker.internal:9091 — bot bound to host loopback. The
# `extra_hosts: host.docker.internal:host-gateway` line in
# `local-stack.yml` makes this resolve on Linux Docker the same
# as it does natively on macOS / Windows.
# * charon:9091 — bot running in the cloud-deploy
# compose stack. `local-stack.yml` joins that stack's network
# under the alias `charon_net_external` so both scrape paths can
# coexist; the target is silently DOWN when only the host bot is
# running, which is fine for the laptop workflow.
#
# Alert rules are loaded from the existing repo bundle so a single
# source of truth (`deploy/grafana/alerts.yaml`) drives both Grafana
# unified alerting and Prometheus rule evaluation.

global:
# Match the Alloy sidecar cadence (cloud-deploy stack uses 15s) so
# local panels read the same as Grafana Cloud panels do.
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 30s

rule_files:
- /etc/prometheus/alerts.yaml

scrape_configs:
- job_name: charon
metrics_path: /metrics
static_configs:
- targets:
- host.docker.internal:9091
labels:
source: host
- targets:
- charon:9091
labels:
source: compose
Loading