diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..dc898a1 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,29 @@ +target/ +**/target/ +.git/ +.github/ +.gitignore +.env +.env.* +!.env.example +.claude/ +.vscode/ +.idea/ +*.log +docs/ +NOTES.md +# `config/` belongs on the host and is bind-mounted at runtime (see +# Dockerfile + deploy/compose/docker-compose.yml). Excluding it from +# the build context keeps TOML values out of the image layer even if +# a future Dockerfile re-adds a COPY by mistake — see #287. +config/ +README.md +CLAUDE.md +Dockerfile +.dockerignore +contracts/out/ +contracts/cache/ +contracts/broadcast/ +contracts/lib/ +deploy/ +scripts/ diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml new file mode 100644 index 0000000..0d69b64 --- /dev/null +++ b/.github/workflows/docker.yml @@ -0,0 +1,37 @@ +name: docker-build + +on: + push: + paths: + - 'Dockerfile' + - 'Cargo.toml' + - 'Cargo.lock' + - 'crates/**' + - '.github/workflows/docker.yml' + pull_request: + paths: + - 'Dockerfile' + - 'Cargo.toml' + - 'Cargo.lock' + - 'crates/**' + - '.github/workflows/docker.yml' + +permissions: + contents: read + +jobs: + build: + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v4 + - uses: docker/setup-buildx-action@v3 + - name: Build image + uses: docker/build-push-action@v6 + with: + context: . + file: Dockerfile + push: false + tags: charon:ci + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..674c842 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,77 @@ +# syntax=docker/dockerfile:1.7 + +# ─── Builder ────────────────────────────────────────────────────────── +# `rust:1-slim` pairs with a slim Debian runtime below and keeps the +# final image < 150 MB — well under Hetzner CX22's 40 GB disk budget +# even after repeated rebuilds. +# +# Pinned by digest so a Docker Hub re-tag cannot silently swap the +# toolchain, libc, or OpenSSL under a rebuild. Bump in a dedicated +# commit when refreshing the base — never as a drive-by. +FROM rust:1-slim@sha256:c03ea1587a8e4474ae1a3f4a377cbb35ad53d2eb5c27f0bdf1ca8986025e322f AS builder + +# Build-time TLS + pkg-config — alloy transitively links OpenSSL for +# WS over TLS, and reqwest pulls pkg-config during build scripts. +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + pkg-config \ + libssl-dev \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /build + +# Copy the full workspace. The BuildKit cache mount on `/build/target` +# below is what preserves incremental compilation across rebuilds — +# `COPY crates` invalidates this layer on any source change, but the +# RUN layer reuses the cached `target/` so cargo only recompiles +# crates whose source actually changed. `Cargo.lock` churn still +# forces a full dep recompile (5-15 min on CX22 2 vCPU). +COPY Cargo.toml Cargo.lock ./ +COPY crates ./crates + +# Build the release binary. `charon` is the single bin — other crates +# are libraries and compile as dependencies of it. +RUN --mount=type=cache,target=/build/target \ + --mount=type=cache,target=/usr/local/cargo/registry \ + cargo build --locked --release --bin charon \ + && cp target/release/charon /charon + +# ─── Runtime ────────────────────────────────────────────────────────── +# `debian:bookworm-slim` because we need CA certificates and libssl3 +# for outbound TLS (WS RPC, Chainlink HTTP). Distroless is smaller but +# drops the shell, which makes `docker compose exec` diagnostics harder +# on a 4 GB Hetzner box. Digest-pinned for the same reason as the +# builder — predictable libssl3 ABI across rebuilds. +FROM debian:bookworm-slim@sha256:f9c6a2fd2ddbc23e336b6257a5245e31f996953ef06cd13a59fa0a1df2d5c252 AS runtime + +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + ca-certificates \ + curl \ + libssl3 \ + && rm -rf /var/lib/apt/lists/* \ + && useradd --system --uid 10001 --home /app --shell /usr/sbin/nologin charon + +WORKDIR /app +COPY --from=builder /charon /usr/local/bin/charon + +# `config/` is deliberately not copied into the image: compose always +# bind-mounts `../../config:/app/config:ro` at runtime, and a +# `docker run` without a mount would otherwise launch silently against +# stale TOML (contract addresses, RPC endpoints) or leak secrets +# baked into a layer. Running the image without a config mount fails +# at startup, which is the intended behaviour — see #287. + +USER charon + +EXPOSE 9091 + +# Probe the Prometheus exporter — the final step in the bot's startup +# sequence, so a 200 on /metrics implies RPC connect + chain-id check +# + listener bind all succeeded. `start-period` covers the WS +# handshake + first block drain on a cold start. +HEALTHCHECK --interval=10s --timeout=5s --start-period=30s --retries=3 \ + CMD curl -sf http://localhost:9091/metrics > /dev/null || exit 1 + +ENTRYPOINT ["charon"] +CMD ["--config", "config/default.toml", "listen"] diff --git a/README.md b/README.md index 36444b8..ac598db 100644 --- a/README.md +++ b/README.md @@ -264,6 +264,26 @@ Alert rules in `deploy/grafana/alerts.yaml` can be loaded by Prometheus via `rul --- +## Deploy (single host, e.g. Hetzner CX22) + +A minimal `docker compose` stack ships in [`deploy/compose/`](deploy/compose/). It runs two services: + +1. `charon` — built from the repo-root [`Dockerfile`](Dockerfile) (multi-stage: `rust:1-slim` builder → `debian:bookworm-slim` runtime, ~150 MB final image) +2. `alloy` — [Grafana Alloy](https://grafana.com/docs/alloy/latest/) sidecar that scrapes `charon:9091` over the internal compose network and `remote_write`s every series to Grafana Cloud + +No local Prometheus or Grafana is deployed — the Grafana Cloud free tier is the visualisation surface, which fits the CX22 resource envelope (2 vCPU / 4 GB RAM) comfortably. + +```sh +cd deploy/compose +cp .env.example .env # fill in RPC + Grafana Cloud creds +docker compose up -d --build +docker compose logs -f charon +``` + +The metrics endpoint is not exposed to the host — Alloy reaches it by DNS name. Import [`deploy/grafana/charon.json`](deploy/grafana/charon.json) into Grafana Cloud and the panels populate automatically once Alloy's first push lands. + +--- + ## Project structure ``` diff --git a/deploy/compose/.env.example b/deploy/compose/.env.example new file mode 100644 index 0000000..bcc3473 --- /dev/null +++ b/deploy/compose/.env.example @@ -0,0 +1,44 @@ +# Charon compose deploy — environment variables. +# Copy to `.env` in this directory and fill in real values. +# `.env` is git-ignored by the repo-wide policy (`.env` in .gitignore). + +# ── BNB Chain RPC endpoints ────────────────────────────────────────── +# The defaults work for a smoke test but are rate-limited under +# sustained load; point at a dedicated node for production. +BNB_WS_URL=wss://bsc-rpc.publicnode.com +BNB_HTTP_URL=https://bsc-rpc.publicnode.com +BSC_PRIVATE_RPC_URL= + +# ── BSC testnet (Chapel) — only needed if running `config/testnet.toml` ─ +BNB_TESTNET_WS_URL=wss://bsc-testnet-rpc.publicnode.com +BNB_TESTNET_HTTP_URL=https://bsc-testnet-rpc.publicnode.com + +# ── Bot signer ─────────────────────────────────────────────────────── +# Leave empty to run read-only (scanner + metrics only). Set to the +# hot-wallet private key to enable tx signing + simulation. +# +# SECURITY — the value passed here lives in the container environment +# and is readable in plaintext via `docker inspect charon` by any OS +# user in the host's `docker` group (equivalent to root on Linux). +# Before populating this variable on a Hetzner (or any) host: +# 1. Restrict the `docker` OS group to a single deploy user only — +# no shared logins, no CI agents, no monitoring sidecars. +# 2. Treat the hot wallet as compromisable: fund it only with gas +# and the smallest flash-loan working balance, never a cold +# float. Sweep profit to the cold wallet inside every callback. +# 3. Plan the migration path to an external signer (KMS, HSM, or +# Safe cosigner) before production hardening; plaintext env +# injection is a v0.1 concession, not the end state. +# Tracked in #288. +BOT_SIGNER_KEY= + +# ── Grafana Cloud remote_write (used by the alloy sidecar) ────────── +# Find these three values in your Grafana Cloud account under: +# Stack → Connections → Hosted Prometheus metrics → "Send metrics" +# The push URL looks like: +# https://prometheus-prod-XX-prod-eu-west-X.grafana.net/api/prom/push +# Username is a numeric instance id (six-to-seven digits). +# Password is an access token scoped to "Write metrics". +GRAFANA_CLOUD_PROM_URL= +GRAFANA_CLOUD_PROM_USER= +GRAFANA_CLOUD_PROM_PASSWORD= diff --git a/deploy/compose/alloy-config.alloy b/deploy/compose/alloy-config.alloy new file mode 100644 index 0000000..22c88be --- /dev/null +++ b/deploy/compose/alloy-config.alloy @@ -0,0 +1,50 @@ +// Charon — Grafana Alloy scrape + remote_write sidecar. +// +// Scrapes the `charon` container's Prometheus endpoint over the +// compose internal network and forwards every series to Grafana +// Cloud. Env vars are resolved via `sys.env` so the config file +// itself carries no credentials. +// +// Required environment variables (loaded from `.env`): +// GRAFANA_CLOUD_PROM_URL e.g. https://prometheus-prod-XX.grafana.net/api/prom/push +// GRAFANA_CLOUD_PROM_USER numeric instance id from Grafana Cloud UI +// GRAFANA_CLOUD_PROM_PASSWORD access token with the metrics-push scope + +prometheus.scrape "charon" { + // DNS name from the compose network; `:9091` is the exporter bind + // port defined in `config/*.toml`. + targets = [ + {"__address__" = "charon:9091", "job" = "charon"}, + ] + + scrape_interval = "15s" + scrape_timeout = "10s" + + forward_to = [prometheus.remote_write.grafana_cloud.receiver] +} + +prometheus.remote_write "grafana_cloud" { + endpoint { + url = sys.env("GRAFANA_CLOUD_PROM_URL") + + basic_auth { + username = sys.env("GRAFANA_CLOUD_PROM_USER") + password = sys.env("GRAFANA_CLOUD_PROM_PASSWORD") + } + } + + // Cap the on-disk WAL so a Grafana Cloud outage or a revoked + // token cannot buffer indefinitely and exhaust the 40 GB CX22 + // disk. 2h of retention covers short network hiccups; older + // samples are dropped once the WAL reaches min_keepalive_time + // past their age. See #293. + // + // Alloy validates truncate_frequency < min_keepalive_time at + // startup (the underlying Prometheus WAL rejects equal values), + // so keep truncate on a 15m cadence under a 30m floor. + wal { + truncate_frequency = "15m" + min_keepalive_time = "30m" + max_keepalive_time = "2h" + } +} diff --git a/deploy/compose/docker-compose.yml b/deploy/compose/docker-compose.yml new file mode 100644 index 0000000..e4d493a --- /dev/null +++ b/deploy/compose/docker-compose.yml @@ -0,0 +1,106 @@ +# Charon — single-host deploy target. +# +# Designed for a Hetzner CX22 (2 vCPU / 4 GB RAM / 40 GB disk) or +# any equivalent box. Two services: +# - charon : the bot binary (Dockerfile at repo root) +# - alloy : Grafana Alloy sidecar; scrapes charon:9091 locally and +# forwards the series to Grafana Cloud via remote_write +# +# We ship Alloy rather than a local Prometheus server because the +# Grafana Cloud free tier is the intended visualisation surface; a +# local Prom would just be another thing to tune on a 4 GB box. +# +# Usage: +# cp .env.example .env # fill in RPC + Grafana Cloud creds +# docker compose up -d --build +# +# Secrets: +# `.env` is git-ignored and read by both services via `env_file`. +# Do not commit it. The `.env.example` in this directory is the +# canonical list of required variables. + +services: + charon: + build: + context: ../.. + dockerfile: Dockerfile + image: charon:local + restart: unless-stopped + env_file: + - ./.env + volumes: + # Config is read-only from the host so rotating a profile + # (mainnet → testnet → fork) doesn't require an image rebuild. + - ../../config:/app/config:ro + networks: + - charon_net + # `:9091` stays on the internal compose network. Alloy talks to it + # by DNS name, so the metrics endpoint never hits the public + # internet — one less public-facing port on the Hetzner box. + expose: + - "9091" + # Keep the container from hogging a 4 GB box if the bot spins; + # these limits match typical steady-state plus headroom. + deploy: + resources: + limits: + cpus: "1.5" + memory: 1G + # Cap per-service log storage — json-file driver has no default + # rotation and charon emits ~1 info/block on a 3s BSC cadence. + # 5 × 50 MB keeps ~a week of history on a 40 GB CX22 without + # filling the disk (see #291). + logging: + driver: json-file + options: + max-size: "50m" + max-file: "5" + + alloy: + # Digest-pinned: Docker Hub tags are mutable (maintainers can + # republish under the same tag) and this stack already pins every + # other image by digest. Bump in a dedicated commit when refreshing + # the sidecar — never as a drive-by. + image: grafana/alloy:v1.4.3@sha256:06bdcbb51fc22b16c2e96b31cbdf9cb0972e3bdedfbac2cc55ed36743f7fb9aa + restart: unless-stopped + # `run` subcommand and the default server port (12345) are enough + # for a scrape + remote_write topology; we don't expose the UI. + # + # Admin HTTP binds to loopback inside the container so no other + # service on charon_net can reach the config, /-/reload, or + # component-graph endpoints. If UI access is ever needed, run + # `docker compose exec alloy wget http://127.0.0.1:12345/...` or + # tunnel the port on demand. + command: + - "run" + - "--server.http.listen-addr=127.0.0.1:12345" + - "--storage.path=/var/lib/alloy/data" + - "/etc/alloy/config.alloy" + env_file: + - ./.env + volumes: + - ./alloy-config.alloy:/etc/alloy/config.alloy:ro + - alloy_data:/var/lib/alloy/data + networks: + - charon_net + depends_on: + charon: + condition: service_healthy + deploy: + resources: + limits: + cpus: "0.5" + memory: 256M + # Match charon's rotation policy so a Grafana Cloud outage + # (every scrape logs a remote_write retry) cannot fill the disk. + logging: + driver: json-file + options: + max-size: "50m" + max-file: "5" + +networks: + charon_net: {} + +volumes: + alloy_data: {}