From 12cf30464f04d2d46becb378bec83dd1dd15aef1 Mon Sep 17 00:00:00 2001 From: obchain Date: Wed, 22 Apr 2026 17:16:23 +0530 Subject: [PATCH 01/11] feat(deploy): Dockerfile + compose stack for Hetzner CX22 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Containerised deploy target designed to fit on a 2 vCPU / 4 GB RAM / 40 GB disk box. Two files define the build, two more define the runtime topology. Dockerfile (multi-stage): - Builder: `rust:1-slim` with pkg-config + libssl-dev for the alloy TLS transitive dep. `cargo build --locked --release --bin charon` with BuildKit cache mounts on `/build/target` and the registry, so iterating on compose config doesn't re-download crates. - Runtime: `debian:bookworm-slim` with `ca-certificates` + `libssl3` only. Non-root user (uid 10001) owns `/app`. Final image ≈ 150 MB. - `EXPOSE 9091`, `ENTRYPOINT charon`, `CMD --config config/default.toml listen`. .dockerignore keeps the build context lean (≈ 5 MB) — excludes target/, .git/, docs/, contracts outputs, and secrets. deploy/compose/docker-compose.yml: - `charon` service: built from the repo-root Dockerfile, runs read-only (config mounted as :ro from the host so profile rotation doesn't require a rebuild), resource limits at 1.5 CPU / 1 GB RAM. - `alloy` service: `grafana/alloy:v1.4.3` sidecar. Scrapes `charon:9091` over the internal `charon_net` and remote_writes to Grafana Cloud. Resource limits at 0.5 CPU / 256 MB — Alloy is lean. Storage-path volume persists the WAL across restarts. - The metrics port is deliberately NOT published to the host. Alloy reaches it by DNS name, so nothing on 0.0.0.0:9091 faces the public internet on the Hetzner box. deploy/compose/alloy-config.alloy: - River config: one `prometheus.scrape` for the `charon` target + one `prometheus.remote_write` for Grafana Cloud. Credentials come from `sys.env` so the file carries no secrets and is safe to commit. 15 s scrape interval matches what the Grafana dashboard is tuned for. deploy/compose/.env.example lists every variable both services need: BNB RPC endpoints, optional signer key, and the three Grafana Cloud `GRAFANA_CLOUD_PROM_*` values. The real `.env` stays git-ignored. README gains a "Deploy" section with the three-command quickstart. Closes #22. --- .dockerignore | 24 ++++++++++ Dockerfile | 56 ++++++++++++++++++++++ README.md | 20 ++++++++ deploy/compose/.env.example | 30 ++++++++++++ deploy/compose/alloy-config.alloy | 35 ++++++++++++++ deploy/compose/docker-compose.yml | 79 +++++++++++++++++++++++++++++++ 6 files changed, 244 insertions(+) create mode 100644 .dockerignore create mode 100644 Dockerfile create mode 100644 deploy/compose/.env.example create mode 100644 deploy/compose/alloy-config.alloy create mode 100644 deploy/compose/docker-compose.yml diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..385edb4 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,24 @@ +target/ +**/target/ +.git/ +.github/ +.gitignore +.env +.env.* +!.env.example +.claude/ +.vscode/ +.idea/ +*.log +docs/ +NOTES.md +README.md +CLAUDE.md +Dockerfile +.dockerignore +contracts/out/ +contracts/cache/ +contracts/broadcast/ +contracts/lib/ +deploy/ +scripts/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..f82a471 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,56 @@ +# syntax=docker/dockerfile:1.7 + +# ─── Builder ────────────────────────────────────────────────────────── +# `rust:1-slim` pairs with a slim Debian runtime below and keeps the +# final image < 150 MB — well under Hetzner CX22's 40 GB disk budget +# even after repeated rebuilds. +FROM rust:1-slim AS builder + +# Build-time TLS + pkg-config — alloy transitively links OpenSSL for +# WS over TLS, and reqwest pulls pkg-config during build scripts. +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + pkg-config \ + libssl-dev \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /build + +# Manifests first so the dep layer caches separately from source. +# Dummy `src/main.rs` is cheaper than copying the full workspace when +# only Cargo.* changes — iteration speed on `docker build` during +# compose tuning. +COPY Cargo.toml Cargo.lock ./ +COPY crates ./crates + +# Build the release binary. `charon` is the single bin — other crates +# are libraries and compile as dependencies of it. +RUN --mount=type=cache,target=/build/target \ + --mount=type=cache,target=/usr/local/cargo/registry \ + cargo build --locked --release --bin charon \ + && cp target/release/charon /charon + +# ─── Runtime ────────────────────────────────────────────────────────── +# `debian:bookworm-slim` because we need CA certificates and libssl3 +# for outbound TLS (WS RPC, Chainlink HTTP). Distroless is smaller but +# drops the shell, which makes `docker compose exec` diagnostics harder +# on a 4 GB Hetzner box. +FROM debian:bookworm-slim AS runtime + +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + ca-certificates \ + libssl3 \ + && rm -rf /var/lib/apt/lists/* \ + && useradd --system --uid 10001 --home /app --shell /usr/sbin/nologin charon + +WORKDIR /app +COPY --from=builder /charon /usr/local/bin/charon +COPY config ./config + +USER charon + +EXPOSE 9091 + +ENTRYPOINT ["charon"] +CMD ["--config", "config/default.toml", "listen"] diff --git a/README.md b/README.md index cc4b6eb..2031b6a 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,24 @@ A ready-to-import dashboard lives at [`deploy/grafana/charon.json`](deploy/grafa Dashboard UID is `charon-v0` and tags are `charon`, `liquidation`, `defi` — re-importing over an existing copy replaces it rather than duplicating. Variables (`Chain`, `Instance`) auto-populate from label values once metrics start flowing. +## Deploy (single host, e.g. Hetzner CX22) + +A minimal `docker compose` stack ships in [`deploy/compose/`](deploy/compose/). It runs two services: + +1. `charon` — built from the repo-root [`Dockerfile`](Dockerfile) (multi-stage: `rust:1-slim` builder → `debian:bookworm-slim` runtime, ~150 MB final image) +2. `alloy` — [Grafana Alloy](https://grafana.com/docs/alloy/latest/) sidecar that scrapes `charon:9091` over the internal compose network and `remote_write`s every series to Grafana Cloud + +No local Prometheus or Grafana is deployed — the Grafana Cloud free tier is the visualisation surface, which fits the CX22 resource envelope (2 vCPU / 4 GB RAM) comfortably. + +```sh +cd deploy/compose +cp .env.example .env # fill in RPC + Grafana Cloud creds +docker compose up -d --build +docker compose logs -f charon +``` + +The metrics endpoint is not exposed to the host — Alloy reaches it by DNS name. Import [`deploy/grafana/charon.json`](deploy/grafana/charon.json) into Grafana Cloud and the panels populate automatically once Alloy's first push lands. + ## Repository layout ``` @@ -104,6 +122,8 @@ crates/ contracts/ CharonLiquidator.sol + Foundry suite config/ TOML profiles (default, testnet, fork) scripts/ operator helpers (anvil_fork.sh, ...) +deploy/ docker-compose + grafana dashboard JSON +Dockerfile multi-stage build of the `charon` binary ``` ## License diff --git a/deploy/compose/.env.example b/deploy/compose/.env.example new file mode 100644 index 0000000..473604f --- /dev/null +++ b/deploy/compose/.env.example @@ -0,0 +1,30 @@ +# Charon compose deploy — environment variables. +# Copy to `.env` in this directory and fill in real values. +# `.env` is git-ignored by the repo-wide policy (`.env` in .gitignore). + +# ── BNB Chain RPC endpoints ────────────────────────────────────────── +# The defaults work for a smoke test but are rate-limited under +# sustained load; point at a dedicated node for production. +BNB_WS_URL=wss://bsc-rpc.publicnode.com +BNB_HTTP_URL=https://bsc-rpc.publicnode.com +BSC_PRIVATE_RPC_URL= + +# ── BSC testnet (Chapel) — only needed if running `config/testnet.toml` ─ +BNB_TESTNET_WS_URL=wss://bsc-testnet-rpc.publicnode.com +BNB_TESTNET_HTTP_URL=https://bsc-testnet-rpc.publicnode.com + +# ── Bot signer ─────────────────────────────────────────────────────── +# Leave empty to run read-only (scanner + metrics only). Set to the +# hot-wallet private key to enable tx signing + simulation. +BOT_SIGNER_KEY= + +# ── Grafana Cloud remote_write (used by the alloy sidecar) ────────── +# Find these three values in your Grafana Cloud account under: +# Stack → Connections → Hosted Prometheus metrics → "Send metrics" +# The push URL looks like: +# https://prometheus-prod-XX-prod-eu-west-X.grafana.net/api/prom/push +# Username is a numeric instance id (six-to-seven digits). +# Password is an access token scoped to "Write metrics". +GRAFANA_CLOUD_PROM_URL= +GRAFANA_CLOUD_PROM_USER= +GRAFANA_CLOUD_PROM_PASSWORD= diff --git a/deploy/compose/alloy-config.alloy b/deploy/compose/alloy-config.alloy new file mode 100644 index 0000000..db58034 --- /dev/null +++ b/deploy/compose/alloy-config.alloy @@ -0,0 +1,35 @@ +// Charon — Grafana Alloy scrape + remote_write sidecar. +// +// Scrapes the `charon` container's Prometheus endpoint over the +// compose internal network and forwards every series to Grafana +// Cloud. Env vars are resolved via `sys.env` so the config file +// itself carries no credentials. +// +// Required environment variables (loaded from `.env`): +// GRAFANA_CLOUD_PROM_URL e.g. https://prometheus-prod-XX.grafana.net/api/prom/push +// GRAFANA_CLOUD_PROM_USER numeric instance id from Grafana Cloud UI +// GRAFANA_CLOUD_PROM_PASSWORD access token with the metrics-push scope + +prometheus.scrape "charon" { + // DNS name from the compose network; `:9091` is the exporter bind + // port defined in `config/*.toml`. + targets = [ + {"__address__" = "charon:9091", "job" = "charon"}, + ] + + scrape_interval = "15s" + scrape_timeout = "10s" + + forward_to = [prometheus.remote_write.grafana_cloud.receiver] +} + +prometheus.remote_write "grafana_cloud" { + endpoint { + url = sys.env("GRAFANA_CLOUD_PROM_URL") + + basic_auth { + username = sys.env("GRAFANA_CLOUD_PROM_USER") + password = sys.env("GRAFANA_CLOUD_PROM_PASSWORD") + } + } +} diff --git a/deploy/compose/docker-compose.yml b/deploy/compose/docker-compose.yml new file mode 100644 index 0000000..cd5a922 --- /dev/null +++ b/deploy/compose/docker-compose.yml @@ -0,0 +1,79 @@ +# Charon — single-host deploy target. +# +# Designed for a Hetzner CX22 (2 vCPU / 4 GB RAM / 40 GB disk) or +# any equivalent box. Two services: +# - charon : the bot binary (Dockerfile at repo root) +# - alloy : Grafana Alloy sidecar; scrapes charon:9091 locally and +# forwards the series to Grafana Cloud via remote_write +# +# We ship Alloy rather than a local Prometheus server because the +# Grafana Cloud free tier is the intended visualisation surface; a +# local Prom would just be another thing to tune on a 4 GB box. +# +# Usage: +# cp .env.example .env # fill in RPC + Grafana Cloud creds +# docker compose up -d --build +# +# Secrets: +# `.env` is git-ignored and read by both services via `env_file`. +# Do not commit it. The `.env.example` in this directory is the +# canonical list of required variables. + +services: + charon: + build: + context: ../.. + dockerfile: Dockerfile + image: charon:local + restart: unless-stopped + env_file: + - ./.env + volumes: + # Config is read-only from the host so rotating a profile + # (mainnet → testnet → fork) doesn't require an image rebuild. + - ../../config:/app/config:ro + networks: + - charon_net + # `:9091` stays on the internal compose network. Alloy talks to it + # by DNS name, so the metrics endpoint never hits the public + # internet — one less public-facing port on the Hetzner box. + expose: + - "9091" + # Keep the container from hogging a 4 GB box if the bot spins; + # these limits match typical steady-state plus headroom. + deploy: + resources: + limits: + cpus: "1.5" + memory: 1G + + alloy: + image: grafana/alloy:v1.4.3 + restart: unless-stopped + # `run` subcommand and the default server port (12345) are enough + # for a scrape + remote_write topology; we don't expose the UI. + command: + - "run" + - "--server.http.listen-addr=0.0.0.0:12345" + - "--storage.path=/var/lib/alloy/data" + - "/etc/alloy/config.alloy" + env_file: + - ./.env + volumes: + - ./alloy-config.alloy:/etc/alloy/config.alloy:ro + - alloy_data:/var/lib/alloy/data + networks: + - charon_net + depends_on: + - charon + deploy: + resources: + limits: + cpus: "0.5" + memory: 256M + +networks: + charon_net: {} + +volumes: + alloy_data: {} From 91fe374504daf5a88d034785b4de570ece60e47b Mon Sep 17 00:00:00 2001 From: obchain Date: Thu, 23 Apr 2026 16:20:40 +0530 Subject: [PATCH 02/11] fix(deploy): digest-pin base images for reproducible builds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both FROM lines used mutable tags (rust:1-slim, debian:bookworm-slim). A Docker Hub re-tag between rebuilds could silently swap toolchain, libc, or OpenSSL and ship a different binary under the same compose invocation — supply-chain risk on a key-holding host. Pin both to the sha256 digests current as of 2026-04-22. Inline comments flag digest bumps as dedicated commits so the base-image swap is auditable in git history. Closes #286 --- Dockerfile | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index f82a471..e2a3908 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,11 @@ # `rust:1-slim` pairs with a slim Debian runtime below and keeps the # final image < 150 MB — well under Hetzner CX22's 40 GB disk budget # even after repeated rebuilds. -FROM rust:1-slim AS builder +# +# Pinned by digest so a Docker Hub re-tag cannot silently swap the +# toolchain, libc, or OpenSSL under a rebuild. Bump in a dedicated +# commit when refreshing the base — never as a drive-by. +FROM rust:1-slim@sha256:c03ea1587a8e4474ae1a3f4a377cbb35ad53d2eb5c27f0bdf1ca8986025e322f AS builder # Build-time TLS + pkg-config — alloy transitively links OpenSSL for # WS over TLS, and reqwest pulls pkg-config during build scripts. @@ -34,8 +38,9 @@ RUN --mount=type=cache,target=/build/target \ # `debian:bookworm-slim` because we need CA certificates and libssl3 # for outbound TLS (WS RPC, Chainlink HTTP). Distroless is smaller but # drops the shell, which makes `docker compose exec` diagnostics harder -# on a 4 GB Hetzner box. -FROM debian:bookworm-slim AS runtime +# on a 4 GB Hetzner box. Digest-pinned for the same reason as the +# builder — predictable libssl3 ABI across rebuilds. +FROM debian:bookworm-slim@sha256:f9c6a2fd2ddbc23e336b6257a5245e31f996953ef06cd13a59fa0a1df2d5c252 AS runtime RUN apt-get update \ && apt-get install --no-install-recommends -y \ From ed2194a25b29da1a8febf11ea0ddd99aeff13a09 Mon Sep 17 00:00:00 2001 From: obchain Date: Thu, 23 Apr 2026 16:22:07 +0530 Subject: [PATCH 03/11] fix(deploy): drop COPY config, require bind mount at runtime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Dockerfile previously copied config/ into the runtime image, which duplicated what the compose bind mount overlays at startup. Two hazards: TOML values (contract addresses, RPC endpoints, any accidental secret) became visible in docker history even after the mount overrode them, and `docker run` without a mount silently launched against stale baked-in config with no visible error. Drop the COPY, add config/ to .dockerignore so future drift cannot silently re-bake the directory, and document that compose bind-mount is the only supported config-delivery path. Running the image bare now fails fast — the intended behaviour. Closes #287 --- .dockerignore | 5 +++++ Dockerfile | 8 +++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/.dockerignore b/.dockerignore index 385edb4..dc898a1 100644 --- a/.dockerignore +++ b/.dockerignore @@ -12,6 +12,11 @@ target/ *.log docs/ NOTES.md +# `config/` belongs on the host and is bind-mounted at runtime (see +# Dockerfile + deploy/compose/docker-compose.yml). Excluding it from +# the build context keeps TOML values out of the image layer even if +# a future Dockerfile re-adds a COPY by mistake — see #287. +config/ README.md CLAUDE.md Dockerfile diff --git a/Dockerfile b/Dockerfile index e2a3908..d725ef2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -51,7 +51,13 @@ RUN apt-get update \ WORKDIR /app COPY --from=builder /charon /usr/local/bin/charon -COPY config ./config + +# `config/` is deliberately not copied into the image: compose always +# bind-mounts `../../config:/app/config:ro` at runtime, and a +# `docker run` without a mount would otherwise launch silently against +# stale TOML (contract addresses, RPC endpoints) or leak secrets +# baked into a layer. Running the image without a config mount fails +# at startup, which is the intended behaviour — see #287. USER charon From a55eaa40083784714ceb7c622d2f68e4b3625cd2 Mon Sep 17 00:00:00 2001 From: obchain Date: Thu, 23 Apr 2026 16:22:49 +0530 Subject: [PATCH 04/11] fix(deploy): HEALTHCHECK probes /metrics + compose waits on health Add a HEALTHCHECK that curls the Prometheus exporter on :9091. Because the listener binds only after the async startup chain finishes (WS connect, chain_id verify, first block drained), a 200 on /metrics is a reliable proxy for end-to-end readiness. Install curl in the runtime layer to make the probe self-contained. Gate the alloy sidecar on `condition: service_healthy` so the scraper does not spray connection-refused errors into the logs while charon is still establishing its BSC RPC link. The `start-period=30s` absorbs slow cold starts on Hetzner. Closes #290 --- Dockerfile | 8 ++++++++ deploy/compose/docker-compose.yml | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index d725ef2..cee1d50 100644 --- a/Dockerfile +++ b/Dockerfile @@ -45,6 +45,7 @@ FROM debian:bookworm-slim@sha256:f9c6a2fd2ddbc23e336b6257a5245e31f996953ef06cd13 RUN apt-get update \ && apt-get install --no-install-recommends -y \ ca-certificates \ + curl \ libssl3 \ && rm -rf /var/lib/apt/lists/* \ && useradd --system --uid 10001 --home /app --shell /usr/sbin/nologin charon @@ -63,5 +64,12 @@ USER charon EXPOSE 9091 +# Probe the Prometheus exporter — the final step in the bot's startup +# sequence, so a 200 on /metrics implies RPC connect + chain-id check +# + listener bind all succeeded. `start-period` covers the WS +# handshake + first block drain on a cold start. +HEALTHCHECK --interval=10s --timeout=5s --start-period=30s --retries=3 \ + CMD curl -sf http://localhost:9091/metrics > /dev/null || exit 1 + ENTRYPOINT ["charon"] CMD ["--config", "config/default.toml", "listen"] diff --git a/deploy/compose/docker-compose.yml b/deploy/compose/docker-compose.yml index cd5a922..c8963a5 100644 --- a/deploy/compose/docker-compose.yml +++ b/deploy/compose/docker-compose.yml @@ -65,7 +65,8 @@ services: networks: - charon_net depends_on: - - charon + charon: + condition: service_healthy deploy: resources: limits: From 5f2a14642cd38b830ced324f2275dd5464f0dce6 Mon Sep 17 00:00:00 2001 From: obchain Date: Thu, 23 Apr 2026 16:23:11 +0530 Subject: [PATCH 05/11] fix(deploy): bind alloy admin HTTP to loopback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Alloy's admin server previously listened on 0.0.0.0:12345 inside the compose network. No host port was published, but any container on charon_net — including a future log shipper, node exporter, or a compromised charon — could reach the rendered River config, /-/reload, and the component-graph endpoints. A /-/reload pointed at an attacker-controlled remote_write would exfiltrate every series. Bind the admin server to 127.0.0.1 inside the container so those endpoints stay reachable only from alloy itself. Ad-hoc access goes through `docker compose exec alloy`. Closes #289 --- deploy/compose/docker-compose.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/deploy/compose/docker-compose.yml b/deploy/compose/docker-compose.yml index c8963a5..75d8fad 100644 --- a/deploy/compose/docker-compose.yml +++ b/deploy/compose/docker-compose.yml @@ -52,9 +52,15 @@ services: restart: unless-stopped # `run` subcommand and the default server port (12345) are enough # for a scrape + remote_write topology; we don't expose the UI. + # + # Admin HTTP binds to loopback inside the container so no other + # service on charon_net can reach the config, /-/reload, or + # component-graph endpoints. If UI access is ever needed, run + # `docker compose exec alloy wget http://127.0.0.1:12345/...` or + # tunnel the port on demand. command: - "run" - - "--server.http.listen-addr=0.0.0.0:12345" + - "--server.http.listen-addr=127.0.0.1:12345" - "--storage.path=/var/lib/alloy/data" - "/etc/alloy/config.alloy" env_file: From 21fd5fc13acf702cb181f4ead0ba36215d0e5f6a Mon Sep 17 00:00:00 2001 From: obchain Date: Thu, 23 Apr 2026 16:24:04 +0530 Subject: [PATCH 06/11] fix(deploy): cap json-file log rotation on charon + alloy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Neither service specified a logging driver, so Docker defaulted to json-file with no rotation. Charon emits ~1 log per 3s BSC block, alloy logs every scrape + every remote_write retry — unbounded growth fills the 40 GB CX22 disk in a matter of weeks and faster under a Grafana Cloud outage. Cap each service at 5 × 50 MB. 250 MB per service retains ~a week of history for incident forensics while leaving plenty of headroom on the small host. Closes #291 --- deploy/compose/docker-compose.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/deploy/compose/docker-compose.yml b/deploy/compose/docker-compose.yml index 75d8fad..e892789 100644 --- a/deploy/compose/docker-compose.yml +++ b/deploy/compose/docker-compose.yml @@ -46,6 +46,15 @@ services: limits: cpus: "1.5" memory: 1G + # Cap per-service log storage — json-file driver has no default + # rotation and charon emits ~1 info/block on a 3s BSC cadence. + # 5 × 50 MB keeps ~a week of history on a 40 GB CX22 without + # filling the disk (see #291). + logging: + driver: json-file + options: + max-size: "50m" + max-file: "5" alloy: image: grafana/alloy:v1.4.3 @@ -78,6 +87,13 @@ services: limits: cpus: "0.5" memory: 256M + # Match charon's rotation policy so a Grafana Cloud outage + # (every scrape logs a remote_write retry) cannot fill the disk. + logging: + driver: json-file + options: + max-size: "50m" + max-file: "5" networks: charon_net: {} From 9124cd242cd64b9e89453abf406ae06be360eb77 Mon Sep 17 00:00:00 2001 From: obchain Date: Thu, 23 Apr 2026 16:24:25 +0530 Subject: [PATCH 07/11] fix(deploy): bound Alloy WAL so remote_write failure cannot fill disk The prometheus.remote_write block shipped without a wal stanza, so Alloy defaulted to unbounded WAL retention at /var/lib/alloy/data. A sustained Grafana Cloud outage, an expired token, or a network partition would buffer indefinitely and race log rotation for the 40 GB CX22 disk. Set truncate_frequency=15m, min_keepalive_time=30m, and max_keepalive_time=2h. Two hours of replay covers typical cloud blips while keeping the WAL disk footprint well inside the host budget. truncate_frequency sits under min_keepalive_time because Alloy's underlying Prometheus WAL rejects equal values at startup. Closes #293 --- deploy/compose/alloy-config.alloy | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/deploy/compose/alloy-config.alloy b/deploy/compose/alloy-config.alloy index db58034..22c88be 100644 --- a/deploy/compose/alloy-config.alloy +++ b/deploy/compose/alloy-config.alloy @@ -32,4 +32,19 @@ prometheus.remote_write "grafana_cloud" { password = sys.env("GRAFANA_CLOUD_PROM_PASSWORD") } } + + // Cap the on-disk WAL so a Grafana Cloud outage or a revoked + // token cannot buffer indefinitely and exhaust the 40 GB CX22 + // disk. 2h of retention covers short network hiccups; older + // samples are dropped once the WAL reaches min_keepalive_time + // past their age. See #293. + // + // Alloy validates truncate_frequency < min_keepalive_time at + // startup (the underlying Prometheus WAL rejects equal values), + // so keep truncate on a 15m cadence under a 30m floor. + wal { + truncate_frequency = "15m" + min_keepalive_time = "30m" + max_keepalive_time = "2h" + } } From 084f00e37e919d98d906bcdb42b6fd2ec845ada8 Mon Sep 17 00:00:00 2001 From: obchain Date: Thu, 23 Apr 2026 16:24:59 +0530 Subject: [PATCH 08/11] docs(deploy): warn on BOT_SIGNER_KEY visibility via docker inspect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 3-command quickstart routes operators to paste the hot-wallet private key into .env, which compose then injects as a container env var — readable in plaintext by any OS user in the host docker group via `docker inspect charon`. That group is effectively root on Linux, and the original doc set had no security callout. Inline an explicit warning in .env.example next to the variable: restrict the docker group to a single deploy user, size the hot wallet as compromisable (gas + working balance only, sweep profit to cold on every callback), and earmark the migration to an external signer as the production hardening path. Raw plaintext env injection is the v0.1 concession, not the end state. Closes #288 --- deploy/compose/.env.example | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/deploy/compose/.env.example b/deploy/compose/.env.example index 473604f..bcc3473 100644 --- a/deploy/compose/.env.example +++ b/deploy/compose/.env.example @@ -16,6 +16,20 @@ BNB_TESTNET_HTTP_URL=https://bsc-testnet-rpc.publicnode.com # ── Bot signer ─────────────────────────────────────────────────────── # Leave empty to run read-only (scanner + metrics only). Set to the # hot-wallet private key to enable tx signing + simulation. +# +# SECURITY — the value passed here lives in the container environment +# and is readable in plaintext via `docker inspect charon` by any OS +# user in the host's `docker` group (equivalent to root on Linux). +# Before populating this variable on a Hetzner (or any) host: +# 1. Restrict the `docker` OS group to a single deploy user only — +# no shared logins, no CI agents, no monitoring sidecars. +# 2. Treat the hot wallet as compromisable: fund it only with gas +# and the smallest flash-loan working balance, never a cold +# float. Sweep profit to the cold wallet inside every callback. +# 3. Plan the migration path to an external signer (KMS, HSM, or +# Safe cosigner) before production hardening; plaintext env +# injection is a v0.1 concession, not the end state. +# Tracked in #288. BOT_SIGNER_KEY= # ── Grafana Cloud remote_write (used by the alloy sidecar) ────────── From 8fe1c6d9896a408ee76665c390837c42fd51f332 Mon Sep 17 00:00:00 2001 From: obchain Date: Thu, 23 Apr 2026 18:55:33 +0530 Subject: [PATCH 09/11] fix(deploy): correct dockerfile cache comment to match layer reality MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit manifest-first layering does not give the advertised fast rebuild — the cache mount on /build/target is the actual mechanism. note Cargo.lock churn still triggers full dep recompile. --- Dockerfile | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index cee1d50..674c842 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,10 +20,12 @@ RUN apt-get update \ WORKDIR /build -# Manifests first so the dep layer caches separately from source. -# Dummy `src/main.rs` is cheaper than copying the full workspace when -# only Cargo.* changes — iteration speed on `docker build` during -# compose tuning. +# Copy the full workspace. The BuildKit cache mount on `/build/target` +# below is what preserves incremental compilation across rebuilds — +# `COPY crates` invalidates this layer on any source change, but the +# RUN layer reuses the cached `target/` so cargo only recompiles +# crates whose source actually changed. `Cargo.lock` churn still +# forces a full dep recompile (5-15 min on CX22 2 vCPU). COPY Cargo.toml Cargo.lock ./ COPY crates ./crates From c417cf6750f951fc51c00107784a47f19469379a Mon Sep 17 00:00:00 2001 From: obchain Date: Thu, 23 Apr 2026 18:55:40 +0530 Subject: [PATCH 10/11] fix(deploy): pin grafana/alloy image by digest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit docker hub tags are mutable — pin by manifest-list sha256 so a republish cannot swap the sidecar silently. matches the base-image pinning already in dockerfile. --- deploy/compose/docker-compose.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/deploy/compose/docker-compose.yml b/deploy/compose/docker-compose.yml index e892789..e4d493a 100644 --- a/deploy/compose/docker-compose.yml +++ b/deploy/compose/docker-compose.yml @@ -57,7 +57,11 @@ services: max-file: "5" alloy: - image: grafana/alloy:v1.4.3 + # Digest-pinned: Docker Hub tags are mutable (maintainers can + # republish under the same tag) and this stack already pins every + # other image by digest. Bump in a dedicated commit when refreshing + # the sidecar — never as a drive-by. + image: grafana/alloy:v1.4.3@sha256:06bdcbb51fc22b16c2e96b31cbdf9cb0972e3bdedfbac2cc55ed36743f7fb9aa restart: unless-stopped # `run` subcommand and the default server port (12345) are enough # for a scrape + remote_write topology; we don't expose the UI. From 4f5ef7db5b7c3786d73337794d125550d7a5aa5b Mon Sep 17 00:00:00 2001 From: obchain Date: Thu, 23 Apr 2026 18:55:47 +0530 Subject: [PATCH 11/11] ci(deploy): build docker image on dockerfile or crates changes previously no ci gate verified dockerfile correctness; broken copy paths, missing system packages, or cargo --locked failures were only caught at deploy time. --- .github/workflows/docker.yml | 37 ++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 .github/workflows/docker.yml diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml new file mode 100644 index 0000000..0d69b64 --- /dev/null +++ b/.github/workflows/docker.yml @@ -0,0 +1,37 @@ +name: docker-build + +on: + push: + paths: + - 'Dockerfile' + - 'Cargo.toml' + - 'Cargo.lock' + - 'crates/**' + - '.github/workflows/docker.yml' + pull_request: + paths: + - 'Dockerfile' + - 'Cargo.toml' + - 'Cargo.lock' + - 'crates/**' + - '.github/workflows/docker.yml' + +permissions: + contents: read + +jobs: + build: + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v4 + - uses: docker/setup-buildx-action@v3 + - name: Build image + uses: docker/build-push-action@v6 + with: + context: . + file: Dockerfile + push: false + tags: charon:ci + cache-from: type=gha + cache-to: type=gha,mode=max