From b50c998fa232e3f06893f3a544bf6b2cac99f643 Mon Sep 17 00:00:00 2001 From: obchain Date: Thu, 23 Apr 2026 15:32:20 +0530 Subject: [PATCH] fix(metrics): domain-scaled histogram buckets Register explicit bucket boundaries for charon_pipeline_block_duration_seconds and charon_executor_profit_usd_cents via PrometheusBuilder::set_buckets_for_metric. Without matchers, the exporter renders both histograms as Prometheus summaries, producing NaN from histogram_quantile and empty heatmaps in the companion Grafana dashboard. Block-duration buckets target BSC's 3s block cadence (healthy / warning / alert / overrun). Profit buckets cover the $0.05 dust to $10k+ windfall range observed on Venus liquidations. Closes #275 Closes #218 Closes #217 --- crates/charon-metrics/src/lib.rs | 44 +++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/crates/charon-metrics/src/lib.rs b/crates/charon-metrics/src/lib.rs index d182be5..39e6de8 100644 --- a/crates/charon-metrics/src/lib.rs +++ b/crates/charon-metrics/src/lib.rs @@ -19,9 +19,31 @@ use std::net::SocketAddr; use anyhow::{Context, Result}; use metrics::{counter, describe_counter, describe_gauge, describe_histogram, gauge, histogram}; -use metrics_exporter_prometheus::PrometheusBuilder; +use metrics_exporter_prometheus::{Matcher, PrometheusBuilder}; use tracing::info; +// Bucket boundaries for `charon_pipeline_block_duration_seconds`. +// BSC produces a block every ~3s; resolution is packed around that +// threshold so p50/p95 quantiles stay meaningful instead of piling +// into `+Inf` with the exporter's default HTTP-latency buckets. +const BLOCK_DURATION_SECONDS_BUCKETS: &[f64] = + &[0.05, 0.1, 0.25, 0.5, 1.0, 2.0, 3.0, 5.0, 10.0]; + +// Bucket boundaries for `charon_executor_profit_usd_cents`. +// Realistic Venus liquidation profit spans ~$0.05 dust to ~$10k +// windfalls; buckets are in cents (5 → 1_000_000) so histogram_quantile +// returns finite values across that range. +const PROFIT_USD_CENTS_BUCKETS: &[f64] = &[ + 5.0, + 50.0, + 500.0, + 2_500.0, + 10_000.0, + 50_000.0, + 250_000.0, + 1_000_000.0, +]; + /// Single-source-of-truth metric names. Kept as constants so call /// sites, dashboard JSON, and alert rules refer to the same strings. pub mod names { @@ -76,6 +98,26 @@ pub mod drop_stage { pub async fn init(bind: SocketAddr) -> Result<()> { PrometheusBuilder::new() .with_http_listener(bind) + .set_buckets_for_metric( + Matcher::Full(names::PIPELINE_BLOCK_DURATION_SECONDS.to_string()), + BLOCK_DURATION_SECONDS_BUCKETS, + ) + .with_context(|| { + format!( + "failed to register buckets for {}", + names::PIPELINE_BLOCK_DURATION_SECONDS + ) + })? + .set_buckets_for_metric( + Matcher::Full(names::EXECUTOR_PROFIT_USD_CENTS.to_string()), + PROFIT_USD_CENTS_BUCKETS, + ) + .with_context(|| { + format!( + "failed to register buckets for {}", + names::EXECUTOR_PROFIT_USD_CENTS + ) + })? .install() .with_context(|| format!("failed to install Prometheus exporter on {bind}"))?;