From 2097d3da99a6f182e6049361c5c9c8525da2738e Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Tue, 19 Aug 2025 15:02:07 -0400 Subject: [PATCH 01/43] Add debug log --- bottlecap/src/traces/trace_agent.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index b920c2513..01c70ad63 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -166,6 +166,7 @@ impl TraceAgent { tokio::spawn(async move { while let Some(stats_payload) = stats_rx.recv().await { let mut aggregator = stats_aggregator.lock().await; + debug!("Trace Agent | adding stats payload to aggregator: {stats_payload:?}"); aggregator.add(stats_payload); } }); From 6c6532a830f34e420f41c152939a0f34ac77d0b2 Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Tue, 19 Aug 2025 15:43:39 -0400 Subject: [PATCH 02/43] Send dummy stats --- bottlecap/src/traces/trace_agent.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index 01c70ad63..f38c5bf1b 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -171,8 +171,26 @@ impl TraceAgent { } }); + stats_tx.clone().send(pb::ClientStatsPayload { + hostname: "hostname".to_string(), + env: "dev".to_string(), + version: "version".to_string(), + lang: "rust".to_string(), + tracer_version: "tracer.version".to_string(), + runtime_id: "runtime_id".to_string(), + sequence: 1, + agent_aggregation: "aggregation".to_string(), + service: "service".to_string(), + container_id: "container_id".to_string(), + tags: vec![], + git_commit_sha: "git_commit_sha".to_string(), + image_tag: "image_tag".to_string(), + stats: vec![], + }).await?; + let router = self.make_router(stats_tx); + let port = u16::try_from(TRACE_AGENT_PORT).expect("TRACE_AGENT_PORT is too large"); let socket = SocketAddr::from(([127, 0, 0, 1], port)); let listener = tokio::net::TcpListener::bind(&socket).await?; @@ -287,6 +305,7 @@ impl TraceAgent { } async fn stats(State(state): State, request: Request) -> Response { + debug!("Trace Agent | stats()"); match state .stats_processor .process_stats(request, state.stats_tx) From 338273ade0206e645c7b996ae309cd281ef6e7b5 Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Tue, 2 Sep 2025 17:07:07 -0400 Subject: [PATCH 03/43] ... --- bottlecap/src/traces/stats_aggregator.rs | 3 +++ bottlecap/src/traces/trace_agent.rs | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/bottlecap/src/traces/stats_aggregator.rs b/bottlecap/src/traces/stats_aggregator.rs index fa23c0b63..c10106ac3 100644 --- a/bottlecap/src/traces/stats_aggregator.rs +++ b/bottlecap/src/traces/stats_aggregator.rs @@ -17,6 +17,8 @@ use std::collections::VecDeque; /// const MAX_CONTENT_SIZE_BYTES: usize = 3 * 1024 * 1024; // ~3MB +use tracing::debug; + #[allow(clippy::module_name_repetitions)] pub struct StatsAggregator { queue: VecDeque, @@ -48,6 +50,7 @@ impl StatsAggregator { /// Takes in an individual trace stats payload. pub fn add(&mut self, payload: ClientStatsPayload) { + debug!("StatsAggregator | adding stats payload to aggregator: {payload:?}"); self.queue.push_back(payload); } diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index f38c5bf1b..bfa412de3 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -166,7 +166,6 @@ impl TraceAgent { tokio::spawn(async move { while let Some(stats_payload) = stats_rx.recv().await { let mut aggregator = stats_aggregator.lock().await; - debug!("Trace Agent | adding stats payload to aggregator: {stats_payload:?}"); aggregator.add(stats_payload); } }); From 0af0f8f91803739918ab2cd981428551be902dbd Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Wed, 3 Sep 2025 14:36:05 -0400 Subject: [PATCH 04/43] Add more fields --- bottlecap/src/traces/trace_agent.rs | 55 +++++++++++++++++++++++------ 1 file changed, 44 insertions(+), 11 deletions(-) diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index bfa412de3..d07e90d81 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -152,8 +152,9 @@ impl TraceAgent { } } + #[allow(clippy::cast_possible_truncation)] pub async fn start(&self) -> Result<(), Box> { - let now = Instant::now(); + let now: Instant = Instant::now(); // Set up a channel to send processed stats to our stats aggregator. let (stats_tx, mut stats_rx): ( @@ -171,20 +172,52 @@ impl TraceAgent { }); stats_tx.clone().send(pb::ClientStatsPayload { - hostname: "hostname".to_string(), + hostname: String::new(), + // TODO (Yiming): support setting this env: "dev".to_string(), + // TODO (Yiming): support setting this version: "version".to_string(), lang: "rust".to_string(), - tracer_version: "tracer.version".to_string(), - runtime_id: "runtime_id".to_string(), - sequence: 1, - agent_aggregation: "aggregation".to_string(), - service: "service".to_string(), - container_id: "container_id".to_string(), + tracer_version: String::new(), + runtime_id: String::new(), + sequence: 0, + agent_aggregation: String::new(), + // TODO (Yiming): support setting this + service: "yiming_service".to_string(), + container_id: String::new(), tags: vec![], - git_commit_sha: "git_commit_sha".to_string(), - image_tag: "image_tag".to_string(), - stats: vec![], + git_commit_sha: String::new(), + image_tag: String::new(), + stats: vec![ + pb::ClientStatsBucket { + start: std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("Time went backwards") + .as_nanos() as u64, + duration: 1_000_000_000, + stats: vec![ + pb::ClientGroupedStats { + service: "yiming_service".to_string(), + name: "yiming_name".to_string(), + resource: "yiming_resource".to_string(), + http_status_code: 200, + r#type: "yiming_type".to_string(), + db_type: String::new(), + hits: 1, + errors: 0, + duration: 1_000_000_000, + ok_summary: vec![], + error_summary: vec![], + synthetics: false, + top_level_hits: 0, + span_kind: String::new(), + peer_tags: vec![], + is_trace_root: 1, + }, + ], + agent_time_shift: 0, + }, + ], }).await?; let router = self.make_router(stats_tx); From cb2e790847825c0bc7ea9bf5006daa647175af7c Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Mon, 8 Sep 2025 15:28:32 -0400 Subject: [PATCH 05/43] Can see metrics in metrics explorer --- bottlecap/src/bin/bottlecap/main.rs | 29 +++- .../src/lifecycle/invocation/processor.rs | 32 +++- bottlecap/src/traces/mod.rs | 2 + bottlecap/src/traces/my_stats_processor.rs | 88 +++++++++++ bottlecap/src/traces/stats_agent.rs | 54 +++++++ bottlecap/src/traces/trace_agent.rs | 140 +++++++++--------- 6 files changed, 272 insertions(+), 73 deletions(-) create mode 100644 bottlecap/src/traces/my_stats_processor.rs create mode 100644 bottlecap/src/traces/stats_agent.rs diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index 0e87473f0..ae21da57f 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -59,6 +59,7 @@ use bottlecap::{ trace_aggregator::{self, SendDataBuilderInfo}, trace_flusher::{self, ServerlessTraceFlusher, TraceFlusher}, trace_processor::{self, SendingTraceProcessor}, + stats_agent::{StatsEvent, StatsAgent}, }, }; use datadog_fips::reqwest_adapter::create_reqwest_client_builder; @@ -92,11 +93,12 @@ use std::{ sync::Arc, time::{Duration, Instant}, }; -use tokio::{sync::Mutex as TokioMutex, sync::RwLock, sync::mpsc::Sender, task::JoinHandle}; +use tokio::{sync::Mutex as TokioMutex, sync::RwLock, sync::mpsc::{self, Sender, Receiver}, task::JoinHandle}; use tokio_util::sync::CancellationToken; use tracing::{debug, error}; use tracing_subscriber::EnvFilter; use ustr::Ustr; +use datadog_trace_protobuf::pb; #[allow(clippy::struct_field_names)] struct PendingFlushHandles { @@ -517,6 +519,7 @@ async fn extension_loop_active( ))); let propagator = Arc::new(DatadogCompositePropagator::new(Arc::clone(config))); + let (stats_tx, stats_rx) = mpsc::channel::(1000); // Lifecycle Invocation Processor let invocation_processor = Arc::new(TokioMutex::new(InvocationProcessor::new( Arc::clone(&tags_provider), @@ -524,6 +527,7 @@ async fn extension_loop_active( Arc::clone(&aws_config), metrics_aggr_handle.clone(), Arc::clone(&propagator), + stats_tx, ))); // AppSec processor (if enabled) let appsec_processor = match AppSecProcessor::new(config) { @@ -545,6 +549,7 @@ async fn extension_loop_active( stats_flusher, proxy_flusher, trace_agent_shutdown_token, + stats_aggregator_tx, ) = start_trace_agent( config, &api_key_factory, @@ -554,6 +559,8 @@ async fn extension_loop_active( Arc::clone(&trace_aggregator), ); + start_stats_agent(stats_rx, stats_aggregator_tx, config, &tags_provider); + let api_runtime_proxy_shutdown_signal = start_api_runtime_proxy( config, aws_config, @@ -895,7 +902,7 @@ async fn handle_event_bus_event( } TelemetryRecord::PlatformStart { request_id, .. } => { let mut p = invocation_processor.lock().await; - p.on_platform_start(request_id, event.time); + p.on_platform_start(request_id, event.time).await; drop(p); } TelemetryRecord::PlatformRuntimeDone { @@ -959,7 +966,7 @@ async fn handle_next_invocation( invoked_function_arn.clone() ); let mut p = invocation_processor.lock().await; - p.on_invoke_event(request_id.into()); + p.on_invoke_event(request_id.into()).await; drop(p); } Ok(NextEventResponse::Shutdown { @@ -1018,6 +1025,18 @@ fn start_logs_agent( (logs_agent_channel, logs_flusher) } +fn start_stats_agent( + stats_rx: Receiver, + stats_aggregator_tx: Sender, + config: &Arc, + tags_provider: &Arc, +) { + let mut stats_agent = StatsAgent::new(stats_rx, stats_aggregator_tx, Arc::clone(config), Arc::clone(tags_provider)); + tokio::spawn(async move { + stats_agent.spin().await; + }); +} + fn start_metrics_flushers( api_key_factory: Arc, metrics_aggr_handle: &MetricsAggregatorHandle, @@ -1098,6 +1117,7 @@ fn start_trace_agent( Arc, Arc, tokio_util::sync::CancellationToken, + Sender, ) { // Stats let stats_aggregator = Arc::new(TokioMutex::new(StatsAggregator::default())); @@ -1138,7 +1158,7 @@ fn start_trace_agent( Arc::clone(config), )); - let trace_agent = trace_agent::TraceAgent::new( + let (trace_agent, stats_aggregator_tx) = trace_agent::TraceAgent::new( Arc::clone(config), trace_aggregator, trace_processor.clone(), @@ -1166,6 +1186,7 @@ fn start_trace_agent( stats_flusher, proxy_flusher, shutdown_token, + stats_aggregator_tx, ) } diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index a1d0d94b6..afbe5fa10 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -9,7 +9,7 @@ use datadog_trace_protobuf::pb::Span; use datadog_trace_utils::tracer_header_tags; use serde_json::Value; use tokio::sync::watch; -use tracing::{debug, warn}; +use tracing::{debug, error, warn}; use crate::{ config::{self, aws::AwsConfig}, @@ -41,6 +41,9 @@ use crate::{ use crate::lifecycle::invocation::triggers::get_default_service_name; +use crate::traces::stats_agent::StatsEvent; +use tokio::sync::mpsc::Sender; + pub const MS_TO_NS: f64 = 1_000_000.0; pub const S_TO_MS: u64 = 1_000; pub const S_TO_NS: f64 = 1_000_000_000.0; @@ -81,6 +84,7 @@ pub struct Processor { /// /// These tags are used to capture runtime and initialization. dynamic_tags: HashMap, + stats_agent_tx: Sender, } impl Processor { @@ -91,6 +95,7 @@ impl Processor { aws_config: Arc, metrics_aggregator: dogstatsd::aggregator_service::AggregatorHandle, propagator: Arc, + stats_agent_tx: Sender, ) -> Self { let resource = tags_provider .get_canonical_resource_name() @@ -114,12 +119,13 @@ impl Processor { service, resource, dynamic_tags: HashMap::new(), + stats_agent_tx, } } /// Given a `request_id`, creates the context and adds the enhanced metric offsets to the context buffer. /// - pub fn on_invoke_event(&mut self, request_id: String) { + pub async fn on_invoke_event(&mut self, request_id: String) { let invocation_span = create_empty_span(String::from("aws.lambda"), &self.resource, &self.service); // Important! Call set_init_tags() before adding the invocation to the context buffer @@ -170,6 +176,17 @@ impl Processor { self.inferrer.infer_span(&payload_value, &self.aws_config); self.process_on_universal_instrumentation_start(request_id, headers, payload_value); } + + // Send stats event + let stats_event = StatsEvent; + match self.stats_agent_tx.send(stats_event).await { + Ok(()) => { + debug!("Successfully buffered stats event to be aggregated."); + } + Err(err) => { + error!("Error sending stats event to the stats aggregator: {err}"); + } + } } /// On the first invocation, determine if it's a cold start or proactive init. @@ -269,7 +286,7 @@ impl Processor { /// Given a `request_id` and the time of the platform start, add the start time to the context buffer. /// - pub fn on_platform_start(&mut self, request_id: String, time: DateTime) { + pub async fn on_platform_start(&mut self, request_id: String, time: DateTime) { let start_time: i64 = SystemTime::from(time) .duration_since(UNIX_EPOCH) .expect("time went backwards") @@ -277,6 +294,15 @@ impl Processor { .try_into() .unwrap_or_default(); self.context_buffer.add_start_time(&request_id, start_time); + let stats_event = StatsEvent; + match self.stats_agent_tx.send(stats_event).await { + Ok(()) => { + debug!("Successfully buffered stats event to be aggregated."); + } + Err(err) => { + error!("Error sending stats event to the stats aggregator: {err}"); + } + } } #[allow(clippy::too_many_arguments)] diff --git a/bottlecap/src/traces/mod.rs b/bottlecap/src/traces/mod.rs index f3363d67d..c28918804 100644 --- a/bottlecap/src/traces/mod.rs +++ b/bottlecap/src/traces/mod.rs @@ -7,8 +7,10 @@ pub mod proxy_aggregator; pub mod proxy_flusher; pub mod span_pointers; pub mod stats_aggregator; +pub mod stats_agent; pub mod stats_flusher; pub mod stats_processor; +pub mod my_stats_processor; pub mod trace_agent; pub mod trace_aggregator; pub mod trace_flusher; diff --git a/bottlecap/src/traces/my_stats_processor.rs b/bottlecap/src/traces/my_stats_processor.rs new file mode 100644 index 000000000..f2bae51a7 --- /dev/null +++ b/bottlecap/src/traces/my_stats_processor.rs @@ -0,0 +1,88 @@ +use crate::traces::stats_agent::StatsEvent; +use datadog_trace_protobuf::pb; +use tracing::{debug, error}; +use tokio::sync::mpsc::Sender; + +use crate::config::Config; +use std::sync::Arc; +use crate::tags::provider::Provider as TagProvider; + +#[derive(Clone)] +pub struct MyStatsProcessor { + stats_aggregator_tx: Sender, + config: Arc, + resource: String, +} + +impl MyStatsProcessor { + #[must_use] + pub fn new(stats_aggregator_tx: Sender, config: Arc, tags_provider: Arc) -> Self { + let resource = tags_provider + .get_canonical_resource_name() + .unwrap_or(String::from("aws.lambda")); + Self { stats_aggregator_tx, config, resource } + } + + #[allow(clippy::cast_possible_truncation)] + pub async fn process( + &self, + _event: StatsEvent, + ) { + debug!("In my stats processor: Processing stats event."); + let stats = pb::ClientStatsPayload { + // hostname: String::new(), + hostname: "yiming7-hostname".to_string(), + env: self.config.env.clone().unwrap_or_default(), + version: self.config.version.clone().unwrap_or_default(), + lang: "rust".to_string(), + tracer_version: String::new(), + runtime_id: String::new(), + sequence: 0, + agent_aggregation: String::new(), + // service: self.config.service.clone().unwrap_or(String::new()), + service: self.config.service.clone().unwrap_or_default(), + container_id: String::new(), + tags: vec![], + git_commit_sha: String::new(), + image_tag: String::new(), + stats: vec![ + pb::ClientStatsBucket { + start: std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("Invalid time") + .as_nanos() as u64, + duration: 1_000_000_000, + stats: vec![ + pb::ClientGroupedStats { + service: self.config.service.clone().unwrap_or_default(), + name: "yiming_name".to_string(), + resource: self.resource.clone(), + http_status_code: 200, + r#type: String::new(), + db_type: String::new(), + hits: 1, + errors: 0, + duration: 1_000_000_000, + ok_summary: vec![], + error_summary: vec![], + synthetics: false, + top_level_hits: 0, + span_kind: String::new(), + peer_tags: vec![], + is_trace_root: 1, + }, + ], + agent_time_shift: 0, + }, + ], + }; + match self.stats_aggregator_tx.send(stats).await { + Ok(()) => { + debug!("In my stats processor: Successfully buffered stats to be aggregated."); + } + Err(err) => { + error!("In my stats processor: Error sending stats to the stats aggregator: {err}"); + } + } + } +} \ No newline at end of file diff --git a/bottlecap/src/traces/stats_agent.rs b/bottlecap/src/traces/stats_agent.rs new file mode 100644 index 000000000..559648b9b --- /dev/null +++ b/bottlecap/src/traces/stats_agent.rs @@ -0,0 +1,54 @@ +use tokio::sync::mpsc::{self, Receiver, Sender}; +use tracing::debug; + +use datadog_trace_protobuf::pb; + +use super::my_stats_processor::MyStatsProcessor; + +use crate::config::Config; +use std::sync::Arc; +use crate::tags::provider::Provider as TagProvider; + +#[derive(Clone, Copy)] +pub struct StatsEvent; + +#[allow(clippy::module_name_repetitions)] +pub struct StatsAgent { + rx: mpsc::Receiver, + processor: MyStatsProcessor, +} + +impl StatsAgent { + #[must_use] + pub fn new( + rx: Receiver, + stats_aggregator_tx: Sender, + config: Arc, + tags_provider: Arc, + ) -> StatsAgent { + + StatsAgent { + rx, + processor: MyStatsProcessor::new(stats_aggregator_tx, config, tags_provider), + } + } + + pub async fn spin(&mut self) { + while let Some(event) = self.rx.recv().await { + debug!("In stats agent: Received stats event."); + self.processor.process(event).await; + } + } + + // pub async fn sync_consume(&mut self) { + // if let Some(events) = self.rx.recv().await { + // self.processor.process().await; + // } + // } + + // #[must_use] + // pub fn get_sender_copy(&self) -> Sender { + // self.tx.clone() + // } + +} diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index d07e90d81..6d72f972f 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -102,6 +102,8 @@ pub struct TraceAgent { appsec_processor: Option>>, shutdown_token: CancellationToken, tx: Sender, + stats_tx: Sender, + stats_rx: Arc>>>, } #[derive(Clone, Copy)] @@ -123,7 +125,7 @@ impl TraceAgent { invocation_processor: Arc>, appsec_processor: Option>>, tags_provider: Arc, - ) -> TraceAgent { + ) -> (TraceAgent, Sender) { // Set up a channel to send processed traces to our trace aggregator. tx is passed through each // endpoint_handler to the trace processor, which uses it to send de-serialized // processed trace payloads to our trace aggregator. @@ -138,7 +140,13 @@ impl TraceAgent { } }); - TraceAgent { + // Set up a channel to send processed stats to our stats aggregator. + let (stats_tx, stats_rx): ( + Sender, + Receiver, + ) = mpsc::channel(STATS_PAYLOAD_CHANNEL_BUFFER_SIZE); + + let agent = TraceAgent { config: config.clone(), trace_processor, stats_aggregator, @@ -149,78 +157,78 @@ impl TraceAgent { tags_provider, tx: trace_tx, shutdown_token: CancellationToken::new(), - } + stats_tx: stats_tx.clone(), + stats_rx: Arc::new(Mutex::new(Some(stats_rx))), + }; + + (agent, stats_tx) } #[allow(clippy::cast_possible_truncation)] pub async fn start(&self) -> Result<(), Box> { let now: Instant = Instant::now(); - // Set up a channel to send processed stats to our stats aggregator. - let (stats_tx, mut stats_rx): ( - Sender, - Receiver, - ) = mpsc::channel(STATS_PAYLOAD_CHANNEL_BUFFER_SIZE); - // Start the stats aggregator, which receives and buffers stats payloads to be consumed by the stats flusher. - let stats_aggregator = self.stats_aggregator.clone(); - tokio::spawn(async move { - while let Some(stats_payload) = stats_rx.recv().await { - let mut aggregator = stats_aggregator.lock().await; - aggregator.add(stats_payload); - } - }); + if let Some(mut stats_rx) = self.stats_rx.lock().await.take() { + let stats_aggregator = self.stats_aggregator.clone(); + tokio::spawn(async move { + while let Some(stats_payload) = stats_rx.recv().await { + let mut aggregator = stats_aggregator.lock().await; + aggregator.add(stats_payload); + } + }); + } - stats_tx.clone().send(pb::ClientStatsPayload { - hostname: String::new(), - // TODO (Yiming): support setting this - env: "dev".to_string(), - // TODO (Yiming): support setting this - version: "version".to_string(), - lang: "rust".to_string(), - tracer_version: String::new(), - runtime_id: String::new(), - sequence: 0, - agent_aggregation: String::new(), - // TODO (Yiming): support setting this - service: "yiming_service".to_string(), - container_id: String::new(), - tags: vec![], - git_commit_sha: String::new(), - image_tag: String::new(), - stats: vec![ - pb::ClientStatsBucket { - start: std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .expect("Time went backwards") - .as_nanos() as u64, - duration: 1_000_000_000, - stats: vec![ - pb::ClientGroupedStats { - service: "yiming_service".to_string(), - name: "yiming_name".to_string(), - resource: "yiming_resource".to_string(), - http_status_code: 200, - r#type: "yiming_type".to_string(), - db_type: String::new(), - hits: 1, - errors: 0, - duration: 1_000_000_000, - ok_summary: vec![], - error_summary: vec![], - synthetics: false, - top_level_hits: 0, - span_kind: String::new(), - peer_tags: vec![], - is_trace_root: 1, - }, - ], - agent_time_shift: 0, - }, - ], - }).await?; - - let router = self.make_router(stats_tx); + // stats_tx.clone().send(pb::ClientStatsPayload { + // hostname: String::new(), + // // TODO (Yiming): support setting this + // env: "dev".to_string(), + // // TODO (Yiming): support setting this + // version: "version".to_string(), + // lang: "rust".to_string(), + // tracer_version: String::new(), + // runtime_id: String::new(), + // sequence: 0, + // agent_aggregation: String::new(), + // // TODO (Yiming): support setting this + // service: "yiming_service".to_string(), + // container_id: String::new(), + // tags: vec![], + // git_commit_sha: String::new(), + // image_tag: String::new(), + // stats: vec![ + // pb::ClientStatsBucket { + // start: std::time::SystemTime::now() + // .duration_since(std::time::UNIX_EPOCH) + // .expect("Time went backwards") + // .as_nanos() as u64, + // duration: 1_000_000_000, + // stats: vec![ + // pb::ClientGroupedStats { + // service: "yiming_service".to_string(), + // name: "yiming_name".to_string(), + // resource: "yiming_resource".to_string(), + // http_status_code: 200, + // r#type: "yiming_type".to_string(), + // db_type: String::new(), + // hits: 1, + // errors: 0, + // duration: 1_000_000_000, + // ok_summary: vec![], + // error_summary: vec![], + // synthetics: false, + // top_level_hits: 0, + // span_kind: String::new(), + // peer_tags: vec![], + // is_trace_root: 1, + // }, + // ], + // agent_time_shift: 0, + // }, + // ], + // }).await?; + + let router = self.make_router(self.stats_tx.clone()); let port = u16::try_from(TRACE_AGENT_PORT).expect("TRACE_AGENT_PORT is too large"); From 102cbbe08c9cd3ad491e349077defa1f59c48927 Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Mon, 8 Sep 2025 16:27:53 -0400 Subject: [PATCH 06/43] Add stats concentrator, which pushes data to aggregator --- bottlecap/src/bin/bottlecap/main.rs | 11 +++++--- bottlecap/src/traces/mod.rs | 1 + bottlecap/src/traces/my_stats_processor.rs | 19 ++++++-------- bottlecap/src/traces/stats_agent.rs | 8 +++--- bottlecap/src/traces/stats_concentrator.rs | 30 ++++++++++++++++++++++ 5 files changed, 52 insertions(+), 17 deletions(-) create mode 100644 bottlecap/src/traces/stats_concentrator.rs diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index ae21da57f..33c6168f8 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -99,6 +99,7 @@ use tracing::{debug, error}; use tracing_subscriber::EnvFilter; use ustr::Ustr; use datadog_trace_protobuf::pb; +use tokio::sync::Mutex; #[allow(clippy::struct_field_names)] struct PendingFlushHandles { @@ -550,6 +551,7 @@ async fn extension_loop_active( proxy_flusher, trace_agent_shutdown_token, stats_aggregator_tx, + stats_aggregator, ) = start_trace_agent( config, &api_key_factory, @@ -559,7 +561,7 @@ async fn extension_loop_active( Arc::clone(&trace_aggregator), ); - start_stats_agent(stats_rx, stats_aggregator_tx, config, &tags_provider); + start_stats_agent(stats_rx, stats_aggregator_tx, config, &tags_provider, stats_aggregator.clone()); let api_runtime_proxy_shutdown_signal = start_api_runtime_proxy( config, @@ -1030,8 +1032,9 @@ fn start_stats_agent( stats_aggregator_tx: Sender, config: &Arc, tags_provider: &Arc, + stats_aggregator: Arc>, ) { - let mut stats_agent = StatsAgent::new(stats_rx, stats_aggregator_tx, Arc::clone(config), Arc::clone(tags_provider)); + let mut stats_agent = StatsAgent::new(stats_rx, stats_aggregator_tx, Arc::clone(config), Arc::clone(tags_provider), stats_aggregator); tokio::spawn(async move { stats_agent.spin().await; }); @@ -1118,6 +1121,7 @@ fn start_trace_agent( Arc, tokio_util::sync::CancellationToken, Sender, + Arc>, ) { // Stats let stats_aggregator = Arc::new(TokioMutex::new(StatsAggregator::default())); @@ -1162,7 +1166,7 @@ fn start_trace_agent( Arc::clone(config), trace_aggregator, trace_processor.clone(), - stats_aggregator, + stats_aggregator.clone(), stats_processor, proxy_aggregator, invocation_processor, @@ -1187,6 +1191,7 @@ fn start_trace_agent( proxy_flusher, shutdown_token, stats_aggregator_tx, + stats_aggregator, ) } diff --git a/bottlecap/src/traces/mod.rs b/bottlecap/src/traces/mod.rs index c28918804..cbd5ed704 100644 --- a/bottlecap/src/traces/mod.rs +++ b/bottlecap/src/traces/mod.rs @@ -15,6 +15,7 @@ pub mod trace_agent; pub mod trace_aggregator; pub mod trace_flusher; pub mod trace_processor; +pub mod stats_concentrator; // URL for a call to the Lambda runtime API. The value may be replaced if `AWS_LAMBDA_RUNTIME_API` is set. const LAMBDA_RUNTIME_URL_PREFIX: &str = "http://127.0.0.1:9001"; diff --git a/bottlecap/src/traces/my_stats_processor.rs b/bottlecap/src/traces/my_stats_processor.rs index f2bae51a7..14ce8ae41 100644 --- a/bottlecap/src/traces/my_stats_processor.rs +++ b/bottlecap/src/traces/my_stats_processor.rs @@ -6,21 +6,25 @@ use tokio::sync::mpsc::Sender; use crate::config::Config; use std::sync::Arc; use crate::tags::provider::Provider as TagProvider; +use crate::traces::stats_concentrator::StatsConcentrator; +use tokio::sync::Mutex; +use crate::traces::stats_aggregator::StatsAggregator; -#[derive(Clone)] pub struct MyStatsProcessor { stats_aggregator_tx: Sender, config: Arc, resource: String, + concentrator: Arc>, } impl MyStatsProcessor { #[must_use] - pub fn new(stats_aggregator_tx: Sender, config: Arc, tags_provider: Arc) -> Self { + pub fn new(stats_aggregator_tx: Sender, config: Arc, tags_provider: Arc, stats_aggregator: Arc>) -> Self { let resource = tags_provider .get_canonical_resource_name() .unwrap_or(String::from("aws.lambda")); - Self { stats_aggregator_tx, config, resource } + let concentrator = StatsConcentrator::new(stats_aggregator); + Self { stats_aggregator_tx, config, resource, concentrator: Arc::new(Mutex::new(concentrator)) } } #[allow(clippy::cast_possible_truncation)] @@ -76,13 +80,6 @@ impl MyStatsProcessor { }, ], }; - match self.stats_aggregator_tx.send(stats).await { - Ok(()) => { - debug!("In my stats processor: Successfully buffered stats to be aggregated."); - } - Err(err) => { - error!("In my stats processor: Error sending stats to the stats aggregator: {err}"); - } - } + self.concentrator.lock().await.add(stats).await; } } \ No newline at end of file diff --git a/bottlecap/src/traces/stats_agent.rs b/bottlecap/src/traces/stats_agent.rs index 559648b9b..6a0fc4588 100644 --- a/bottlecap/src/traces/stats_agent.rs +++ b/bottlecap/src/traces/stats_agent.rs @@ -8,10 +8,12 @@ use super::my_stats_processor::MyStatsProcessor; use crate::config::Config; use std::sync::Arc; use crate::tags::provider::Provider as TagProvider; - +use crate::traces::stats_aggregator::StatsAggregator; +use tokio::sync::Mutex; #[derive(Clone, Copy)] pub struct StatsEvent; + #[allow(clippy::module_name_repetitions)] pub struct StatsAgent { rx: mpsc::Receiver, @@ -25,11 +27,11 @@ impl StatsAgent { stats_aggregator_tx: Sender, config: Arc, tags_provider: Arc, + stats_aggregator: Arc>, ) -> StatsAgent { - StatsAgent { rx, - processor: MyStatsProcessor::new(stats_aggregator_tx, config, tags_provider), + processor: MyStatsProcessor::new(stats_aggregator_tx, config, tags_provider, stats_aggregator), } } diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs new file mode 100644 index 000000000..9e3efa112 --- /dev/null +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -0,0 +1,30 @@ +/** + * TODO: + * + */ + +use datadog_trace_protobuf::pb; +use std::sync::Arc; +use tokio::sync::Mutex; +use crate::traces::stats_aggregator::StatsAggregator; + +pub struct StatsConcentrator { + // pub storage: Vec, + stats_aggregator: Arc>, +} + +impl StatsConcentrator { + pub fn new(stats_aggregator: Arc>) -> Self { + Self { stats_aggregator } + } + + pub async fn add(&mut self, stats: pb::ClientStatsPayload) { + self.stats_aggregator.lock().await.add(stats); + } + + // pub fn get_batch(&mut self) -> Vec { + // let ret = self.storage.clone(); + // self.storage.clear(); + // ret + // } +} \ No newline at end of file From 82d692b60f389400d5079b0601c3cf97054ca2c6 Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Mon, 8 Sep 2025 16:43:39 -0400 Subject: [PATCH 07/43] Avoid returning unused trace_tx --- bottlecap/src/bin/bottlecap/main.rs | 10 ++----- bottlecap/src/traces/my_stats_processor.rs | 6 ++-- bottlecap/src/traces/stats_agent.rs | 3 +- bottlecap/src/traces/trace_agent.rs | 33 ++++++++++------------ 4 files changed, 21 insertions(+), 31 deletions(-) diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index 33c6168f8..47321113e 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -550,7 +550,6 @@ async fn extension_loop_active( stats_flusher, proxy_flusher, trace_agent_shutdown_token, - stats_aggregator_tx, stats_aggregator, ) = start_trace_agent( config, @@ -561,7 +560,7 @@ async fn extension_loop_active( Arc::clone(&trace_aggregator), ); - start_stats_agent(stats_rx, stats_aggregator_tx, config, &tags_provider, stats_aggregator.clone()); + start_stats_agent(stats_rx, config, &tags_provider, stats_aggregator.clone()); let api_runtime_proxy_shutdown_signal = start_api_runtime_proxy( config, @@ -1029,12 +1028,11 @@ fn start_logs_agent( fn start_stats_agent( stats_rx: Receiver, - stats_aggregator_tx: Sender, config: &Arc, tags_provider: &Arc, stats_aggregator: Arc>, ) { - let mut stats_agent = StatsAgent::new(stats_rx, stats_aggregator_tx, Arc::clone(config), Arc::clone(tags_provider), stats_aggregator); + let mut stats_agent = StatsAgent::new(stats_rx, Arc::clone(config), Arc::clone(tags_provider), stats_aggregator); tokio::spawn(async move { stats_agent.spin().await; }); @@ -1120,7 +1118,6 @@ fn start_trace_agent( Arc, Arc, tokio_util::sync::CancellationToken, - Sender, Arc>, ) { // Stats @@ -1162,7 +1159,7 @@ fn start_trace_agent( Arc::clone(config), )); - let (trace_agent, stats_aggregator_tx) = trace_agent::TraceAgent::new( + let trace_agent = trace_agent::TraceAgent::new( Arc::clone(config), trace_aggregator, trace_processor.clone(), @@ -1190,7 +1187,6 @@ fn start_trace_agent( stats_flusher, proxy_flusher, shutdown_token, - stats_aggregator_tx, stats_aggregator, ) } diff --git a/bottlecap/src/traces/my_stats_processor.rs b/bottlecap/src/traces/my_stats_processor.rs index 14ce8ae41..cd6609128 100644 --- a/bottlecap/src/traces/my_stats_processor.rs +++ b/bottlecap/src/traces/my_stats_processor.rs @@ -1,7 +1,6 @@ use crate::traces::stats_agent::StatsEvent; use datadog_trace_protobuf::pb; use tracing::{debug, error}; -use tokio::sync::mpsc::Sender; use crate::config::Config; use std::sync::Arc; @@ -11,7 +10,6 @@ use tokio::sync::Mutex; use crate::traces::stats_aggregator::StatsAggregator; pub struct MyStatsProcessor { - stats_aggregator_tx: Sender, config: Arc, resource: String, concentrator: Arc>, @@ -19,12 +17,12 @@ pub struct MyStatsProcessor { impl MyStatsProcessor { #[must_use] - pub fn new(stats_aggregator_tx: Sender, config: Arc, tags_provider: Arc, stats_aggregator: Arc>) -> Self { + pub fn new(config: Arc, tags_provider: Arc, stats_aggregator: Arc>) -> Self { let resource = tags_provider .get_canonical_resource_name() .unwrap_or(String::from("aws.lambda")); let concentrator = StatsConcentrator::new(stats_aggregator); - Self { stats_aggregator_tx, config, resource, concentrator: Arc::new(Mutex::new(concentrator)) } + Self { config, resource, concentrator: Arc::new(Mutex::new(concentrator)) } } #[allow(clippy::cast_possible_truncation)] diff --git a/bottlecap/src/traces/stats_agent.rs b/bottlecap/src/traces/stats_agent.rs index 6a0fc4588..95911c623 100644 --- a/bottlecap/src/traces/stats_agent.rs +++ b/bottlecap/src/traces/stats_agent.rs @@ -24,14 +24,13 @@ impl StatsAgent { #[must_use] pub fn new( rx: Receiver, - stats_aggregator_tx: Sender, config: Arc, tags_provider: Arc, stats_aggregator: Arc>, ) -> StatsAgent { StatsAgent { rx, - processor: MyStatsProcessor::new(stats_aggregator_tx, config, tags_provider, stats_aggregator), + processor: MyStatsProcessor::new(config, tags_provider, stats_aggregator), } } diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index 6d72f972f..b7d0638e2 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -103,7 +103,7 @@ pub struct TraceAgent { shutdown_token: CancellationToken, tx: Sender, stats_tx: Sender, - stats_rx: Arc>>>, + stats_rx: Arc>>, } #[derive(Clone, Copy)] @@ -125,7 +125,7 @@ impl TraceAgent { invocation_processor: Arc>, appsec_processor: Option>>, tags_provider: Arc, - ) -> (TraceAgent, Sender) { + ) -> TraceAgent { // Set up a channel to send processed traces to our trace aggregator. tx is passed through each // endpoint_handler to the trace processor, which uses it to send de-serialized // processed trace payloads to our trace aggregator. @@ -146,7 +146,7 @@ impl TraceAgent { Receiver, ) = mpsc::channel(STATS_PAYLOAD_CHANNEL_BUFFER_SIZE); - let agent = TraceAgent { + TraceAgent { config: config.clone(), trace_processor, stats_aggregator, @@ -155,13 +155,11 @@ impl TraceAgent { invocation_processor, appsec_processor, tags_provider, - tx: trace_tx, shutdown_token: CancellationToken::new(), + tx: trace_tx, stats_tx: stats_tx.clone(), - stats_rx: Arc::new(Mutex::new(Some(stats_rx))), - }; - - (agent, stats_tx) + stats_rx: Arc::new(Mutex::new(stats_rx)), + } } #[allow(clippy::cast_possible_truncation)] @@ -169,15 +167,15 @@ impl TraceAgent { let now: Instant = Instant::now(); // Start the stats aggregator, which receives and buffers stats payloads to be consumed by the stats flusher. - if let Some(mut stats_rx) = self.stats_rx.lock().await.take() { - let stats_aggregator = self.stats_aggregator.clone(); - tokio::spawn(async move { - while let Some(stats_payload) = stats_rx.recv().await { - let mut aggregator = stats_aggregator.lock().await; - aggregator.add(stats_payload); - } - }); - } + let stats_aggregator = self.stats_aggregator.clone(); + let stats_rx = self.stats_rx.clone(); + tokio::spawn(async move { + let mut stats_rx = stats_rx.lock().await; + while let Some(stats_payload) = stats_rx.recv().await { + let mut aggregator = stats_aggregator.lock().await; + aggregator.add(stats_payload); + } + }); // stats_tx.clone().send(pb::ClientStatsPayload { // hostname: String::new(), @@ -230,7 +228,6 @@ impl TraceAgent { let router = self.make_router(self.stats_tx.clone()); - let port = u16::try_from(TRACE_AGENT_PORT).expect("TRACE_AGENT_PORT is too large"); let socket = SocketAddr::from(([127, 0, 0, 1], port)); let listener = tokio::net::TcpListener::bind(&socket).await?; From 0589583dc33cbf9b4821f580dff562ff51e3dbc8 Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Mon, 8 Sep 2025 16:46:30 -0400 Subject: [PATCH 08/43] Move start_stats_agent() inside start_stats_agent() --- bottlecap/src/bin/bottlecap/main.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index 47321113e..904301ccd 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -550,7 +550,6 @@ async fn extension_loop_active( stats_flusher, proxy_flusher, trace_agent_shutdown_token, - stats_aggregator, ) = start_trace_agent( config, &api_key_factory, @@ -558,10 +557,9 @@ async fn extension_loop_active( Arc::clone(&invocation_processor), appsec_processor.clone(), Arc::clone(&trace_aggregator), + stats_rx, ); - start_stats_agent(stats_rx, config, &tags_provider, stats_aggregator.clone()); - let api_runtime_proxy_shutdown_signal = start_api_runtime_proxy( config, aws_config, @@ -1111,6 +1109,7 @@ fn start_trace_agent( invocation_processor: Arc>, appsec_processor: Option>>, trace_aggregator: Arc>, + stats_rx: Receiver, ) -> ( Sender, Arc, @@ -1118,7 +1117,6 @@ fn start_trace_agent( Arc, Arc, tokio_util::sync::CancellationToken, - Arc>, ) { // Stats let stats_aggregator = Arc::new(TokioMutex::new(StatsAggregator::default())); @@ -1180,6 +1178,8 @@ fn start_trace_agent( } }); + start_stats_agent(stats_rx, config, &tags_provider, stats_aggregator); + ( trace_agent_channel, trace_flusher, @@ -1187,7 +1187,6 @@ fn start_trace_agent( stats_flusher, proxy_flusher, shutdown_token, - stats_aggregator, ) } From 1c7bb7cee8e4bfab39ef320283d1f1c26e3ad2db Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Mon, 8 Sep 2025 17:16:16 -0400 Subject: [PATCH 09/43] Make stats aggregator pull from stats concentrator --- bottlecap/src/bin/bottlecap/main.rs | 33 +++++++++--------- bottlecap/src/traces/my_stats_processor.rs | 8 ++--- bottlecap/src/traces/stats_agent.rs | 7 ++-- bottlecap/src/traces/stats_aggregator.rs | 39 ++++++++++++++++------ bottlecap/src/traces/stats_concentrator.rs | 27 ++++++++------- bottlecap/src/traces/stats_flusher.rs | 4 +-- bottlecap/src/traces/trace_agent.rs | 15 +++++++++ 7 files changed, 83 insertions(+), 50 deletions(-) diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index 904301ccd..da116bc9b 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -60,6 +60,7 @@ use bottlecap::{ trace_flusher::{self, ServerlessTraceFlusher, TraceFlusher}, trace_processor::{self, SendingTraceProcessor}, stats_agent::{StatsEvent, StatsAgent}, + stats_concentrator::StatsConcentrator, }, }; use datadog_fips::reqwest_adapter::create_reqwest_client_builder; @@ -98,8 +99,6 @@ use tokio_util::sync::CancellationToken; use tracing::{debug, error}; use tracing_subscriber::EnvFilter; use ustr::Ustr; -use datadog_trace_protobuf::pb; -use tokio::sync::Mutex; #[allow(clippy::struct_field_names)] struct PendingFlushHandles { @@ -1024,17 +1023,17 @@ fn start_logs_agent( (logs_agent_channel, logs_flusher) } -fn start_stats_agent( - stats_rx: Receiver, - config: &Arc, - tags_provider: &Arc, - stats_aggregator: Arc>, -) { - let mut stats_agent = StatsAgent::new(stats_rx, Arc::clone(config), Arc::clone(tags_provider), stats_aggregator); - tokio::spawn(async move { - stats_agent.spin().await; - }); -} +// fn start_stats_agent( +// stats_rx: Receiver, +// config: &Arc, +// tags_provider: &Arc, +// stats_aggregator: Arc>, +// ) { +// let mut stats_agent = StatsAgent::new(stats_rx, Arc::clone(config), Arc::clone(tags_provider), stats_aggregator); +// tokio::spawn(async move { +// stats_agent.spin().await; +// }); +// } fn start_metrics_flushers( api_key_factory: Arc, @@ -1119,7 +1118,8 @@ fn start_trace_agent( tokio_util::sync::CancellationToken, ) { // Stats - let stats_aggregator = Arc::new(TokioMutex::new(StatsAggregator::default())); + let stats_concentrator = Arc::new(TokioMutex::new(StatsConcentrator::new())); + let stats_aggregator = Arc::new(TokioMutex::new(StatsAggregator::new_with_concentrator(stats_concentrator.clone()))); let stats_flusher = Arc::new(stats_flusher::ServerlessStatsFlusher::new( api_key_factory.clone(), stats_aggregator.clone(), @@ -1167,6 +1167,9 @@ fn start_trace_agent( invocation_processor, appsec_processor, Arc::clone(tags_provider), + // TODO: rename this + stats_rx, + stats_concentrator.clone(), ); let trace_agent_channel = trace_agent.get_sender_copy(); let shutdown_token = trace_agent.shutdown_token(); @@ -1178,7 +1181,7 @@ fn start_trace_agent( } }); - start_stats_agent(stats_rx, config, &tags_provider, stats_aggregator); + // start_stats_agent(stats_rx, config, &tags_provider, stats_aggregator); ( trace_agent_channel, diff --git a/bottlecap/src/traces/my_stats_processor.rs b/bottlecap/src/traces/my_stats_processor.rs index cd6609128..057feb55e 100644 --- a/bottlecap/src/traces/my_stats_processor.rs +++ b/bottlecap/src/traces/my_stats_processor.rs @@ -7,7 +7,6 @@ use std::sync::Arc; use crate::tags::provider::Provider as TagProvider; use crate::traces::stats_concentrator::StatsConcentrator; use tokio::sync::Mutex; -use crate::traces::stats_aggregator::StatsAggregator; pub struct MyStatsProcessor { config: Arc, @@ -17,12 +16,11 @@ pub struct MyStatsProcessor { impl MyStatsProcessor { #[must_use] - pub fn new(config: Arc, tags_provider: Arc, stats_aggregator: Arc>) -> Self { + pub fn new(config: Arc, tags_provider: Arc, stats_concentrator: Arc>) -> Self { let resource = tags_provider .get_canonical_resource_name() .unwrap_or(String::from("aws.lambda")); - let concentrator = StatsConcentrator::new(stats_aggregator); - Self { config, resource, concentrator: Arc::new(Mutex::new(concentrator)) } + Self { config, resource, concentrator: stats_concentrator } } #[allow(clippy::cast_possible_truncation)] @@ -78,6 +76,6 @@ impl MyStatsProcessor { }, ], }; - self.concentrator.lock().await.add(stats).await; + self.concentrator.lock().await.add(stats); } } \ No newline at end of file diff --git a/bottlecap/src/traces/stats_agent.rs b/bottlecap/src/traces/stats_agent.rs index 95911c623..47c31a986 100644 --- a/bottlecap/src/traces/stats_agent.rs +++ b/bottlecap/src/traces/stats_agent.rs @@ -4,11 +4,11 @@ use tracing::debug; use datadog_trace_protobuf::pb; use super::my_stats_processor::MyStatsProcessor; +use super::stats_concentrator::StatsConcentrator; use crate::config::Config; use std::sync::Arc; use crate::tags::provider::Provider as TagProvider; -use crate::traces::stats_aggregator::StatsAggregator; use tokio::sync::Mutex; #[derive(Clone, Copy)] pub struct StatsEvent; @@ -26,11 +26,12 @@ impl StatsAgent { rx: Receiver, config: Arc, tags_provider: Arc, - stats_aggregator: Arc>, + stats_concentrator: Arc>, ) -> StatsAgent { + let processor = MyStatsProcessor::new(config, tags_provider, stats_concentrator); StatsAgent { rx, - processor: MyStatsProcessor::new(config, tags_provider, stats_aggregator), + processor, } } diff --git a/bottlecap/src/traces/stats_aggregator.rs b/bottlecap/src/traces/stats_aggregator.rs index c10106ac3..e1d4fbc9a 100644 --- a/bottlecap/src/traces/stats_aggregator.rs +++ b/bottlecap/src/traces/stats_aggregator.rs @@ -1,5 +1,8 @@ use datadog_trace_protobuf::pb::ClientStatsPayload; use std::collections::VecDeque; +use std::sync::Arc; +use tokio::sync::Mutex; +use crate::traces::stats_concentrator::StatsConcentrator; #[allow(clippy::empty_line_after_doc_comments)] /// Maximum number of entries in a stat payload. @@ -24,30 +27,36 @@ pub struct StatsAggregator { queue: VecDeque, max_content_size_bytes: usize, buffer: Vec, + stats_concentrator: Arc>, } -impl Default for StatsAggregator { - fn default() -> Self { - StatsAggregator { - queue: VecDeque::new(), - max_content_size_bytes: MAX_CONTENT_SIZE_BYTES, - buffer: Vec::new(), - } - } -} +// impl Default for StatsAggregator { +// fn default() -> Self { +// StatsAggregator { +// queue: VecDeque::new(), +// max_content_size_bytes: MAX_CONTENT_SIZE_BYTES, +// buffer: Vec::new(), +// } +// } +// } /// Takes in individual trace stats payloads and aggregates them into batches to be flushed to Datadog. impl StatsAggregator { #[allow(dead_code)] #[allow(clippy::must_use_candidate)] - pub fn new(max_content_size_bytes: usize) -> Self { + pub fn new(max_content_size_bytes: usize, stats_concentrator: Arc>) -> Self { StatsAggregator { queue: VecDeque::new(), max_content_size_bytes, buffer: Vec::new(), + stats_concentrator, } } + pub fn new_with_concentrator(stats_concentrator: Arc>) -> Self { + Self::new(MAX_CONTENT_SIZE_BYTES, stats_concentrator) + } + /// Takes in an individual trace stats payload. pub fn add(&mut self, payload: ClientStatsPayload) { debug!("StatsAggregator | adding stats payload to aggregator: {payload:?}"); @@ -55,7 +64,15 @@ impl StatsAggregator { } /// Returns a batch of trace stats payloads, subject to the max content size. - pub fn get_batch(&mut self) -> Vec { + pub async fn get_batch(&mut self) -> Vec { + // Pull stats data from stats concentrator + let mut stats_concentrator = self.stats_concentrator.lock().await; + let mut stats = stats_concentrator.get_batch(); + while !stats.is_empty() { + self.queue.extend(stats); + stats = stats_concentrator.get_batch(); + } + let mut batch_size = 0; // Fill the batch diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index 9e3efa112..319231b44 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -4,27 +4,26 @@ */ use datadog_trace_protobuf::pb; -use std::sync::Arc; -use tokio::sync::Mutex; -use crate::traces::stats_aggregator::StatsAggregator; +#[derive(Default)] pub struct StatsConcentrator { - // pub storage: Vec, - stats_aggregator: Arc>, + pub storage: Vec, } impl StatsConcentrator { - pub fn new(stats_aggregator: Arc>) -> Self { - Self { stats_aggregator } + #[must_use] + pub fn new() -> Self { + Self { storage: Vec::new() } } - pub async fn add(&mut self, stats: pb::ClientStatsPayload) { - self.stats_aggregator.lock().await.add(stats); + pub fn add(&mut self, stats: pb::ClientStatsPayload) { + self.storage.push(stats); } - // pub fn get_batch(&mut self) -> Vec { - // let ret = self.storage.clone(); - // self.storage.clear(); - // ret - // } + #[must_use] + pub fn get_batch(&mut self) -> Vec { + let ret = self.storage.clone(); + self.storage.clear(); + ret + } } \ No newline at end of file diff --git a/bottlecap/src/traces/stats_flusher.rs b/bottlecap/src/traces/stats_flusher.rs index 71a86fe9d..b90167359 100644 --- a/bottlecap/src/traces/stats_flusher.rs +++ b/bottlecap/src/traces/stats_flusher.rs @@ -119,11 +119,11 @@ impl StatsFlusher for ServerlessStatsFlusher { async fn flush(&self) { let mut guard = self.aggregator.lock().await; - let mut stats = guard.get_batch(); + let mut stats = guard.get_batch().await; while !stats.is_empty() { self.send(stats).await; - stats = guard.get_batch(); + stats = guard.get_batch().await; } } } diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index b7d0638e2..42c2af028 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -41,6 +41,10 @@ use datadog_trace_protobuf::pb; use datadog_trace_utils::trace_utils::{self}; use ddcommon::hyper_migration; +use crate::traces::stats_agent::StatsAgent; +use crate::traces::stats_agent::StatsEvent; +use crate::traces::stats_concentrator::StatsConcentrator; + const TRACE_AGENT_PORT: usize = 8126; // Agent endpoints @@ -104,6 +108,7 @@ pub struct TraceAgent { tx: Sender, stats_tx: Sender, stats_rx: Arc>>, + stats_agent: Arc>, } #[derive(Clone, Copy)] @@ -125,6 +130,8 @@ impl TraceAgent { invocation_processor: Arc>, appsec_processor: Option>>, tags_provider: Arc, + my_stats_rx: Receiver, + stats_concentrator: Arc>, ) -> TraceAgent { // Set up a channel to send processed traces to our trace aggregator. tx is passed through each // endpoint_handler to the trace processor, which uses it to send de-serialized @@ -146,6 +153,8 @@ impl TraceAgent { Receiver, ) = mpsc::channel(STATS_PAYLOAD_CHANNEL_BUFFER_SIZE); + let stats_agent = StatsAgent::new(my_stats_rx, config.clone(), tags_provider.clone(), stats_concentrator.clone()); + TraceAgent { config: config.clone(), trace_processor, @@ -159,6 +168,7 @@ impl TraceAgent { tx: trace_tx, stats_tx: stats_tx.clone(), stats_rx: Arc::new(Mutex::new(stats_rx)), + stats_agent: Arc::new(Mutex::new(stats_agent)), } } @@ -177,6 +187,11 @@ impl TraceAgent { } }); + let stats_agent = self.stats_agent.clone(); + tokio::spawn(async move { + stats_agent.lock().await.spin().await; + }); + // stats_tx.clone().send(pb::ClientStatsPayload { // hostname: String::new(), // // TODO (Yiming): support setting this From 03e88cc49795263876d70a4ff0989a21fa049c45 Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Tue, 9 Sep 2025 11:01:31 -0400 Subject: [PATCH 10/43] Add logging --- bottlecap/src/traces/stats_aggregator.rs | 1 + bottlecap/src/traces/stats_concentrator.rs | 2 ++ 2 files changed, 3 insertions(+) diff --git a/bottlecap/src/traces/stats_aggregator.rs b/bottlecap/src/traces/stats_aggregator.rs index e1d4fbc9a..0abd139f8 100644 --- a/bottlecap/src/traces/stats_aggregator.rs +++ b/bottlecap/src/traces/stats_aggregator.rs @@ -65,6 +65,7 @@ impl StatsAggregator { /// Returns a batch of trace stats payloads, subject to the max content size. pub async fn get_batch(&mut self) -> Vec { + debug!("StatsAggregator | getting batch of stats payloads"); // Pull stats data from stats concentrator let mut stats_concentrator = self.stats_concentrator.lock().await; let mut stats = stats_concentrator.get_batch(); diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index 319231b44..96da1130b 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -4,6 +4,7 @@ */ use datadog_trace_protobuf::pb; +use tracing::debug; #[derive(Default)] pub struct StatsConcentrator { @@ -17,6 +18,7 @@ impl StatsConcentrator { } pub fn add(&mut self, stats: pb::ClientStatsPayload) { + debug!("StatsConcentrator | adding stats payload to concentrator: {stats:?}"); self.storage.push(stats); } From e06b1e7b19ac8b64ab323cc7ebd4d52df4682869 Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Tue, 9 Sep 2025 15:35:28 -0400 Subject: [PATCH 11/43] Fix double counting --- bottlecap/src/lifecycle/invocation/processor.rs | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index afbe5fa10..3b75278f2 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -174,7 +174,7 @@ impl Processor { if let Some((headers, payload_value)) = self.context_buffer.pair_invoke_event(&request_id) { // Infer span self.inferrer.infer_span(&payload_value, &self.aws_config); - self.process_on_universal_instrumentation_start(request_id, headers, payload_value); + self.process_on_universal_instrumentation_start(request_id.clone(), headers, payload_value); } // Send stats event @@ -294,15 +294,6 @@ impl Processor { .try_into() .unwrap_or_default(); self.context_buffer.add_start_time(&request_id, start_time); - let stats_event = StatsEvent; - match self.stats_agent_tx.send(stats_event).await { - Ok(()) => { - debug!("Successfully buffered stats event to be aggregated."); - } - Err(err) => { - error!("Error sending stats event to the stats aggregator: {err}"); - } - } } #[allow(clippy::too_many_arguments)] From 92ba7b23a518364b89561b5feab628416519cd1f Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Tue, 9 Sep 2025 16:00:48 -0400 Subject: [PATCH 12/43] Move ClientStatsPayload construction to StatsConcentrator --- bottlecap/src/bin/bottlecap/main.rs | 6 +- .../src/lifecycle/invocation/processor.rs | 10 ++- bottlecap/src/traces/my_stats_processor.rs | 67 ++--------------- bottlecap/src/traces/stats_agent.rs | 17 ++--- bottlecap/src/traces/stats_concentrator.rs | 73 ++++++++++++++++--- bottlecap/src/traces/trace_agent.rs | 2 +- 6 files changed, 88 insertions(+), 87 deletions(-) diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index da116bc9b..27f2f575f 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -59,7 +59,7 @@ use bottlecap::{ trace_aggregator::{self, SendDataBuilderInfo}, trace_flusher::{self, ServerlessTraceFlusher, TraceFlusher}, trace_processor::{self, SendingTraceProcessor}, - stats_agent::{StatsEvent, StatsAgent}, + stats_agent::StatsEvent, stats_concentrator::StatsConcentrator, }, }; @@ -900,7 +900,7 @@ async fn handle_event_bus_event( } TelemetryRecord::PlatformStart { request_id, .. } => { let mut p = invocation_processor.lock().await; - p.on_platform_start(request_id, event.time).await; + p.on_platform_start(request_id, event.time); drop(p); } TelemetryRecord::PlatformRuntimeDone { @@ -1118,7 +1118,7 @@ fn start_trace_agent( tokio_util::sync::CancellationToken, ) { // Stats - let stats_concentrator = Arc::new(TokioMutex::new(StatsConcentrator::new())); + let stats_concentrator = Arc::new(TokioMutex::new(StatsConcentrator::new(Arc::clone(config), Arc::clone(tags_provider)))); let stats_aggregator = Arc::new(TokioMutex::new(StatsAggregator::new_with_concentrator(stats_concentrator.clone()))); let stats_flusher = Arc::new(stats_flusher::ServerlessStatsFlusher::new( api_key_factory.clone(), diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index 3b75278f2..01f89be6f 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -47,6 +47,7 @@ use tokio::sync::mpsc::Sender; pub const MS_TO_NS: f64 = 1_000_000.0; pub const S_TO_MS: u64 = 1_000; pub const S_TO_NS: f64 = 1_000_000_000.0; +pub const S_TO_NS_I64: i64 = 1_000_000_000; pub const PROACTIVE_INITIALIZATION_THRESHOLD_MS: u64 = 10_000; pub const DATADOG_INVOCATION_ERROR_MESSAGE_KEY: &str = "x-datadog-invocation-error-msg"; @@ -133,7 +134,7 @@ impl Processor { self.context_buffer .start_context(&request_id, invocation_span); - let timestamp = std::time::UNIX_EPOCH + let timestamp_secs = std::time::UNIX_EPOCH .elapsed() .expect("can't poll clock, unrecoverable") .as_secs() @@ -167,7 +168,7 @@ impl Processor { } // Increment the invocation metric - self.enhanced_metrics.increment_invocation_metric(timestamp); + self.enhanced_metrics.increment_invocation_metric(timestamp_secs); self.enhanced_metrics.set_invoked_received(); // If `UniversalInstrumentationStart` event happened first, process it @@ -178,7 +179,8 @@ impl Processor { } // Send stats event - let stats_event = StatsEvent; + let timestamp_ns = timestamp_secs * S_TO_NS_I64; + let stats_event = StatsEvent { time: timestamp_ns.try_into().unwrap_or_default(), dummy: 0 }; match self.stats_agent_tx.send(stats_event).await { Ok(()) => { debug!("Successfully buffered stats event to be aggregated."); @@ -286,7 +288,7 @@ impl Processor { /// Given a `request_id` and the time of the platform start, add the start time to the context buffer. /// - pub async fn on_platform_start(&mut self, request_id: String, time: DateTime) { + pub fn on_platform_start(&mut self, request_id: String, time: DateTime) { let start_time: i64 = SystemTime::from(time) .duration_since(UNIX_EPOCH) .expect("time went backwards") diff --git a/bottlecap/src/traces/my_stats_processor.rs b/bottlecap/src/traces/my_stats_processor.rs index 057feb55e..f876af235 100644 --- a/bottlecap/src/traces/my_stats_processor.rs +++ b/bottlecap/src/traces/my_stats_processor.rs @@ -1,81 +1,28 @@ use crate::traces::stats_agent::StatsEvent; -use datadog_trace_protobuf::pb; -use tracing::{debug, error}; +use tracing::{debug}; -use crate::config::Config; use std::sync::Arc; -use crate::tags::provider::Provider as TagProvider; use crate::traces::stats_concentrator::StatsConcentrator; use tokio::sync::Mutex; pub struct MyStatsProcessor { - config: Arc, - resource: String, concentrator: Arc>, } impl MyStatsProcessor { #[must_use] - pub fn new(config: Arc, tags_provider: Arc, stats_concentrator: Arc>) -> Self { - let resource = tags_provider - .get_canonical_resource_name() - .unwrap_or(String::from("aws.lambda")); - Self { config, resource, concentrator: stats_concentrator } + pub fn new(stats_concentrator: Arc>) -> Self { + Self { concentrator: stats_concentrator } } #[allow(clippy::cast_possible_truncation)] pub async fn process( &self, - _event: StatsEvent, + // TODO: get time, duration and hit/error from stats event + event: StatsEvent, ) { debug!("In my stats processor: Processing stats event."); - let stats = pb::ClientStatsPayload { - // hostname: String::new(), - hostname: "yiming7-hostname".to_string(), - env: self.config.env.clone().unwrap_or_default(), - version: self.config.version.clone().unwrap_or_default(), - lang: "rust".to_string(), - tracer_version: String::new(), - runtime_id: String::new(), - sequence: 0, - agent_aggregation: String::new(), - // service: self.config.service.clone().unwrap_or(String::new()), - service: self.config.service.clone().unwrap_or_default(), - container_id: String::new(), - tags: vec![], - git_commit_sha: String::new(), - image_tag: String::new(), - stats: vec![ - pb::ClientStatsBucket { - start: std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .expect("Invalid time") - .as_nanos() as u64, - duration: 1_000_000_000, - stats: vec![ - pb::ClientGroupedStats { - service: self.config.service.clone().unwrap_or_default(), - name: "yiming_name".to_string(), - resource: self.resource.clone(), - http_status_code: 200, - r#type: String::new(), - db_type: String::new(), - hits: 1, - errors: 0, - duration: 1_000_000_000, - ok_summary: vec![], - error_summary: vec![], - synthetics: false, - top_level_hits: 0, - span_kind: String::new(), - peer_tags: vec![], - is_trace_root: 1, - }, - ], - agent_time_shift: 0, - }, - ], - }; - self.concentrator.lock().await.add(stats); + + self.concentrator.lock().await.add(event); } } \ No newline at end of file diff --git a/bottlecap/src/traces/stats_agent.rs b/bottlecap/src/traces/stats_agent.rs index 47c31a986..4d8306c34 100644 --- a/bottlecap/src/traces/stats_agent.rs +++ b/bottlecap/src/traces/stats_agent.rs @@ -1,17 +1,16 @@ -use tokio::sync::mpsc::{self, Receiver, Sender}; +use tokio::sync::mpsc::{self, Receiver}; use tracing::debug; -use datadog_trace_protobuf::pb; - use super::my_stats_processor::MyStatsProcessor; use super::stats_concentrator::StatsConcentrator; -use crate::config::Config; use std::sync::Arc; -use crate::tags::provider::Provider as TagProvider; use tokio::sync::Mutex; -#[derive(Clone, Copy)] -pub struct StatsEvent; +#[derive(Clone, Copy, Default)] +pub struct StatsEvent { + pub time: u64, + pub dummy: u64, +} #[allow(clippy::module_name_repetitions)] @@ -24,11 +23,9 @@ impl StatsAgent { #[must_use] pub fn new( rx: Receiver, - config: Arc, - tags_provider: Arc, stats_concentrator: Arc>, ) -> StatsAgent { - let processor = MyStatsProcessor::new(config, tags_provider, stats_concentrator); + let processor = MyStatsProcessor::new(stats_concentrator); StatsAgent { rx, processor, diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index 96da1130b..31ae0470a 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -4,28 +4,83 @@ */ use datadog_trace_protobuf::pb; -use tracing::debug; +use crate::traces::stats_agent::StatsEvent; +use crate::config::Config; +use std::sync::Arc; +use crate::tags::provider::Provider as TagProvider; + -#[derive(Default)] pub struct StatsConcentrator { - pub storage: Vec, + pub storage: Vec, + pub config: Arc, + pub resource: String, } impl StatsConcentrator { #[must_use] - pub fn new() -> Self { - Self { storage: Vec::new() } + pub fn new(config: Arc, tags_provider: Arc) -> Self { + let resource = tags_provider + .get_canonical_resource_name() + .unwrap_or(String::from("aws.lambda")); + Self { storage: Vec::new(), config, resource } } - pub fn add(&mut self, stats: pb::ClientStatsPayload) { - debug!("StatsConcentrator | adding stats payload to concentrator: {stats:?}"); - self.storage.push(stats); + pub fn add(&mut self, stats_event: StatsEvent) { + // debug!("StatsConcentrator | adding stats payload to concentrator: {stats:?}"); + self.storage.push(stats_event); } #[must_use] pub fn get_batch(&mut self) -> Vec { - let ret = self.storage.clone(); + let ret = self.storage.iter().map(|stats_event| self.construct_stats_payload(stats_event)).collect(); self.storage.clear(); ret } + + fn construct_stats_payload(&self, stats_event: &StatsEvent) -> pb::ClientStatsPayload { + pb::ClientStatsPayload { + // hostname: String::new(), + hostname: "yiming7-hostname".to_string(), + env: self.config.env.clone().unwrap_or_default(), + version: self.config.version.clone().unwrap_or_default(), + lang: "rust".to_string(), + tracer_version: String::new(), + runtime_id: String::new(), + sequence: 0, + agent_aggregation: String::new(), + // service: self.config.service.clone().unwrap_or(String::new()), + service: self.config.service.clone().unwrap_or_default(), + container_id: String::new(), + tags: vec![], + git_commit_sha: String::new(), + image_tag: String::new(), + stats: vec![ + pb::ClientStatsBucket { + start: stats_event.time, + duration: 1_000_000_000, + stats: vec![ + pb::ClientGroupedStats { + service: self.config.service.clone().unwrap_or_default(), + name: "yiming_name".to_string(), + resource: self.resource.clone(), + http_status_code: 200, + r#type: String::new(), + db_type: String::new(), + hits: 1, + errors: 0, + duration: 1_000_000_000, + ok_summary: vec![], + error_summary: vec![], + synthetics: false, + top_level_hits: 0, + span_kind: String::new(), + peer_tags: vec![], + is_trace_root: 1, + }, + ], + agent_time_shift: 0, + }, + ], + } + } } \ No newline at end of file diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index 42c2af028..51a633cd4 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -153,7 +153,7 @@ impl TraceAgent { Receiver, ) = mpsc::channel(STATS_PAYLOAD_CHANNEL_BUFFER_SIZE); - let stats_agent = StatsAgent::new(my_stats_rx, config.clone(), tags_provider.clone(), stats_concentrator.clone()); + let stats_agent = StatsAgent::new(my_stats_rx, stats_concentrator.clone()); TraceAgent { config: config.clone(), From 853e95546b155e1835404a7e57e665a8adf6069c Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Tue, 9 Sep 2025 16:24:35 -0400 Subject: [PATCH 13/43] Create buckets --- .../src/lifecycle/invocation/processor.rs | 12 +++---- bottlecap/src/traces/stats_concentrator.rs | 35 +++++++++++++------ 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index 01f89be6f..38b732318 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -47,7 +47,7 @@ use tokio::sync::mpsc::Sender; pub const MS_TO_NS: f64 = 1_000_000.0; pub const S_TO_MS: u64 = 1_000; pub const S_TO_NS: f64 = 1_000_000_000.0; -pub const S_TO_NS_I64: i64 = 1_000_000_000; +pub const S_TO_NS_U64: u64 = 1_000_000_000; pub const PROACTIVE_INITIALIZATION_THRESHOLD_MS: u64 = 10_000; pub const DATADOG_INVOCATION_ERROR_MESSAGE_KEY: &str = "x-datadog-invocation-error-msg"; @@ -137,9 +137,7 @@ impl Processor { let timestamp_secs = std::time::UNIX_EPOCH .elapsed() .expect("can't poll clock, unrecoverable") - .as_secs() - .try_into() - .unwrap_or_default(); + .as_secs(); if self.config.lambda_proc_enhanced_metrics { // Collect offsets for network and cpu metrics @@ -168,7 +166,7 @@ impl Processor { } // Increment the invocation metric - self.enhanced_metrics.increment_invocation_metric(timestamp_secs); + self.enhanced_metrics.increment_invocation_metric(timestamp_secs.try_into().unwrap_or_default()); self.enhanced_metrics.set_invoked_received(); // If `UniversalInstrumentationStart` event happened first, process it @@ -179,8 +177,8 @@ impl Processor { } // Send stats event - let timestamp_ns = timestamp_secs * S_TO_NS_I64; - let stats_event = StatsEvent { time: timestamp_ns.try_into().unwrap_or_default(), dummy: 0 }; + let timestamp_ns = timestamp_secs * S_TO_NS_U64; + let stats_event = StatsEvent { time: timestamp_ns, dummy: 0 }; match self.stats_agent_tx.send(stats_event).await { Ok(()) => { debug!("Successfully buffered stats event to be aggregated."); diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index 31ae0470a..9c442c799 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -8,36 +8,49 @@ use crate::traces::stats_agent::StatsEvent; use crate::config::Config; use std::sync::Arc; use crate::tags::provider::Provider as TagProvider; +use std::collections::HashMap; +use crate::lifecycle::invocation::processor::S_TO_NS_U64; +struct Bucket { + pub hits: u64, +} pub struct StatsConcentrator { - pub storage: Vec, - pub config: Arc, - pub resource: String, + config: Arc, + resource: String, + buckets: HashMap, } +const BUCKET_DURATION_NS: u64 = 10 * S_TO_NS_U64; + impl StatsConcentrator { #[must_use] pub fn new(config: Arc, tags_provider: Arc) -> Self { let resource = tags_provider .get_canonical_resource_name() .unwrap_or(String::from("aws.lambda")); - Self { storage: Vec::new(), config, resource } + Self { buckets: HashMap::new(), config, resource } } pub fn add(&mut self, stats_event: StatsEvent) { // debug!("StatsConcentrator | adding stats payload to concentrator: {stats:?}"); - self.storage.push(stats_event); + let bucket_timestamp = Self::get_bucket_timestamp(stats_event.time); + let bucket = self.buckets.entry(bucket_timestamp).or_insert(Bucket { hits: 0 }); + bucket.hits += 1; + } + + fn get_bucket_timestamp(timestamp: u64) -> u64 { + timestamp - timestamp % BUCKET_DURATION_NS } #[must_use] pub fn get_batch(&mut self) -> Vec { - let ret = self.storage.iter().map(|stats_event| self.construct_stats_payload(stats_event)).collect(); - self.storage.clear(); + let ret = self.buckets.iter().map(|(timestamp, bucket)| self.construct_stats_payload(*timestamp, bucket)).collect(); + self.buckets.clear(); ret } - fn construct_stats_payload(&self, stats_event: &StatsEvent) -> pb::ClientStatsPayload { + fn construct_stats_payload(&self, timestamp: u64, bucket: &Bucket) -> pb::ClientStatsPayload { pb::ClientStatsPayload { // hostname: String::new(), hostname: "yiming7-hostname".to_string(), @@ -56,7 +69,8 @@ impl StatsConcentrator { image_tag: String::new(), stats: vec![ pb::ClientStatsBucket { - start: stats_event.time, + start: timestamp, + // TODO: consider changing this to 0 duration: 1_000_000_000, stats: vec![ pb::ClientGroupedStats { @@ -66,8 +80,9 @@ impl StatsConcentrator { http_status_code: 200, r#type: String::new(), db_type: String::new(), - hits: 1, + hits: bucket.hits, errors: 0, + // TODO: consider changing this to 0 duration: 1_000_000_000, ok_summary: vec![], error_summary: vec![], From b383773296b55b9ccd3eaf937fd5b356393c3921 Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Wed, 10 Sep 2025 11:57:16 -0400 Subject: [PATCH 14/43] Do not flush the latest two buckets --- bottlecap/src/bin/bottlecap/main.rs | 9 +++++++- bottlecap/src/traces/stats_aggregator.rs | 10 ++++----- bottlecap/src/traces/stats_concentrator.rs | 25 +++++++++++++++++++--- bottlecap/src/traces/stats_flusher.rs | 8 +++---- 4 files changed, 39 insertions(+), 13 deletions(-) diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index 27f2f575f..ae9dce873 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -634,6 +634,7 @@ async fn extension_loop_active( &proxy_flusher, &mut race_flush_interval, &metrics_aggr_handle.clone(), + false, ) .await; } @@ -649,6 +650,7 @@ async fn extension_loop_active( &proxy_flusher, &mut race_flush_interval, &metrics_aggr_handle.clone(), + false, ) .await; let next_response = next_event(client, &r.extension_id).await; @@ -726,6 +728,7 @@ async fn extension_loop_active( &proxy_flusher, &mut race_flush_interval, &metrics_aggr_handle, + false, ) .await; last_continuous_flush_error = false; @@ -766,6 +769,7 @@ async fn extension_loop_active( &proxy_flusher, &mut race_flush_interval, &metrics_aggr_handle, + false, ) .await; } @@ -823,6 +827,7 @@ async fn extension_loop_active( &proxy_flusher, &mut race_flush_interval, &metrics_aggr_handle, + true, ) .await; return Ok(()); @@ -830,6 +835,7 @@ async fn extension_loop_active( } } +#[allow(clippy::too_many_arguments)] async fn blocking_flush_all( logs_flusher: &LogsFlusher, metrics_flushers: &mut [MetricsFlusher], @@ -838,6 +844,7 @@ async fn blocking_flush_all( proxy_flusher: &ProxyFlusher, race_flush_interval: &mut tokio::time::Interval, metrics_aggr_handle: &MetricsAggregatorHandle, + force_flush: bool, ) { let flush_response = metrics_aggr_handle .flush() @@ -857,7 +864,7 @@ async fn blocking_flush_all( logs_flusher.flush(None), futures::future::join_all(metrics_futures), trace_flusher.flush(None), - stats_flusher.flush(), + stats_flusher.flush(force_flush), proxy_flusher.flush(None), ); race_flush_interval.reset(); diff --git a/bottlecap/src/traces/stats_aggregator.rs b/bottlecap/src/traces/stats_aggregator.rs index 0abd139f8..8e4188437 100644 --- a/bottlecap/src/traces/stats_aggregator.rs +++ b/bottlecap/src/traces/stats_aggregator.rs @@ -64,14 +64,14 @@ impl StatsAggregator { } /// Returns a batch of trace stats payloads, subject to the max content size. - pub async fn get_batch(&mut self) -> Vec { + pub async fn get_batch(&mut self, force_flush: bool) -> Vec { debug!("StatsAggregator | getting batch of stats payloads"); // Pull stats data from stats concentrator let mut stats_concentrator = self.stats_concentrator.lock().await; - let mut stats = stats_concentrator.get_batch(); + let mut stats = stats_concentrator.get_batch(force_flush); while !stats.is_empty() { self.queue.extend(stats); - stats = stats_concentrator.get_batch(); + stats = stats_concentrator.get_batch(force_flush); } let mut batch_size = 0; @@ -179,12 +179,12 @@ mod tests { aggregator.add(payload.clone()); // The batch should only contain the first 2 payloads - let first_batch = aggregator.get_batch(); + let first_batch = aggregator.get_batch(false); assert_eq!(first_batch, vec![payload.clone(), payload.clone()]); assert_eq!(aggregator.queue.len(), 1); // The second batch should only contain the last log - let second_batch = aggregator.get_batch(); + let second_batch = aggregator.get_batch(false); assert_eq!(second_batch, vec![payload]); assert_eq!(aggregator.queue.len(), 0); } diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index 9c442c799..487041d48 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -10,6 +10,7 @@ use std::sync::Arc; use crate::tags::provider::Provider as TagProvider; use std::collections::HashMap; use crate::lifecycle::invocation::processor::S_TO_NS_U64; +use std::time::{SystemTime, UNIX_EPOCH}; struct Bucket { pub hits: u64, @@ -22,6 +23,7 @@ pub struct StatsConcentrator { } const BUCKET_DURATION_NS: u64 = 10 * S_TO_NS_U64; +const NO_FLUSH_BUCKET_COUNT: u64 = 2; impl StatsConcentrator { #[must_use] @@ -44,12 +46,29 @@ impl StatsConcentrator { } #[must_use] - pub fn get_batch(&mut self) -> Vec { - let ret = self.buckets.iter().map(|(timestamp, bucket)| self.construct_stats_payload(*timestamp, bucket)).collect(); - self.buckets.clear(); + pub fn get_batch(&mut self, force_flush: bool) -> Vec { + let current_timestamp: u64 = SystemTime::now().duration_since(UNIX_EPOCH).expect("Failed to get current timestamp").as_nanos().try_into().expect("Failed to convert timestamp to u64"); + let mut ret = Vec::new(); + let mut to_remove = Vec::new(); + + for (×tamp, bucket) in &self.buckets { + if force_flush || Self::should_flush_bucket(current_timestamp, timestamp) { + ret.push(self.construct_stats_payload(timestamp, bucket)); + to_remove.push(timestamp); + } + } + + for timestamp in to_remove { + self.buckets.remove(×tamp); + } + ret } + fn should_flush_bucket(current_timestamp: u64, bucket_timestamp: u64) -> bool { + current_timestamp - bucket_timestamp >= BUCKET_DURATION_NS * NO_FLUSH_BUCKET_COUNT + } + fn construct_stats_payload(&self, timestamp: u64, bucket: &Bucket) -> pb::ClientStatsPayload { pb::ClientStatsPayload { // hostname: String::new(), diff --git a/bottlecap/src/traces/stats_flusher.rs b/bottlecap/src/traces/stats_flusher.rs index b90167359..363418cf4 100644 --- a/bottlecap/src/traces/stats_flusher.rs +++ b/bottlecap/src/traces/stats_flusher.rs @@ -28,7 +28,7 @@ pub trait StatsFlusher { /// Flushes stats to the Datadog trace stats intake. async fn send(&self, traces: Vec); - async fn flush(&self); + async fn flush(&self, force_flush: bool); } #[allow(clippy::module_name_repetitions)] @@ -116,14 +116,14 @@ impl StatsFlusher for ServerlessStatsFlusher { } }; } - async fn flush(&self) { + async fn flush(&self, force_flush: bool) { let mut guard = self.aggregator.lock().await; - let mut stats = guard.get_batch().await; + let mut stats = guard.get_batch(force_flush).await; while !stats.is_empty() { self.send(stats).await; - stats = guard.get_batch().await; + stats = guard.get_batch(force_flush).await; } } } From c69adb6b3c40407ffaabfcc56d920231ed1c7426 Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Wed, 10 Sep 2025 12:22:26 -0400 Subject: [PATCH 15/43] Do not use hard coded keys such as yiming_name --- bottlecap/src/traces/stats_concentrator.rs | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index 487041d48..d81dee12a 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -35,7 +35,6 @@ impl StatsConcentrator { } pub fn add(&mut self, stats_event: StatsEvent) { - // debug!("StatsConcentrator | adding stats payload to concentrator: {stats:?}"); let bucket_timestamp = Self::get_bucket_timestamp(stats_event.time); let bucket = self.buckets.entry(bucket_timestamp).or_insert(Bucket { hits: 0 }); bucket.hits += 1; @@ -71,8 +70,7 @@ impl StatsConcentrator { fn construct_stats_payload(&self, timestamp: u64, bucket: &Bucket) -> pb::ClientStatsPayload { pb::ClientStatsPayload { - // hostname: String::new(), - hostname: "yiming7-hostname".to_string(), + hostname: String::new(), env: self.config.env.clone().unwrap_or_default(), version: self.config.version.clone().unwrap_or_default(), lang: "rust".to_string(), @@ -80,7 +78,6 @@ impl StatsConcentrator { runtime_id: String::new(), sequence: 0, agent_aggregation: String::new(), - // service: self.config.service.clone().unwrap_or(String::new()), service: self.config.service.clone().unwrap_or_default(), container_id: String::new(), tags: vec![], @@ -89,20 +86,18 @@ impl StatsConcentrator { stats: vec![ pb::ClientStatsBucket { start: timestamp, - // TODO: consider changing this to 0 - duration: 1_000_000_000, + duration: 0, stats: vec![ pb::ClientGroupedStats { service: self.config.service.clone().unwrap_or_default(), - name: "yiming_name".to_string(), + name: "aws.lambda".to_string(), resource: self.resource.clone(), http_status_code: 200, r#type: String::new(), db_type: String::new(), hits: bucket.hits, errors: 0, - // TODO: consider changing this to 0 - duration: 1_000_000_000, + duration: 0, ok_summary: vec![], error_summary: vec![], synthetics: false, From 13c2aef38c83c7b27b417bfb627b00d804d6df32 Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Wed, 10 Sep 2025 12:22:44 -0400 Subject: [PATCH 16/43] Change _dd.compute_stats from 1 to 0 --- bottlecap/src/tags/lambda/tags.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bottlecap/src/tags/lambda/tags.rs b/bottlecap/src/tags/lambda/tags.rs index c20f468a5..800ad9d10 100644 --- a/bottlecap/src/tags/lambda/tags.rs +++ b/bottlecap/src/tags/lambda/tags.rs @@ -39,7 +39,7 @@ const SERVICE_KEY: &str = "service"; // ComputeStatsKey is the tag key indicating whether trace stats should be computed const COMPUTE_STATS_KEY: &str = "_dd.compute_stats"; // ComputeStatsValue is the tag value indicating trace stats should be computed -const COMPUTE_STATS_VALUE: &str = "1"; +const COMPUTE_STATS_VALUE: &str = "0"; // FunctionTagsKey is the tag key for a function's tags to be set on the top level tracepayload const FUNCTION_TAGS_KEY: &str = "_dd.tags.function"; // TODO(astuyve) decide what to do with the version From 609294c93d5d1f60e767148ec7c94df98f372149 Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Wed, 10 Sep 2025 13:42:04 -0400 Subject: [PATCH 17/43] Remove MyStatsProcessor --- bottlecap/src/traces/mod.rs | 1 - bottlecap/src/traces/my_stats_processor.rs | 28 ---------------------- bottlecap/src/traces/stats_agent.rs | 21 ++++------------ 3 files changed, 4 insertions(+), 46 deletions(-) delete mode 100644 bottlecap/src/traces/my_stats_processor.rs diff --git a/bottlecap/src/traces/mod.rs b/bottlecap/src/traces/mod.rs index cbd5ed704..4f18815e4 100644 --- a/bottlecap/src/traces/mod.rs +++ b/bottlecap/src/traces/mod.rs @@ -10,7 +10,6 @@ pub mod stats_aggregator; pub mod stats_agent; pub mod stats_flusher; pub mod stats_processor; -pub mod my_stats_processor; pub mod trace_agent; pub mod trace_aggregator; pub mod trace_flusher; diff --git a/bottlecap/src/traces/my_stats_processor.rs b/bottlecap/src/traces/my_stats_processor.rs deleted file mode 100644 index f876af235..000000000 --- a/bottlecap/src/traces/my_stats_processor.rs +++ /dev/null @@ -1,28 +0,0 @@ -use crate::traces::stats_agent::StatsEvent; -use tracing::{debug}; - -use std::sync::Arc; -use crate::traces::stats_concentrator::StatsConcentrator; -use tokio::sync::Mutex; - -pub struct MyStatsProcessor { - concentrator: Arc>, -} - -impl MyStatsProcessor { - #[must_use] - pub fn new(stats_concentrator: Arc>) -> Self { - Self { concentrator: stats_concentrator } - } - - #[allow(clippy::cast_possible_truncation)] - pub async fn process( - &self, - // TODO: get time, duration and hit/error from stats event - event: StatsEvent, - ) { - debug!("In my stats processor: Processing stats event."); - - self.concentrator.lock().await.add(event); - } -} \ No newline at end of file diff --git a/bottlecap/src/traces/stats_agent.rs b/bottlecap/src/traces/stats_agent.rs index 4d8306c34..31686fea4 100644 --- a/bottlecap/src/traces/stats_agent.rs +++ b/bottlecap/src/traces/stats_agent.rs @@ -1,7 +1,6 @@ use tokio::sync::mpsc::{self, Receiver}; use tracing::debug; -use super::my_stats_processor::MyStatsProcessor; use super::stats_concentrator::StatsConcentrator; use std::sync::Arc; @@ -16,38 +15,26 @@ pub struct StatsEvent { #[allow(clippy::module_name_repetitions)] pub struct StatsAgent { rx: mpsc::Receiver, - processor: MyStatsProcessor, + concentrator: Arc>, } impl StatsAgent { #[must_use] pub fn new( rx: Receiver, - stats_concentrator: Arc>, + concentrator: Arc>, ) -> StatsAgent { - let processor = MyStatsProcessor::new(stats_concentrator); StatsAgent { rx, - processor, + concentrator, } } pub async fn spin(&mut self) { while let Some(event) = self.rx.recv().await { debug!("In stats agent: Received stats event."); - self.processor.process(event).await; + self.concentrator.lock().await.add(event); } } - // pub async fn sync_consume(&mut self) { - // if let Some(events) = self.rx.recv().await { - // self.processor.process().await; - // } - // } - - // #[must_use] - // pub fn get_sender_copy(&self) -> Sender { - // self.tx.clone() - // } - } From 39766d69dc4b933109b32257a9b200c6c566d57a Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Wed, 10 Sep 2025 13:51:41 -0400 Subject: [PATCH 18/43] Remove unused code --- bottlecap/src/bin/bottlecap/main.rs | 14 ------- bottlecap/src/traces/stats_agent.rs | 1 - bottlecap/src/traces/stats_aggregator.rs | 10 ----- bottlecap/src/traces/trace_agent.rs | 50 ------------------------ 4 files changed, 75 deletions(-) diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index ae9dce873..b0972aff7 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -1030,18 +1030,6 @@ fn start_logs_agent( (logs_agent_channel, logs_flusher) } -// fn start_stats_agent( -// stats_rx: Receiver, -// config: &Arc, -// tags_provider: &Arc, -// stats_aggregator: Arc>, -// ) { -// let mut stats_agent = StatsAgent::new(stats_rx, Arc::clone(config), Arc::clone(tags_provider), stats_aggregator); -// tokio::spawn(async move { -// stats_agent.spin().await; -// }); -// } - fn start_metrics_flushers( api_key_factory: Arc, metrics_aggr_handle: &MetricsAggregatorHandle, @@ -1188,8 +1176,6 @@ fn start_trace_agent( } }); - // start_stats_agent(stats_rx, config, &tags_provider, stats_aggregator); - ( trace_agent_channel, trace_flusher, diff --git a/bottlecap/src/traces/stats_agent.rs b/bottlecap/src/traces/stats_agent.rs index 31686fea4..c59a1dbe4 100644 --- a/bottlecap/src/traces/stats_agent.rs +++ b/bottlecap/src/traces/stats_agent.rs @@ -36,5 +36,4 @@ impl StatsAgent { self.concentrator.lock().await.add(event); } } - } diff --git a/bottlecap/src/traces/stats_aggregator.rs b/bottlecap/src/traces/stats_aggregator.rs index 8e4188437..5690eecbd 100644 --- a/bottlecap/src/traces/stats_aggregator.rs +++ b/bottlecap/src/traces/stats_aggregator.rs @@ -30,16 +30,6 @@ pub struct StatsAggregator { stats_concentrator: Arc>, } -// impl Default for StatsAggregator { -// fn default() -> Self { -// StatsAggregator { -// queue: VecDeque::new(), -// max_content_size_bytes: MAX_CONTENT_SIZE_BYTES, -// buffer: Vec::new(), -// } -// } -// } - /// Takes in individual trace stats payloads and aggregates them into batches to be flushed to Datadog. impl StatsAggregator { #[allow(dead_code)] diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index 51a633cd4..814a8c35f 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -192,55 +192,6 @@ impl TraceAgent { stats_agent.lock().await.spin().await; }); - // stats_tx.clone().send(pb::ClientStatsPayload { - // hostname: String::new(), - // // TODO (Yiming): support setting this - // env: "dev".to_string(), - // // TODO (Yiming): support setting this - // version: "version".to_string(), - // lang: "rust".to_string(), - // tracer_version: String::new(), - // runtime_id: String::new(), - // sequence: 0, - // agent_aggregation: String::new(), - // // TODO (Yiming): support setting this - // service: "yiming_service".to_string(), - // container_id: String::new(), - // tags: vec![], - // git_commit_sha: String::new(), - // image_tag: String::new(), - // stats: vec![ - // pb::ClientStatsBucket { - // start: std::time::SystemTime::now() - // .duration_since(std::time::UNIX_EPOCH) - // .expect("Time went backwards") - // .as_nanos() as u64, - // duration: 1_000_000_000, - // stats: vec![ - // pb::ClientGroupedStats { - // service: "yiming_service".to_string(), - // name: "yiming_name".to_string(), - // resource: "yiming_resource".to_string(), - // http_status_code: 200, - // r#type: "yiming_type".to_string(), - // db_type: String::new(), - // hits: 1, - // errors: 0, - // duration: 1_000_000_000, - // ok_summary: vec![], - // error_summary: vec![], - // synthetics: false, - // top_level_hits: 0, - // span_kind: String::new(), - // peer_tags: vec![], - // is_trace_root: 1, - // }, - // ], - // agent_time_shift: 0, - // }, - // ], - // }).await?; - let router = self.make_router(self.stats_tx.clone()); let port = u16::try_from(TRACE_AGENT_PORT).expect("TRACE_AGENT_PORT is too large"); @@ -357,7 +308,6 @@ impl TraceAgent { } async fn stats(State(state): State, request: Request) -> Response { - debug!("Trace Agent | stats()"); match state .stats_processor .process_stats(request, state.stats_tx) From 503c07dbf84dcf7b507b270f637bb068f4bf0b28 Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Wed, 10 Sep 2025 13:52:11 -0400 Subject: [PATCH 19/43] Format --- bottlecap/src/bin/bottlecap/main.rs | 20 +++-- .../src/lifecycle/invocation/processor.rs | 14 +++- bottlecap/src/traces/mod.rs | 4 +- bottlecap/src/traces/stats_agent.rs | 6 +- bottlecap/src/traces/stats_aggregator.rs | 7 +- bottlecap/src/traces/stats_concentrator.rs | 81 ++++++++++--------- 6 files changed, 78 insertions(+), 54 deletions(-) diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index b0972aff7..d145eeca3 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -53,14 +53,14 @@ use bottlecap::{ propagation::DatadogCompositePropagator, proxy_aggregator, proxy_flusher::Flusher as ProxyFlusher, + stats_agent::StatsEvent, stats_aggregator::StatsAggregator, + stats_concentrator::StatsConcentrator, stats_flusher::{self, StatsFlusher}, stats_processor, trace_agent, trace_aggregator::{self, SendDataBuilderInfo}, trace_flusher::{self, ServerlessTraceFlusher, TraceFlusher}, trace_processor::{self, SendingTraceProcessor}, - stats_agent::StatsEvent, - stats_concentrator::StatsConcentrator, }, }; use datadog_fips::reqwest_adapter::create_reqwest_client_builder; @@ -94,7 +94,12 @@ use std::{ sync::Arc, time::{Duration, Instant}, }; -use tokio::{sync::Mutex as TokioMutex, sync::RwLock, sync::mpsc::{self, Sender, Receiver}, task::JoinHandle}; +use tokio::{ + sync::Mutex as TokioMutex, + sync::RwLock, + sync::mpsc::{self, Receiver, Sender}, + task::JoinHandle, +}; use tokio_util::sync::CancellationToken; use tracing::{debug, error}; use tracing_subscriber::EnvFilter; @@ -1113,8 +1118,13 @@ fn start_trace_agent( tokio_util::sync::CancellationToken, ) { // Stats - let stats_concentrator = Arc::new(TokioMutex::new(StatsConcentrator::new(Arc::clone(config), Arc::clone(tags_provider)))); - let stats_aggregator = Arc::new(TokioMutex::new(StatsAggregator::new_with_concentrator(stats_concentrator.clone()))); + let stats_concentrator = Arc::new(TokioMutex::new(StatsConcentrator::new( + Arc::clone(config), + Arc::clone(tags_provider), + ))); + let stats_aggregator = Arc::new(TokioMutex::new(StatsAggregator::new_with_concentrator( + stats_concentrator.clone(), + ))); let stats_flusher = Arc::new(stats_flusher::ServerlessStatsFlusher::new( api_key_factory.clone(), stats_aggregator.clone(), diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index 38b732318..2191fb551 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -166,19 +166,27 @@ impl Processor { } // Increment the invocation metric - self.enhanced_metrics.increment_invocation_metric(timestamp_secs.try_into().unwrap_or_default()); + self.enhanced_metrics + .increment_invocation_metric(timestamp_secs.try_into().unwrap_or_default()); self.enhanced_metrics.set_invoked_received(); // If `UniversalInstrumentationStart` event happened first, process it if let Some((headers, payload_value)) = self.context_buffer.pair_invoke_event(&request_id) { // Infer span self.inferrer.infer_span(&payload_value, &self.aws_config); - self.process_on_universal_instrumentation_start(request_id.clone(), headers, payload_value); + self.process_on_universal_instrumentation_start( + request_id.clone(), + headers, + payload_value, + ); } // Send stats event let timestamp_ns = timestamp_secs * S_TO_NS_U64; - let stats_event = StatsEvent { time: timestamp_ns, dummy: 0 }; + let stats_event = StatsEvent { + time: timestamp_ns, + dummy: 0, + }; match self.stats_agent_tx.send(stats_event).await { Ok(()) => { debug!("Successfully buffered stats event to be aggregated."); diff --git a/bottlecap/src/traces/mod.rs b/bottlecap/src/traces/mod.rs index 4f18815e4..653569161 100644 --- a/bottlecap/src/traces/mod.rs +++ b/bottlecap/src/traces/mod.rs @@ -6,15 +6,15 @@ pub mod propagation; pub mod proxy_aggregator; pub mod proxy_flusher; pub mod span_pointers; -pub mod stats_aggregator; pub mod stats_agent; +pub mod stats_aggregator; +pub mod stats_concentrator; pub mod stats_flusher; pub mod stats_processor; pub mod trace_agent; pub mod trace_aggregator; pub mod trace_flusher; pub mod trace_processor; -pub mod stats_concentrator; // URL for a call to the Lambda runtime API. The value may be replaced if `AWS_LAMBDA_RUNTIME_API` is set. const LAMBDA_RUNTIME_URL_PREFIX: &str = "http://127.0.0.1:9001"; diff --git a/bottlecap/src/traces/stats_agent.rs b/bottlecap/src/traces/stats_agent.rs index c59a1dbe4..14fa0c824 100644 --- a/bottlecap/src/traces/stats_agent.rs +++ b/bottlecap/src/traces/stats_agent.rs @@ -11,7 +11,6 @@ pub struct StatsEvent { pub dummy: u64, } - #[allow(clippy::module_name_repetitions)] pub struct StatsAgent { rx: mpsc::Receiver, @@ -24,10 +23,7 @@ impl StatsAgent { rx: Receiver, concentrator: Arc>, ) -> StatsAgent { - StatsAgent { - rx, - concentrator, - } + StatsAgent { rx, concentrator } } pub async fn spin(&mut self) { diff --git a/bottlecap/src/traces/stats_aggregator.rs b/bottlecap/src/traces/stats_aggregator.rs index 5690eecbd..1be0c07af 100644 --- a/bottlecap/src/traces/stats_aggregator.rs +++ b/bottlecap/src/traces/stats_aggregator.rs @@ -1,8 +1,8 @@ +use crate::traces::stats_concentrator::StatsConcentrator; use datadog_trace_protobuf::pb::ClientStatsPayload; use std::collections::VecDeque; use std::sync::Arc; use tokio::sync::Mutex; -use crate::traces::stats_concentrator::StatsConcentrator; #[allow(clippy::empty_line_after_doc_comments)] /// Maximum number of entries in a stat payload. @@ -34,7 +34,10 @@ pub struct StatsAggregator { impl StatsAggregator { #[allow(dead_code)] #[allow(clippy::must_use_candidate)] - pub fn new(max_content_size_bytes: usize, stats_concentrator: Arc>) -> Self { + pub fn new( + max_content_size_bytes: usize, + stats_concentrator: Arc>, + ) -> Self { StatsAggregator { queue: VecDeque::new(), max_content_size_bytes, diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index d81dee12a..f0a97dbe4 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -1,15 +1,14 @@ +use crate::config::Config; +use crate::lifecycle::invocation::processor::S_TO_NS_U64; +use crate::tags::provider::Provider as TagProvider; +use crate::traces::stats_agent::StatsEvent; /** * TODO: - * + * */ - use datadog_trace_protobuf::pb; -use crate::traces::stats_agent::StatsEvent; -use crate::config::Config; -use std::sync::Arc; -use crate::tags::provider::Provider as TagProvider; use std::collections::HashMap; -use crate::lifecycle::invocation::processor::S_TO_NS_U64; +use std::sync::Arc; use std::time::{SystemTime, UNIX_EPOCH}; struct Bucket { @@ -31,12 +30,19 @@ impl StatsConcentrator { let resource = tags_provider .get_canonical_resource_name() .unwrap_or(String::from("aws.lambda")); - Self { buckets: HashMap::new(), config, resource } + Self { + buckets: HashMap::new(), + config, + resource, + } } pub fn add(&mut self, stats_event: StatsEvent) { let bucket_timestamp = Self::get_bucket_timestamp(stats_event.time); - let bucket = self.buckets.entry(bucket_timestamp).or_insert(Bucket { hits: 0 }); + let bucket = self + .buckets + .entry(bucket_timestamp) + .or_insert(Bucket { hits: 0 }); bucket.hits += 1; } @@ -46,7 +52,12 @@ impl StatsConcentrator { #[must_use] pub fn get_batch(&mut self, force_flush: bool) -> Vec { - let current_timestamp: u64 = SystemTime::now().duration_since(UNIX_EPOCH).expect("Failed to get current timestamp").as_nanos().try_into().expect("Failed to convert timestamp to u64"); + let current_timestamp: u64 = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("Failed to get current timestamp") + .as_nanos() + .try_into() + .expect("Failed to convert timestamp to u64"); let mut ret = Vec::new(); let mut to_remove = Vec::new(); @@ -83,33 +94,29 @@ impl StatsConcentrator { tags: vec![], git_commit_sha: String::new(), image_tag: String::new(), - stats: vec![ - pb::ClientStatsBucket { - start: timestamp, + stats: vec![pb::ClientStatsBucket { + start: timestamp, + duration: 0, + stats: vec![pb::ClientGroupedStats { + service: self.config.service.clone().unwrap_or_default(), + name: "aws.lambda".to_string(), + resource: self.resource.clone(), + http_status_code: 200, + r#type: String::new(), + db_type: String::new(), + hits: bucket.hits, + errors: 0, duration: 0, - stats: vec![ - pb::ClientGroupedStats { - service: self.config.service.clone().unwrap_or_default(), - name: "aws.lambda".to_string(), - resource: self.resource.clone(), - http_status_code: 200, - r#type: String::new(), - db_type: String::new(), - hits: bucket.hits, - errors: 0, - duration: 0, - ok_summary: vec![], - error_summary: vec![], - synthetics: false, - top_level_hits: 0, - span_kind: String::new(), - peer_tags: vec![], - is_trace_root: 1, - }, - ], - agent_time_shift: 0, - }, - ], + ok_summary: vec![], + error_summary: vec![], + synthetics: false, + top_level_hits: 0, + span_kind: String::new(), + peer_tags: vec![], + is_trace_root: 1, + }], + agent_time_shift: 0, + }], } } -} \ No newline at end of file +} From ce2879cf2a76236e038a386a59a9386213b005c4 Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Wed, 10 Sep 2025 14:54:23 -0400 Subject: [PATCH 20/43] Rename variables and remove unnecessary code --- bottlecap/src/bin/bottlecap/main.rs | 1 - .../src/lifecycle/invocation/processor.rs | 8 +++--- bottlecap/src/traces/stats_agent.rs | 2 -- bottlecap/src/traces/stats_aggregator.rs | 28 ++++++------------- bottlecap/src/traces/stats_concentrator.rs | 8 ++++-- bottlecap/src/traces/trace_agent.rs | 25 +++++++---------- 6 files changed, 28 insertions(+), 44 deletions(-) diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index d145eeca3..78a3c2f97 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -1172,7 +1172,6 @@ fn start_trace_agent( invocation_processor, appsec_processor, Arc::clone(tags_provider), - // TODO: rename this stats_rx, stats_concentrator.clone(), ); diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index 2191fb551..812d621bb 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -85,7 +85,7 @@ pub struct Processor { /// /// These tags are used to capture runtime and initialization. dynamic_tags: HashMap, - stats_agent_tx: Sender, + stats_tx: Sender, } impl Processor { @@ -96,7 +96,7 @@ impl Processor { aws_config: Arc, metrics_aggregator: dogstatsd::aggregator_service::AggregatorHandle, propagator: Arc, - stats_agent_tx: Sender, + stats_tx: Sender, ) -> Self { let resource = tags_provider .get_canonical_resource_name() @@ -120,7 +120,7 @@ impl Processor { service, resource, dynamic_tags: HashMap::new(), - stats_agent_tx, + stats_tx, } } @@ -187,7 +187,7 @@ impl Processor { time: timestamp_ns, dummy: 0, }; - match self.stats_agent_tx.send(stats_event).await { + match self.stats_tx.send(stats_event).await { Ok(()) => { debug!("Successfully buffered stats event to be aggregated."); } diff --git a/bottlecap/src/traces/stats_agent.rs b/bottlecap/src/traces/stats_agent.rs index 14fa0c824..824304fa5 100644 --- a/bottlecap/src/traces/stats_agent.rs +++ b/bottlecap/src/traces/stats_agent.rs @@ -1,5 +1,4 @@ use tokio::sync::mpsc::{self, Receiver}; -use tracing::debug; use super::stats_concentrator::StatsConcentrator; @@ -28,7 +27,6 @@ impl StatsAgent { pub async fn spin(&mut self) { while let Some(event) = self.rx.recv().await { - debug!("In stats agent: Received stats event."); self.concentrator.lock().await.add(event); } } diff --git a/bottlecap/src/traces/stats_aggregator.rs b/bottlecap/src/traces/stats_aggregator.rs index 1be0c07af..140515a86 100644 --- a/bottlecap/src/traces/stats_aggregator.rs +++ b/bottlecap/src/traces/stats_aggregator.rs @@ -20,52 +20,42 @@ use tokio::sync::Mutex; /// const MAX_CONTENT_SIZE_BYTES: usize = 3 * 1024 * 1024; // ~3MB -use tracing::debug; - #[allow(clippy::module_name_repetitions)] pub struct StatsAggregator { queue: VecDeque, max_content_size_bytes: usize, buffer: Vec, - stats_concentrator: Arc>, + concentrator: Arc>, } /// Takes in individual trace stats payloads and aggregates them into batches to be flushed to Datadog. impl StatsAggregator { #[allow(dead_code)] #[allow(clippy::must_use_candidate)] - pub fn new( - max_content_size_bytes: usize, - stats_concentrator: Arc>, - ) -> Self { + pub fn new(max_content_size_bytes: usize, concentrator: Arc>) -> Self { StatsAggregator { queue: VecDeque::new(), max_content_size_bytes, buffer: Vec::new(), - stats_concentrator, + concentrator, } } - pub fn new_with_concentrator(stats_concentrator: Arc>) -> Self { - Self::new(MAX_CONTENT_SIZE_BYTES, stats_concentrator) + pub fn new_with_concentrator(concentrator: Arc>) -> Self { + Self::new(MAX_CONTENT_SIZE_BYTES, concentrator) } /// Takes in an individual trace stats payload. pub fn add(&mut self, payload: ClientStatsPayload) { - debug!("StatsAggregator | adding stats payload to aggregator: {payload:?}"); self.queue.push_back(payload); } /// Returns a batch of trace stats payloads, subject to the max content size. pub async fn get_batch(&mut self, force_flush: bool) -> Vec { - debug!("StatsAggregator | getting batch of stats payloads"); - // Pull stats data from stats concentrator - let mut stats_concentrator = self.stats_concentrator.lock().await; - let mut stats = stats_concentrator.get_batch(force_flush); - while !stats.is_empty() { - self.queue.extend(stats); - stats = stats_concentrator.get_batch(force_flush); - } + // Pull stats data from concentrator + let mut concentrator = self.concentrator.lock().await; + let stats = concentrator.get_stats(force_flush); + self.queue.extend(stats); let mut batch_size = 0; diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index f0a97dbe4..9af841005 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -11,8 +11,9 @@ use std::collections::HashMap; use std::sync::Arc; use std::time::{SystemTime, UNIX_EPOCH}; +#[derive(Default)] struct Bucket { - pub hits: u64, + pub hits: u32, } pub struct StatsConcentrator { @@ -22,6 +23,7 @@ pub struct StatsConcentrator { } const BUCKET_DURATION_NS: u64 = 10 * S_TO_NS_U64; +// TODO: comment const NO_FLUSH_BUCKET_COUNT: u64 = 2; impl StatsConcentrator { @@ -51,7 +53,7 @@ impl StatsConcentrator { } #[must_use] - pub fn get_batch(&mut self, force_flush: bool) -> Vec { + pub fn get_stats(&mut self, force_flush: bool) -> Vec { let current_timestamp: u64 = SystemTime::now() .duration_since(UNIX_EPOCH) .expect("Failed to get current timestamp") @@ -104,7 +106,7 @@ impl StatsConcentrator { http_status_code: 200, r#type: String::new(), db_type: String::new(), - hits: bucket.hits, + hits: bucket.hits.into(), errors: 0, duration: 0, ok_summary: vec![], diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index 814a8c35f..c021217bb 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -106,8 +106,6 @@ pub struct TraceAgent { appsec_processor: Option>>, shutdown_token: CancellationToken, tx: Sender, - stats_tx: Sender, - stats_rx: Arc>>, stats_agent: Arc>, } @@ -130,7 +128,7 @@ impl TraceAgent { invocation_processor: Arc>, appsec_processor: Option>>, tags_provider: Arc, - my_stats_rx: Receiver, + stats_rx: Receiver, stats_concentrator: Arc>, ) -> TraceAgent { // Set up a channel to send processed traces to our trace aggregator. tx is passed through each @@ -147,13 +145,7 @@ impl TraceAgent { } }); - // Set up a channel to send processed stats to our stats aggregator. - let (stats_tx, stats_rx): ( - Sender, - Receiver, - ) = mpsc::channel(STATS_PAYLOAD_CHANNEL_BUFFER_SIZE); - - let stats_agent = StatsAgent::new(my_stats_rx, stats_concentrator.clone()); + let stats_agent = StatsAgent::new(stats_rx, stats_concentrator.clone()); TraceAgent { config: config.clone(), @@ -166,8 +158,6 @@ impl TraceAgent { tags_provider, shutdown_token: CancellationToken::new(), tx: trace_tx, - stats_tx: stats_tx.clone(), - stats_rx: Arc::new(Mutex::new(stats_rx)), stats_agent: Arc::new(Mutex::new(stats_agent)), } } @@ -176,23 +166,28 @@ impl TraceAgent { pub async fn start(&self) -> Result<(), Box> { let now: Instant = Instant::now(); + // Set up a channel to send processed stats to our stats aggregator. + let (stats_tx, mut stats_rx): ( + Sender, + Receiver, + ) = mpsc::channel(STATS_PAYLOAD_CHANNEL_BUFFER_SIZE); + // Start the stats aggregator, which receives and buffers stats payloads to be consumed by the stats flusher. let stats_aggregator = self.stats_aggregator.clone(); - let stats_rx = self.stats_rx.clone(); tokio::spawn(async move { - let mut stats_rx = stats_rx.lock().await; while let Some(stats_payload) = stats_rx.recv().await { let mut aggregator = stats_aggregator.lock().await; aggregator.add(stats_payload); } }); + // Start the stats agent, which receives stats events and sends them to the stats concentrator let stats_agent = self.stats_agent.clone(); tokio::spawn(async move { stats_agent.lock().await.spin().await; }); - let router = self.make_router(self.stats_tx.clone()); + let router = self.make_router(stats_tx); let port = u16::try_from(TRACE_AGENT_PORT).expect("TRACE_AGENT_PORT is too large"); let socket = SocketAddr::from(([127, 0, 0, 1], port)); From 70f5e077b5a2588c66024189472e2601255b5529 Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Wed, 10 Sep 2025 14:57:26 -0400 Subject: [PATCH 21/43] Fix code style --- bottlecap/src/traces/stats_concentrator.rs | 6 +++--- bottlecap/src/traces/trace_agent.rs | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index 9af841005..b4785b1c3 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -60,12 +60,12 @@ impl StatsConcentrator { .as_nanos() .try_into() .expect("Failed to convert timestamp to u64"); - let mut ret = Vec::new(); + let mut stats = Vec::new(); let mut to_remove = Vec::new(); for (×tamp, bucket) in &self.buckets { if force_flush || Self::should_flush_bucket(current_timestamp, timestamp) { - ret.push(self.construct_stats_payload(timestamp, bucket)); + stats.push(self.construct_stats_payload(timestamp, bucket)); to_remove.push(timestamp); } } @@ -74,7 +74,7 @@ impl StatsConcentrator { self.buckets.remove(×tamp); } - ret + stats } fn should_flush_bucket(current_timestamp: u64, bucket_timestamp: u64) -> bool { diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index c021217bb..bf7e7da21 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -164,7 +164,7 @@ impl TraceAgent { #[allow(clippy::cast_possible_truncation)] pub async fn start(&self) -> Result<(), Box> { - let now: Instant = Instant::now(); + let now = Instant::now(); // Set up a channel to send processed stats to our stats aggregator. let (stats_tx, mut stats_rx): ( From 029a556368209ed5d6c4e591c98ab1fb38c1ea72 Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Wed, 10 Sep 2025 14:59:42 -0400 Subject: [PATCH 22/43] Code style --- bottlecap/src/traces/trace_agent.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index bf7e7da21..e4b1a29c6 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -156,8 +156,8 @@ impl TraceAgent { invocation_processor, appsec_processor, tags_provider, - shutdown_token: CancellationToken::new(), tx: trace_tx, + shutdown_token: CancellationToken::new(), stats_agent: Arc::new(Mutex::new(stats_agent)), } } From abf937255397e189f0bf37c2ef82d50c092ff20b Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Wed, 10 Sep 2025 15:28:19 -0400 Subject: [PATCH 23/43] Add comments --- bottlecap/src/bin/bottlecap/main.rs | 11 ++++++----- .../src/lifecycle/invocation/processor.rs | 11 ++++------- bottlecap/src/traces/stats_agent.rs | 2 +- bottlecap/src/traces/stats_aggregator.rs | 2 +- bottlecap/src/traces/stats_concentrator.rs | 18 ++++++++++++------ bottlecap/src/traces/stats_flusher.rs | 1 + 6 files changed, 25 insertions(+), 20 deletions(-) diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index 78a3c2f97..febd3d577 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -524,6 +524,7 @@ async fn extension_loop_active( ))); let propagator = Arc::new(DatadogCompositePropagator::new(Arc::clone(config))); + // Received by stats agent, which sends the stats to the stats concentrator let (stats_tx, stats_rx) = mpsc::channel::(1000); // Lifecycle Invocation Processor let invocation_processor = Arc::new(TokioMutex::new(InvocationProcessor::new( @@ -733,7 +734,7 @@ async fn extension_loop_active( &proxy_flusher, &mut race_flush_interval, &metrics_aggr_handle, - false, + false, // force_flush_trace_stats ) .await; last_continuous_flush_error = false; @@ -774,7 +775,7 @@ async fn extension_loop_active( &proxy_flusher, &mut race_flush_interval, &metrics_aggr_handle, - false, + false, // force_flush_trace_stats ) .await; } @@ -832,7 +833,7 @@ async fn extension_loop_active( &proxy_flusher, &mut race_flush_interval, &metrics_aggr_handle, - true, + true, // force_flush_trace_stats ) .await; return Ok(()); @@ -849,7 +850,7 @@ async fn blocking_flush_all( proxy_flusher: &ProxyFlusher, race_flush_interval: &mut tokio::time::Interval, metrics_aggr_handle: &MetricsAggregatorHandle, - force_flush: bool, + force_flush_trace_stats: bool, ) { let flush_response = metrics_aggr_handle .flush() @@ -869,7 +870,7 @@ async fn blocking_flush_all( logs_flusher.flush(None), futures::future::join_all(metrics_futures), trace_flusher.flush(None), - stats_flusher.flush(force_flush), + stats_flusher.flush(force_flush_trace_stats), proxy_flusher.flush(None), ); race_flush_interval.reset(); diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index 812d621bb..7ba097e73 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -181,18 +181,15 @@ impl Processor { ); } - // Send stats event + // Send stats event to the stats concentrator let timestamp_ns = timestamp_secs * S_TO_NS_U64; - let stats_event = StatsEvent { - time: timestamp_ns, - dummy: 0, - }; + let stats_event = StatsEvent { time: timestamp_ns }; match self.stats_tx.send(stats_event).await { Ok(()) => { - debug!("Successfully buffered stats event to be aggregated."); + debug!("Buffered stats event to be concentrated."); } Err(err) => { - error!("Error sending stats event to the stats aggregator: {err}"); + error!("Error sending stats event to the concentrator: {err}"); } } } diff --git a/bottlecap/src/traces/stats_agent.rs b/bottlecap/src/traces/stats_agent.rs index 824304fa5..001241965 100644 --- a/bottlecap/src/traces/stats_agent.rs +++ b/bottlecap/src/traces/stats_agent.rs @@ -4,10 +4,10 @@ use super::stats_concentrator::StatsConcentrator; use std::sync::Arc; use tokio::sync::Mutex; + #[derive(Clone, Copy, Default)] pub struct StatsEvent { pub time: u64, - pub dummy: u64, } #[allow(clippy::module_name_repetitions)] diff --git a/bottlecap/src/traces/stats_aggregator.rs b/bottlecap/src/traces/stats_aggregator.rs index 140515a86..2e37b1cb2 100644 --- a/bottlecap/src/traces/stats_aggregator.rs +++ b/bottlecap/src/traces/stats_aggregator.rs @@ -32,7 +32,7 @@ pub struct StatsAggregator { impl StatsAggregator { #[allow(dead_code)] #[allow(clippy::must_use_candidate)] - pub fn new(max_content_size_bytes: usize, concentrator: Arc>) -> Self { + fn new(max_content_size_bytes: usize, concentrator: Arc>) -> Self { StatsAggregator { queue: VecDeque::new(), max_content_size_bytes, diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index b4785b1c3..7119979ae 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -2,10 +2,6 @@ use crate::config::Config; use crate::lifecycle::invocation::processor::S_TO_NS_U64; use crate::tags::provider::Provider as TagProvider; use crate::traces::stats_agent::StatsEvent; -/** - * TODO: - * - */ use datadog_trace_protobuf::pb; use std::collections::HashMap; use std::sync::Arc; @@ -22,10 +18,17 @@ pub struct StatsConcentrator { buckets: HashMap, } -const BUCKET_DURATION_NS: u64 = 10 * S_TO_NS_U64; -// TODO: comment +// The duration of a bucket in nanoseconds. +const BUCKET_DURATION_NS: u64 = 10 * S_TO_NS_U64; // 10 seconds + +// The number of latest buckets to not flush when force_flush is false. +// For example, if we have buckets with timestamps 10, 20, 40, the current timestamp is 45, +// and NO_FLUSH_BUCKET_COUNT is 3, then we will flush bucket 10 but not bucket 20 or 40. +// Note that the bucket 30 is included in the 3 latest buckets even if it has no data. +// This is to avoid flushing stats that are still being collected to save some cost. const NO_FLUSH_BUCKET_COUNT: u64 = 2; +// Aggregates stats into buckets, which are then pulled by the stats aggregator. impl StatsConcentrator { #[must_use] pub fn new(config: Arc, tags_provider: Arc) -> Self { @@ -52,6 +55,8 @@ impl StatsConcentrator { timestamp - timestamp % BUCKET_DURATION_NS } + // force_flush: If true, flush all stats. If false, flush stats except for the few latest + // buckets, which may still be getting data. #[must_use] pub fn get_stats(&mut self, force_flush: bool) -> Vec { let current_timestamp: u64 = SystemTime::now() @@ -77,6 +82,7 @@ impl StatsConcentrator { stats } + // Whether a bucket should be flushed based on the current timestamp and the bucket timestamp. fn should_flush_bucket(current_timestamp: u64, bucket_timestamp: u64) -> bool { current_timestamp - bucket_timestamp >= BUCKET_DURATION_NS * NO_FLUSH_BUCKET_COUNT } diff --git a/bottlecap/src/traces/stats_flusher.rs b/bottlecap/src/traces/stats_flusher.rs index 363418cf4..95ba905bb 100644 --- a/bottlecap/src/traces/stats_flusher.rs +++ b/bottlecap/src/traces/stats_flusher.rs @@ -116,6 +116,7 @@ impl StatsFlusher for ServerlessStatsFlusher { } }; } + async fn flush(&self, force_flush: bool) { let mut guard = self.aggregator.lock().await; From 815e740277ad7afe9a4e3264e6eb8769222d7627 Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Wed, 10 Sep 2025 15:31:06 -0400 Subject: [PATCH 24/43] Add tests for should_flush_bucket() --- bottlecap/src/traces/stats_concentrator.rs | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index 7119979ae..c7fe76376 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -128,3 +128,29 @@ impl StatsConcentrator { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_should_flush_bucket_false_when_not_enough_time_passed() { + let bucket_timestamp = 1_000_000_000; + let current_timestamp = bucket_timestamp + BUCKET_DURATION_NS * (NO_FLUSH_BUCKET_COUNT - 1); + assert!( + !StatsConcentrator::should_flush_bucket(current_timestamp, bucket_timestamp), + "Should not flush when current_timestamp is less than threshold ahead" + ); + } + + #[test] + fn test_should_flush_bucket_true_when_much_later() { + let bucket_timestamp = 1_000_000_000; + let current_timestamp = bucket_timestamp + BUCKET_DURATION_NS * (NO_FLUSH_BUCKET_COUNT + 5); + assert!( + StatsConcentrator::should_flush_bucket(current_timestamp, bucket_timestamp), + "Should flush when current_timestamp is much greater than threshold" + ); + } +} + From c5770e32900ca26c3764f777509aa2f5c888ee9d Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Wed, 10 Sep 2025 16:18:50 -0400 Subject: [PATCH 25/43] Format --- bottlecap/src/traces/stats_concentrator.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index c7fe76376..2ac7d240f 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -153,4 +153,3 @@ mod tests { ); } } - From aa7073cc4ab0075b7338e6c122eb039cb49c50fb Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Fri, 12 Sep 2025 16:00:30 -0400 Subject: [PATCH 26/43] Move the trigger to trace agent, without grouping by key --- bottlecap/src/bin/bottlecap/main.rs | 11 ++---- .../src/lifecycle/invocation/processor.rs | 22 ++---------- bottlecap/src/traces/mod.rs | 1 + bottlecap/src/traces/stats_agent.rs | 14 +++++--- bottlecap/src/traces/stats_concentrator.rs | 2 ++ bottlecap/src/traces/trace_agent.rs | 35 ++++++++++++++----- bottlecap/src/traces/trace_stats_processor.rs | 35 +++++++++++++++++++ 7 files changed, 78 insertions(+), 42 deletions(-) create mode 100644 bottlecap/src/traces/trace_stats_processor.rs diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index febd3d577..a807b990b 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -53,7 +53,6 @@ use bottlecap::{ propagation::DatadogCompositePropagator, proxy_aggregator, proxy_flusher::Flusher as ProxyFlusher, - stats_agent::StatsEvent, stats_aggregator::StatsAggregator, stats_concentrator::StatsConcentrator, stats_flusher::{self, StatsFlusher}, @@ -97,7 +96,7 @@ use std::{ use tokio::{ sync::Mutex as TokioMutex, sync::RwLock, - sync::mpsc::{self, Receiver, Sender}, + sync::mpsc::Sender, task::JoinHandle, }; use tokio_util::sync::CancellationToken; @@ -524,8 +523,6 @@ async fn extension_loop_active( ))); let propagator = Arc::new(DatadogCompositePropagator::new(Arc::clone(config))); - // Received by stats agent, which sends the stats to the stats concentrator - let (stats_tx, stats_rx) = mpsc::channel::(1000); // Lifecycle Invocation Processor let invocation_processor = Arc::new(TokioMutex::new(InvocationProcessor::new( Arc::clone(&tags_provider), @@ -533,7 +530,6 @@ async fn extension_loop_active( Arc::clone(&aws_config), metrics_aggr_handle.clone(), Arc::clone(&propagator), - stats_tx, ))); // AppSec processor (if enabled) let appsec_processor = match AppSecProcessor::new(config) { @@ -562,7 +558,6 @@ async fn extension_loop_active( Arc::clone(&invocation_processor), appsec_processor.clone(), Arc::clone(&trace_aggregator), - stats_rx, ); let api_runtime_proxy_shutdown_signal = start_api_runtime_proxy( @@ -977,7 +972,7 @@ async fn handle_next_invocation( invoked_function_arn.clone() ); let mut p = invocation_processor.lock().await; - p.on_invoke_event(request_id.into()).await; + p.on_invoke_event(request_id.into()); drop(p); } Ok(NextEventResponse::Shutdown { @@ -1109,7 +1104,6 @@ fn start_trace_agent( invocation_processor: Arc>, appsec_processor: Option>>, trace_aggregator: Arc>, - stats_rx: Receiver, ) -> ( Sender, Arc, @@ -1173,7 +1167,6 @@ fn start_trace_agent( invocation_processor, appsec_processor, Arc::clone(tags_provider), - stats_rx, stats_concentrator.clone(), ); let trace_agent_channel = trace_agent.get_sender_copy(); diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index 7ba097e73..0a40a8559 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -9,7 +9,7 @@ use datadog_trace_protobuf::pb::Span; use datadog_trace_utils::tracer_header_tags; use serde_json::Value; use tokio::sync::watch; -use tracing::{debug, error, warn}; +use tracing::{debug, warn}; use crate::{ config::{self, aws::AwsConfig}, @@ -41,9 +41,6 @@ use crate::{ use crate::lifecycle::invocation::triggers::get_default_service_name; -use crate::traces::stats_agent::StatsEvent; -use tokio::sync::mpsc::Sender; - pub const MS_TO_NS: f64 = 1_000_000.0; pub const S_TO_MS: u64 = 1_000; pub const S_TO_NS: f64 = 1_000_000_000.0; @@ -85,7 +82,6 @@ pub struct Processor { /// /// These tags are used to capture runtime and initialization. dynamic_tags: HashMap, - stats_tx: Sender, } impl Processor { @@ -96,7 +92,6 @@ impl Processor { aws_config: Arc, metrics_aggregator: dogstatsd::aggregator_service::AggregatorHandle, propagator: Arc, - stats_tx: Sender, ) -> Self { let resource = tags_provider .get_canonical_resource_name() @@ -120,13 +115,12 @@ impl Processor { service, resource, dynamic_tags: HashMap::new(), - stats_tx, } } /// Given a `request_id`, creates the context and adds the enhanced metric offsets to the context buffer. /// - pub async fn on_invoke_event(&mut self, request_id: String) { + pub fn on_invoke_event(&mut self, request_id: String) { let invocation_span = create_empty_span(String::from("aws.lambda"), &self.resource, &self.service); // Important! Call set_init_tags() before adding the invocation to the context buffer @@ -180,18 +174,6 @@ impl Processor { payload_value, ); } - - // Send stats event to the stats concentrator - let timestamp_ns = timestamp_secs * S_TO_NS_U64; - let stats_event = StatsEvent { time: timestamp_ns }; - match self.stats_tx.send(stats_event).await { - Ok(()) => { - debug!("Buffered stats event to be concentrated."); - } - Err(err) => { - error!("Error sending stats event to the concentrator: {err}"); - } - } } /// On the first invocation, determine if it's a cold start or proactive init. diff --git a/bottlecap/src/traces/mod.rs b/bottlecap/src/traces/mod.rs index 653569161..3e790d5da 100644 --- a/bottlecap/src/traces/mod.rs +++ b/bottlecap/src/traces/mod.rs @@ -15,6 +15,7 @@ pub mod trace_agent; pub mod trace_aggregator; pub mod trace_flusher; pub mod trace_processor; +pub mod trace_stats_processor; // URL for a call to the Lambda runtime API. The value may be replaced if `AWS_LAMBDA_RUNTIME_API` is set. const LAMBDA_RUNTIME_URL_PREFIX: &str = "http://127.0.0.1:9001"; diff --git a/bottlecap/src/traces/stats_agent.rs b/bottlecap/src/traces/stats_agent.rs index 001241965..fa093c084 100644 --- a/bottlecap/src/traces/stats_agent.rs +++ b/bottlecap/src/traces/stats_agent.rs @@ -1,4 +1,4 @@ -use tokio::sync::mpsc::{self, Receiver}; +use tokio::sync::mpsc::{self, Receiver, Sender}; use super::stats_concentrator::StatsConcentrator; @@ -12,17 +12,18 @@ pub struct StatsEvent { #[allow(clippy::module_name_repetitions)] pub struct StatsAgent { - rx: mpsc::Receiver, + tx: Sender, + rx: Receiver, concentrator: Arc>, } impl StatsAgent { #[must_use] pub fn new( - rx: Receiver, concentrator: Arc>, ) -> StatsAgent { - StatsAgent { rx, concentrator } + let (tx, rx) = mpsc::channel::(1000); + StatsAgent { tx, rx, concentrator } } pub async fn spin(&mut self) { @@ -30,4 +31,9 @@ impl StatsAgent { self.concentrator.lock().await.add(event); } } + + #[must_use] + pub fn get_sender_copy(&self) -> Sender { + self.tx.clone() + } } diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index 2ac7d240f..aa8b6576b 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -6,6 +6,7 @@ use datadog_trace_protobuf::pb; use std::collections::HashMap; use std::sync::Arc; use std::time::{SystemTime, UNIX_EPOCH}; +use tracing::debug; #[derive(Default)] struct Bucket { @@ -43,6 +44,7 @@ impl StatsConcentrator { } pub fn add(&mut self, stats_event: StatsEvent) { + debug!("Adding stats to the stats concentrator"); let bucket_timestamp = Self::get_bucket_timestamp(stats_event.time); let bucket = self .buckets diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index e4b1a29c6..258345559 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -42,8 +42,8 @@ use datadog_trace_utils::trace_utils::{self}; use ddcommon::hyper_migration; use crate::traces::stats_agent::StatsAgent; -use crate::traces::stats_agent::StatsEvent; use crate::traces::stats_concentrator::StatsConcentrator; +use crate::traces::trace_stats_processor::SendingTraceStatsProcessor; const TRACE_AGENT_PORT: usize = 8126; @@ -79,6 +79,7 @@ const LAMBDA_LOAD_SPAN: &str = "aws.lambda.load"; pub struct TraceState { pub config: Arc, pub trace_sender: Arc, + pub stats_sender: Arc, pub invocation_processor: Arc>, pub tags_provider: Arc, } @@ -128,7 +129,6 @@ impl TraceAgent { invocation_processor: Arc>, appsec_processor: Option>>, tags_provider: Arc, - stats_rx: Receiver, stats_concentrator: Arc>, ) -> TraceAgent { // Set up a channel to send processed traces to our trace aggregator. tx is passed through each @@ -145,7 +145,7 @@ impl TraceAgent { } }); - let stats_agent = StatsAgent::new(stats_rx, stats_concentrator.clone()); + let stats_agent = StatsAgent::new(stats_concentrator.clone()); TraceAgent { config: config.clone(), @@ -187,7 +187,7 @@ impl TraceAgent { stats_agent.lock().await.spin().await; }); - let router = self.make_router(stats_tx); + let router = self.make_router(stats_tx).await; let port = u16::try_from(TRACE_AGENT_PORT).expect("TRACE_AGENT_PORT is too large"); let socket = SocketAddr::from(([127, 0, 0, 1], port)); @@ -207,7 +207,8 @@ impl TraceAgent { Ok(()) } - fn make_router(&self, stats_tx: Sender) -> Router { + async fn make_router(&self, stats_tx: Sender) -> Router { + let stats_agent_tx = self.stats_agent.lock().await.get_sender_copy(); let trace_state = TraceState { config: Arc::clone(&self.config), trace_sender: Arc::new(SendingTraceProcessor { @@ -215,6 +216,7 @@ impl TraceAgent { processor: Arc::clone(&self.trace_processor), trace_tx: self.tx.clone(), }), + stats_sender: Arc::new(SendingTraceStatsProcessor::new(stats_agent_tx)), invocation_processor: Arc::clone(&self.invocation_processor), tags_provider: Arc::clone(&self.tags_provider), }; @@ -279,10 +281,12 @@ impl TraceAgent { } async fn v04_traces(State(state): State, request: Request) -> Response { + debug!("Received v04 traces to process"); Self::handle_traces( state.config, request, state.trace_sender, + state.stats_sender, state.invocation_processor, state.tags_provider, ApiVersion::V04, @@ -291,10 +295,12 @@ impl TraceAgent { } async fn v05_traces(State(state): State, request: Request) -> Response { + debug!("Received v05 traces to process"); Self::handle_traces( state.config, request, state.trace_sender, + state.stats_sender, state.invocation_processor, state.tags_provider, ApiVersion::V05, @@ -434,6 +440,7 @@ impl TraceAgent { config: Arc, request: Request, trace_sender: Arc, + stats_sender: Arc, invocation_processor: Arc>, tags_provider: Arc, version: ApiVersion, @@ -526,7 +533,16 @@ impl TraceAgent { } } - match trace_sender + // TODO (Yiming): maybe we don't need to send stats for some traces + debug!("Sending stats to the stats aggregator. Traces: {traces:?}"); + if let Err(err) = stats_sender.send(&traces).await { + return error_response( + StatusCode::INTERNAL_SERVER_ERROR, + format!("Error sending stats to the stats aggregator: {err}"), + ); + } + + if let Err(err) = trace_sender .send_processed_traces( config, tags_provider, @@ -537,12 +553,13 @@ impl TraceAgent { ) .await { - Ok(()) => success_response("Successfully buffered traces to be aggregated."), - Err(err) => error_response( + return error_response( StatusCode::INTERNAL_SERVER_ERROR, format!("Error sending traces to the trace aggregator: {err}"), - ), + ); } + + success_response("Successfully buffered traces to be aggregated.") } #[allow(clippy::too_many_arguments)] diff --git a/bottlecap/src/traces/trace_stats_processor.rs b/bottlecap/src/traces/trace_stats_processor.rs new file mode 100644 index 000000000..b75e65b06 --- /dev/null +++ b/bottlecap/src/traces/trace_stats_processor.rs @@ -0,0 +1,35 @@ +use tokio::sync::mpsc::Sender; +use tokio::sync::mpsc::error::SendError; +use tracing::debug; + +use super::stats_agent::StatsEvent; + +use datadog_trace_protobuf::pb; + +pub struct SendingTraceStatsProcessor { + stats_tx: Sender, +} + +impl SendingTraceStatsProcessor { + #[must_use] + pub fn new(stats_tx: Sender) -> Self { + Self { stats_tx } + } + + pub async fn send( + &self, + traces: &[Vec], + ) -> Result<(), SendError> { + debug!("Sending stats to the stats concentrator"); + for trace in traces { + for span in trace { + let stats = StatsEvent { + time: span.start.try_into().unwrap_or_default(), + }; + debug!("Sending single stats to the stats concentrator."); + self.stats_tx.send(stats).await?; + } + } + Ok(()) + } +} \ No newline at end of file From 01441c3ca20c37b81a57d3b648dc0e00961da20b Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Fri, 12 Sep 2025 16:41:02 -0400 Subject: [PATCH 27/43] Support aggregation keys --- bottlecap/src/traces/stats_agent.rs | 7 ++- bottlecap/src/traces/stats_concentrator.rs | 48 ++++++++++++++----- bottlecap/src/traces/trace_stats_processor.rs | 10 ++++ 3 files changed, 51 insertions(+), 14 deletions(-) diff --git a/bottlecap/src/traces/stats_agent.rs b/bottlecap/src/traces/stats_agent.rs index fa093c084..404838bc8 100644 --- a/bottlecap/src/traces/stats_agent.rs +++ b/bottlecap/src/traces/stats_agent.rs @@ -5,9 +5,14 @@ use super::stats_concentrator::StatsConcentrator; use std::sync::Arc; use tokio::sync::Mutex; -#[derive(Clone, Copy, Default)] +use super::stats_concentrator::AggregationKey; +use super::stats_concentrator::Stats; + +#[derive(Clone)] pub struct StatsEvent { pub time: u64, + pub aggregation_key: AggregationKey, + pub stats: Stats, } #[allow(clippy::module_name_repetitions)] diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index aa8b6576b..8a67ce110 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -8,14 +8,28 @@ use std::sync::Arc; use std::time::{SystemTime, UNIX_EPOCH}; use tracing::debug; +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct AggregationKey { + // TODO: add more fields + pub name: String, + pub resource: String, +} + +// Aggregated stats for a time interval across all the aggregation keys. #[derive(Default)] struct Bucket { + data: HashMap, +} + +#[derive(Clone, Debug, Default, Copy)] +pub struct Stats { + // TODO: add more fields pub hits: u32, } pub struct StatsConcentrator { config: Arc, - resource: String, + _resource: String, buckets: HashMap, } @@ -33,13 +47,14 @@ const NO_FLUSH_BUCKET_COUNT: u64 = 2; impl StatsConcentrator { #[must_use] pub fn new(config: Arc, tags_provider: Arc) -> Self { + // TODO: delete resource let resource = tags_provider .get_canonical_resource_name() .unwrap_or(String::from("aws.lambda")); Self { buckets: HashMap::new(), config, - resource, + _resource: resource, } } @@ -49,8 +64,11 @@ impl StatsConcentrator { let bucket = self .buckets .entry(bucket_timestamp) - .or_insert(Bucket { hits: 0 }); - bucket.hits += 1; + .or_default(); + + let stats = bucket.data.entry(stats_event.aggregation_key).or_default(); + + stats.hits += stats_event.stats.hits; } fn get_bucket_timestamp(timestamp: u64) -> u64 { @@ -67,21 +85,25 @@ impl StatsConcentrator { .as_nanos() .try_into() .expect("Failed to convert timestamp to u64"); - let mut stats = Vec::new(); + let mut ret = Vec::new(); let mut to_remove = Vec::new(); for (×tamp, bucket) in &self.buckets { - if force_flush || Self::should_flush_bucket(current_timestamp, timestamp) { - stats.push(self.construct_stats_payload(timestamp, bucket)); - to_remove.push(timestamp); + if !force_flush && !Self::should_flush_bucket(current_timestamp, timestamp) { + continue; + } + + for (aggregation_key, stats) in &bucket.data { + ret.push(self.construct_stats_payload(timestamp, aggregation_key, *stats)); } + to_remove.push(timestamp); } for timestamp in to_remove { self.buckets.remove(×tamp); } - stats + ret } // Whether a bucket should be flushed based on the current timestamp and the bucket timestamp. @@ -89,7 +111,7 @@ impl StatsConcentrator { current_timestamp - bucket_timestamp >= BUCKET_DURATION_NS * NO_FLUSH_BUCKET_COUNT } - fn construct_stats_payload(&self, timestamp: u64, bucket: &Bucket) -> pb::ClientStatsPayload { + fn construct_stats_payload(&self, timestamp: u64, aggregation_key: &AggregationKey, stats: Stats) -> pb::ClientStatsPayload { pb::ClientStatsPayload { hostname: String::new(), env: self.config.env.clone().unwrap_or_default(), @@ -109,12 +131,12 @@ impl StatsConcentrator { duration: 0, stats: vec![pb::ClientGroupedStats { service: self.config.service.clone().unwrap_or_default(), - name: "aws.lambda".to_string(), - resource: self.resource.clone(), + name: aggregation_key.name.clone(), + resource: aggregation_key.resource.clone(), http_status_code: 200, r#type: String::new(), db_type: String::new(), - hits: bucket.hits.into(), + hits: stats.hits.into(), errors: 0, duration: 0, ok_summary: vec![], diff --git a/bottlecap/src/traces/trace_stats_processor.rs b/bottlecap/src/traces/trace_stats_processor.rs index b75e65b06..2d0ca4cc1 100644 --- a/bottlecap/src/traces/trace_stats_processor.rs +++ b/bottlecap/src/traces/trace_stats_processor.rs @@ -3,6 +3,8 @@ use tokio::sync::mpsc::error::SendError; use tracing::debug; use super::stats_agent::StatsEvent; +use super::stats_concentrator::AggregationKey; +use super::stats_concentrator::Stats; use datadog_trace_protobuf::pb; @@ -25,6 +27,14 @@ impl SendingTraceStatsProcessor { for span in trace { let stats = StatsEvent { time: span.start.try_into().unwrap_or_default(), + aggregation_key: AggregationKey { + name: span.name.clone(), + resource: span.resource.clone(), + }, + stats: Stats { + // TODO: handle error == 1 + hits: 1, + }, }; debug!("Sending single stats to the stats concentrator."); self.stats_tx.send(stats).await?; From 847ec5dc190b0caeedda56e71c8c979717e0d112 Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Fri, 12 Sep 2025 16:42:29 -0400 Subject: [PATCH 28/43] Support duration --- bottlecap/src/traces/stats_concentrator.rs | 1 + bottlecap/src/traces/trace_stats_processor.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index 8a67ce110..5487972ea 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -25,6 +25,7 @@ struct Bucket { pub struct Stats { // TODO: add more fields pub hits: u32, + pub duration: u64, // in nanoseconds } pub struct StatsConcentrator { diff --git a/bottlecap/src/traces/trace_stats_processor.rs b/bottlecap/src/traces/trace_stats_processor.rs index 2d0ca4cc1..363d4f803 100644 --- a/bottlecap/src/traces/trace_stats_processor.rs +++ b/bottlecap/src/traces/trace_stats_processor.rs @@ -34,6 +34,7 @@ impl SendingTraceStatsProcessor { stats: Stats { // TODO: handle error == 1 hits: 1, + duration: span.duration.try_into().unwrap_or_default(), }, }; debug!("Sending single stats to the stats concentrator."); From 8b103eec402f10275b2a1eba0cc054739d5e032c Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Fri, 12 Sep 2025 16:50:09 -0400 Subject: [PATCH 29/43] Support errors --- bottlecap/src/traces/stats_concentrator.rs | 10 ++++++---- bottlecap/src/traces/trace_agent.rs | 1 - bottlecap/src/traces/trace_stats_processor.rs | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index 5487972ea..09bd65902 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -24,8 +24,9 @@ struct Bucket { #[derive(Clone, Debug, Default, Copy)] pub struct Stats { // TODO: add more fields - pub hits: u32, - pub duration: u64, // in nanoseconds + pub hits: i32, + pub duration: i64, // in nanoseconds + pub error: i32, } pub struct StatsConcentrator { @@ -137,12 +138,13 @@ impl StatsConcentrator { http_status_code: 200, r#type: String::new(), db_type: String::new(), - hits: stats.hits.into(), - errors: 0, + hits: stats.hits.try_into().unwrap_or_default(), + errors: stats.error.try_into().unwrap_or_default(), duration: 0, ok_summary: vec![], error_summary: vec![], synthetics: false, + // TODO: handle top_level_hits top_level_hits: 0, span_kind: String::new(), peer_tags: vec![], diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index 258345559..0a780c2c5 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -533,7 +533,6 @@ impl TraceAgent { } } - // TODO (Yiming): maybe we don't need to send stats for some traces debug!("Sending stats to the stats aggregator. Traces: {traces:?}"); if let Err(err) = stats_sender.send(&traces).await { return error_response( diff --git a/bottlecap/src/traces/trace_stats_processor.rs b/bottlecap/src/traces/trace_stats_processor.rs index 363d4f803..a42d1fc3d 100644 --- a/bottlecap/src/traces/trace_stats_processor.rs +++ b/bottlecap/src/traces/trace_stats_processor.rs @@ -32,9 +32,9 @@ impl SendingTraceStatsProcessor { resource: span.resource.clone(), }, stats: Stats { - // TODO: handle error == 1 hits: 1, - duration: span.duration.try_into().unwrap_or_default(), + error: span.error, + duration: span.duration, }, }; debug!("Sending single stats to the stats concentrator."); From ca42a2c5124fa3cd807a386d2d1916d376b77f5f Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Mon, 15 Sep 2025 15:23:06 -0400 Subject: [PATCH 30/43] Support duration --- bottlecap/src/traces/stats_concentrator.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index 09bd65902..e1458df04 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -130,7 +130,7 @@ impl StatsConcentrator { image_tag: String::new(), stats: vec![pb::ClientStatsBucket { start: timestamp, - duration: 0, + duration: BUCKET_DURATION_NS, stats: vec![pb::ClientGroupedStats { service: self.config.service.clone().unwrap_or_default(), name: aggregation_key.name.clone(), @@ -140,7 +140,7 @@ impl StatsConcentrator { db_type: String::new(), hits: stats.hits.try_into().unwrap_or_default(), errors: stats.error.try_into().unwrap_or_default(), - duration: 0, + duration: stats.duration.try_into().unwrap_or_default(), ok_summary: vec![], error_summary: vec![], synthetics: false, From efe580ce23623c01dcf295d7ad53b4e70016b117 Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Mon, 15 Sep 2025 15:26:52 -0400 Subject: [PATCH 31/43] Change http status code from 200 to 0 --- bottlecap/src/traces/stats_concentrator.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index e1458df04..4152a6112 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -135,7 +135,7 @@ impl StatsConcentrator { service: self.config.service.clone().unwrap_or_default(), name: aggregation_key.name.clone(), resource: aggregation_key.resource.clone(), - http_status_code: 200, + http_status_code: 0, r#type: String::new(), db_type: String::new(), hits: stats.hits.try_into().unwrap_or_default(), From 2687e001fd1f951c57f682fbc1bcab5f7f4d8cdb Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Mon, 15 Sep 2025 15:28:18 -0400 Subject: [PATCH 32/43] Remove unused resource param --- bottlecap/src/bin/bottlecap/main.rs | 12 ++-------- bottlecap/src/traces/stats_agent.rs | 10 ++++---- bottlecap/src/traces/stats_concentrator.rs | 23 ++++++++----------- bottlecap/src/traces/trace_stats_processor.rs | 7 ++---- 4 files changed, 19 insertions(+), 33 deletions(-) diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index a807b990b..530c2a35e 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -93,12 +93,7 @@ use std::{ sync::Arc, time::{Duration, Instant}, }; -use tokio::{ - sync::Mutex as TokioMutex, - sync::RwLock, - sync::mpsc::Sender, - task::JoinHandle, -}; +use tokio::{sync::Mutex as TokioMutex, sync::RwLock, sync::mpsc::Sender, task::JoinHandle}; use tokio_util::sync::CancellationToken; use tracing::{debug, error}; use tracing_subscriber::EnvFilter; @@ -1113,10 +1108,7 @@ fn start_trace_agent( tokio_util::sync::CancellationToken, ) { // Stats - let stats_concentrator = Arc::new(TokioMutex::new(StatsConcentrator::new( - Arc::clone(config), - Arc::clone(tags_provider), - ))); + let stats_concentrator = Arc::new(TokioMutex::new(StatsConcentrator::new(Arc::clone(config)))); let stats_aggregator = Arc::new(TokioMutex::new(StatsAggregator::new_with_concentrator( stats_concentrator.clone(), ))); diff --git a/bottlecap/src/traces/stats_agent.rs b/bottlecap/src/traces/stats_agent.rs index 404838bc8..1b589f5e5 100644 --- a/bottlecap/src/traces/stats_agent.rs +++ b/bottlecap/src/traces/stats_agent.rs @@ -24,11 +24,13 @@ pub struct StatsAgent { impl StatsAgent { #[must_use] - pub fn new( - concentrator: Arc>, - ) -> StatsAgent { + pub fn new(concentrator: Arc>) -> StatsAgent { let (tx, rx) = mpsc::channel::(1000); - StatsAgent { tx, rx, concentrator } + StatsAgent { + tx, + rx, + concentrator, + } } pub async fn spin(&mut self) { diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index 4152a6112..359753bab 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -1,6 +1,5 @@ use crate::config::Config; use crate::lifecycle::invocation::processor::S_TO_NS_U64; -use crate::tags::provider::Provider as TagProvider; use crate::traces::stats_agent::StatsEvent; use datadog_trace_protobuf::pb; use std::collections::HashMap; @@ -31,7 +30,6 @@ pub struct Stats { pub struct StatsConcentrator { config: Arc, - _resource: String, buckets: HashMap, } @@ -42,31 +40,23 @@ const BUCKET_DURATION_NS: u64 = 10 * S_TO_NS_U64; // 10 seconds // For example, if we have buckets with timestamps 10, 20, 40, the current timestamp is 45, // and NO_FLUSH_BUCKET_COUNT is 3, then we will flush bucket 10 but not bucket 20 or 40. // Note that the bucket 30 is included in the 3 latest buckets even if it has no data. -// This is to avoid flushing stats that are still being collected to save some cost. +// This is to reduce the chance of flushing stats that are still being collected to save some cost. const NO_FLUSH_BUCKET_COUNT: u64 = 2; // Aggregates stats into buckets, which are then pulled by the stats aggregator. impl StatsConcentrator { #[must_use] - pub fn new(config: Arc, tags_provider: Arc) -> Self { - // TODO: delete resource - let resource = tags_provider - .get_canonical_resource_name() - .unwrap_or(String::from("aws.lambda")); + pub fn new(config: Arc) -> Self { Self { buckets: HashMap::new(), config, - _resource: resource, } } pub fn add(&mut self, stats_event: StatsEvent) { debug!("Adding stats to the stats concentrator"); let bucket_timestamp = Self::get_bucket_timestamp(stats_event.time); - let bucket = self - .buckets - .entry(bucket_timestamp) - .or_default(); + let bucket = self.buckets.entry(bucket_timestamp).or_default(); let stats = bucket.data.entry(stats_event.aggregation_key).or_default(); @@ -113,7 +103,12 @@ impl StatsConcentrator { current_timestamp - bucket_timestamp >= BUCKET_DURATION_NS * NO_FLUSH_BUCKET_COUNT } - fn construct_stats_payload(&self, timestamp: u64, aggregation_key: &AggregationKey, stats: Stats) -> pb::ClientStatsPayload { + fn construct_stats_payload( + &self, + timestamp: u64, + aggregation_key: &AggregationKey, + stats: Stats, + ) -> pb::ClientStatsPayload { pb::ClientStatsPayload { hostname: String::new(), env: self.config.env.clone().unwrap_or_default(), diff --git a/bottlecap/src/traces/trace_stats_processor.rs b/bottlecap/src/traces/trace_stats_processor.rs index a42d1fc3d..b9a82e16d 100644 --- a/bottlecap/src/traces/trace_stats_processor.rs +++ b/bottlecap/src/traces/trace_stats_processor.rs @@ -18,10 +18,7 @@ impl SendingTraceStatsProcessor { Self { stats_tx } } - pub async fn send( - &self, - traces: &[Vec], - ) -> Result<(), SendError> { + pub async fn send(&self, traces: &[Vec]) -> Result<(), SendError> { debug!("Sending stats to the stats concentrator"); for trace in traces { for span in trace { @@ -43,4 +40,4 @@ impl SendingTraceStatsProcessor { } Ok(()) } -} \ No newline at end of file +} From c39a4a01eac63c3e34415b451bcd154411abb89c Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Mon, 15 Sep 2025 15:32:29 -0400 Subject: [PATCH 33/43] Get service from trace --- bottlecap/src/traces/stats_concentrator.rs | 7 +++---- bottlecap/src/traces/trace_stats_processor.rs | 1 + 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index 359753bab..6732ed4c5 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -9,7 +9,7 @@ use tracing::debug; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct AggregationKey { - // TODO: add more fields + pub service: String, pub name: String, pub resource: String, } @@ -22,7 +22,6 @@ struct Bucket { #[derive(Clone, Debug, Default, Copy)] pub struct Stats { - // TODO: add more fields pub hits: i32, pub duration: i64, // in nanoseconds pub error: i32, @@ -118,7 +117,7 @@ impl StatsConcentrator { runtime_id: String::new(), sequence: 0, agent_aggregation: String::new(), - service: self.config.service.clone().unwrap_or_default(), + service: aggregation_key.service.clone(), container_id: String::new(), tags: vec![], git_commit_sha: String::new(), @@ -127,7 +126,7 @@ impl StatsConcentrator { start: timestamp, duration: BUCKET_DURATION_NS, stats: vec![pb::ClientGroupedStats { - service: self.config.service.clone().unwrap_or_default(), + service: aggregation_key.service.clone(), name: aggregation_key.name.clone(), resource: aggregation_key.resource.clone(), http_status_code: 0, diff --git a/bottlecap/src/traces/trace_stats_processor.rs b/bottlecap/src/traces/trace_stats_processor.rs index b9a82e16d..f878dda65 100644 --- a/bottlecap/src/traces/trace_stats_processor.rs +++ b/bottlecap/src/traces/trace_stats_processor.rs @@ -25,6 +25,7 @@ impl SendingTraceStatsProcessor { let stats = StatsEvent { time: span.start.try_into().unwrap_or_default(), aggregation_key: AggregationKey { + service: span.service.clone(), name: span.name.clone(), resource: span.resource.clone(), }, From d0fa4194340b1e5fbe412bf6184dc34057dc1be2 Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Mon, 15 Sep 2025 15:34:43 -0400 Subject: [PATCH 34/43] Get env from trace --- bottlecap/src/traces/stats_concentrator.rs | 3 ++- bottlecap/src/traces/trace_stats_processor.rs | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index 6732ed4c5..b90fdb99b 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -9,6 +9,7 @@ use tracing::debug; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct AggregationKey { + pub env: String, pub service: String, pub name: String, pub resource: String, @@ -110,7 +111,7 @@ impl StatsConcentrator { ) -> pb::ClientStatsPayload { pb::ClientStatsPayload { hostname: String::new(), - env: self.config.env.clone().unwrap_or_default(), + env: aggregation_key.env.clone(), version: self.config.version.clone().unwrap_or_default(), lang: "rust".to_string(), tracer_version: String::new(), diff --git a/bottlecap/src/traces/trace_stats_processor.rs b/bottlecap/src/traces/trace_stats_processor.rs index f878dda65..72362834d 100644 --- a/bottlecap/src/traces/trace_stats_processor.rs +++ b/bottlecap/src/traces/trace_stats_processor.rs @@ -25,6 +25,7 @@ impl SendingTraceStatsProcessor { let stats = StatsEvent { time: span.start.try_into().unwrap_or_default(), aggregation_key: AggregationKey { + env: span.meta.get("env").cloned().unwrap_or_default(), service: span.service.clone(), name: span.name.clone(), resource: span.resource.clone(), From f298a2eef490d65c23590cb821d000c5954bb3c1 Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Mon, 15 Sep 2025 15:40:47 -0400 Subject: [PATCH 35/43] Support r#type --- bottlecap/src/traces/stats_concentrator.rs | 3 ++- bottlecap/src/traces/trace_stats_processor.rs | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index b90fdb99b..fdc29e119 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -13,6 +13,7 @@ pub struct AggregationKey { pub service: String, pub name: String, pub resource: String, + pub r#type: String, } // Aggregated stats for a time interval across all the aggregation keys. @@ -131,7 +132,7 @@ impl StatsConcentrator { name: aggregation_key.name.clone(), resource: aggregation_key.resource.clone(), http_status_code: 0, - r#type: String::new(), + r#type: aggregation_key.r#type.clone(), db_type: String::new(), hits: stats.hits.try_into().unwrap_or_default(), errors: stats.error.try_into().unwrap_or_default(), diff --git a/bottlecap/src/traces/trace_stats_processor.rs b/bottlecap/src/traces/trace_stats_processor.rs index 72362834d..5f239db95 100644 --- a/bottlecap/src/traces/trace_stats_processor.rs +++ b/bottlecap/src/traces/trace_stats_processor.rs @@ -29,6 +29,7 @@ impl SendingTraceStatsProcessor { service: span.service.clone(), name: span.name.clone(), resource: span.resource.clone(), + r#type: span.r#type.clone(), }, stats: Stats { hits: 1, From 2793e41f780b6df52f3ed828ed2517a0300c212e Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Mon, 15 Sep 2025 15:44:16 -0400 Subject: [PATCH 36/43] Add comments --- bottlecap/src/traces/stats_concentrator.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index fdc29e119..b48b5ee85 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -11,8 +11,11 @@ use tracing::debug; pub struct AggregationKey { pub env: String, pub service: String, + // e.g. "aws.lambda.load", "aws.lambda.import" pub name: String, + // e.g. "my-lambda-function-name", "datadog_lambda.handler", "urllib.request" pub resource: String, + // e.g. "aws.lambda.load", "aws.lambda.import" pub r#type: String, } From e8ca4227326101d227c42b100e559de5888a1ef1 Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Mon, 15 Sep 2025 16:09:44 -0400 Subject: [PATCH 37/43] Use retain() --- bottlecap/src/traces/stats_concentrator.rs | 29 ++++++++++------------ 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index b48b5ee85..21240f0dc 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -82,22 +82,19 @@ impl StatsConcentrator { .try_into() .expect("Failed to convert timestamp to u64"); let mut ret = Vec::new(); - let mut to_remove = Vec::new(); - for (×tamp, bucket) in &self.buckets { - if !force_flush && !Self::should_flush_bucket(current_timestamp, timestamp) { - continue; + self.buckets.retain(|×tamp, bucket| { + if force_flush || Self::should_flush_bucket(current_timestamp, timestamp) { + // Flush and remove this bucket + for (aggregation_key, stats) in &bucket.data { + ret.push(Self::construct_stats_payload(&self.config, timestamp, aggregation_key, *stats)); + } + false + } else { + // Keep this bucket + true } - - for (aggregation_key, stats) in &bucket.data { - ret.push(self.construct_stats_payload(timestamp, aggregation_key, *stats)); - } - to_remove.push(timestamp); - } - - for timestamp in to_remove { - self.buckets.remove(×tamp); - } + }); ret } @@ -108,7 +105,7 @@ impl StatsConcentrator { } fn construct_stats_payload( - &self, + config: &Config, timestamp: u64, aggregation_key: &AggregationKey, stats: Stats, @@ -116,7 +113,7 @@ impl StatsConcentrator { pb::ClientStatsPayload { hostname: String::new(), env: aggregation_key.env.clone(), - version: self.config.version.clone().unwrap_or_default(), + version: config.version.clone().unwrap_or_default(), lang: "rust".to_string(), tracer_version: String::new(), runtime_id: String::new(), From 45c65d9c9bfc7537abcbee3b77f3c0f73b53580c Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Mon, 15 Sep 2025 16:16:16 -0400 Subject: [PATCH 38/43] Handle error when casting u128 to u64 --- bottlecap/src/traces/stats_concentrator.rs | 31 +++++++++++++++------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index 21240f0dc..a5e76970b 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -5,7 +5,7 @@ use datadog_trace_protobuf::pb; use std::collections::HashMap; use std::sync::Arc; use std::time::{SystemTime, UNIX_EPOCH}; -use tracing::debug; +use tracing::{debug, error}; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct AggregationKey { @@ -75,19 +75,32 @@ impl StatsConcentrator { // buckets, which may still be getting data. #[must_use] pub fn get_stats(&mut self, force_flush: bool) -> Vec { - let current_timestamp: u64 = SystemTime::now() - .duration_since(UNIX_EPOCH) - .expect("Failed to get current timestamp") - .as_nanos() - .try_into() - .expect("Failed to convert timestamp to u64"); - let mut ret = Vec::new(); + let current_timestamp: u64 = match SystemTime::now().duration_since(UNIX_EPOCH) { + Ok(duration) => { + if let Ok(ts) = duration.as_nanos().try_into() { + ts + } else { + error!("Timestamp overflow, skipping stats flush"); + return Vec::new(); + } + } + Err(e) => { + error!("Failed to get current timestamp: {e}, skipping stats flush"); + return Vec::new(); + } + }; + let mut ret = Vec::new(); self.buckets.retain(|×tamp, bucket| { if force_flush || Self::should_flush_bucket(current_timestamp, timestamp) { // Flush and remove this bucket for (aggregation_key, stats) in &bucket.data { - ret.push(Self::construct_stats_payload(&self.config, timestamp, aggregation_key, *stats)); + ret.push(Self::construct_stats_payload( + &self.config, + timestamp, + aggregation_key, + *stats, + )); } false } else { From 6a9a16a0eabab8919e7012f0782e9b6a40ddd52e Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Mon, 15 Sep 2025 16:58:45 -0400 Subject: [PATCH 39/43] Fix the support for error and duration --- bottlecap/src/traces/stats_concentrator.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index a5e76970b..8b090e83a 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -65,6 +65,8 @@ impl StatsConcentrator { let stats = bucket.data.entry(stats_event.aggregation_key).or_default(); stats.hits += stats_event.stats.hits; + stats.error += stats_event.stats.error; + stats.duration += stats_event.stats.duration; } fn get_bucket_timestamp(timestamp: u64) -> u64 { From cc0e252e6ef0e5c5301636fba5fec08bb78285ef Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Mon, 15 Sep 2025 17:56:00 -0400 Subject: [PATCH 40/43] Support top_level_hits --- bottlecap/src/traces/stats_concentrator.rs | 7 +++++-- bottlecap/src/traces/trace_stats_processor.rs | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index 8b090e83a..65f78cd98 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -30,6 +30,7 @@ pub struct Stats { pub hits: i32, pub duration: i64, // in nanoseconds pub error: i32, + pub top_level_hits: f64, } pub struct StatsConcentrator { @@ -67,6 +68,7 @@ impl StatsConcentrator { stats.hits += stats_event.stats.hits; stats.error += stats_event.stats.error; stats.duration += stats_event.stats.duration; + stats.top_level_hits += stats_event.stats.top_level_hits; } fn get_bucket_timestamp(timestamp: u64) -> u64 { @@ -119,6 +121,8 @@ impl StatsConcentrator { current_timestamp - bucket_timestamp >= BUCKET_DURATION_NS * NO_FLUSH_BUCKET_COUNT } + #[allow(clippy::cast_possible_truncation)] + #[allow(clippy::cast_sign_loss)] fn construct_stats_payload( config: &Config, timestamp: u64, @@ -155,8 +159,7 @@ impl StatsConcentrator { ok_summary: vec![], error_summary: vec![], synthetics: false, - // TODO: handle top_level_hits - top_level_hits: 0, + top_level_hits: stats.top_level_hits.round() as u64, span_kind: String::new(), peer_tags: vec![], is_trace_root: 1, diff --git a/bottlecap/src/traces/trace_stats_processor.rs b/bottlecap/src/traces/trace_stats_processor.rs index 5f239db95..d355cf2f8 100644 --- a/bottlecap/src/traces/trace_stats_processor.rs +++ b/bottlecap/src/traces/trace_stats_processor.rs @@ -35,6 +35,7 @@ impl SendingTraceStatsProcessor { hits: 1, error: span.error, duration: span.duration, + top_level_hits: span.metrics.get("_dd.top_level").map_or(0.0, |v| *v), }, }; debug!("Sending single stats to the stats concentrator."); From 7026753a21655f9618a1aa417a86ab58a254fe7f Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Tue, 16 Sep 2025 15:21:58 -0400 Subject: [PATCH 41/43] Add feature flag --- bottlecap/src/bin/bottlecap/main.rs | 2 ++ bottlecap/src/config/env.rs | 6 ++++++ bottlecap/src/config/mod.rs | 2 ++ bottlecap/src/config/yaml.rs | 3 +++ bottlecap/src/tags/lambda/tags.rs | 11 +++++------ bottlecap/src/traces/trace_agent.rs | 16 +++++++++++----- 6 files changed, 29 insertions(+), 11 deletions(-) diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index 530c2a35e..8e1d1a158 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -413,6 +413,8 @@ fn load_configs(start_time: Instant) -> (AwsConfig, AwsCredentials, Arc) } }; + debug!("Config.compute_trace_stats: {}", config.compute_trace_stats); + (aws_config, aws_credentials, config) } diff --git a/bottlecap/src/config/env.rs b/bottlecap/src/config/env.rs index b6d444fc5..fdcd0f400 100644 --- a/bottlecap/src/config/env.rs +++ b/bottlecap/src/config/env.rs @@ -337,6 +337,11 @@ pub struct EnvConfig { /// The maximum depth of the Lambda payload to capture. /// Default is `10`. Requires `capture_lambda_payload` to be `true`. pub capture_lambda_payload_max_depth: Option, + /// @env `DD_COMPUTE_TRACE_STATS` + /// + /// Enable computation of trace stats for AWS Lambda. + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub compute_trace_stats: Option, /// @env `DD_SERVERLESS_APPSEC_ENABLED` /// /// Enable Application and API Protection (AAP), previously known as AppSec/ASM, for AWS Lambda. @@ -513,6 +518,7 @@ fn merge_config(config: &mut Config, env_config: &EnvConfig) { merge_option_to_value!(config, env_config, lambda_proc_enhanced_metrics); merge_option_to_value!(config, env_config, capture_lambda_payload); merge_option_to_value!(config, env_config, capture_lambda_payload_max_depth); + merge_option_to_value!(config, env_config, compute_trace_stats); merge_option_to_value!(config, env_config, serverless_appsec_enabled); merge_option!(config, env_config, appsec_rules); merge_option_to_value!(config, env_config, appsec_waf_timeout); diff --git a/bottlecap/src/config/mod.rs b/bottlecap/src/config/mod.rs index 0dc7b734f..8fc3ae9d4 100644 --- a/bottlecap/src/config/mod.rs +++ b/bottlecap/src/config/mod.rs @@ -334,6 +334,7 @@ pub struct Config { pub lambda_proc_enhanced_metrics: bool, pub capture_lambda_payload: bool, pub capture_lambda_payload_max_depth: u32, + pub compute_trace_stats: bool, pub serverless_appsec_enabled: bool, pub appsec_rules: Option, @@ -429,6 +430,7 @@ impl Default for Config { lambda_proc_enhanced_metrics: true, capture_lambda_payload: false, capture_lambda_payload_max_depth: 10, + compute_trace_stats: false, serverless_appsec_enabled: false, appsec_rules: None, diff --git a/bottlecap/src/config/yaml.rs b/bottlecap/src/config/yaml.rs index b8dca19b3..a52521a47 100644 --- a/bottlecap/src/config/yaml.rs +++ b/bottlecap/src/config/yaml.rs @@ -94,6 +94,8 @@ pub struct YamlConfig { pub capture_lambda_payload: Option, pub capture_lambda_payload_max_depth: Option, #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub compute_trace_stats: Option, + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] pub serverless_appsec_enabled: Option, pub appsec_rules: Option, #[serde(deserialize_with = "deserialize_optional_duration_from_microseconds")] @@ -613,6 +615,7 @@ fn merge_config(config: &mut Config, yaml_config: &YamlConfig) { merge_option_to_value!(config, yaml_config, lambda_proc_enhanced_metrics); merge_option_to_value!(config, yaml_config, capture_lambda_payload); merge_option_to_value!(config, yaml_config, capture_lambda_payload_max_depth); + merge_option_to_value!(config, yaml_config, compute_trace_stats); merge_option_to_value!(config, yaml_config, serverless_appsec_enabled); merge_option!(config, yaml_config, appsec_rules); merge_option_to_value!(config, yaml_config, appsec_waf_timeout); diff --git a/bottlecap/src/tags/lambda/tags.rs b/bottlecap/src/tags/lambda/tags.rs index 800ad9d10..2ae4a8acd 100644 --- a/bottlecap/src/tags/lambda/tags.rs +++ b/bottlecap/src/tags/lambda/tags.rs @@ -38,8 +38,6 @@ const SERVICE_KEY: &str = "service"; // ComputeStatsKey is the tag key indicating whether trace stats should be computed const COMPUTE_STATS_KEY: &str = "_dd.compute_stats"; -// ComputeStatsValue is the tag value indicating trace stats should be computed -const COMPUTE_STATS_VALUE: &str = "0"; // FunctionTagsKey is the tag key for a function's tags to be set on the top level tracepayload const FUNCTION_TAGS_KEY: &str = "_dd.tags.function"; // TODO(astuyve) decide what to do with the version @@ -122,10 +120,11 @@ fn tags_from_env( tags_map.extend(config.tags.clone()); } - tags_map.insert( - COMPUTE_STATS_KEY.to_string(), - COMPUTE_STATS_VALUE.to_string(), - ); + // "config.compute_trace_stats == true" means computing stats on the extension side, + // so we set _dd.compute_stats to 0 so stats won't be computed on the backend side. + let compute_stats = i32::from(!config.compute_trace_stats); + tags_map.insert(COMPUTE_STATS_KEY.to_string(), compute_stats.to_string()); + tags_map } diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index 0a780c2c5..050bd3b9b 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -533,11 +533,17 @@ impl TraceAgent { } } - debug!("Sending stats to the stats aggregator. Traces: {traces:?}"); - if let Err(err) = stats_sender.send(&traces).await { - return error_response( - StatusCode::INTERNAL_SERVER_ERROR, - format!("Error sending stats to the stats aggregator: {err}"), + if config.compute_trace_stats { + debug!("Sending stats to the stats aggregator. Traces: {traces:?}"); + if let Err(err) = stats_sender.send(&traces).await { + return error_response( + StatusCode::INTERNAL_SERVER_ERROR, + format!("Error sending stats to the stats aggregator: {err}"), + ); + } + } else { + debug!( + "compute_trace_stats is disabled. Skipping sending stats to the stats aggregator." ); } From d0f55e86f96a03bffc2226266b5752731e1cb7c0 Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Tue, 16 Sep 2025 16:56:04 -0400 Subject: [PATCH 42/43] Add stats concentrator service to avoid using mutex --- bottlecap/src/bin/bottlecap/main.rs | 14 +-- bottlecap/src/traces/mod.rs | 1 + bottlecap/src/traces/stats_agent.rs | 14 +-- bottlecap/src/traces/stats_aggregator.rs | 23 +++-- .../src/traces/stats_concentrator_service.rs | 86 +++++++++++++++++++ bottlecap/src/traces/trace_agent.rs | 4 +- 6 files changed, 118 insertions(+), 24 deletions(-) create mode 100644 bottlecap/src/traces/stats_concentrator_service.rs diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index 8e1d1a158..153d815be 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -54,7 +54,7 @@ use bottlecap::{ proxy_aggregator, proxy_flusher::Flusher as ProxyFlusher, stats_aggregator::StatsAggregator, - stats_concentrator::StatsConcentrator, + stats_concentrator_service::StatsConcentratorService, stats_flusher::{self, StatsFlusher}, stats_processor, trace_agent, trace_aggregator::{self, SendDataBuilderInfo}, @@ -1110,10 +1110,12 @@ fn start_trace_agent( tokio_util::sync::CancellationToken, ) { // Stats - let stats_concentrator = Arc::new(TokioMutex::new(StatsConcentrator::new(Arc::clone(config)))); - let stats_aggregator = Arc::new(TokioMutex::new(StatsAggregator::new_with_concentrator( - stats_concentrator.clone(), - ))); + let (stats_concentrator_service, stats_concentrator_handle) = + StatsConcentratorService::new(Arc::clone(config)); + tokio::spawn(stats_concentrator_service.run()); + let stats_aggregator: Arc> = Arc::new(TokioMutex::new( + StatsAggregator::new_with_concentrator(stats_concentrator_handle.clone()), + )); let stats_flusher = Arc::new(stats_flusher::ServerlessStatsFlusher::new( api_key_factory.clone(), stats_aggregator.clone(), @@ -1161,7 +1163,7 @@ fn start_trace_agent( invocation_processor, appsec_processor, Arc::clone(tags_provider), - stats_concentrator.clone(), + stats_concentrator_handle.clone(), ); let trace_agent_channel = trace_agent.get_sender_copy(); let shutdown_token = trace_agent.shutdown_token(); diff --git a/bottlecap/src/traces/mod.rs b/bottlecap/src/traces/mod.rs index 3e790d5da..420b06207 100644 --- a/bottlecap/src/traces/mod.rs +++ b/bottlecap/src/traces/mod.rs @@ -9,6 +9,7 @@ pub mod span_pointers; pub mod stats_agent; pub mod stats_aggregator; pub mod stats_concentrator; +pub mod stats_concentrator_service; pub mod stats_flusher; pub mod stats_processor; pub mod trace_agent; diff --git a/bottlecap/src/traces/stats_agent.rs b/bottlecap/src/traces/stats_agent.rs index 1b589f5e5..23f7487bc 100644 --- a/bottlecap/src/traces/stats_agent.rs +++ b/bottlecap/src/traces/stats_agent.rs @@ -1,9 +1,7 @@ use tokio::sync::mpsc::{self, Receiver, Sender}; +use tracing::error; -use super::stats_concentrator::StatsConcentrator; - -use std::sync::Arc; -use tokio::sync::Mutex; +use super::stats_concentrator_service::StatsConcentratorHandle; use super::stats_concentrator::AggregationKey; use super::stats_concentrator::Stats; @@ -19,12 +17,12 @@ pub struct StatsEvent { pub struct StatsAgent { tx: Sender, rx: Receiver, - concentrator: Arc>, + concentrator: StatsConcentratorHandle, } impl StatsAgent { #[must_use] - pub fn new(concentrator: Arc>) -> StatsAgent { + pub fn new(concentrator: StatsConcentratorHandle) -> StatsAgent { let (tx, rx) = mpsc::channel::(1000); StatsAgent { tx, @@ -35,7 +33,9 @@ impl StatsAgent { pub async fn spin(&mut self) { while let Some(event) = self.rx.recv().await { - self.concentrator.lock().await.add(event); + if let Err(e) = self.concentrator.add(event) { + error!("Error adding stats event to the stats concentrator: {e}"); + } } } diff --git a/bottlecap/src/traces/stats_aggregator.rs b/bottlecap/src/traces/stats_aggregator.rs index 2e37b1cb2..913d981cd 100644 --- a/bottlecap/src/traces/stats_aggregator.rs +++ b/bottlecap/src/traces/stats_aggregator.rs @@ -1,8 +1,7 @@ -use crate::traces::stats_concentrator::StatsConcentrator; +use crate::traces::stats_concentrator_service::StatsConcentratorHandle; use datadog_trace_protobuf::pb::ClientStatsPayload; use std::collections::VecDeque; -use std::sync::Arc; -use tokio::sync::Mutex; +use tracing::error; #[allow(clippy::empty_line_after_doc_comments)] /// Maximum number of entries in a stat payload. @@ -25,14 +24,14 @@ pub struct StatsAggregator { queue: VecDeque, max_content_size_bytes: usize, buffer: Vec, - concentrator: Arc>, + concentrator: StatsConcentratorHandle, } /// Takes in individual trace stats payloads and aggregates them into batches to be flushed to Datadog. impl StatsAggregator { #[allow(dead_code)] #[allow(clippy::must_use_candidate)] - fn new(max_content_size_bytes: usize, concentrator: Arc>) -> Self { + fn new(max_content_size_bytes: usize, concentrator: StatsConcentratorHandle) -> Self { StatsAggregator { queue: VecDeque::new(), max_content_size_bytes, @@ -41,7 +40,8 @@ impl StatsAggregator { } } - pub fn new_with_concentrator(concentrator: Arc>) -> Self { + #[must_use] + pub fn new_with_concentrator(concentrator: StatsConcentratorHandle) -> Self { Self::new(MAX_CONTENT_SIZE_BYTES, concentrator) } @@ -53,9 +53,14 @@ impl StatsAggregator { /// Returns a batch of trace stats payloads, subject to the max content size. pub async fn get_batch(&mut self, force_flush: bool) -> Vec { // Pull stats data from concentrator - let mut concentrator = self.concentrator.lock().await; - let stats = concentrator.get_stats(force_flush); - self.queue.extend(stats); + match self.concentrator.get_stats(force_flush).await { + Ok(stats) => { + self.queue.extend(stats); + } + Err(e) => { + error!("Error getting stats from the stats concentrator: {e:?}"); + } + } let mut batch_size = 0; diff --git a/bottlecap/src/traces/stats_concentrator_service.rs b/bottlecap/src/traces/stats_concentrator_service.rs new file mode 100644 index 000000000..b3f745219 --- /dev/null +++ b/bottlecap/src/traces/stats_concentrator_service.rs @@ -0,0 +1,86 @@ +use tokio::sync::{mpsc, oneshot}; + +use super::stats_agent::StatsEvent; +use super::stats_concentrator::StatsConcentrator; +use crate::config::Config; +use datadog_trace_protobuf::pb; +use std::sync::Arc; +use tracing::error; + +#[derive(Debug)] +pub enum StatsError { + SendError(mpsc::error::SendError), + RecvError(oneshot::error::RecvError), +} + +impl From> for StatsError { + fn from(err: mpsc::error::SendError) -> Self { + StatsError::SendError(err) + } +} + +impl From for StatsError { + fn from(err: oneshot::error::RecvError) -> Self { + StatsError::RecvError(err) + } +} + +pub enum ConcentratorCommand { + Add(StatsEvent), + GetStats(bool, oneshot::Sender>), +} + +#[derive(Clone)] +pub struct StatsConcentratorHandle { + tx: mpsc::UnboundedSender, +} + +impl StatsConcentratorHandle { + pub fn add( + &self, + stats_event: StatsEvent, + ) -> Result<(), mpsc::error::SendError> { + self.tx.send(ConcentratorCommand::Add(stats_event)) + } + + pub async fn get_stats( + &self, + force_flush: bool, + ) -> Result, StatsError> { + let (response_tx, response_rx) = oneshot::channel(); + self.tx + .send(ConcentratorCommand::GetStats(force_flush, response_tx))?; + let stats = response_rx.await?; + Ok(stats) + } +} + +pub struct StatsConcentratorService { + concentrator: StatsConcentrator, + rx: mpsc::UnboundedReceiver, +} + +impl StatsConcentratorService { + #[must_use] + pub fn new(config: Arc) -> (Self, StatsConcentratorHandle) { + let (tx, rx) = mpsc::unbounded_channel(); + let handle = StatsConcentratorHandle { tx }; + let concentrator = StatsConcentrator::new(config); + let service: StatsConcentratorService = Self { concentrator, rx }; + (service, handle) + } + + pub async fn run(mut self) { + while let Some(command) = self.rx.recv().await { + match command { + ConcentratorCommand::Add(stats_event) => self.concentrator.add(stats_event), + ConcentratorCommand::GetStats(force_flush, response_tx) => { + let stats = self.concentrator.get_stats(force_flush); + if let Err(e) = response_tx.send(stats) { + error!("Failed to return trace stats: {e:?}"); + } + } + } + } + } +} diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index 050bd3b9b..515fac1e8 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -42,7 +42,7 @@ use datadog_trace_utils::trace_utils::{self}; use ddcommon::hyper_migration; use crate::traces::stats_agent::StatsAgent; -use crate::traces::stats_concentrator::StatsConcentrator; +use crate::traces::stats_concentrator_service::StatsConcentratorHandle; use crate::traces::trace_stats_processor::SendingTraceStatsProcessor; const TRACE_AGENT_PORT: usize = 8126; @@ -129,7 +129,7 @@ impl TraceAgent { invocation_processor: Arc>, appsec_processor: Option>>, tags_provider: Arc, - stats_concentrator: Arc>, + stats_concentrator: StatsConcentratorHandle, ) -> TraceAgent { // Set up a channel to send processed traces to our trace aggregator. tx is passed through each // endpoint_handler to the trace processor, which uses it to send de-serialized From 9f58ddb6bc4792cd023d3baf58fe1748917f14d9 Mon Sep 17 00:00:00 2001 From: Yiming Luo Date: Tue, 16 Sep 2025 17:17:47 -0400 Subject: [PATCH 43/43] Remove some debug log --- bottlecap/src/bin/bottlecap/main.rs | 2 -- bottlecap/src/traces/stats_concentrator.rs | 3 +-- bottlecap/src/traces/trace_agent.rs | 7 ------- bottlecap/src/traces/trace_stats_processor.rs | 3 +-- 4 files changed, 2 insertions(+), 13 deletions(-) diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index 153d815be..a338cf00b 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -413,8 +413,6 @@ fn load_configs(start_time: Instant) -> (AwsConfig, AwsCredentials, Arc) } }; - debug!("Config.compute_trace_stats: {}", config.compute_trace_stats); - (aws_config, aws_credentials, config) } diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs index 65f78cd98..ae9a0f207 100644 --- a/bottlecap/src/traces/stats_concentrator.rs +++ b/bottlecap/src/traces/stats_concentrator.rs @@ -5,7 +5,7 @@ use datadog_trace_protobuf::pb; use std::collections::HashMap; use std::sync::Arc; use std::time::{SystemTime, UNIX_EPOCH}; -use tracing::{debug, error}; +use tracing::error; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct AggregationKey { @@ -59,7 +59,6 @@ impl StatsConcentrator { } pub fn add(&mut self, stats_event: StatsEvent) { - debug!("Adding stats to the stats concentrator"); let bucket_timestamp = Self::get_bucket_timestamp(stats_event.time); let bucket = self.buckets.entry(bucket_timestamp).or_default(); diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index 515fac1e8..5acd21d0c 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -281,7 +281,6 @@ impl TraceAgent { } async fn v04_traces(State(state): State, request: Request) -> Response { - debug!("Received v04 traces to process"); Self::handle_traces( state.config, request, @@ -295,7 +294,6 @@ impl TraceAgent { } async fn v05_traces(State(state): State, request: Request) -> Response { - debug!("Received v05 traces to process"); Self::handle_traces( state.config, request, @@ -534,17 +532,12 @@ impl TraceAgent { } if config.compute_trace_stats { - debug!("Sending stats to the stats aggregator. Traces: {traces:?}"); if let Err(err) = stats_sender.send(&traces).await { return error_response( StatusCode::INTERNAL_SERVER_ERROR, format!("Error sending stats to the stats aggregator: {err}"), ); } - } else { - debug!( - "compute_trace_stats is disabled. Skipping sending stats to the stats aggregator." - ); } if let Err(err) = trace_sender diff --git a/bottlecap/src/traces/trace_stats_processor.rs b/bottlecap/src/traces/trace_stats_processor.rs index d355cf2f8..a96fb3c3b 100644 --- a/bottlecap/src/traces/trace_stats_processor.rs +++ b/bottlecap/src/traces/trace_stats_processor.rs @@ -19,7 +19,7 @@ impl SendingTraceStatsProcessor { } pub async fn send(&self, traces: &[Vec]) -> Result<(), SendError> { - debug!("Sending stats to the stats concentrator"); + debug!("Sending trace stats to the concentrator"); for trace in traces { for span in trace { let stats = StatsEvent { @@ -38,7 +38,6 @@ impl SendingTraceStatsProcessor { top_level_hits: span.metrics.get("_dd.top_level").map_or(0.0, |v| *v), }, }; - debug!("Sending single stats to the stats concentrator."); self.stats_tx.send(stats).await?; } }